Beautiful Words(2022 BSPC B)

标签：Beautiful lcp int SIGMA BSPC 2022 SA type sa

题目大意

你有一个长度为\(N\)的字符串\(A\)和一个字符串集\(S\)，问所有\(A\)的循环同构中与\(S\)中的字符串的最长公共子串长度的最大值最小是多少。\((1\le N\le10^5,\sum|s|\le10^6)\)

思路

我们可以首先把\(A\)串中从所有位置开始的与\(S\)集合中的串的最长公共子串的长度都求出来，这个是利用后缀数组\(O(N)\)就可以解决的，然后对于长度为\(N\)的每一段，都可以分为三部分，左边的，全部在里面的和右边的，里面的和右边的都可以用优先队列存一下，里面的长度优先，右边的左端点优先，当右边的不满足条件时，他会到里面，当里面的不满足条件时，他会到左边，然后因为左边只是一直递减，所以只需要存一个长度即可，最后一段的值便是三个值取个\(max\)，最后答案与所有段的值取个\(min\)即可，由于最多只会有\(N\)个最长公共子串的长度，所以这样复杂度是可以保证的，然后由于优先队列自带一个\(log\)，所以最后的复杂度是\(O(N\log)\)。优先队列的常数是真的大，哭。

代码

const int MAXLEN = 1000005;
struct SAIS
{
	// 后缀类型
#define L_TYPE 0
#define S_TYPE 1
	
	int st[MAXLEN];
	int rl[MAXLEN],rk[MAXLEN],lcp[MAXLEN];
	int n;
	//rl:后缀排序后的排名列表,rl[i]代表第i名的后缀的序号
	//rk:名次,rk[i]代表后缀i的名次
	//lcp:height数组,lcp[i]代表列表中第i个后缀和第i-1个后缀的lcp长度
	
	void init(const string&s,const int&_n)
	{
		n = _n;
		for (int i=1;i<=n;++i) st[i] = s[i-1];
		// st中的字符必须调成正数!!!!!!!!!!!!!!
	}
	
	// 判断一个字符是否为LMS字符
	inline bool is_lms_char(int *type, int x) {
		return x > 1 && type[x] == S_TYPE && type[x - 1] == L_TYPE;
	}
	
	// 判断两个LMS子串是否相同
	inline bool equal_substring(int *S, int x, int y, int *type) {
		do {
			if (S[x] != S[y])
				return false;
			x++, y++;
		} while (!is_lms_char(type, x) && !is_lms_char(type, y));
		
		return S[x] == S[y] && type[x] == type[y];
	}
	
	// 诱导排序(从*型诱导到L型、从L型诱导到S型)
	// 调用之前应将*型按要求放入SA中
	inline void induced_sort(int *S, int *SA, int *type, int *bucket, int *lbucket,
							 int *sbucket, int nn, int SIGMA) {
		for (int i = 1; i <= nn; i++)
			if (SA[i] > 1 && type[SA[i] - 1] == L_TYPE)
				SA[lbucket[S[SA[i] - 1]]++] = SA[i] - 1;
		for (int i = 0; i <= SIGMA; i++)  // Reset S-type bucket
			sbucket[i] = bucket[i];
		for (int i = nn; i >= 1; i--)
			if (SA[i] > 1 && type[SA[i] - 1] == S_TYPE)
				SA[sbucket[S[SA[i] - 1]]--] = SA[i] - 1;
	}
	
	// SA-IS主体
	// S是输入字符串，length是字符串的长度, SIGMA是字符集的大小
	int *sais(int *S, int length, int SIGMA) {
		int nn = length;
		assert(S[nn]==0);
		int *type = new int[nn + 5];  // 后缀类型
		int *position = new int[nn + 5];  // 记录LMS子串的起始位置
		int *name = new int[nn + 5];  // 记录每个LMS子串的新名称
		int *SA = new int[nn + 5];  // SA数组
		int *bucket = new int[SIGMA + 5];  // 每个字符的桶
		int *lbucket = new int[SIGMA + 5];  // 每个字符的L型桶的起始位置
		int *sbucket = new int[SIGMA + 5];  // 每个字符的S型桶的起始位置
		
		// 初始化每个桶
		memset(bucket, 0, sizeof(int) * (SIGMA + 5));
		for (int i = 1; i <= nn; i++)
			bucket[S[i]]++;
		for (int i = 0; i <= SIGMA; i++) {
			if (i==0)
			{
				bucket[i] = bucket[i];
				lbucket[i] = 1;
			}else
			{
				bucket[i] += bucket[i - 1];
				lbucket[i] = bucket[i - 1] + 1;
			}
			sbucket[i] = bucket[i];
		}
		
		// 确定后缀类型(利用引理2.1)
		type[nn] = S_TYPE;
		for (int i = nn - 1; i >= 1; i--) {
			if (S[i] < S[i + 1])
				type[i] = S_TYPE;
			else if (S[i] > S[i + 1])
				type[i] = L_TYPE;
			else
				type[i] = type[i + 1];
		}
		// 寻找每个LMS子串
		int cnt = 0;
		for (int i = 1; i <= nn; i++)
			if (is_lms_char(type,i))
				position[++cnt] = i;
		// 对LMS子串进行排序
		fill(SA, SA + nn + 3, -1);
		for (int i = 1; i <= cnt; i++)
			SA[sbucket[S[position[i]]]--] = position[i];
		induced_sort(S, SA, type, bucket, lbucket, sbucket, nn, SIGMA);
		
		// 为每个LMS子串命名
		fill(name, name + nn + 3, -1);
		int lastx = -1, namecnt = 1;  // 上一次处理的LMS子串与名称的计数
		bool flag = false;  // 这里顺便记录是否有重复的字符
		for (int i = 2; i <= nn; i++) {
			int x = SA[i];
			
			if (is_lms_char(type, x)) {
				if (lastx >= 0 && !equal_substring(S, x, lastx, type))
					namecnt++;
				// 因为只有相同的LMS子串才会有同样的名称
				if (lastx >= 0 && namecnt == name[lastx])
					flag = true;
				
				name[x] = namecnt;
				lastx = x;
			}
		}  // for
		name[nn] = 0;
		
		// 生成S1
		int *S1 = new int[cnt+5];
		int pos = 0;
		for (int i = 1; i <= nn; i++)
			if (name[i] >= 0)
				S1[++pos] = name[i];
		int *SA1;
		if (!flag) {
			// 直接计算SA1
			SA1 = new int[cnt + 5];
			
			for (int i = 1; i <= cnt; i++)
				SA1[S1[i]+1] = i;
		} else
			SA1 = sais(S1, cnt, namecnt);  // 递归计算SA1
		
		// 从SA1诱导到SA
		for (int i = 0; i <= SIGMA; i++) {
			if (i==0)
				lbucket[i] = 1;
			else
				lbucket[i] = bucket[i - 1] + 1;
			sbucket[i] = bucket[i];
		}
		fill(SA, SA + nn + 3, -1);
		for (int i = cnt; i >= 1; i--)  // 这里是逆序扫描SA1，因为SA中S型桶是倒序的
			SA[sbucket[S[position[SA1[i]]]]--] = position[SA1[i]];
		induced_sort(S, SA, type, bucket, lbucket, sbucket, nn, SIGMA);
		
		delete[] S1;
		delete[] SA1;
		delete[] bucket;
		delete[] lbucket;
		delete[] sbucket;
		delete[] position;
		delete[] type;
		delete[] name;
		// 后缀数组计算完毕
		return SA;
	}
	
	void build()
	{
		st[0]=st[n+2]=-1;
		st[n+1]=0;
		int SIGMA = 0;
		for (int i=1;i<=n;++i) SIGMA = max(SIGMA,st[i]);
		int * sa = sais(st,n+1,SIGMA);
		for (int i=2;i<=n+1;++i) rk[sa[i]]=i-1;
		delete[] sa;
		for (int i=1;i<=n;++i) rl[rk[i]]=i;
		for (int i=1,len=0;i<=n;++i)
		{
			if (len) --len;
			while (i+len<=n && rl[rk[i]-1]+len<=n && st[i+len]==st[rl[rk[i]-1]+len]) ++len;
			lcp[rk[i]]=len;
		}
	}

#undef L_TYPE
#undef R_TYPE
}sa;
string S,s[10005];
vector<int> a[200005];
struct node
{
	int left,len;
	bool operator<(const node&n)const
	{
		return len<n.len;
	}
};
struct nodee
{
	int left,len;
	bool operator<(const nodee&n)const
	{
		return left>n.left;
	}
};

void solve()
{
	int n,m;
	cin>>n>>m;
	cin>>S;
	for(int i=1;i<=n;i++)sa.st[++sa.n]=S[i-1]+m;
	for(int i=1;i<=n;i++)sa.st[++sa.n]=S[i-1]+m;
	for(int i=1;i<=m;i++)
	{
		sa.st[++sa.n]=i;
		cin>>s[i];
		for(auto it:s[i])sa.st[++sa.n]=it+m;
	}
	sa.build();
	int lcp=inf;
	for(int i=1;i<=sa.n;i++)
	{
		if(sa.rl[i]>2*n)lcp=inf;
		else
		{
			lcp=min(lcp,sa.lcp[i]);
			if(lcp)a[sa.rl[i]].push_back(lcp);
		}
	}
	lcp=0;
	for(int i=sa.n;i>=1;i--)
	{
		if(sa.rl[i]>2*n)lcp=sa.lcp[i];
		else
		{
			if(lcp)a[sa.rl[i]].push_back(lcp);
			lcp=min(lcp,sa.lcp[i]);
		}
	}
	int l=1,r=n;
	priority_queue<node>q;
	priority_queue<nodee>rig;
	int left_max_len=0;
	int ans=inf;
	for(int i=1;i<n;i++)
	{
		for(auto it:a[i])
		{
			if(i+it-1<=n)q.push({i,it});
			else rig.push({i,it});
		}
	}
	for(;r<=2*n;l++,r++)
	{
		left_max_len=max(0,left_max_len-1);
		for(auto it:a[r])
		{
			if(r+it-1<=r)q.push({r,it});
			else rig.push({r,it});
		}
		while(!rig.empty()&&rig.top().left+rig.top().len-1<=r)
		{
			q.push({rig.top().left,rig.top().len});
			rig.pop();
		}
		while(!q.empty()&&q.top().left<l)
		{
			left_max_len=max(left_max_len,q.top().len-(l-q.top().left+1));
			q.pop();
		}
		int maxx=left_max_len;
		if(!q.empty())maxx=max(maxx,q.top().len);
		if(!rig.empty())maxx=max(maxx,r-rig.top().left+1);
		ans=min(ans,maxx);
	}
	cout<<ans<<'\n';
}

标签：Beautiful,lcp,int,SIGMA,BSPC,2022,SA,type,sa
From： https://www.cnblogs.com/Jerry-Black/p/16759298.html

题目大意

思路

代码

相关文章

赞助商

阅读排行