后缀数组
后缀数组(Suffix Array)是一个通过对字符串的所有后缀经过排序后得到的数组。
实现
记 \(sa_i\) 表示排名为 \(i\) 的后缀为 \(S[sa_i, n]\) ,\(rk_i\) 表示 \(S[i, n]\) 的排名。
由此可得:
\[sa_{rk_i}=rk_{sa_i}=i \]考虑已经将每个串以前 \(2^{k - 1}\) 个字符为关键字排好序,接下来需要以前 \(2^k\) 个字符为关键字排好序。注意到 \(2^k = 2^{k - 1} + 2^{k - 1}\) ,于是只要做一次双关键字排序即可。其中第一关键字为 \(S[i, i + 2^{k - 1} - 1]\) ,第二关键字为 \(S[i + 2^{k - 1}, i + 2^k - 1]\) ,这两个信息的大小比较在上一轮排序中均可找到。
当倍增数量大于字符串数量时,就可以停止倍增。并且当所有 \(sa_i\) 都不相同时,就可以停止排序。
使用计数排序可以做到 \(O(n \log n)\) 。
#include <bits/stdc++.h>
using namespace std;
const int N = 1e6 + 7;
char str[N];
int n;
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N];
inline void prework(int n) {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
}
} // namespace SA
signed main() {
scanf("%s", str + 1);
SA::prework(n = strlen(str + 1));
for (int i = 1; i <= n; ++i)
printf("%d ", SA::sa[i]);
return 0;
}
LCP
记:
\[\begin{align} lcp(i, j) &= LCP(S[sa_i, n], S[sa_j, n]) \\ height_i &= lcp(i - 1, i) \end{align} \]有:
- LCP Lemma:\(\forall 1 \leq i < j < k \leq n, lcp(i, k) = \min(lcp(i, j), lcp(j, k))\) 。
- LCP Theorem:\(\forall i \leq i < j \leq n, lcp(i, j) = \min_{k = i + 1}^j {height_k}\) 。
- LCP Corollary:\(\forall 1 \leq i \leq j < k \leq n, lcp(i, j) \geq lcp(i, k)\) 。
于是求解两个后缀的 LCP 可以转化为 \(height\) 数组上的 RMQ 问题。
设 \(h_i = height_{rk_i}\) ,则:
\[h_i \geq h_{i - 1} - 1 \]证明:在 \(sa\) 数组里面找一个后缀,设它在原字符串的下标为 \(i-1\) 。在 \(sa\) 中的前面一个后缀,它在原字符串的下标为 \(k\) 。现在,把它们两个后缀的首字母都砍掉,它们就变成了 \(i\) 和 \(k+1\) 。
当两个字符串首字母不同时,它们的 LCP 就是 \(0\) 。否则删除首字母后排名先后肯定也是不变的,且它们的 LCP 长度为 \(h_{i-1}-1\) 。而由 LCP Lemma 可知,这个LCP长度是这个区间中最小的。因此 \(h_i \geq h_{i-1}-1\) 。
令 \(k = h_i\) ,可以线性求出 \(height\) :
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k; // h[i] >= h[i - 1] - 1
int j = sa[rk[i] - 1]; // j 是 i 相邻的一个后缀
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
应用
求长为 \(n\) 的字符串的不同子串个数。
\(n \leq 10^5\)
发现每个子串都是一个后缀的前缀,考虑在排名最小的后缀处统计贡献。那么多出来的部分就是排名相邻的两个后缀的 LCP ,答案即为 \(\frac{n(n + 1)}{2} - \sum_{i = 2}^n height_i\) 。
#include <bits/stdc++.h>
typedef long long ll;
using namespace std;
const int N = 1e5 + 7;
char str[N];
int n;
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework() {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
signed main() {
scanf("%d%s", &n, str + 1);
SA::prework();
ll ans = 1ll * n * (n + 1) / 2;
for (int i = 2; i <= n; ++i)
ans -= SA::ht[i];
printf("%lld", ans);
return 0;
}
LCS - Longest Common Substring
求两个串的最长公共子串。
\(n \leq 2.5 \times 10^5\)
把两个穿拼起来,中间塞一个无关字符,答案即为 \(height\) 数组的最大值(需要保证前后两个后缀串分别来自给出的两个串)。
#include <bits/stdc++.h>
using namespace std;
const int N = 5e5 + 7;
char s[N], p[N];
int n, m;
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework(char *str, int n) {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
signed main() {
scanf("%s%s", s + 1, p + 1);
n = strlen(s + 1), m = strlen(p + 1);
s[n + 1] = '#', memcpy(s + n + 2, p + 1, sizeof(char) * m);
SA::prework(s, n + 1 + m);
int ans = 0;
for (int i = 2; i <= n + 1 + m; ++i)
if (min(SA::sa[i - 1], SA::sa[i]) <= n && max(SA::sa[i - 1], SA::sa[i]) > n)
ans = max(ans, SA::ht[i]);
printf("%d", ans);
return 0;
}
给定两个字符串,求出在两个字符串中各取出一个子串使得这两个子串相同的方案数。两个方案不同当且仅当这两个子串中有一个位置不同。
\(n \leq 2 \times 10^5\)
本质上就是算所有后缀两两之间的 LCP 和,但是需要来自两个不同串。考虑把两个串拼起来(中间塞一个无关字符)做一次,然后再减去两个串分别做一次的贡献。问题转化为 \(height\) 数组上所有子区间的最小值之和,不难用单调栈解决。
#include <bits/stdc++.h>
typedef long long ll;
using namespace std;
const int N = 4e5 + 7;
int L[N], R[N], sta[N];
char s[N], p[N];
int n, m;
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework(char *str, int n) {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
inline ll solve(char *str, int n) {
SA::prework(str, n);
for (int i = 2, top = 0; i <= n; ++i) {
while (top && SA::ht[sta[top]] > SA::ht[i])
--top;
L[i] = top ? sta[top] + 1 : 2, sta[++top] = i;
}
for (int i = n, top = 0; i >= 2; --i) {
while (top && SA::ht[sta[top]] >= SA::ht[i])
--top;
R[i] = top ? sta[top] - 1 : n, sta[++top] = i;
}
ll res = 0;
for (int i = 2; i <= n; ++i)
res += 1ll * SA::ht[i] * (i - L[i] + 1) * (R[i] - i + 1);
return res;
}
signed main() {
scanf("%s%s", s + 1, p + 1);
n = strlen(s + 1), m = strlen(p + 1);
ll res = solve(s, n) + solve(p, m);
s[n + 1] = '#', memcpy(s + n + 2, p + 1, sizeof(char) * m);
printf("%lld", solve(s, n + 1 + m) - res);
return 0;
}
求:\(\sum_{1 \leq i < j \leq n} |S[i, n]| + |S[j, n]| - 2 \times lcp(S[i, n], S[j, n])\) 。
\(n \leq 5 \times 10^5\)
前半部分总和为 \(\frac{n(n - 1)(n + 1)}{2}\) ,后半部分和上题一样。
#include <bits/stdc++.h>
typedef long long ll;
using namespace std;
const int N = 5e5 + 7;
int L[N], R[N], sta[N];
char str[N];
int n;
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework(char *str, int n) {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
signed main() {
scanf("%s", str + 1), n = strlen(str + 1);
SA::prework(str, n);
for (int i = 2, top = 0; i <= n; ++i) {
while (top && SA::ht[sta[top]] > SA::ht[i])
--top;
L[i] = top ? sta[top] + 1 : 2, sta[++top] = i;
}
for (int i = n, top = 0; i >= 2; --i) {
while (top && SA::ht[sta[top]] >= SA::ht[i])
--top;
R[i] = top ? sta[top] - 1 : n, sta[++top] = i;
}
ll ans = 1ll * n * (n - 1) * (n + 1) / 2;
for (int i = 2; i <= n; ++i)
ans -= 2ll * SA::ht[i] * (i - L[i] + 1) * (R[i] - i + 1);
printf("%lld", ans);
return 0;
}
给定字符串, \(m\) 次询问 \(S[a, b]\) 所有子串与 \(S[c, d]\) 的 LCP 最大值。
\(n, m \leq 10^5\)
考虑二分答案,那么对于 \(LCP \geq mid\) 的限制可以对 \(height\) 建立 ST 表后二分求出一段后缀排名的区间,\(S[a, b]\) 的限制可以转化为一段下标的区间。静态二维数点用主席树解决即可,时间复杂度 \(O(n \log^2 n)\) 。
#include <bits/stdc++.h>
typedef long long ll;
using namespace std;
const int N = 1e5 + 7, LOGN = 17;
char str[N];
int n, m;
template <class T = int>
inline T read() {
char c = getchar();
bool sign = (c == '-');
while (c < '0' || c > '9')
c = getchar(), sign |= (c == '-');
T x = 0;
while ('0' <= c && c <= '9')
x = (x << 1) + (x << 3) + (c & 15), c = getchar();
return sign ? (~x + 1) : x;
}
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework() {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
namespace ST {
int f[LOGN][N];
inline void prework() {
memcpy(f[0] + 1, SA::ht + 1, sizeof(int) * n);
for (int j = 1; j <= __lg(n); ++j)
for (int i = 1; i + (1 << j) - 1 <= n; ++i)
f[j][i] = min(f[j - 1][i], f[j - 1][i + (1 << (j - 1))]);
}
inline int query(int l, int r) {
int k = __lg(r - l + 1);
return min(f[k][l], f[k][r - (1 << k) + 1]);
}
} // namespace ST
namespace SMT {
const int S = 1e7 + 7;
int lc[S], rc[S], cnt[S], rt[N];
int tot;
int insert(int x, int nl, int nr, int p) {
int y = ++tot;
lc[y] = lc[x], rc[y] = rc[x], cnt[y] = cnt[x] + 1;
if (nl == nr)
return y;
int mid = (nl + nr) >> 1;
if (p <= mid)
lc[y] = insert(lc[x], nl, mid, p);
else
rc[y] = insert(rc[x], mid + 1, nr, p);
return y;
}
int query(int x, int y, int nl, int nr, int l, int r) {
if (l <= nl && nr <= r)
return cnt[y] - cnt[x];
int mid = (nl + nr) >> 1;
if (r <= mid)
return query(lc[x], lc[y], nl, mid, l, r);
else if (l > mid)
return query(rc[x], rc[y], mid + 1, nr, l, r);
else
return query(lc[x], lc[y], nl, mid, l, r) + query(rc[x], rc[y], mid + 1, nr, l, r);
}
} // namespace SMT
inline bool check(int ql, int qr, int x, int k) {
int l = 1, r = SA::rk[x] - 1, hl = SA::rk[x];
while (l <= r) {
int mid = (l + r) >> 1;
if (ST::query(mid + 1, SA::rk[x]) >= k)
hl = mid, r = mid - 1;
else
l = mid + 1;
}
int hr = SA::rk[x];
l = SA::rk[x] + 1, r = n;
while (l <= r) {
int mid = (l + r) >> 1;
if (ST::query(SA::rk[x] + 1, mid) >= k)
hr = mid, l = mid + 1;
else
r = mid - 1;
}
return SMT::query(SMT::rt[hr], SMT::rt[hl - 1], 1, n, ql, qr);
}
signed main() {
n = read(), m = read();
scanf("%s", str + 1);
SA::prework(), ST::prework();
for (int i = 1; i <= n; ++i)
SMT::rt[i] = SMT::insert(SMT::rt[i - 1], 1, n, SA::sa[i]);
while (m--) {
int a = read(), b = read(), c = read(), d = read(),
l = 0, r = min(b - a + 1, d - c + 1), ans = 0;
while (l <= r) {
int mid = (l + r) >> 1;
if (check(a, b - mid + 1, c, mid))
ans = mid, l = mid + 1;
else
r = mid - 1;
}
printf("%d\n", ans);
}
return 0;
}
给定长度为 \(n\) 的字符串与权值 \(a_{1 \sim n}\) 。对于所有 \(i \in [1, n]\) ,求有多少对后缀满足 LCP 长度 \(\geq i\) 以及满足条件的两个后缀权值乘积的最大值。
\(n \leq 3 \times 10^5\)
考虑倒序枚举 \(i\) ,则每次都会将 \(height = i\) 的位置的两个后缀合并为一个连通块,两个问题的答案都可以通过并查集维护连通块来处理,时间复杂度 \(O(n \log n)\) 。
#include <bits/stdc++.h>
typedef long long ll;
using namespace std;
const ll inf = 1e18;
const int N = 3e5 + 7;
vector<int> ht[N];
pair<ll, ll> ans[N];
int a[N];
char str[N];
pair<ll, ll> Answer = make_pair(0ll, -inf);
int n;
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework() {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
namespace DSU {
int fa[N], siz[N], mx[N], mn[N];
inline void prework(int n) {
iota(fa + 1, fa + 1 + n, 1);
fill(siz + 1, siz + 1 + n, 1);
}
inline int find(int x) {
while (x != fa[x])
fa[x] = fa[fa[x]], x = fa[x];
return x;
}
inline void merge(int x, int y) {
int fx = find(x), fy = find(y);
Answer.first += 1ll * siz[fx] * siz[fy];
Answer.second = max(Answer.second, max(1ll * mx[fx] * mx[fy], 1ll * mn[fx] * mn[fy]));
mx[fx] = max(mx[fx], mx[fy]), mn[fx] = min(mn[fx], mn[fy]);
siz[fx] += siz[fy], fa[fy] = fx;
}
} // namespace DSU
signed main() {
scanf("%d%s", &n, str + 1);
for (int i = 1; i <= n; ++i)
scanf("%d", a + i);
SA::prework(), DSU::prework(n);
for (int i = 1; i <= n; ++i)
DSU::mx[i] = DSU::mn[i] = a[SA::sa[i]];
for (int i = 2; i <= n; ++i)
ht[SA::ht[i]].emplace_back(i);
for (int i = n - 1; ~i; --i) {
for (int x : ht[i])
DSU::merge(x, x - 1);
ans[i] = Answer;
}
for (int i = 0; i < n; ++i)
printf("%lld %lld\n", ans[i].first, ans[i].second == -inf ? 0 : ans[i].second);
return 0;
}
现有一个字符串 \(S\) ,从中划出 \(n_a\) 个子串作为 A 类串,再划出 \(n_b\) 个子串作为 B 类串。
额外给定 \(m\) 组支配关系,每组支配关系 \((x, y)\) 表示第 \(x\) 个 A 类串支配第 \(y\) 个 B 类串。
求长度最大的串 \(T\) 长度,满足存在一个串 \(T\) 的分割 \(T = t_1 + t_2 + \cdots + t_k\) 满足:
- 分割中的每个串 \(t_i\) 均为 A 类串。
- 对于分割中所有相邻的串 \(t_i, t_{i + 1}\) ,都存在一个 \(t_i\) 支配的 B 类串为 \(t_{i + 1}\) 的前缀。
特别地,若存在无限长的 \(T\) ,输出 \(-1\) 。
\(|S|, n_a, n_b \leq 2 \times 10^5\)
题目转化为:给定一些从 A 类区间连向 B 类区间的边,一个 B 区间能连向一个 A 区间当且仅当前者是后者的前缀。求这张图上的最长路(或判断无限长)。
显然若有环则答案可以无限大,否则直接在 DAG 上 DP 即可。
考虑优化 B 区间到 A 区间的边,发现包含某个 B 串作为前缀的 A 串需要满足两个条件:
- 以二者左端点开始的后缀的 LCP 长度不小于 B 串长度:转化为对应着后缀数组上的一段区间。
- A 串长度不小于 B 串长度:按照长度从大到小的顺序加入每个串即可。
使用主席树优化建图,时间复杂度 \(O((n + n_a + n_b) \log n)\) 。
#include <bits/stdc++.h>
typedef long long ll;
using namespace std;
const int N = 2e5 + 7, LOGN = 19, S = N << 5;
struct Interval {
int l, r;
} a[N], b[N];
struct Node {
int x, len, op, id;
inline Node() {}
inline Node(int _x, int _len, int _op, int _id) : x(_x), len(_len), op(_op), id(_id) {}
inline bool operator < (const Node &rhs) const {
return len == rhs.len ? op < rhs.op : len > rhs.len;
}
};
struct Graph {
vector<int> e[S];
int indeg[S];
inline void clear(int n) {
for (int i = 1; i <= n; ++i)
e[i].clear(), indeg[i] = 0;
}
inline void insert(int u, int v) {
e[u].emplace_back(v), ++indeg[v];
}
} G;
ll f[S];
char str[N];
int n, na, nb, m, tot;
template <class T = int>
inline T read() {
char c = getchar();
bool sign = (c == '-');
while (c < '0' || c > '9')
c = getchar(), sign |= (c == '-');
T x = 0;
while ('0' <= c && c <= '9')
x = (x << 1) + (x << 3) + (c & 15), c = getchar();
return sign ? (~x + 1) : x;
}
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
inline void prework() {
int m = 1 << 9;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = str[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && str[i + k] == str[j + k])
++k;
ht[rk[i]] = k;
}
}
} // namespace SA
namespace ST {
int f[LOGN][N];
inline void prework() {
memcpy(f[0] + 1, SA::ht + 1, sizeof(int) * n);
for (int j = 1; j <= __lg(n); ++j)
for (int i = 1; i + (1 << j) - 1 <= n; ++i)
f[j][i] = min(f[j - 1][i], f[j - 1][i + (1 << (j - 1))]);
}
inline int query(int l, int r) {
int k = __lg(r - l + 1);
return min(f[k][l], f[k][r - (1 << k) + 1]);
}
} // namespace ST
namespace SMT {
int lc[S], rc[S];
int root, tot;
inline void clear() {
G.clear(tot), root = 0;
for (; tot; --tot)
lc[tot] = rc[tot] = 0;
}
int insert(int x, int nl, int nr, int p, int k) {
int y = ++tot;
lc[y] = lc[x], rc[y] = rc[x];
if (x)
G.insert(y, x);
if (nl == nr) {
G.insert(y, k);
return y;
}
int mid = (nl + nr) >> 1;
if (p <= mid)
G.insert(y, lc[y] = insert(lc[x], nl, mid, p, k));
else
G.insert(y, rc[y] = insert(rc[x], mid + 1, nr, p, k));
return y;
}
void update(int x, int nl, int nr, int l, int r, int k) {
if (!x)
return;
if (l <= nl && nr <= r) {
G.insert(k, x);
return;
}
int mid = (nl + nr) >> 1;
if (l <= mid)
update(lc[x], nl, mid, l, r, k);
if (r > mid)
update(rc[x], mid + 1, nr, l, r, k);
}
} // namespace SMT
inline ll TopoSort() {
memset(f + 1, 0, sizeof(ll) * SMT::tot);
queue<int> q;
ll ans = 0;
int cnt = 0;
for (int i = 1; i <= SMT::tot; ++i)
if (!G.indeg[i])
q.emplace(i);
while (!q.empty()) {
int u = q.front();
q.pop(), ++cnt;
ans = max(ans, f[u] += (u <= na ? a[u].r - a[u].l + 1 : 0));
for (int v : G.e[u]) {
f[v] = max(f[v], f[u]), --G.indeg[v];
if (!G.indeg[v])
q.emplace(v);
}
}
return cnt == SMT::tot ? ans : -1;
}
signed main() {
int T = read();
while (T--) {
scanf("%s", str + 1), n = strlen(str + 1);
SA::prework(), ST::prework();
vector<Node> nd;
na = read();
for (int i = 1; i <= na; ++i) {
a[i].l = read(), a[i].r = read();
nd.emplace_back(SA::rk[a[i].l], a[i].r - a[i].l + 1, 0, i);
}
nb = read();
for (int i = 1; i <= nb; ++i) {
b[i].l = read(), b[i].r = read();
nd.emplace_back(SA::rk[b[i].l], b[i].r - b[i].l + 1, 1, na + i);
}
sort(nd.begin(), nd.end());
SMT::clear(), SMT::tot = na + nb;
for (Node it : nd) {
if (it.op) {
int l = it.x, r = it.x;
for (int j = LOGN - 1; ~j; --j) {
if (l - (1 << j) >= 1 && ST::query(l - (1 << j) + 1, it.x) >= it.len)
l -= 1 << j;
if (r + (1 << j) <= n && ST::query(it.x + 1, r + (1 << j)) >= it.len)
r += 1 << j;
}
SMT::update(SMT::root, 1, n, l, r, it.id);
} else
SMT::root = SMT::insert(SMT::root, 1, n, it.x, it.id);
}
m = read();
for (int i = 1; i <= m; ++i) {
int u = read(), v = na + read();
G.insert(u, v);
}
printf("%lld\n", TopoSort());
}
return 0;
}
相似子串
定义字符串 \(A\) 与字符串 \(B\) 相似当且仅当 \(A_i = A_j \iff B_i = B_j\) 。
给定长度为 \(n\) 的数字串 \(S\) ,\(q\) 次询问与某个子串相似的子串数量,强制在线。
\(n \leq 10^5\) ,\(q \leq 5 \times 10^5\)
考虑对于一个子串,每个位置的权值定义为与上一个相同字符的下标差,若在该子串中首次出现则为 \(0\) ,则两个字符串相似当且仅当它们每个位置的权值相同。
首先对原权值构建出 \(height\) 数组,然后可以注意到截取一段子串时,最多只会有 \(10\) 个位置变为 \(0\) ,那么二分时分段比较即可,
#include <bits/stdc++.h>
using namespace std;
const int inf = 0x3f3f3f3f;
const int N = 1e5 + 7, LOGN = 17, S = 10;
struct ST {
int f[LOGN][N];
inline int &operator [] (const int &x) {
return f[0][x];
}
inline void prework(int n) {
for (int j = 1; j <= __lg(n); ++j)
for (int i = 1; i + (1 << j) - 1 <= n; ++i)
f[j][i] = min(f[j - 1][i], f[j - 1][i + (1 << (j - 1))]);
}
inline int query(int l, int r) {
if (l > r)
return inf;
int k = __lg(r - l + 1);
return min(f[k][l], f[k][r - (1 << k) + 1]);
}
} st;
vector<int> place[N];
int nxt[N][S];
int a[N], id[N], rid[N];
char str[N];
int n, q;
template <class T = int>
inline T read() {
char c = getchar();
bool sign = (c == '-');
while (c < '0' || c > '9')
c = getchar(), sign |= (c == '-');
T x = 0;
while ('0' <= c && c <= '9')
x = (x << 1) + (x << 3) + (c & 15), c = getchar();
return sign ? (~x + 1) : x;
}
namespace SA {
int sa[N], rk[N], cnt[N], id[N], oldrk[N], ht[N];
ST st;
inline void prework() {
int m = n;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[i] = a[i]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[i]]--] = i;
for (int k = 1; k < n; k <<= 1) {
int tot = 0;
for (int i = n; i > n - k; --i)
id[++tot] = i;
for (int i = 1; i <= n; ++i)
if (sa[i] > k)
id[++tot] = sa[i] - k;
memset(cnt, 0, sizeof(int) * (m + 1));
for (int i = 1; i <= n; ++i)
++cnt[rk[id[i]]];
for (int i = 1; i <= m; ++i)
cnt[i] += cnt[i - 1];
for (int i = n; i; --i)
sa[cnt[rk[id[i]]]--] = id[i];
memcpy(oldrk + 1, rk + 1, sizeof(int) * n), tot = 0;
for (int i = 1; i <= n; ++i) {
if (oldrk[sa[i]] != oldrk[sa[i - 1]] || oldrk[sa[i] + k] != oldrk[sa[i - 1] + k])
++tot;
rk[sa[i]] = tot;
}
if (tot == n)
break;
m = tot;
}
for (int i = 1, k = 0; i <= n; ++i) {
if (k)
--k;
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && a[i + k] == a[j + k])
++k;
ht[rk[i]] = k;
}
for (int i = 1; i <= n; ++i)
st[i] = ht[i];
st.prework(n);
}
inline int LCP(int x, int y) {
if (x == y)
return n - x + 1;
x = rk[x], y = rk[y];
if (x > y)
swap(x, y);
return st.query(x + 1, y);
}
} // namespace SA
inline int querylcp(int x, int y) {
int i = 0, curx = x, cury = y, len = 0;
while (curx <= n && cury <= n) {
int nxtx = (i < place[x].size() ? place[x][i] : n + 1),
nxty = (i < place[y].size() ? place[y][i] : n + 1),
nlen = min(nxtx - curx, nxty - cury), lcp = SA::LCP(curx, cury);
if (lcp < nlen)
return len + lcp;
len += nlen;
if (nxtx != curx + nlen || nxty != cury + nlen || nxtx > n || nxty > n)
return len;
++len, curx = nxtx + 1, cury = nxty + 1, ++i;
}
return len;
}
inline int query(int x, int len) {
x = rid[x];
int l = x, r = x;
for (int i = LOGN - 1; ~i; --i) {
if (l > (1 << i) && st.query(l - (1 << i) + 1, x) >= len)
l -= 1 << i;
if (r + (1 << i) <= n && st.query(x + 1, r + (1 << i)) >= len)
r += 1 << i;
}
return r - l + 1;
}
inline bool cmp(const int &x, const int &y) {
int len = querylcp(x, y),
cmpx = (x + len <= n ? a[x + len] : -1),
cmpy = (y + len <= n ? a[y + len] : -1);
if (cmpx == -1 || cmpy == -1)
return cmpx < cmpy;
for (int it : place[x])
if (it == x + len) {
cmpx = 0;
break;
}
for (int it : place[y])
if (it == y + len) {
cmpy = 0;
break;
}
return cmpx < cmpy;
}
signed main() {
n = read(), q = read();
scanf("%s", str + 1);
vector<int> lst(11);
for (int i = 1; i <= n; ++i) {
int idx = str[i] & 15;
a[i] = lst[idx] ? i - lst[idx] : 0;
lst[idx] = i;
}
for (int i = n; i; --i) {
memcpy(nxt[i], nxt[i + 1], sizeof(int) * S);
nxt[i][str[i] & 15] = i;
for (int j = 0; j < S; ++j)
if (nxt[i][j])
place[i].emplace_back(nxt[i][j]);
sort(place[i].begin(), place[i].end());
}
SA::prework();
iota(id + 1, id + 1 + n, 1);
stable_sort(id + 1, id + 1 + n, cmp);
for (int i = 1; i <= n; ++i)
rid[id[i]] = i;
for (int i = 2; i <= n; ++i)
st.f[0][i] = querylcp(id[i], id[i - 1]);
st.prework(n);
int lstans = 0;
while (q--) {
int l = read() ^ lstans, r = read() ^ lstans;
printf("%d\n", lstans = query(l, r - l + 1));
}
return 0;
}
标签:后缀,top,++,int,数组,sa,id,SA
From: https://www.cnblogs.com/wshcl/p/18622278/SA