题目描述
给定一个长度为 n 的字符串 S,令 Ti 表示它从第 i 个字符开始的后缀。求
\(\sum_{1\le i <j\le n}len(T_i)+len(T_j)-2*lcp(T_i,T_j)\)
说明
对于 100% 的数据,保证 2⩽n⩽500000,且均为小写字母。
思路
注意到前面那个东西是个定值,所以关键在于如何求后面那个东西
由于 \(lcp(sa[l],sa[r])=min_{i=l+1}^{r}H[i]\)
所以后面那个东西实际上就是 \(H\) 数组的所有子区间 \(min\) 之和,当然是去掉 \(H[1]\) 之后
这个东西可以用单调栈通过维护后缀 \(min\) 来求
代码
#include<iostream>
#include<cstdio>
#include<cstring>
#define maxn 500010
#define INF 1000000000
#define ll long long
using namespace std;
int n;
char c[maxn];
int tax[maxn], rk[maxn], tp[maxn], sa[maxn], M = 200;
void rsort() {
for (int i = 0; i <= M; ++i) tax[i] = 0;
for (int i = 1; i <= n; ++i) ++tax[rk[i]];
for (int i = 1; i <= M; ++i) tax[i] += tax[i - 1];
for (int i = n; i; --i) sa[tax[rk[tp[i]]]--] = tp[i];
}
int c1, H[maxn];
void SA(char *s) {
for (int i = 1; i <= n; ++i) rk[i] = s[i], tp[i] = i; rsort();
for (int k = 1; k < n; k *= 2) {
if (c1 == n) break; M = c1; c1 = 0;
for (int i = n - k + 1; i <= n; ++i) tp[++c1] = i;
for (int i = 1; i <= n; ++i) if (sa[i] > k) tp[++c1] = sa[i] - k;
rsort(); swap(tp, rk); rk[sa[1]] = c1 = 1;
for (int i = 2; i <= n; ++i) {
if (tp[sa[i - 1]] != tp[sa[i]] || tp[sa[i - 1] + k] != tp[sa[i] + k]) ++c1;
rk[sa[i]] = c1;
}
} int lcp = 0;
for (int i = 1; i <= n; ++i) {
if (lcp) --lcp;
int j = sa[rk[i] - 1];
while (s[j + lcp] == s[i + lcp]) ++lcp;
H[rk[i]] = lcp;
}
}
int st[maxn], top;
ll ans, s;
int main() {
scanf("%s", c + 1); n = strlen(c + 1); SA(c); ans = (ll) n * (n - 1) / 2 * (n + 1);
for (int i = 1; i < n; ++i) H[i] = H[i + 1]; H[n--] = 0;
for (int i = 1; i <= n; ++i) {
while (top && H[st[top]] >= H[i]) {
s -= (ll) (st[top] - st[top - 1]) * H[st[top]];
--top;
}
st[++top] = i; s += (st[top] - st[top - 1]) * H[st[top]];
ans -= 2 * s;
} cout << ans << endl;
return 0;
}