建议先学习 KMP, 阅读本篇前请至少先阅读 KMP学习笔记 的前言
\[ Upd \ in \ 2019.10.10 \]
增加一些新的理解,顺便把博客搬过来。
AC 自动机的 nxt 与 KMP 的并不一样,前者是指向一个与自己等价的地方,而 KMP 则是指向 与前一个等价的地方的后一个 ,所以才是失配后去找的地方。
还有注意只有 $ ch $ 需要开满,其他的数组只用开到 数量 * 长度
即可。
#include <iostream>
#include <cstdio>
#include <cstring>
#include <queue>
#define reg register
#define ll long long
using namespace std;
template <typename _Typer>
inline _Typer read() {
reg char ch = getchar();
reg _Typer init = 0, base = 1;
while ((ch < '0' || ch > '9') && ch != '-') ch = getchar();
if (ch == '-') base = -1;
else init = ch - '0';
ch = getchar();
while (ch >= '0' && ch <= '9')
init = (init<<3) + (init<<1) + ch - '0', ch = getchar();
return init * base;
}
const ll N = 1000005, INF = 1e9;
const int Sigma_Size = 26;
const char Cmod = 'a';
int ch[N][Sigma_Size];
int val[N], sz;
int ans;
struct Trie {
Trie () { sz = 1; /*memset(ch, 0, sizeof(ch)); memset(val, 0, sizeof(val));*/}
int idx(char c) { return c - Cmod;}
void insert(char *s, int v) {
int u = 0, len = strlen(s);
for (reg int i = 0; i < len; ++i) {
int c = idx(s[i]);
if (!ch[u][c]) {
// memset(ch[sz], 0, sizeof(ch[sz]));
val[sz] = 0;
ch[u][c] = sz;
++sz;
}
u = ch[u][c];
}
val[u] += v;
}
int query(char *s) {
int u = 0, len = strlen(s);
for (reg int i = 0; i < len; ++i) {
int c = idx(s[i]);
if (!ch[u][c]) return -1;
u = ch[u][c];
}
return val[u];
}
}trie;
int nex[N*Sigma_Size], lst[N*Sigma_Size];
// nex[x]=y, only when char[x]=char[y] and 0...y=somewhere...x
inline void getfail() {
queue <int> q;
nex[0] = 0;
for (reg int i = 0; i < Sigma_Size; ++i) {
int u = ch[0][i];
if (u) { nex[u] = 0; q.push(u); lst[u] = 0;}
}
while (!q.empty()) {
int r = q.front(); q.pop();
for (reg int c = 0; c < Sigma_Size; ++c) {
int u = ch[r][c];
// when r,i fail -> nex[r],i -> 0-nex[r]=x-r -> r=nex[r]
// because haven't ch[r][c](u) -> r.c=nex[r].c (@1)
if (!u) { ch[r][c] = ch[nex[r]][c]; continue;}
q.push(u);
int v = nex[r];
// 1.ch[v][c]>0 -> now.c=v.c -> nex[u(ch[r][c])]=sz(v.c)=ch[v][c]
// 2.v=0 -> no one same of now(r.c) -> now.c=0 -> nex[u]=0(ch[v][c])
nex[u] = ch[v][c];
// 1.nex[u].val>0 -> nex[u] is a word
// 2.nex[u].val=0 -> nex[u] isn't a word ->
// 1).lst[nex[u]].val>0 -> lst[nex[u]] is a word
// 2).lst[nex[u]].val=0 -> it's not a word ->
// .
// .
// .
// so, lst[x] mean the lastest word
// (if lst[x] is 0, the reason is that no word is finish with x(lst[x])) (@2)
lst[u] = val[nex[u]] ? nex[u] : lst[nex[u]];
}
}
}
inline void update(int j) {
if (j) {
ans += val[j];
val[j] = 0;
update(lst[j]);
}
}
inline void find(char *T) {
getfail();
int j = 0, len = strlen(T);
for (reg int i = 0; i < len; ++i) {
int c = trie.idx(T[i]);
// try to compare T[i](c),ch[j][c]
// if j have son is c -> some word's P[j].next is c
// else -> no word's P[j].next is c -> fail -> compare T[i],P[nex[j]]
// because j haven't son is c, ch[j][c](first) is 0 -> @1 -> ch[j][c]=ch[nex[j][c]
j = ch[j][c];
// if val[j]>0 a word have compared successful -> update
if (val[j]) update(j);
// else may be when we compare in now, some word have compared successful
// but in this node, the val is 0
// and now, if lst[j]>0, it means some word is finish with j(ch[pre[j]][c(T[i])])
// (because @2)
else if (lst[j]) update(lst[j]);
}
}
char Ts[N];
inline void scan();
inline int _main() {
scan();
find(Ts);
printf("%d\n", ans);
return 0;
}
int main() {
#define offline1
#ifdef offline
freopen("Aho-Corasick.in", "r", stdin);
freopen("Aho-Corasick.out", "w", stdout);
_main();
fclose(stdin); fclose(stdout);
#else
_main();
#endif
return 0;
}
inline void scan() {
int n = read<int>();
char Ps[N];
for (reg int i = 1; i <= n; ++i) {
scanf("%s", Ps);
trie.insert(Ps, 1);
}
scanf("%s", Ts);
}