AC自动机学习笔记

建议先学习 KMP，阅读本篇前请至少先阅读 KMP学习笔记的前言

\[ Upd \ in \ 2019.10.10 \]

增加一些新的理解，顺便把博客搬过来。

AC 自动机的 nxt 与 KMP 的并不一样，前者是指向一个与自己等价的地方，而 KMP 则是指向 与前一个等价的地方的后一个 ，所以才是失配后去找的地方。

还有注意只有 $ ch $ 需要开满，其他的数组只用开到 数量 * 长度 即可。

#include <iostream>
#include <cstdio>
#include <cstring>
#include <queue>
#define reg register
#define ll long long
using namespace std;
template <typename _Typer>
inline _Typer read() {
    reg char ch = getchar();
    reg _Typer init = 0, base = 1;
    while ((ch < '0' || ch > '9') && ch != '-') ch = getchar();
    if (ch == '-') base = -1;
    else init = ch - '0';
    ch = getchar();
    while (ch >= '0' && ch <= '9')
        init = (init<<3) + (init<<1) + ch - '0', ch = getchar();
    return init * base;
}
const ll N = 1000005, INF = 1e9;
const int Sigma_Size = 26;
const char Cmod = 'a';

int ch[N][Sigma_Size];
int val[N], sz;
int ans;
struct Trie {
    Trie () { sz = 1; /*memset(ch, 0, sizeof(ch)); memset(val, 0, sizeof(val));*/}
    int idx(char c) { return c - Cmod;}
    void insert(char *s, int v) {
        int u = 0, len = strlen(s);
        for (reg int i = 0; i < len; ++i) {
            int c = idx(s[i]);
            if (!ch[u][c]) {
//              memset(ch[sz], 0, sizeof(ch[sz]));
                val[sz] = 0;
                ch[u][c] = sz;
                ++sz;
            }
            u = ch[u][c];
        }
        val[u] += v;
    }
    int query(char *s) {
        int u = 0, len = strlen(s);
        for (reg int i = 0; i < len; ++i) {
            int c = idx(s[i]);
            if (!ch[u][c]) return -1;
            u = ch[u][c];
        }
        return val[u];
    }
}trie;

int nex[N*Sigma_Size], lst[N*Sigma_Size];
// nex[x]=y, only when char[x]=char[y] and 0...y=somewhere...x
inline void getfail() {
    queue <int> q;
    nex[0] = 0;
    for (reg int i = 0; i < Sigma_Size; ++i) {
        int u = ch[0][i];
        if (u) { nex[u] = 0; q.push(u); lst[u] = 0;}
    }
    while (!q.empty()) {
        int r = q.front(); q.pop();
        for (reg int c = 0; c < Sigma_Size; ++c) {
            int u = ch[r][c];
            // when r,i fail -> nex[r],i -> 0-nex[r]=x-r -> r=nex[r]
            // because haven't ch[r][c](u) -> r.c=nex[r].c (@1)
            if (!u) { ch[r][c] = ch[nex[r]][c]; continue;}
            q.push(u);
            int v = nex[r];
            // 1.ch[v][c]>0 -> now.c=v.c -> nex[u(ch[r][c])]=sz(v.c)=ch[v][c]
            // 2.v=0 -> no one same of now(r.c) -> now.c=0 -> nex[u]=0(ch[v][c])
            nex[u] = ch[v][c];
            // 1.nex[u].val>0 -> nex[u] is a word
            // 2.nex[u].val=0 -> nex[u] isn't a word ->
            //   1).lst[nex[u]].val>0 -> lst[nex[u]] is a word
            //   2).lst[nex[u]].val=0 -> it's not a word ->
            //      .
            //      .
            //      .
            // so, lst[x] mean the lastest word
            // (if lst[x] is 0, the reason is that no word is finish with x(lst[x])) (@2)
            lst[u] = val[nex[u]] ? nex[u] : lst[nex[u]];
        }
    }
}
inline void update(int j) {
    if (j) {
        ans += val[j];
        val[j] = 0;
        update(lst[j]);
    }
}
inline void find(char *T) {
    getfail();
    int j = 0, len = strlen(T);
    for (reg int i = 0; i < len; ++i) {
        int c = trie.idx(T[i]);
        // try to compare T[i](c),ch[j][c]
        // if j have son is c -> some word's P[j].next is c
        // else -> no word's P[j].next is c -> fail -> compare T[i],P[nex[j]]
        // because j haven't son is c, ch[j][c](first) is 0 -> @1 -> ch[j][c]=ch[nex[j][c]
        j = ch[j][c];
        // if val[j]>0 a word have compared successful -> update
        if (val[j]) update(j);
        // else may be when we compare in now, some word have compared successful
        // but in this node, the val is 0
        // and now, if lst[j]>0, it means some word is finish with j(ch[pre[j]][c(T[i])])
        // (because @2)
        else if (lst[j]) update(lst[j]);
    }
}

char Ts[N];
inline void scan();
inline int _main() {
    scan();
    find(Ts);
    printf("%d\n", ans);
    return 0;
}

int main() {
    #define offline1
    #ifdef offline
    freopen("Aho-Corasick.in", "r", stdin);
    freopen("Aho-Corasick.out", "w", stdout);
    _main();
    fclose(stdin); fclose(stdout);
    #else
    _main();
    #endif
    return 0;
}

inline void scan() {
    int n = read<int>();
    char Ps[N];
    for (reg int i = 1; i <= n; ++i) {
        scanf("%s", Ps);
        trie.insert(Ps, 1);
    }
    scanf("%s", Ts);
}