AC自动机入门

Aho-Corasick automaton，该算法在1975年产生于贝尔实验室，是著名的多模式匹配算法之一。

KMP算法很好的解决了单模式匹配问题，如果有了字典树的基础，我们可以完美的结合二者解决多模式匹配问题。

在KMP算法中，我们预先根据待匹配串自身的信息得到失配指针，使得在每次匹配不成功后，可以不再去处理模式串的已匹配过的部分，进而使得复杂度降为O（N）。

对于多模式串匹配问题，当一个模式串与待匹配串不匹配时，失配指针可以指向任意一个串，这就需要我们利用字典树来组织所有模式串并得到失配指针。

构造失配指针的详细过程可参考 http://blog.csdn.net/niushuai666/article/details/7002823

时间复杂度分析：假设有N个长不超过m的模式串，待匹配串长度为L。建字典树的复杂度为O（Nm），对于每个L的前缀，最坏情况下匹配树高（m）次，故总的复杂度为O（（N+L）*m）。

我的模板

 /*

 基于HDOJ 2222 的 AC自动机

 文本串对多个模板串的查找

 */

 #include<bits/stdc++.h>

 using namespace std;

 const int maxn = ;

 const int alp =;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 char str[];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[])

 {

     int len = strlen(str);

     int now = root;

     for(int i=; i<len; i++)

     {

         int &tmp = ch[now][str[i]-'a'];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] ++;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front(); q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i], q.push(tmp);

         }

     }

 }

 int query(char str[])

 {

     int len = strlen(str);

     int now = root;

     int ret = ;

     for(int i=; i<len; i++)

     {

         int tmp = now = ch[now][str[i]-'a'];

         while(tmp != root)

         {

             if(ed[tmp] > )

                 ret += ed[tmp],

                 ed[tmp] = ;

             tmp = fail[tmp];

         }

     }

     return ret;

 }

 int main()

 {

     int n, t;

     scanf("%d",&t);

     while(t--)

     {

         scanf("%d",&n);

         init();

         for(int i=; i<=n; i++)

             scanf("%s",str), Insert(str);

         build();

         scanf("%s",str);

         printf("%d\n",query(str));

     }

     return ;

 }

 #include<bits/stdc++.h>

 using namespace std;

 const int maxn = ;

 const int alp = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 char str[];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[], int tag)

 {

     int len = strlen(str);

     int now = root;

     for(int i=; i<len; i++)

     {

         int &tmp = ch[now][str[i]];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] = tag;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front(); q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i], q.push(tmp);

         }

     }

 }

 bool vis[];

 void query(char str[])

 {

     int len = strlen(str);

     int now = root;

     int ret = ;

     for(int i=; i<len; i++)

     {

         int tmp = now = ch[now][str[i]];

         while(tmp != root)

         {

             if(ed[tmp]) vis[ed[tmp]] = ;

             tmp = fail[tmp];

         }

     }

 }

 int main()

 {

     int n, m;

     while(scanf("%d",&n) == )

     {

         init();

         for(int i=; i<=n; i++)

             scanf("%s",str), Insert(str, i);

         build();

         scanf("%d",&m);

         int total = ;

         for(int i=; i<=m; i++)

         {

             memset(vis, , sizeof(vis));

             scanf("%s",str);

             query(str);

             int a[], id = ;

             for(int j=; j<=n; j++)

                 if(vis[j]) a[++id] = j;

             if(id) {

                 printf("web %d:",i);

                 for(int j=; j<=id; j++) printf(" %d", a[j]);

                 puts("");

                 total++;

             }

         }

         printf("total: %d\n",total);

     }

     return ;

 }

 #include<bits/stdc++.h>

 using namespace std;

 const int maxn = ;

 const int alp = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 char str[];

 int num[];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[], int tag)

 {

     int len = strlen(str);

     int now = root;

     for(int i=; i<len; i++)

     {

         int &tmp = ch[now][str[i]-'A'];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] = tag;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front(); q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i], q.push(tmp);

         }

     }

 }

 void query(char str[])

 {

     int len = strlen(str);

     int now = root;

     int ret = ;

     for(int i=; i<len; i++)

     {

         int tmp = now = ch[now][str[i]-'A'];

         while(tmp != root)

         {

             if(ed[tmp])  num[ed[tmp]] ++;

             tmp = fail[tmp];

         }

     }

 }

 char s[][];

 int main()

 {

     int n, m;

     while(scanf("%d",&n) == )

     {

         init();

         for(int i=; i<=n; i++)

             scanf("%s",s[i]), Insert(s[i], i), num[i]=;

         build();

         scanf("%s",str);

         for(int i=; str[i]; i++)

             if(str[i]<'A' || str[i] >'Z') str[i] = 'Z'+;

         query(str);

         for(int i=; i<=n; i++)

             if(num[i])

             printf("%s: %d\n",s[i], num[i]);

     }

     return ;

 }

　　给N(N<=10)个长度不超过10的DNA串，问可以构造出多少个长度为M（M<=2000000000)的DNA串不含以上串，所有串仅由'A'、'C'、'G'、'T'四个字符构成。

对于一个有向图，我们要求一点x到另一点y的长度为L不同路径的个数，构造一个矩阵，令mat[i][j]表示直接相连i和j的边的数量，那么该矩阵的L次幂的mat[x,y]就是答案。

　　这里我们可以先构造出最多101个节点的Trie图，建立AC自动机，标记给出的串。对于每个节点u，并枚举每个以上字符，若该节点没被标记并且转移后的节点v也没有被标记，那么mat[u][v]++。求出pow(mat,M)，矩阵第一行之和就是答案，因为它代表从root（空串）开始所有的长度为M的满足要求的所有串的数量和。

　　*注意，对于串"A"和"TAG",在建立自动机时，对于串"TAG"中的'A'节点也应该被标记，也就是说，每个节点的标记要考虑到失配节点的标记。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 typedef long long LL;

 using namespace std;

 const int maxn = ;

 const int N = ;

 const int alp = ;

 const LL mod = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 char str[];

 int Hash['z'];

 int root, sz;

 void mul(LL a[N][N], LL b[N][N])

 {

     LL c[N][N] = {};

     for(int k=; k<sz; k++)

         for(int i=; i<sz; i++)

             for(int j=; j<sz; j++)

             c[i][j] = (c[i][j] + a[i][k]*b[k][j]) % mod;

     memcpy(a, c, sizeof(c));

 }

 void quick_mod(LL a[N][N], int b)

 {

     LL c[N][N] = {};

     for(int i=; i<sz; i++) c[i][i] = ;

     while(b)

     {

         if(b&) mul(c,a);

         mul(a, a);

         b>>=;

     }

     memcpy(a, c, sizeof(c));

 }

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[])

 {

     int len = strlen(str);

     int now = root;

     for(int i=; i<len; i++)

     {

         int &tmp = ch[now][Hash[str[i]]];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] = ;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front(); q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i], ed[tmp] |= ed[fail[tmp]], q.push(tmp);

         }

     }

 }

 void get_matrix(LL a[N][N])

 {

     for(int i=; i<sz; i++)

     {

         if(ed[i]) continue;

         for(int j=; j<; j++)

         {

             if(ed[ch[i][j]]) continue;

             a[i][ch[i][j]] ++;

         }

     }

 }

 int main()

 {

     int n, m;

     Hash['A'] = , Hash['C'] = , Hash['G'] = , Hash['T'] = ;

     while(scanf("%d %d",&n,&m) == )

     {

         init();

         for(int i=; i<=n; i++)

         {

             scanf("%s",str);

             Insert(str);

         }

         build();

         LL a[N][N] = {};

         get_matrix(a);

         quick_mod(a, m);

         LL ans = ;

         for(int i=; i<sz; i++)

             ans =(ans + a[][i]) % mod;

         printf("%I64d\n",ans);

     }

     return ;

 }

5.hdu 2243 考研路茫茫——单词情节

　　求长度不超过m的至少包含一个所给模式串的串的个数，并对2^64取模。

假设长度只能为m,上题中我们计算了不包含模式串的串的个数，而串的总数为pow(26,m)，包含模式串的串的个数就是两者只差，这是很容易得出的。
那么如何求出所有长度不超过m的串的总数呢？
对于前者，我们对上题的矩阵的边长加1，并且置最后一列全为1，这样，矩阵的第一行之和就记录了所有情况之和，由于初始第一行最后一列为1，结果减一即可。
对于后者，令sum_pow(26,x)表示26^1+26^2+26^3+...+26^x, 则有，若x为奇数转化为26^x+sum_pow(26,x-1), 若为偶数则转化为sum_pow(26,x/2)*(26^(x/2)+1),这样可在log(x)内的时间复杂度内求出。
对于模2^64,只要将所有结果保存为unsigned long long 型，自然溢出就相当于对2^64取模。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int N = ;

 const int alp = ;

 const int maxn = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz++] = ;

     return sz-;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[])

 {

     int len = strlen(str);

     int now = root;

     for(int i=; i<len; i++)

     {

         int &tmp = ch[now][str[i]-'a'];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] = ;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front(); q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                  ed[tmp] |= ed[fail[tmp]],

                  q.push(tmp);

         }

     }

 }

 void get_matric(ull a[N][N])

 {

     for(int i=; i<sz; i++)

     {

         if(ed[i]) continue;

         for(int j=; j<alp; j++)

         {

             if(ed[ch[i][j]]) continue;

             a[i][ch[i][j]] ++;

         }

     }

     for(int i=; i<=sz; i++) a[i][sz] = ;

 }

 ull quick_mod(ull a, ull b)

 {

     ull c = ;

     while(b) {

         if(b&) c = c * a;

         a = a * a;

         b >>= ;

     }

     return c;

 }

 ///pow(a,1)+pow(a,2)+...+pow(a,x);

 ull sum_pow(ull a, ull x)

 {

     if(x == ) return a;

     if(x&) return quick_mod(a,x) + sum_pow(a, x-);

     return sum_pow(a, x>>) * (quick_mod(a, x>>)+);

 }

 void mul(ull a[N][N], ull b[N][N])

 {

     ull c[N][N] = {};

     for(int k=; k<=sz; k++)

         for(int i=; i<=sz; i++)

             for(int j=; j<=sz; j++)

             c[i][j] += a[i][k]*b[k][j];

     memcpy(a, c, sizeof c);

 }

 void mat_pow(ull a[N][N], ull b)

 {

     ull c[N][N] = {};

     for(int i=; i<=sz; i++)

         c[i][i] = ;

     while(b){

         if(b&) mul(c, a);

         mul(a, a);

         b >>= ;

     }

     memcpy(a, c, sizeof c);

 }

 char s[];

 int main()

 {

     int i,j,k;

     ull n, m;

     while(scanf("%I64u %I64u",&n,&m) == )

     {

         init();

         for(i=; i<=n; i++) scanf("%s",s), Insert(s);

         build();

         ull a[N][N] = {};

         get_matric(a);

         mat_pow(a, m);

         ull ans = ;

         for(i=; i<=sz; i++) ans += a[][i];

         printf("%I64u\n",sum_pow(,m)-ans+);

     }

     return ;

 }

6.poj 1625 Censored!

　　给你一个不超过50的字典，然后还有不超过10个长度不超过50的非法串，问有多少个由字典组成的长度为m的串？没有取模。

我真想喷这题，没有任何思维上的新点，就是恶心+恶心。

1）.卡内存，明显可以按poj 2778的套路，构造500*500矩阵搞就行了，但是，卡内存啊。注意到m不大，那就dp吧，dp[i][j]表示串长为i时在自动机j节点上的合法串的数量，状态转移也很简单：遍历字母表，如果j节点接收某个字符后转移到k节点，那么dp[i][k] += dp[i-1][j]，初始dp[0][0] = 1。

2）.ASCALL可高达255，我用java写的更坑爹，由于java中char是2个字节，有时会把输入的两个字符当成一个字符输入。。好吧，又改成了byte型的才行。

3）.那么到底输入的p个串是由字母表构成的吗？我怎么测出来非法串还有其他字符啊，按照大家的做法，把不在字典中的字符哈希为0就稀里糊涂过了。

真的是够了，出题人的良心呢？

 import java.math.BigInteger;

 import java.util.Arrays;

 import java.util.LinkedList;

 import java.util.Queue;

 import java.util.Scanner;

 public class Main{

     static int alp;

     static final int maxn = 505;

     static int ch[][] = new int[maxn][55];

     static int ed[] = new int[maxn];

     static int fail[] = new int[maxn];

     static int Hash[] = new int[66666];

     static BigInteger dp[][] = new BigInteger[55][maxn];

     static int root, sz;

     static Queue<Integer> q = new LinkedList<Integer>();

     static int newnode() {

         Arrays.fill(ch[sz], -1);

         ed[sz++] = 0;

         return sz-1;

     }

     static void init()

     {

         sz = 0;

         root = newnode();

     }

     static void Insert(byte str[]){

         int now = root;

         for(int i=0; i<str.length; i++) {

             int t = str[i];

             t += 128;

             if(ch[now][Hash[t]] == -1)

                 ch[now][Hash[t]] = newnode();

             now = ch[now][Hash[t]];

         }

         ed[now] = 1;

     }

     static void build()

     {

         q.clear();

         fail[root] = root;

         for(int i=0; i<alp; i++){

             if(ch[root][i] == -1) ch[root][i] = root;

             else {

                 fail[ch[root][i]] = root; q.add(ch[root][i]);

             }

         }

         while(!q.isEmpty()) {

             int now = q.remove();

             for(int i=0; i<alp; i++)

             {

                 if(ch[now][i] == -1) ch[now][i] = ch[fail[now]][i];

                 else {

                     fail[ch[now][i]] = ch[fail[now]][i];

                     ed[ch[now][i]] |= ed[fail[ch[now][i]]];

                     q.add(ch[now][i]);

                 }

             }

         }

     }

     static void memset(BigInteger a[][], BigInteger x)

     {

         for(int i=0; i<a.length; i++)

             for(int j=0; j<a[i].length; j++)

                 a[i][j] = x;

     }

     public static void main(String[] args) {

         @SuppressWarnings("resource")

         Scanner in = new Scanner(System.in);

         int n, m, p;

         while(in.hasNext())

         {

             n = in.nextInt(); m = in.nextInt(); p = in.nextInt();

             Arrays.fill(Hash, 0);

             byte s[] = in.next().getBytes();

             for(int i=0; i<n; i++) {

                 int t = s[i];

                 t += 128;

                 Hash[t] = i;

             }

             init();

             alp = n;

             for(int i=1; i<=p; i++)

             {

                 byte str[] = in.next().getBytes();

                 Insert(str);

             }

             build();

             memset(dp, BigInteger.ZERO);

             dp[0][0] = BigInteger.ONE;

             for(int i=1; i<=m; i++)

                 for(int j=0; j<sz; j++)

                 {

                     if(ed[j] > 0) continue;

                     for(int k=0; k<n; k++)

                     {

                         int tmp = ch[j][k];

                         if(ed[tmp] > 0) continue;

                         dp[i][tmp] = dp[i][tmp].add(dp[i-1][j]);

                     }

                 }

             BigInteger ans = BigInteger.ZERO;

             for(int i=0; i<sz; i++) ans = ans.add(dp[m][i]);

             System.out.println(ans);

         }

     }

 }

7.hdu 2825 Wireless Password

　　给m个单词，问长为n的至少包含k个单词的串有多少个？(0<=m<=10, 1<=n<=25)

dp的状态和当前串长、Trie节点、已含单词集有关，那么dp[i][j][k]就表示当前串长为i，在Trie的j节点，状态为k（0<=k<2^10)。初始dp[0][0][0] = 1。

　　有两种转移方式：要么枚举dp[i][j][k]的前一个状态加到dp[i][j][k]，要么枚举dp[i][j][k]的后一个状态，将dp[i][j][k]的值加到后一个状态。

复杂度均为O（25*100*2048*26），但是时限比较紧，我们选择后者方式，如果dp[i][j][k]=0，直接continue,它对后续状态无贡献。再者，数据数较多，不能每次用memset清空dp数组。这样优化后大概就能A掉了。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int N = ;

 const int mod = ;

 const int alp = ;

 const int maxn = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz++] = ;

     return sz-;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[], int id)

 {

     int now = root;

     for(int i=; str[i]; i++)

     {

         int &tmp = ch[now][str[i]-'a'];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] |= <<id-;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front();

         q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                                  ed[tmp] |= ed[fail[tmp]],

                                             q.push(tmp);

         }

     }

 }

 char str[];

 int dp[][][<<];

 int num[<<];

 int solve(int n, int m, int k)

 {

     for(int i=; i<=n; i++)

         for(int j=; j<sz; j++)

             for(int sta=; sta<(<<m); sta++)

                 dp[i][j][sta] = ;

     dp[][][] = ;

     /*****************************************

     for(int len=1; len<=n; len++)

         for(int pre=0; pre<sz; pre++)

             for(int sta=0; sta<(1<<m); sta++)

     {

         for(int a=0; a<alp; a++)

         {

             int tmp = ch[pre][a];

             if(ed[tmp] && !(sta&(1<<ed[tmp]-1))) continue;

             int &p = dp[len][tmp][sta];

             if(ed[tmp])

                 p = (p + dp[len-1][pre][sta]+dp[len-1][pre][sta^(1<<ed[tmp]-1)]) % mod;

             else p = (p + dp[len-1][pre][sta])%mod;

             if(p > 1000000000) p %= mod;

         }

     }

     *****************************************************/

     for(int len=; len<n; len++)

         for(int pre=; pre<sz; pre++)

             for(int sta=; sta<(<<m); sta++)

             {

                 if(dp[len][pre][sta] == ) continue;

                 for(int a=; a<alp; a++)

                 {

                     int tmp = ch[pre][a];

                     int &pp = dp[len+][tmp][sta|ed[tmp]];

                     pp += dp[len][pre][sta];

                     if(pp > ) pp %= mod;

                 }

             }

     int ans = ;

     for(int now=; now<sz; now++)

         for(int sta=; sta<(<<m); sta++)

             if(num[sta] >= k)

             {

                 ans += dp[n][now][sta];

                 if(ans > ) ans %= mod;

             }

     return ans%mod;

 }

 int main()

 {

     int i,j,k,m,n;

     memset(num, , sizeof(num));

     num[] = , num[] = ;

     for(int i=; i<(<<); i++)

         num[i] = num[i>>] + (i&);

     while(scanf("%d %d %d",&n,&m,&k) ==  && n+m+k)

     {

         init();

         for(i=; i<=m; i++)

             scanf("%s",str), Insert(str, i);

         build();

         printf("%d\n",solve(n,m,k));

     }

     return ;

 }

8.hdu 2296 Ring

　　给m个单词和长为L的串，问至少需要改变几个字符使得L串不包含任意一个单词。单词50个*20，L<=1000,字母均由'A''C''G''T'组成。

　　令dp[i][j]表示子串s[0,i-1]止于状态j不包含单词所需要改变字符数，初始dp数组inf, dp[0][0] = 0。那么dp[i][j] = min{dp[i-1][k]+(s[i-1] != c) | k由字符c转移到j}。同样，也可以由dp[i][j]来更新dp[i+1][k]，并且加入dp[i][j] != inf优化。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int inf = 0x3f3f3f3f;

 const int N = ;

 const int alp = ;

 const int maxn = ;

 int Hash['Z'];

 char ALP[] = {'A','C','G','T'};

 int ch[maxn][alp], ed[maxn], fail[maxn];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz++] = ;

     return sz-;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[])

 {

     int now = root;

     for(int i=; str[i]; i++)

     {

         int &tmp = ch[now][Hash[str[i]]];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] = ;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front();

         q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                 ed[tmp] |= ed[fail[tmp]],

                 q.push(tmp);

         }

     }

 }

 char str[];

 int dp[N][maxn];

 void solve()

 {

     int len = strlen(str);

     memset(dp, 0x3f, sizeof(dp));

     dp[][] = ;

     int now = root;

     for(int len=; str[len]; len++)

         for(int pre=; pre<sz; pre++)

     {

         if(ed[pre]) continue;

         if(dp[len][pre] == inf) continue;

         for(int a=; a<alp; a++)

         {

             int tmp = ch[pre][a];

             if(ed[tmp]) continue;

             dp[len+][tmp] = min(dp[len+][tmp], dp[len][pre]+(str[len] != ALP[a]));

         }

     }

     int ans = inf;

     for(int i=; i<sz; i++)

         if(!ed[i]) ans = min(ans, dp[len][i]);

     if(ans == inf) puts("-1");

     else printf("%d\n",ans);

 }

 int main()

 {

     int i,j,k,m,n,t;

     int cas = ;

     Hash['A'] = , Hash['C'] = , Hash['G'] = , Hash['T'] = ;

     while(scanf("%d",&n) ==  && n)

     {

         init();

         for(i=; i<=n; i++)

             scanf("%s",str), Insert(str);

         build();

         scanf("%s",str);

         printf("Case %d: ",++cas);

         solve();

     }

     return ;

 }

9.zoj 3228 Searching the String

　　给一个长串len<=10^5，给N个0/1属性的长度不超过6的字符串，N<=10^5。

　　输出每个串在长串中匹配的个数，属性为0/1分别表示可/不可重叠。

　　对所有小串建立AC自动机，每个节点保留0/1属性串匹配的个数，再记录上一次1串匹配到此处的时间，若匹配到此处找到一串，那么只有时间差大于等于串的长度才能+1并更新此时间。0串完全是模板，直接++即可。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int inf = 0x3f3f3f3f;

 const int N = ;

 const int alp = ;

 const int maxn = ;

 int ch[maxn][alp], ed[maxn][], fail[maxn];

 int Hash[N];

 int root, sz;

 int tim[maxn], num[maxn][], len[maxn];

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     tim[sz] = ;

     ed[sz][] = ed[sz][] = ;

     num[sz][] = num[sz][] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[], int tag, int id)

 {

     int now = root, i;

     for(i=; str[i]; i++)

     {

         int &tmp = ch[now][str[i]-'a'];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     Hash[id] = now;

     ed[now][tag] = ;

     len[now] = i;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front();

         q.pop();

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                 q.push(tmp);

         }

     }

 }

 char str[maxn], s[];

 int tag[N];

 void solve(int n)

 {

     int now = root;

     for(int i=; str[i]; i++)

     {

         int tmp = now = ch[now][str[i]-'a'];

         while(tmp != root)

         {

             if(ed[tmp][]) {

                 num[tmp][]++;

             }

             if(ed[tmp][] && i+-tim[tmp] >= len[tmp]) {

                 num[tmp][]++;

                 tim[tmp] = i+;

             }

             tmp = fail[tmp];

         }

     }

     for(int i=; i<=n; i++)

         printf("%d\n",num[Hash[i]][tag[i]]);

     puts("");

 }

 int main()

 {

     int i,j,k,m,n,t,cas=;

     while(scanf("%s",str) == )

     {

         scanf("%d",&n);

         init();

         for(i=; i<=n; i++)

         {

             scanf("%d %s",&m, s);

             tag[i] = m;

             Insert(s, m, i);

         }

         build();

         printf("Case %d\n",++cas);

         solve(n);

     }

     return ;

 }

10.hdu 3341 RLost's revenge

　　给N个Len<=10的单词，再给一个长不超过40的串，将此串字母位置重置，使得包含最多的单词，N<=50。所有单词仅由字母'A''C''G''T'组成。

　　我们不用考虑输入长串的形态，只记录各个字母的个数就行了。dp很好想，令dp[i][j]表示在自动机i节点，字母的使用情况为j时包含的最多的单词数量。

难点就在于：如何表示'A''C''G''T'的使用情况？

　　假设有num[0]个'A',num[1]个'C',num[2]个'G',num[3]个'T'，那么使用了a个'A'，c个'C'，g个'G'，t个'T'可表示为a*(num[1]+1)*(num[2]+1)*(num[3]+1) + c*(num[2]+1)*num[3]+1) + g*(num[3]+1) + t。字母数均分时最大且<11*11*11*11。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int inf = 0x3f3f3f3f;

 const int N = ***+;

 const int alp = ;

 const int maxn = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 int Hash['Z'];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[])

 {

     int now = root, i;

     for(i=; str[i]; i++)

     {

         int &tmp = ch[now][Hash[str[i]]];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] ++;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front();

         q.pop();

         ed[now] += ed[fail[now]];

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                  q.push(tmp);

         }

     }

 }

 int dp[maxn][N];

 char str[];

 int num[];

 void Max(int &a, int b) {a = max(a, b);}

 int main()

 {

     int i,j,k,m,n,t,cas=;

     Hash['A'] = , Hash['C']= , Hash['G'] = , Hash['T'] = ;

     while(scanf("%d",&n) ==  && n)

     {

         init();

         memset(num, , sizeof(num));

         for(int i=; i<=n; i++)

             scanf("%s",str), Insert(str);

         build();

         scanf("%s",str);

         for(i=; str[i]; i++) num[Hash[str[i]]]++;

         memset(dp, , sizeof(dp));

         int mod3 = num[]+;

         int mod2 = mod3 * (num[]+);

         int mod1 = mod2 * (num[]+);

         int limit = num[]*mod1+num[]*mod2+num[]*mod3+num[];

         memset(dp, -, sizeof(dp));

         dp[][] = ;

         for(int a=; a<=num[]; a++)

             for(int b=; b<=num[]; b++)

                 for(int c=; c<=num[]; c++)

                     for(int d=; d<=num[]; d++)

                     {

                         int tp = a*mod1 + b*mod2 + c*mod3 + d;

                         for(int i=; i<sz; i++)

                         {

                             if(dp[i][tp] == -) continue;

                             if(a < num[]) Max(dp[ch[i][]][tp+mod1], dp[i][tp]+ed[ch[i][]]);

                             if(b < num[]) Max(dp[ch[i][]][tp+mod2], dp[i][tp]+ed[ch[i][]]);

                             if(c < num[]) Max(dp[ch[i][]][tp+mod3], dp[i][tp]+ed[ch[i][]]);

                             if(d < num[]) Max(dp[ch[i][]][tp+], dp[i][tp]+ed[ch[i][]]);

                         }

                     }

         int ans = ;

         for(int i=; i<sz; i++)

             for(int j=; j<=limit; j++)

                 Max(ans, dp[i][j]);

         printf("Case %d: %d\n",++cas, ans);

     }

     return ;

 }

11. hdu 3247 Resource Archiver

　　给n个资源串和m病毒串，现要把资源串串起来，两串可以重叠，且不含病毒串，求串起来的最短长度。2 <= n <= 10, 1 <= m <= 1000，资源串长不超过10^5，病毒串总长不超过5w。所有串仅由'0''1'组成。

　　很直接的DP方式就是用dp[i][j]表示在Trie树i节点时，匹配情况是j的最短长度。但是6w*(1<<10)MLE。。。

翻了翻网上的题解，大都是说让j表示在第j个单词的末尾。加单词间的最短路。仔细想一想，不对啊，有的资源串的后缀包含了另一个资源，那么另一个资源串不就有多个结尾了吗？

　　还有一种解释是说把带有标记的节点拿出来，最短路+dp，尼玛，最坏情况下每个节点都有标记啊。

　　我看了看他们的代码，发现都一样，那就一个结论了：所有资源串都不是另一个资源串的前缀或后缀。好吧，那就直接用单词末尾的节点来dp吧，先BFS处理任意两个单词末节点的最短路，它的意义是第二个串长与第一个串后缀重合部分长的差，BFS不经过病毒节点就行了。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int inf = 0x3f3f3f3f;

 const int N = ;

 const int alp = ;

 const int maxn = ;

 int ch[maxn][alp], ed[][maxn], fail[maxn];

 int ID[]; ///第i个串末在自动机中的编号

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[][sz] = ed[][sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[], int id, int tag)

 {

     int now = root, i;

     for(i=; str[i]; i++)

     {

         int &tmp = ch[now][str[i]-''];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[id][now] |= (<<tag-);

     if(!id) ID[tag] = now;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front();

         q.pop();

         ed[][now] |= ed[][fail[now]];

         ed[][now] |= ed[][fail[now]];

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                  q.push(tmp);

         }

     }

 }

 int dis[maxn];

 bool vis[maxn];

 int w[][];

 void get_dis(int s, int n)

 {

     memset(dis, 0x3f, sizeof(dis));

     memset(vis, , sizeof(vis));

     dis[ID[s]] = ;

     vis[ID[s]] = ;

     queue<int>q;

     q.push(ID[s]);

     while(!q.empty()) {

         int u = q.front(); q.pop();

         for(int i=; i<alp; i++)

         {

             int tmp = ch[u][i];

             if(ed[][tmp]) continue;

             if(vis[tmp]) continue;

             vis[tmp] = ;

             dis[tmp] = dis[u] + ;

             q.push(tmp);

         }

     }

     for(int i=; i<=n; i++)

         w[s][i] = dis[ID[i]];

 }

 char str[N];

 int len[];

 int dp[<<][];

 int main()

 {

     int i,j,k,m,n;

     int p[];

     while(scanf("%d %d",&n,&m) ==  && n+m)

     {

         init();

         int sum = ;

         for(i=; i<=n; i++)

             scanf("%s", str), len[i] =  strlen(str), Insert(str, , i);

         for(i=; i<=m; i++)

             scanf("%s", str), Insert(str, , i);

         build();

         ID[] = root;

         memset(w, 0x3f, sizeof(w));

         for(i=; i<=n; i++) get_dis(i,n);

         int ans = inf;

         /***********************************

          *枚举顺序，简单粗暴

         for(i=1; i<=n; i++) p[i] = i;

         do {

             int ret = 0;

             for(j=1; j<n; j++)

                 ret += w[p[j]][p[j+1]];

             ans = min(ans, ret+len[p[1]]);

         } while(next_permutation(p+1, p+1+n));

         ************************************/

         memset(dp, 0x3f, sizeof(dp));

         dp[][] = ;

         for(i=; i<(<<n); i++)

             for(j=; j<=n; j++)

         {

             if(dp[i][j] == inf) continue;

             for(k=; k<=n; k++)

             {

                 if(w[j][k] == inf) continue;

                 dp[i|(<<k-)][k] = min(dp[i|(<<k-)][k], dp[i][j] + w[j][k]);

             }

         }

         for(i=; i<=n; i++) ans = min(ans, dp[(<<n)-][i]);

         printf("%d\n",ans);

     }

     return ;

 }

12. zoj 3494 Detect the Viru

　　给n个01病毒串，然后问区间[L,R]内有多少个数的BCD encoding不包含病毒串。一个数的BCD encoding是指每一位数的4bit位连起来的串。

　　n<=100,病毒串长小于20，L<=R<10^200。

　　很好想的数位DP+AC自动机dp[i][j]表示在还有i位且当前在Trie数的j节点的合法数字的个数。

　　注意，如果用字符串读入L和R，要特判L。

 #include<iostream>

 #include<cstdio>

 #include<cstring>

 #include<cmath>

 #include<algorithm>

 #include<queue>

 using namespace std;

 typedef unsigned long long ull;

 const int inf = 0x3f3f3f3f;

 const int mod = ;

 const int N = ;

 const int alp = ;

 const int maxn = ;

 int ch[maxn][alp], ed[maxn], fail[maxn];

 int root, sz;

 int newnode()

 {

     memset(ch[sz], -, sizeof(ch[sz]));

     ed[sz] = ;

     return sz++;

 }

 void init()

 {

     sz = ;

     root = newnode();

 }

 void Insert(char str[])

 {

     int now = root, i;

     for(i=; str[i]; i++)

     {

         int &tmp = ch[now][str[i]-''];

         if(tmp == -) tmp = newnode();

         now = tmp;

     }

     ed[now] = ;

 }

 void build()

 {

     queue<int> q;

     fail[root] = root;

     for(int i=; i<alp; i++)

     {

         int &tmp = ch[root][i];

         if(tmp == -) tmp = root;

         else fail[tmp] = root, q.push(tmp);

     }

     while(!q.empty())

     {

         int now = q.front();

         q.pop();

         ed[now] |= ed[fail[now]];

         for(int i=; i<alp; i++)

         {

             int &tmp = ch[now][i];

             if(tmp == -) tmp = ch[fail[now]][i];

             else fail[tmp] = ch[fail[now]][i],

                  q.push(tmp);

         }

     }

 }

 int sel[][] = {,,,, ,,,, ,,,, ,,,, ,,,,

                   ,,,, ,,,, ,,,, ,,,, ,,,};

 int query(char str[])

 {

     int len = strlen(str+);

     int now = root;

     for(int i=; i<=len; i++)

     {

         int a = str[i] - '';

         for(int j=; j<; j++)

         {

             int tmp = now = ch[now][sel[a][j]];

             while(tmp) {

                 if(ed[tmp]) return ;

                 tmp = fail[tmp];

             }

         }

     }

     return ;

 }

 char s[N];

 int dp[N][maxn][];

 int DFS(int pos, int id, bool limit, bool pre)

 {

     if(!pos) return pre;

     if(!limit && dp[pos][id][pre] != -) return dp[pos][id][pre];

     int ret = ;

     int end = limit ? s[pos]-'' : ;

     for(int i=; i<=end; i++)

     {

         bool ok = true;

         int tmp = id;

         if(pre || i)

         for(int j=; j<; j++)

         {

             tmp = ch[tmp][sel[i][j]];

             if(ed[tmp]) ok = false;

         }

         if(ok) ret =(ret+DFS(pos-, tmp, limit&&i==end, pre || i))%mod;

     }

     if(!limit) dp[pos][id][pre] = ret;

     return ret;

 }

 int main()

 {

     int i,j,k,m,n,t;

     scanf("%d",&t);

     while(t--)

     {

         init();

         scanf("%d",&n);

         while(n--) scanf("%s",s+), Insert(s+);

         build();

         scanf("%s",s+);

         m = strlen(s+);

         int ans = -query(s);

         reverse(s+, s++m);

         for(i=; i<=m; i++) for(j=; j<sz; j++) dp[i][j][]=dp[i][j][]=-;

         ans += DFS(m, , , );

         scanf("%s",s+);

         m = strlen(s+);

         reverse(s+, s++m);

         for(i=; i<=m; i++) for(j=; j<sz; j++) dp[i][j][]=dp[i][j][]=-;

         ans = (DFS(m, , , ) - ans)%mod;

         printf("%d\n",(ans+mod)%mod);

     }

     return ;

 }

入门题暂时做到这里了( ^_^ )/~~。