POJ - 3693 Maximum repetition substring

题意

输入一个串,求重复次数最多的连续重复字串,如果有次数相同的,则输出字典序最小的

Sample input

ccabababc

daabbccaa

#

Sample Output

Case 1: ababab

Case 2: aa

  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <algorithm>
  5 using namespace std;
  6 const int maxn = 1e5+5;
  7 char s[maxn];
  8 int sa[maxn], t[maxn], t2[maxn], c[maxn];
  9 int n;
 10 //构造字符串s的后缀数组, 每个字符值必须为0 ~ m-1
 11 void build_sa(int m) {
 12     int *x = t, *y = t2;
 13     //基数排序
 14     for(int i = 0; i < m; i++) c[i] = 0;
 15     for(int i = 0; i < n; i++) c[x[i] = s[i]]++;
 16     for(int i = 1; i < m; i++) c[i] += c[i-1];
 17     for(int i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
 18     for(int k = 1; k <= n; k <<= 1) {
 19         int p = 0;
 20         //直接利用sa数组排序第二关键字
 21         for(int i = n-k; i < n; i++) y[p++] = i;
 22         for(int i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i] - k;
 23         //基数排序第一关键字
 24         for(int i = 0; i < m; i++) c[i] = 0;
 25         for(int i = 0; i < n; i++) c[x[y[i]]]++;
 26         for(int i = 1; i < m; i++) c[i] += c[i-1];
 27         for(int i = n-1; i>= 0; i--) sa[--c[x[y[i]]]] = y[i];
 28         //根据sa和y数组计算新的x数组
 29         swap(x, y);
 30         p = 1;
 31         x[sa[0]] = 0;
 32         for(int i = 1; i < n; i++)
 33             x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 : p++);
 34         if(p >= n) break;
 35         m = p;
 36     }
 37 }
 38
 39 int rank_[maxn]; //rank[i]代表后缀i在sa数组中的下标
 40 int height[maxn]; //height[i] 定义为sa[i-1] 和 sa[i] 的最长公共前缀
 41 //后缀j和k的LCP长度等于RMQ(height, rank[j]+1, rank[k])
 42 void get_height() {
 43     int i, j, k = 0;
 44     for(int i = 0; i < n; i++) rank_[sa[i]] = i;
 45     for(int i = 0; i < n; i++) {
 46         if(!rank_[i]) continue;
 47         int j = sa[rank_[i]-1];
 48         if(k) k--;
 49
 50         while(s[i+k] == s[j+k]) k++;
 51         height[rank_[i]] = k;
 52     }
 53 }
 54 int d[maxn][50];
 55 void rmq_init() {
 56     for(int i = 0; i < n; i++) d[i][0] = height[i];
 57     for(int j = 1; (1<<j) <= n; j++)
 58         for(int i = 0; i + (1<<j) - 1 < n; i++)
 59             d[i][j] = min(d[i][j-1], d[i+(1<<(j-1))][j-1]);
 60 }
 61 int rmq(int l, int r) {
 62     if(l == r) return n-l;
 63     if(rank_[l] > rank_[r]) swap(l, r);
 64     int L = rank_[l]+1;
 65     int R = rank_[r];
 66     int k = 0;
 67     while((1<<(k+1)) <= R-L+1) k++;
 68     return min(d[L][k], d[R-(1<<k)+1][k]);
 69 }
 70
 71 int a[maxn];
 72 int main() {
 73     int kase = 0;
 74     while(~scanf("%s", s) && s[0] != '#') {
 75         int L = strlen(s);
 76         n = L + 1;
 77         build_sa(128);
 78         get_height();
 79         rmq_init();
 80         int mx = 0;
 81         int cnt = 0;
 82         // 寻找重复次数最多的连续子串单个子串的长度,可能有多种重复次数相同的子串
 83         for(int l = 1; l <= L; l++) {
 84             for(int j = 0; j + l < L; j += l) {
 85                 int k = rmq(j, j + l); // lcp
 86                 int res = k / l + 1;
 87                 int pos = j - (l - (k % l));
 88                 if(pos >= 0 && k % l && rmq(pos, pos + l)) res++;
 89                 if(res > mx)  {
 90                     mx = res;
 91                     cnt = 0;
 92                     a[cnt++] = l;
 93                 } else if(res == mx) {
 94                     a[cnt++] = l;
 95                 }
 96             }
 97         }
 98         // 找字典序最小
 99         int len = 0, st;
100         for(int i = 1; i < n && !len; i++) {
101             for(int j = 0; j < cnt; j++) {
102                 if(rmq(sa[i], sa[i] + a[j]) >= (mx - 1) * a[j]) {
103                     len = a[j];
104                     st = sa[i];
105                     break;
106                 }
107             }
108         }
109         printf("Case %d: ", ++kase);
110         for(int i = st; i < st + len * mx; i++) {
111             printf("%c", s[i]);
112         }
113         printf("\n");
114     }
115     return 0;
116 }
12-30 13:46