题目链接:P4173 残缺的字符串

题意

给定长度为 \(m\) 的模式串和长度为 \(n\) 的目标串,两个串都带有通配符,求所有匹配的位置。

思路

FFT

带有通配符的字符串匹配问题。

设模式串为 \(p\),目标串为 \(t\),将两个串的内容都根据字母先后顺序映射到 \(1\)\(26\)

如果不带有通配符,那么 \(t\) 以第 \(k\) 位结束的长度为 \(|p|\) 的子串与 \(p\) 匹配时有

\[\sum_{i=0}^{|p|-1} (p[i] - t[k - |p| + 1 + i])^2 = 0\]

如果带有通配符,只需将上式稍微改一下就行。

让两个串中的所有通配符映射到 \(0\),设匹配结果为 \(f\),则有

\[f[i] = \sum_{i=0}^{|p|-1} (p[i] - t[k - |p| + 1 + i])^2 \cdot p[i] \cdot t[k - |p| + 1 + i]\]

接下来翻转 \(p\) 串 (\(FFT\) 的套路),设 \(r[|p| - i - 1] = p[i]\),则有

\[f[i] = \sum_{i=0}^{|p|-1} (r[|p| - i - 1] - t[k - |p| + 1 + i])^2 \cdot r[|p| - i - 1] \cdot t[k - |p| + 1 + i]\]

下标加起来等于 \(k\),令 \(j = |p| - i - 1\),则

\[f[i] = \sum_{i + j = k} (r[j] - t[i])^2 \cdot r[j] \cdot t[i]\]

展开后有

\[f[i] = \sum_{i + j = k} (r[j]^3t[i] + t[i]^3r[j] - 2\cdot r[j]^2t[i]^2)\]

\(FFT\) 分别求一下卷积即可。

代码

#include <bits/stdc++.h>
using namespace std;
const double PI = acos(-1);
const double eps = 1e-8;
typedef complex<double> Complex;
const int maxn = 2e6 + 10;

Complex p[maxn], t[maxn];
Complex a[maxn], b[maxn], c[maxn], d[maxn];
Complex ans[maxn];
string str;
int m, n;
int bit = 2, rev[maxn];

void get_rev(){
    memset(rev, 0, sizeof(rev));
    while(bit <= n + m) bit <<= 1;
    for(int i = 0; i < bit; ++i) {
        rev[i] = (rev[i >> 1] >> 1) | (bit >> 1) * (i & 1);
    }
}

void FFT(Complex *a, int op) {
    for(int i = 0; i < bit; ++i) {
        if(i < rev[i]) swap(a[i], a[rev[i]]);
    }
    for(int mid = 1; mid < bit; mid <<= 1) {
        Complex wn = Complex(cos(PI / mid), op * sin(PI / mid));
        for(int j = 0; j < bit; j += mid<<1) {
            Complex w(1, 0);
            for(int k = 0; k < mid; ++k, w = w * wn) {
                Complex x = a[j + k], y = w * a[j + k + mid];
                a[j + k] = x + y, a[j + k + mid] = x - y;
            }
        }
    }
}

int main() {
    ios::sync_with_stdio(false);
    cin.tie(0);
    cin >> m >> n;
    cin >> str;
    for(int i = 0; i < m; ++i) {
        p[m - i - 1] = str[i] == '*' ? 0 : (str[i] - 'a' + 1);
    }
    cin >> str;
    for(int i = 0; i < n; ++i) {
        t[i] = str[i] == '*' ? 0 : (str[i] - 'a' + 1);
    }
    get_rev();
    for(int i = 0; i < bit; ++i) {
        a[i] = p[i] * p[i] * p[i];
        b[i] = t[i];
    }
    FFT(a, 1); FFT(b, 1);
    for(int i = 0; i < bit; ++i) {
        ans[i] += a[i] * b[i];
    }

    for(int i = 0; i < bit; ++i) {
        a[i] = p[i];
        b[i] = t[i] * t[i] * t[i];
    }
    FFT(a, 1); FFT(b, 1);
    for(int i = 0; i < bit; ++i) {
        ans[i] += a[i] * b[i];
    }

    for(int i = 0; i < bit; ++i) {
        a[i] = p[i] * p[i];
        b[i] = t[i] * t[i];
    }
    FFT(a, 1); FFT(b, 1);
    for(int i = 0; i < bit; ++i) {
        ans[i] -= a[i] * b[i] * Complex(2, 0);
    }

    FFT(ans, -1);
    queue<int> q;
    for(int i = m - 1; i < n; ++i) {
        if((int)(ans[i].real() / bit + 0.5) == 0) q.push(i - m + 2);
    }
    cout << q.size() << endl;
    while(q.size()) {
        cout << q.front() << " ";
        q.pop();
    }
    cout << endl;
    return 0;
}
01-19 05:08