【清华集训2016】石家庄的工人阶级队伍比较坚强与二次剩余

还是这道题，上次的

但是这次学了科技，回来贴板了。

这篇 blog orz

这里简单讲一下，就是互质的直接 CRT，对于素数的 K 次幂，分类讨论。

考虑递推上去（类似牛顿迭代这种）

对于 \(x ^ 2 \equiv a \pmod {p^{K-1}}\)，显然有 \(\left(x + y p^{K-1}\right)^2 \equiv a \pmod {p ^ K}\)

然后显然可以解出 \(y\) 。

实际上对于 \(p \neq 2\) 另外还有一个很优秀的做法。

先让 \(x\) 和 \(p^k\) 互质了，那么就把 \(p\) 提出，且提出的是 \(p\) 的偶数次幂。

考虑 \(x ^ 2 - a \equiv 0 \pmod p\)，即 \(x ^ 2 - a = kp\)，那么有 \(\left(x ^ 2 - a\right)^K \equiv 0 \pmod {p^K}\)。

我们设 \(\left(x - \sqrt{a}\right)^K \equiv u - v\sqrt{a} \pmod {p^K}\)，那么有 \(\left(x + \sqrt{a}\right)^K \equiv u + v\sqrt{a} \pmod {p^K}\)。

直接扩域快速幂可以算出来，可以得到 \(\sqrt{a} = u \times v^{-1} \pmod {p^K}\)。

#include <bits/stdc++.h>

const int MAXN = 531441;
typedef long long LL;
typedef long double LD;

namespace NumberTheory {
    std::mt19937_64 rd(time(0) + (unsigned long) new char);
    typedef std::pair<LL, int> fac;
    typedef std::pair<LL, LL> dbp;
    void reduce(LL & x, const LL P) { x += x >> 63 & P; }
    inline LL mul(LL x, LL y, LL P) {
        LL t = x * y - (LL) ((LD) x * y / P + 0.5) * P;
        return t + (t >> 63 & P);
    }
    inline LL fastpow(LL a, LL b, LL P, LL res = 1) {
        for (; b; b >>= 1, a = mul(a, a, P)) if (b & 1)
            res = mul(res, a, P);
        return res;
    }
    inline LL fastpow(LL a, LL b) {
        LL res = 1;
        for (; b; b >>= 1, a *= a) if (b & 1) res *= a;
        return res;
    }
    LL gcd(LL a, LL b) { return b ? gcd(b, a % b) : a; }
    namespace Prime {
        const int PL = 8;
        const LL li[PL] = {2, 3, 5, 7, 11, 13, 82, 373};
        const int MAXS = 100000;
        int pri[MAXS / 10], ptot; bool npri[MAXS + 1];
        void sieve() {
            *npri = npri[1] = true;
            for (int i = 2; i <= MAXS; ++i) {
                if (!npri[i]) pri[++ptot] = i;
                for (int j = 1; j <= ptot; ++j) {
                    int t = i * pri[j];
                    if (t > MAXS) break;
                    npri[t] = true;
                    if (i % pri[j] == 0) break;
                }
            }
        }
        bool MillerRabin(LL x) {
            if (x <= MAXS) return !npri[x];
            for (int i = 0; i != PL; ++i) {
                if (x == li[i]) return true;
                if (fastpow(li[i], x - 1, x) != 1) return false;
                LL now = x - 1;
                while (~now & 1) {
                    now >>= 1;
                    LL t = fastpow(li[i], now, x);
                    if (t != 1 && t != x - 1) return false;
                    if (t == x - 1) break;
                }
            }
            return true;
        }
    }
    namespace Factor {
        LL Pollard(LL x) {
            LL x1 = rd() % (x - 1) + 1, x2 = x1;
            LL C = rd() % (x - 1) + 1, mx = 1;
            int lst = 1, stp = 0;
            while (true) {
                x2 = mul(x2, x2, x);
                reduce(x2 += C - x, x);
                if (x1 == x2) return x;
                LL t = mul(x2 - x1 + x, mx, x);
                if (t) mx = t;
                if ((stp & 127) == 0) {
                    t = gcd(mx, x); mx = 1;
                    if (t != 1) return t;
                }
                if (++stp == lst) lst <<= 1, x1 = x2;
            }
        }
        std::map<LL, int> li;
        void _factor(LL x, int v = 1) {
            if (x == 1) return ;
            if (Prime::MillerRabin(x)) { li[x] += v; return ; }
            LL t;
            do t = Pollard(x); while (t >= x);
            int lv = 0;
            while (x % t == 0) lv += v, x /= t;
            _factor(t, lv); _factor(x, v);
        }
        std::vector<fac> factor(LL x) {
            _factor(x);
            std::vector<fac> res;
            for (auto t : li)
                res.emplace_back(t.first, t.second);
            li.clear();
            return res;
        }
    }
    struct complex {
        static LL mod, omega;
        LL real, imag;
        complex() { real = imag = 0; }
        complex(LL a, LL b) { real = a, imag = b; }
        complex operator + (complex b) {
            complex res = *this;
            reduce(res.real += b.real - mod, mod);
            reduce(res.imag += b.imag - mod, mod);
            return res;
        }
        complex operator * (complex b) {
            complex res;
            reduce(res.real = mul(real, b.real, mod) + mul(omega, mul(imag, b.imag, mod), mod) - mod, mod);
            reduce(res.imag = mul(real, b.imag, mod) + mul(imag, b.real, mod) - mod, mod);
            return res;
        }
    } ;
    LL complex::mod = 0;
    LL complex::omega = 0;
    complex fastpow(complex a, LL b, complex res = complex(1, 0)) {
        for (; b; b >>= 1, a = a * a) if (b & 1) res = res * a;
        return res;
    }
    LL exgcd(LL a, LL b, LL & x, LL & y) {
        if (!b) return x = 1, y = 0, a;
        else {
            LL t = exgcd(b, a % b, y, x);
            y -= a / b * x;
            return t;
        }
    }
    LL INV(LL x, LL mod) {
        LL ta, tb;
        exgcd(x, mod, ta, tb);
        reduce(ta, mod);
        return ta;
    }
    namespace CRT {
        void _CRT(LL & x, LL & a, LL x1, LL a1) {
            LL G = gcd(x, x1);
            const LL K = mul((a1 - a + x1) / G, INV(x / G, x1 / G), x1 / G);
            const LL mod = x / G * x1;
            reduce(a += mul(K, x, mod) - mod, mod);
            x = mod;
        }
        LL CRT(std::vector<dbp> V) {
            LL x, a; x = a = 0;
            for (auto t : V) {
                LL tx = t.first, ta = t.second;
                x ? _CRT(x, a, tx, ta) : (void) (x = tx, a = ta);
            }
            return a;
        }
    }
    namespace Quadratic {
        LL Cipolla(LL x, LL mod) {
            LL a, t;
            do
                a = rd() % mod, reduce(t = mul(a, a, mod) - x, mod);
            while (fastpow(t, mod - 1 >> 1, mod) != mod - 1) ;
            complex::mod = mod, complex::omega = t;
            complex res = fastpow(complex(a, 1), mod + 1 >> 1);
            return res.real;
        }
        LL _solvep(LL a, LL P, int K) {
            LL _ = a % P;
            if (fastpow(_, P - 1 >> 1, P) == P - 1) return -1;
            LL x = Cipolla(_, P);
            const LL mod = fastpow(P, K);
            complex::omega = a, complex::mod = mod;
            complex t1 = fastpow(complex(x, P - 1), K);
            complex t2 = fastpow(complex(x, 1), K);
            const LL iv2 = P + 1 >> 1;
            LL u = mul(t1.real + t2.real, iv2, mod);
            LL v = mul(t2.imag - t1.imag + mod, iv2, mod);
            LL res = mul(u, INV(v, mod), mod);
            return res;
        }
        LL solve2(LL a, int K) {
            LL lst = 1, x = 1, now;
            for (int i = 2; i <= K; ++i, lst <<= 1) {
                now = lst << 2;
                if (mul(x, x, now) == a % now) continue;
                reduce(x -= lst, now);
                if (mul(x, x, now) != a % now) return -1;
            }
            return x;
        }
        LL solvep(LL x, LL P, int K) {
            x %= fastpow(P, K); int t = 0;
            if (x == 0) return 0;
            while (x % P == 0) x /= P, ++t;
            if (t & 1) return -1;
            LL res = P == 2 ? solve2(x, K - t) : _solvep(x, P, K - t);
            if (res == -1) return res;
            return fastpow(P, t >> 1) * res;
        }
        std::vector<fac> li;
        void init(LL mod) { li = Factor::factor(mod); }
        LL solve(LL x) {
            std::vector<dbp> tx;
            for (auto t : li) {
                LL tv;
                tv = solvep(x, t.first, t.second);
                if (tv == -1) return -1;
                tx.emplace_back(fastpow(t.first, t.second), tv);
            }
            return CRT::CRT(tx);
        }
    }
    void init(LL mod) {
        Prime::sieve();
        Quadratic::init(mod);
    }
}

int m, T, mod, pw3[13], N;
void reduce(int & x) { x += x >> 31 & mod; }
int mul(int a, int b) { return (LL) a * b % mod; }
int fastpow(int a, int b) {
    int res = 1;
    for (; b; b >>= 1, a = mul(a, a)) if (b & 1) res = mul(res, a);
    return res;
}

int W, Ws;
void FWT(int * A, int typ) {
    const int Wn = typ == 1 ? W : Ws;
    const int Wns = typ == 1 ? Ws : W;
    for (int mid = 1; mid != N; mid *= 3)
        for (int k = 0; k != N; k += mid * 3)
            for (int l = 0; l != mid; ++l) {
                int & x = A[l + k], & y = A[l + k + mid], & z = A[l + k + mid * 2];
                int ta = (x + (unsigned) y + z) % mod;
                int tb = (x + (LL) y * Wn + (LL) z * Wns) % mod;
                int tc = (x + (LL) y * Wns + (LL) z * Wn) % mod;
                x = ta, y = tb, z = tc;
            }
}

int A[MAXN], B[MAXN], tr[20][20];
int get(int x, int y) {
    int res = 0;
    while (x) res += x % 3 == y, x /= 3;
    return res;
}
int main() {
    std::ios_base::sync_with_stdio(false), std::cin.tie(0);
    std::cin >> m >> T >> mod;
    NumberTheory::init(mod);
    pw3[0] = 1;
    for (int i = 1; i != 13; ++i) pw3[i] = pw3[i - 1] * 3;
    N = pw3[m];
    const int S3 = NumberTheory::Quadratic::solve((mod - 3 % mod) % mod);
    if (mod == 5) {
        std::cout << "2\n1\n1\n";
        return 0;
    }
    assert(S3 != -1);
    const int iv2 = mod + 1 >> 1;
    const int iv3 = mul(mod % 3 == 1 ? 1 : iv2, mod - mod / 3);
    const int liminv = fastpow(iv3, m);
    W = ((LL) S3 - 1 + mod) % mod * iv2 % mod;
    Ws = mul(W, W);
    for (int i = 0; i < N; ++i) std::cin >> A[i];
    for (int i = 0; i <= m; ++i)
        for (int j = 0; j + i <= m; ++j)
            std::cin >> tr[i][j];
    for (int i = 0; i < N; ++i)
        B[i] = tr[get(i, 1)][get(i, 2)];
    FWT(A, 1); FWT(B, 1);
    for (int i = 0; i < N; ++i)
        A[i] = mul(A[i], fastpow(B[i], T));
    FWT(A, -1);
    for (int i = 0; i < N; ++i) A[i] = mul(A[i], liminv);
    for (int i = 0; i < N; ++i) std::cout << A[i] << '\n';
    return 0;
}

那么

【清华集训2016】石家庄的工人阶级队伍比较坚强 与 二次剩余

【清华集训2016】石家庄的工人阶级队伍比较坚强与二次剩余