还是这道题,上次的
但是这次学了科技,回来贴板了。
这里简单讲一下,就是互质的直接 CRT,对于素数的 K 次幂,分类讨论。
考虑递推上去(类似牛顿迭代这种)
对于 \(x ^ 2 \equiv a \pmod {p^{K-1}}\),显然有 \(\left(x + y p^{K-1}\right)^2 \equiv a \pmod {p ^ K}\)
然后显然可以解出 \(y\) 。
实际上对于 \(p \neq 2\) 另外还有一个很优秀的做法。
先让 \(x\) 和 \(p^k\) 互质了,那么就把 \(p\) 提出,且提出的是 \(p\) 的偶数次幂。
考虑 \(x ^ 2 - a \equiv 0 \pmod p\),即 \(x ^ 2 - a = kp\),那么有 \(\left(x ^ 2 - a\right)^K \equiv 0 \pmod {p^K}\)。
我们设 \(\left(x - \sqrt{a}\right)^K \equiv u - v\sqrt{a} \pmod {p^K}\),那么有 \(\left(x + \sqrt{a}\right)^K \equiv u + v\sqrt{a} \pmod {p^K}\)。
直接扩域快速幂可以算出来,可以得到 \(\sqrt{a} = u \times v^{-1} \pmod {p^K}\)。
#include <bits/stdc++.h>
const int MAXN = 531441;
typedef long long LL;
typedef long double LD;
namespace NumberTheory {
std::mt19937_64 rd(time(0) + (unsigned long) new char);
typedef std::pair<LL, int> fac;
typedef std::pair<LL, LL> dbp;
void reduce(LL & x, const LL P) { x += x >> 63 & P; }
inline LL mul(LL x, LL y, LL P) {
LL t = x * y - (LL) ((LD) x * y / P + 0.5) * P;
return t + (t >> 63 & P);
}
inline LL fastpow(LL a, LL b, LL P, LL res = 1) {
for (; b; b >>= 1, a = mul(a, a, P)) if (b & 1)
res = mul(res, a, P);
return res;
}
inline LL fastpow(LL a, LL b) {
LL res = 1;
for (; b; b >>= 1, a *= a) if (b & 1) res *= a;
return res;
}
LL gcd(LL a, LL b) { return b ? gcd(b, a % b) : a; }
namespace Prime {
const int PL = 8;
const LL li[PL] = {2, 3, 5, 7, 11, 13, 82, 373};
const int MAXS = 100000;
int pri[MAXS / 10], ptot; bool npri[MAXS + 1];
void sieve() {
*npri = npri[1] = true;
for (int i = 2; i <= MAXS; ++i) {
if (!npri[i]) pri[++ptot] = i;
for (int j = 1; j <= ptot; ++j) {
int t = i * pri[j];
if (t > MAXS) break;
npri[t] = true;
if (i % pri[j] == 0) break;
}
}
}
bool MillerRabin(LL x) {
if (x <= MAXS) return !npri[x];
for (int i = 0; i != PL; ++i) {
if (x == li[i]) return true;
if (fastpow(li[i], x - 1, x) != 1) return false;
LL now = x - 1;
while (~now & 1) {
now >>= 1;
LL t = fastpow(li[i], now, x);
if (t != 1 && t != x - 1) return false;
if (t == x - 1) break;
}
}
return true;
}
}
namespace Factor {
LL Pollard(LL x) {
LL x1 = rd() % (x - 1) + 1, x2 = x1;
LL C = rd() % (x - 1) + 1, mx = 1;
int lst = 1, stp = 0;
while (true) {
x2 = mul(x2, x2, x);
reduce(x2 += C - x, x);
if (x1 == x2) return x;
LL t = mul(x2 - x1 + x, mx, x);
if (t) mx = t;
if ((stp & 127) == 0) {
t = gcd(mx, x); mx = 1;
if (t != 1) return t;
}
if (++stp == lst) lst <<= 1, x1 = x2;
}
}
std::map<LL, int> li;
void _factor(LL x, int v = 1) {
if (x == 1) return ;
if (Prime::MillerRabin(x)) { li[x] += v; return ; }
LL t;
do t = Pollard(x); while (t >= x);
int lv = 0;
while (x % t == 0) lv += v, x /= t;
_factor(t, lv); _factor(x, v);
}
std::vector<fac> factor(LL x) {
_factor(x);
std::vector<fac> res;
for (auto t : li)
res.emplace_back(t.first, t.second);
li.clear();
return res;
}
}
struct complex {
static LL mod, omega;
LL real, imag;
complex() { real = imag = 0; }
complex(LL a, LL b) { real = a, imag = b; }
complex operator + (complex b) {
complex res = *this;
reduce(res.real += b.real - mod, mod);
reduce(res.imag += b.imag - mod, mod);
return res;
}
complex operator * (complex b) {
complex res;
reduce(res.real = mul(real, b.real, mod) + mul(omega, mul(imag, b.imag, mod), mod) - mod, mod);
reduce(res.imag = mul(real, b.imag, mod) + mul(imag, b.real, mod) - mod, mod);
return res;
}
} ;
LL complex::mod = 0;
LL complex::omega = 0;
complex fastpow(complex a, LL b, complex res = complex(1, 0)) {
for (; b; b >>= 1, a = a * a) if (b & 1) res = res * a;
return res;
}
LL exgcd(LL a, LL b, LL & x, LL & y) {
if (!b) return x = 1, y = 0, a;
else {
LL t = exgcd(b, a % b, y, x);
y -= a / b * x;
return t;
}
}
LL INV(LL x, LL mod) {
LL ta, tb;
exgcd(x, mod, ta, tb);
reduce(ta, mod);
return ta;
}
namespace CRT {
void _CRT(LL & x, LL & a, LL x1, LL a1) {
LL G = gcd(x, x1);
const LL K = mul((a1 - a + x1) / G, INV(x / G, x1 / G), x1 / G);
const LL mod = x / G * x1;
reduce(a += mul(K, x, mod) - mod, mod);
x = mod;
}
LL CRT(std::vector<dbp> V) {
LL x, a; x = a = 0;
for (auto t : V) {
LL tx = t.first, ta = t.second;
x ? _CRT(x, a, tx, ta) : (void) (x = tx, a = ta);
}
return a;
}
}
namespace Quadratic {
LL Cipolla(LL x, LL mod) {
LL a, t;
do
a = rd() % mod, reduce(t = mul(a, a, mod) - x, mod);
while (fastpow(t, mod - 1 >> 1, mod) != mod - 1) ;
complex::mod = mod, complex::omega = t;
complex res = fastpow(complex(a, 1), mod + 1 >> 1);
return res.real;
}
LL _solvep(LL a, LL P, int K) {
LL _ = a % P;
if (fastpow(_, P - 1 >> 1, P) == P - 1) return -1;
LL x = Cipolla(_, P);
const LL mod = fastpow(P, K);
complex::omega = a, complex::mod = mod;
complex t1 = fastpow(complex(x, P - 1), K);
complex t2 = fastpow(complex(x, 1), K);
const LL iv2 = P + 1 >> 1;
LL u = mul(t1.real + t2.real, iv2, mod);
LL v = mul(t2.imag - t1.imag + mod, iv2, mod);
LL res = mul(u, INV(v, mod), mod);
return res;
}
LL solve2(LL a, int K) {
LL lst = 1, x = 1, now;
for (int i = 2; i <= K; ++i, lst <<= 1) {
now = lst << 2;
if (mul(x, x, now) == a % now) continue;
reduce(x -= lst, now);
if (mul(x, x, now) != a % now) return -1;
}
return x;
}
LL solvep(LL x, LL P, int K) {
x %= fastpow(P, K); int t = 0;
if (x == 0) return 0;
while (x % P == 0) x /= P, ++t;
if (t & 1) return -1;
LL res = P == 2 ? solve2(x, K - t) : _solvep(x, P, K - t);
if (res == -1) return res;
return fastpow(P, t >> 1) * res;
}
std::vector<fac> li;
void init(LL mod) { li = Factor::factor(mod); }
LL solve(LL x) {
std::vector<dbp> tx;
for (auto t : li) {
LL tv;
tv = solvep(x, t.first, t.second);
if (tv == -1) return -1;
tx.emplace_back(fastpow(t.first, t.second), tv);
}
return CRT::CRT(tx);
}
}
void init(LL mod) {
Prime::sieve();
Quadratic::init(mod);
}
}
int m, T, mod, pw3[13], N;
void reduce(int & x) { x += x >> 31 & mod; }
int mul(int a, int b) { return (LL) a * b % mod; }
int fastpow(int a, int b) {
int res = 1;
for (; b; b >>= 1, a = mul(a, a)) if (b & 1) res = mul(res, a);
return res;
}
int W, Ws;
void FWT(int * A, int typ) {
const int Wn = typ == 1 ? W : Ws;
const int Wns = typ == 1 ? Ws : W;
for (int mid = 1; mid != N; mid *= 3)
for (int k = 0; k != N; k += mid * 3)
for (int l = 0; l != mid; ++l) {
int & x = A[l + k], & y = A[l + k + mid], & z = A[l + k + mid * 2];
int ta = (x + (unsigned) y + z) % mod;
int tb = (x + (LL) y * Wn + (LL) z * Wns) % mod;
int tc = (x + (LL) y * Wns + (LL) z * Wn) % mod;
x = ta, y = tb, z = tc;
}
}
int A[MAXN], B[MAXN], tr[20][20];
int get(int x, int y) {
int res = 0;
while (x) res += x % 3 == y, x /= 3;
return res;
}
int main() {
std::ios_base::sync_with_stdio(false), std::cin.tie(0);
std::cin >> m >> T >> mod;
NumberTheory::init(mod);
pw3[0] = 1;
for (int i = 1; i != 13; ++i) pw3[i] = pw3[i - 1] * 3;
N = pw3[m];
const int S3 = NumberTheory::Quadratic::solve((mod - 3 % mod) % mod);
if (mod == 5) {
std::cout << "2\n1\n1\n";
return 0;
}
assert(S3 != -1);
const int iv2 = mod + 1 >> 1;
const int iv3 = mul(mod % 3 == 1 ? 1 : iv2, mod - mod / 3);
const int liminv = fastpow(iv3, m);
W = ((LL) S3 - 1 + mod) % mod * iv2 % mod;
Ws = mul(W, W);
for (int i = 0; i < N; ++i) std::cin >> A[i];
for (int i = 0; i <= m; ++i)
for (int j = 0; j + i <= m; ++j)
std::cin >> tr[i][j];
for (int i = 0; i < N; ++i)
B[i] = tr[get(i, 1)][get(i, 2)];
FWT(A, 1); FWT(B, 1);
for (int i = 0; i < N; ++i)
A[i] = mul(A[i], fastpow(B[i], T));
FWT(A, -1);
for (int i = 0; i < N; ++i) A[i] = mul(A[i], liminv);
for (int i = 0; i < N; ++i) std::cout << A[i] << '\n';
return 0;
}