题目链接
(BZOJ) https://www.lydsy.com/JudgeOnline/problem.php?id=4734
(UOJ) http://uoj.ac/problem/269
题解
似乎大家都是用神仙构造的做法构造了一个二项式反演,然而我只会拿Stirling数爆推QAQ……
首先考虑\(f(x)=x^m\)的情况,最后乘上一个系数求和即可。
\[\sum^n_{k=0}{n\choose k}k^mx^k(1-x)^{n-k}\\ =\sum^n_{k=0}\sum^m_{i=0}\begin{Bmatrix}m\\i\end{Bmatrix}k^{\underline i}x^k(1-x)^{n-k}\\ =\sum^m_{i=0}\begin{Bmatrix}m\\i\end{Bmatrix}\sum^n_{k=0}\frac{n!}{(n-k)!(k-i)!}x^k(1-x)^{n-k}\\ =\sum^m_{i=0}\begin{Bmatrix}m\\i\end{Bmatrix}n^{\underline i}\sum^n_{k=0}{n-i\choose k-i}x^k(1-x)^{n-k}\\ =\sum^m_{i=0}\begin{Bmatrix}m\\i\end{Bmatrix}n^{\underline i}x^i\]
设\(f(x)=\sum^m_{i=0}a_ix^i\), 则答案为\(\sum^m_{i=0}a_i\sum^i_{j=0}\begin{Bmatrix}i\\j\end{Bmatrix}n^{\underline j}x^j\)
嗯,直接求的话时间复杂度是\(O(m^2)\). 而这个做法看上去很难优化了,因为里面用到了斯特林数而且两维都不固定,怎么办?
斯特林数阻止了进一步的优化,因此刚才我们把幂转成了斯特林数,现在再考虑把斯特林数转回来!
首先要注意到一点就是斯特林数的基本公式\(\begin{Bmatrix}i\\j\end{Bmatrix}=\frac{1}{j!}\sum^j_{k=0}(-1)^{j-k}{j\choose k}k^i\)当\(i<j\)时也是适用的,此时等式两边都为\(0\).
于是有\[\sum^m_{i=0}a_i\sum^i_{j=0}\begin{Bmatrix}i\\j\end{Bmatrix}n^{\underline j}x^j\\ =\sum^m_{i=0}a_i\sum^i_{j=0}\sum^j_{k=0}(-1)^{j-k}{j\choose k}k^i{n\choose j}x^j\\ =\sum^m_{j=0}{n\choose j}x^j\sum^j_{k=0}(-1)^{j-k}{j\choose k}\sum^m_{k=0}a_ik^i\\=\sum^m_{j=0}{n\choose j}x^j\sum^j_{k=0}(-1)^{j-k}{j\choose k}f(k)\]
而题目里给定的恰好是\(f(x)\)在\(x=0,1,...,m\)处的点值!所以其实根本不需要插值!
推到这里做法就很显然了: 用NTT对每个\(j\)求出\(g_j=\sum^m_{j=0}(-1)^{j-k}{j\choose k}f(k)\), 然后计算\(\sum^m_{j=0}{n\choose j}x^jg_j\)即可。
时间复杂度\(O(m\log m)\).
代码
#include<bits/stdc++.h>
#define llong long long
using namespace std;
inline int read()
{
int w=1,s=0;char ch=getchar();
while(!isdigit(ch)) {if(ch=='-')w=-1;ch=getchar();}
while(isdigit(ch)) {s=s*10+ch-'0';ch=getchar();}
return w*s;
}
const int N = 1<<17;
const int lgN = 17;
const int P = 998244353;
const int G = 3;
llong quickpow(llong x,llong y)
{
llong cur = x,ret = 1ll;
for(int i=0; y; i++)
{
if(y&(1ll<<i)) {y-=(1ll<<i); ret = ret*cur%P;}
cur = cur*cur%P;
}
return ret;
}
llong mulinv(llong x) {return quickpow(x,P-2);}
namespace FFT
{
llong aux1[N+3],aux2[N+3],aux3[N+3],aux4[N+3],aux5[N+3];
int fftid[N+3];
llong sexp[N+3];
void resize(int dgr1,int dgr2,llong poly[]) {if(dgr1>dgr2) swap(dgr1,dgr2); for(int i=dgr1; i<dgr2; i++) poly[i] = 0ll;}
int getdgr(int n) {int dgr = 1; while(dgr<=n) dgr<<=1; return dgr;}
void init_fftid(int dgr)
{
int len = 0; for(int i=1; i<=lgN; i++) {if(dgr==(1<<i)) {len = i; break;}}
fftid[0] = 0; for(int i=1; i<dgr; i++) fftid[i] = (fftid[i>>1]>>1)|((i&1)<<(len-1));
}
void ntt(int dgr,int coe,llong poly[],llong ret[])
{
init_fftid(dgr);
if(poly==ret) {for(int i=0; i<dgr; i++) {if(i<fftid[i]) swap(ret[i],ret[fftid[i]]);}}
else {for(int i=0; i<dgr; i++) ret[i] = poly[fftid[i]];}
for(int i=1; i<dgr; i<<=1)
{
llong tmp = quickpow(G,(P-1)/(i<<1)); if(coe==-1) tmp = mulinv(tmp);
sexp[0] = 1ll; for(int j=1; j<i; j++) sexp[j] = sexp[j-1]*tmp%P;
for(int j=0; j<dgr; j+=(i<<1))
{
for(llong *k=ret+j,*kk=sexp; k<ret+i+j; k++,kk++)
{
llong x = (*k),y = k[i]*(*kk)%P;
(*k) = x+y>=P?x+y-P:x+y; k[i] = x-y<0?x-y+P:x-y;
}
}
}
if(coe==-1) {llong tmp = mulinv(dgr); for(int i=0; i<dgr; i++) ret[i] = ret[i]*tmp%P;}
}
void polymul(int dgr,llong poly1[],llong poly2[],llong ret[])
{
ntt(dgr<<1,1,poly1,aux1); ntt(dgr<<1,1,poly2,aux2);
for(int i=0; i<(dgr<<1); i++) ret[i] = aux1[i]*aux2[i]%P;
ntt(dgr<<1,-1,ret,ret);
}
}
using FFT::getdgr;
using FFT::resize;
using FFT::ntt;
using FFT::polymul;
llong fact[N+3],finv[N+3];
llong f[N+3],g[N+3],h[N+3];
int m; llong n,ax;
int main()
{
fact[0] = 1ll; for(int i=1; i<=N; i++) fact[i] = fact[i-1]*i%P;
finv[N] = quickpow(fact[N],P-2); for(int i=N-1; i>=0; i--) finv[i] = finv[i+1]*(i+1)%P;
scanf("%lld%d%lld",&n,&m,&ax); int dgr = getdgr(m);
for(int i=0; i<=m; i++) scanf("%lld",&f[i]),f[i] = f[i]*finv[i]%P;
for(int i=0; i<=dgr; i++) g[i] = i&1?P-finv[i]:finv[i];
polymul(dgr,f,g,h);
for(int i=0; i<=m; i++) h[i] = h[i]*fact[i]%P;
llong cur1 = 1ll,cur2 = 1ll,ans = 0ll;
for(int i=0; i<=m&&i<=n; i++)
{
llong tmp = cur1*cur2%P*h[i]%P;
ans = (ans+tmp)%P;
cur1 = cur1*mulinv(i+1ll)%P*(n-i)%P; cur2 = cur2*ax%P;
}
printf("%lld\n",ans);
return 0;
}