\[输入n,k,求\sum_{n=1}^{N}\sum_{a_1}^n...\sum_{a_k}^n[gcd(a_1,...a_k,n)==1] \\设f(d)=\sum_{a_1}^N...\sum_{a_k}^N[gcd(a_1,...a_k,N)==d] \\F(n)=\sum_{n|d}f(d)=\lfloor\frac{N}{n}\rfloor^k \ (n|N)\\f(n)=\sum_{n|d}\mu(\frac{d}{n})F(d) \\f(1)=\sum_{d=1}^N\mu(d)F(d)=\sum_{d|N}\mu(d)(\frac{N}{d})^k \\ans=\sum_{n=1}^{N}\sum_{d|n}\mu(d)(\frac{n}{d})^k=(枚举d)\sum_{d=1}^{N}\mu(d)\sum_{i=1}^{\lfloor \frac{n}{d} \rfloor}i^k\]
用分块+杜教筛求该式子,幂方可用拉格朗日插值法求也可用伯努利数去求
#include<bits/stdc++.h>
using namespace std;
typedef long long LL;
const int maxx = 1e9+10;
const int N = 1e6+10;
const int K = 1010;
const int mod = 998244353;
LL inv[K],b[K],c[K][K],tmp[K];
LL phi[N],vis[N],prime[N],mu[N],sum[N];
map<LL,LL>w;
int n,k;
void getmu()
{
int cnt=0;
phi[1]=mu[1]=1;
for(int i=2;i<N;i++)
{
if(!vis[i])prime[++cnt]=i,mu[i]=-1,phi[i]=i-1;
for(int j=1;j<=cnt&&prime[j]*i<N;j++)
{
vis[i*prime[j]]=1;
if(i%prime[j]==0)
{
phi[i*prime[j]]=phi[i]*prime[j];
break;
}
else mu[i*prime[j]]=-mu[i],phi[i*prime[j]]=phi[i]*(prime[j]-1);
}
}
for(int i=1;i<N;i++)sum[i]=sum[i-1]+mu[i];
}
int dfsmu(int x)
{
if(x<=1e6)return sum[x];
if(w[x])return w[x];
int ans=1;
for(int l=2,r;l>=0&&l<=x;l=r+1)
{
r=x/(x/l);
ans-=(r-l+1)*dfsmu(x/l);
}
return w[x]=ans;
}
void getb()
{
for(int i=0;i<K;i++)
{
c[i][0]=1;
for(int j=1;j<=i;j++)
c[i][j]=(c[i-1][j]%mod+c[i-1][j-1]%mod)%mod;
}
inv[1]=1;
for(int i=2;i<K;i++)
inv[i]=(mod-mod/i)*inv[mod%i]%mod;
b[0]=1;
for(int i=1;i<K;i++)
{
LL ans=0;
if(i==K-1)break;
for(int j=0;j<i;j++)
ans=(ans+c[i+1][j]*b[j])%mod;
ans*=-inv[i+1];
ans=(ans%mod+mod)%mod;
b[i]=ans;
}
}
LL work(int t)
{
tmp[0]=1;
for(int i=1;i<=k+1;i++)
tmp[i]=tmp[i-1]*(t+1)%mod;
LL ans=inv[k+1];
LL sum=0;
for(int i=1;i<=k+1;i++)
{
sum+=c[k+1][i]*tmp[i]%mod*b[k+1-i]%mod;
sum%=mod;
}
ans*=sum;
ans%=mod;
return ans;
}
int main()
{
getb();
getmu();
scanf("%d%d",&n,&k);
LL ans=0;
for(int l=1,r;l<=n;l=r+1)
{
r=n/(n/l);
ans=(ans+work(n/l)*(dfsmu(r)-dfsmu(l-1))%mod)%mod;
}
ans=(ans%mod+mod)%mod;
printf("%lld\n",ans);
}