题意:
统计\(k\)元组个数\((a_1,a_2,\cdots,a_n),1\leq a_i\leq n\)使得\(gcd(a_1,a_2,\cdots,a_k,n)=1\)。
定义\(f(n,k)\)为满足要求的\(k\)元组个数,现在要求出\(\sum_{i=1}^n f(i,k),1\leq n\leq 10^9,1\leq k\leq 1000\)。
思路:
首先来化简一下式子,题目要求的就是:
\[\begin{aligned}&\sum_{i=1}^n\sum_{j=1}^n\cdots \sum_{k=1}^n gcd(i,j,\cdots, k,n)=1\\=&\sum_{i=1}^n\sum_{j=1}^n\cdots \sum_{k=1}^n\sum_{d|i,j,\cdots,k,n}\mu(d)\\=&\sum_{d|n}\mu(d)\sum_{i=1}^{\frac{n}{d}}\sum_{j=1}^{\frac{n}{d}}\cdots \sum_{k=1}^\frac{n}{d}1\\=&\sum_{d|n}\mu(d) (\frac{n}{d})^k\end{aligned}\]
套路到此结束~然后观察到这个式子其实是一个狄利克雷卷积的形式,\(f(i)=\mu(i),g(i)=i^k\),上式则为:\(f*g_{(n)}\)。
那么题目要求的就是这个卷积的前缀和,注意两个积性函数的卷积也是积性函数,因为\(\mu*I=\varepsilon\),所以我们再构造一个积性函数\(h=I\),直接上杜教筛就行了。最后的式子是:
\[h(1)\cdot S(n) = \sum_{i=1}^ng(i)-\sum_{d=2}^n h(d)S(\lfloor\frac{n}{d}\rfloor)\]
后半部分直接整除分块,至于\(\sum_{i=1}^ng(i)\),拉格朗日插值能在\(O(k)\)的时间复杂度解决。
代码如下(比赛的时候写得稍微有点乱):
/*
* Author: heyuhhh
* Created Time: 2019/11/29 21:03:32
*/
#include <iostream>
#include <algorithm>
#include <vector>
#include <cmath>
#include <set>
#include <map>
#include <iomanip>
#define MP make_pair
#define fi first
#define se second
#define sz(x) (int)(x).size()
#define all(x) (x).begin(), (x).end()
#define INF 0x3f3f3f3f
#define Local
#ifdef Local
#define dbg(args...) do { cout << #args << " -> "; err(args); } while (0)
void err() { std::cout << '\n'; }
template<typename T, typename...Args>
void err(T a, Args...args) { std::cout << a << ' '; err(args...); }
#else
#define dbg(...)
#endif
void pt() {std::cout << '\n'; }
template<typename T, typename...Args>
void pt(T a, Args...args) {std::cout << a << ' '; pt(args...); }
using namespace std;
typedef long long ll;
typedef pair<int, int> pii;
//head
const int N = 1e4 + 5, MOD = 998244353;
int n, k;
ll qpow(ll a, ll b) {
ll ans = 1;
while(b) {
if(b & 1) ans = ans * a % MOD;
a = a * a % MOD;
b >>= 1;
}
return ans;
}
struct Lagrange {
static const int SIZE = 1005;
ll f[SIZE], fac[SIZE], inv[SIZE], pre[SIZE], suf[SIZE];
int n;
inline void add(ll &x, int y) {
x += y;
if(x >= MOD) x -= MOD;
}
void init(int _n) {
n = _n;
fac[0] = 1;
for (int i = 1; i < SIZE; ++i) fac[i] = fac[i - 1] * i % MOD;
inv[SIZE - 1] = qpow(fac[SIZE - 1], MOD - 2);
for (int i = SIZE - 1; i >= 1; --i) inv[i - 1] = inv[i] * i % MOD;
//设置f初值,可以根据需要修改
f[0] = 0;
for (int i = 1; i <= n; ++i)
f[i] = (f[i - 1] + qpow(i, k)) % MOD;
}
ll calc(ll x) {
if (x <= n) return f[x];
pre[0] = x % MOD;
for (int i = 1; i <= n; ++i) pre[i] = pre[i - 1] * ((x - i) % MOD) % MOD;
suf[n] = (x - n) % MOD;
for (int i = n - 1; i >= 0; --i) suf[i] = suf[i + 1] * ((x - i) % MOD) % MOD;
ll res = 0;
for (int i = 0; i <= n; ++i) {
ll tmp = f[i] * inv[n - i] % MOD * inv[i] % MOD;
if (i) tmp = tmp * pre[i - 1] % MOD;
if (i < n) tmp = tmp * suf[i + 1] % MOD;
if ((n - i) & 1) tmp = MOD - tmp;
add(res, tmp);
}
return res;
}
}lagrange;
int mu[N], p[N];
bool chk[N];
int pre[N];
void init() {
mu[1] = 1;
int cnt = 0;
for(int i = 2; i <= N - 1; i++) {
if(!chk[i]) p[++cnt] = i, mu[i] = -1;
for(int j = 1; j <= cnt && i * p[j] <= N - 1; j++) {
chk[i * p[j]] = 1;
if(i % p[j] == 0) {mu[i * p[j]] = 0; break;}
mu[i * p[j]] = -mu[i];
}
}
for(int i = 1; i <= N - 1; i++) {
int res = 0;
for(int j = 1; 1ll * j * j <= i; j++) {
if(i % j == 0) {
int d1 = j, d2 = i / j;
res = (res + 1ll * mu[d1] * qpow(d2, k) % MOD) % MOD;
if(d1 != d2) res = (res + 1ll * mu[d2] * qpow(d1, k) % MOD) % MOD;
if(res < 0) res += MOD;
}
}
pre[i] = (pre[i - 1] + res) % MOD;
}
}
map <int, ll> mp;
ll djs(int n) {
if(n < N) return pre[n];
if(mp.find(n) != mp.end()) return mp[n];
ll ans = lagrange.calc(n);
for(int i = 2, j; i <= n; i = j + 1) {
j = n / (n / i);
ans -= 1ll * (j - i + 1) * djs(n / i) % MOD;
if(ans < 0) ans += MOD;
}
return mp[n] = ans;
}
void run(){
lagrange.init(k + 1);
init();
int ans = djs(n);
cout << ans << '\n';
}
int main() {
ios::sync_with_stdio(false);
cin.tie(0); cout.tie(0);
cout << fixed << setprecision(20);
while(cin >> n >> k) run();
return 0;
}