通過枚舉 和 。由Lucas定理是可以獨立計算貢獻的。
首先這是一個完全積性函數,可以線性篩,需要計算快速冪的只有質數,大概複雜度就是 ,就 了。
#include <bits/stdc++.h>
#define show(x) cerr << #x << " = " << x << endl
using namespace std;
typedef long long ll;
typedef pair<int, int> Pairs;
const int N = 1010101;
inline char get(void) {
static char buf[100000], *S = buf, *T = buf;
if (S == T) {
T = (S = buf) + fread(buf, 1, 100000, stdin);
if (S == T) return EOF;
}
return *S++;
}
template<typename T>
inline void read(T &x) {
static char c; x = 0; int sgn = 0;
for (c = get(); c < '0' || c > '9'; c = get()) if (c == '-') sgn = 1;
for (; c >= '0' && c <= '9'; c = get()) x = x * 10 + c - '0';
if (sgn) x = -x;
}
int k, m, pcnt;
int fk[N], n[N];
int ndm, nmm, ans;
int s[N];
int fac[N], ifac[N];
int prime[N], vis[N];
inline int pwr(int a, int b) {
int c = 1;
while (b) {
if (b & 1) c = (ll)c * a % m;
b >>= 1; a = (ll)a * a % m;
}
return c;
}
inline void pre(int n) {
ifac[1] = 1;
for (int i = 2; i <= n; i++)
ifac[i] = (ll)(m - m / i) * ifac[m % i] % m;
fac[0] = ifac[0] = 1;
for (int i = 1; i <= n; i++) {
fac[i] = (ll)fac[i - 1] * i % m;
ifac[i] = (ll)ifac[i - 1] * ifac[i] % m;
}
fk[1] = 1;
for (int i = 2; i <= n; i++) {
if (!vis[i]) {
prime[++pcnt] = i;
fk[i] = pwr(i, k);
}
for (int j = 1, x; j <= pcnt && (x = prime[j] * i) <= n; j++) {
vis[x] = 1; fk[x] = (ll)fk[i] * fk[prime[j]] % m;
if (i % prime[j] == 0) break;
}
}
}
inline int C(int n, int m) {
return (ll)fac[n] * ifac[m] % ::m * ifac[n - m] % ::m;
}
int main(void) {
freopen("1.in", "r", stdin);
freopen("1.out", "w", stdout);
for (char c = get(); c >= '0' && c <= '9'; c = get())
n[++*n] = c - '0';
read(k); read(m); pre(m - 1);
int cur = 0;
for (int i = 1; i <= *n; i++) {
cur = cur * 10 + n[i];
nmm = (nmm * 10 + n[i]) % m;
ndm = (ndm * 10 + cur / m) % (m - 1);
cur %= m;
}
for (int j = 0; j <= nmm; j++)
ans = (ans + (ll)pwr(fk[j] - fk[nmm - j] + m, 2) * C(nmm, j) % m) % m;
ans = (ll)ans * pwr(2, ndm) % m;
cout << ans << endl;
return 0;
}