闲话
没有社论
因为我不觉得我能把这题剖析得多明白
但总之我切了(
昨天没有闲话
我也不知道为什么 但总之想写的时候已经八点多了
然后写了一半就回去睡觉了
然后今天补完了
翻今天T3原题时看到一道题
CF1051F
是不是很眼熟?
然后在打url
已存在相同的友好地址名:chitchat221103
??? 瞪了半分钟
哦wssb
杂题
我趣 我终于切了(
但是感觉全是代码没有讲解啊
给定 \(k,p\),定义 \(n\) 的一个合法整数分拆为满足如下条件的有序数列 \(a\):
- 数列各项加和为 \(n\)。
- 数列各项值均不超过 \(k\)。
- 数列任意相邻两项不能均大于等于 \(p\)。
给定整数 \(l\),求 \(l\) 不同的合法整数分拆的数量。
两个合法整数分拆彼此不同,当且仅当二者对应数列长度不同或存在一项不同。
\(1\le k,p \le 1000,\ 1\le l\le 10^{18}\)。
一点点来吧……
1. \(1\le k,p \le 50,\ 1\le l\le 10^{3}\)
考虑直接 dp。
设 \(f_{i,j}\) 为当前整数大小为 \(i\),数列最后一项为 \(j\) 的可能情况。
然后容易写出转移:
直接转移做到 \(O(kpl)\)。
答案即为 \(\sum_{i=1}^k f_{l,i}\)。
code
#include <bits/stdc++.h>
using namespace std; using pii = pair<int,int>;
using ll = long long; using ull = unsigned long long; using db = double; using ld = long double; using lll = __int128_t;
mt19937 rnd(chrono::steady_clock::now().time_since_epoch().count());
template<typename T> void get(T & x) {
x = 0; char ch = getchar(); bool f = false; while (ch < '0' or ch > '9') f = f or ch == '-', ch = getchar();
while ('0' <= ch and ch <= '9') x = (x << 1) + (x << 3) + ch - '0', ch = getchar(); f && (x = -x);
} template <typename T, typename ... Args> void get(T & a, Args & ... b) { get(a); get(b...); }
#define rep(i,s,t) for (register int i = (s), i##_ = (t) + 1; i < i##_; ++ i)
#define pre(i,s,t) for (register int i = (s), i##_ = (t) - 1; i > i##_; -- i)
const int N = 5e3 + 10;
// const int B = 300;
const int mod = 20201114;
// const int mod = 469762049, g = 3;
// const int mod = 998244353, g = 3;
// const int mod = 1004535809, g = 3;
// const int mod = 1e9 + 7;
// const int mod = 1e9 + 9;
// const int mod = 1e9 + 3579, bse = 131;
const int inv2 = (mod + 1) >> 1;
template <typename T1, typename T2> T1 add(T1 a, T2 b) { return (a += b) >= mod ? a - mod : a; }
template <typename T1, typename ...Args> T1 add(T1 a, Args ... b) { return add(a, add(b...)); }
struct FastMod { int m; ll b; void init(int _m) { m = _m; b = ((lll)1<<64) / m; } FastMod(int _m) { init(_m); } int operator() (ll a) {ll q = ((lll)a * b) >> 64; a -= q * m; if (a >= m) a -= m; return a; } } Mod(mod);
int mul(int a, int b) { return Mod(1ll * a * b); } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
// int mul(int a, int b) { return 1ll * a * b % mod; } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
template <typename T1, typename T2> T1 qp(T1 a, T2 b) { T1 ret = 1; for (; b; a = mul(a, a), b >>= 1) if (b & 1) ret = mul(ret, a); return ret; }
int k, p, f[N][55], ans;
ll l;
int main() {
get(k, p, l);
f[0][1] = 1;
rep(i,1,l) {
rep(j,1,min(i,p-1)) {
rep(l,1,k) f[i][j] = add(f[i][j], f[i-j][l]);
}
rep(j,p,min(i,k)) {
rep(l,1,p-1) f[i][j] = add(f[i][j], f[i-j][l]);
}
} rep(i,1,min(l, (ll)k)) ans = add(ans, f[l][i]);
cout << ans << endl;
}
然后我们发现似乎可以压缩一下状态。我们发现 \(j\) 这一维实际上只记录了 \(j< p\) 的情况和 \(j\ge p\) 的情况。此决策可以在转移时完成。然后就可以把这两种状态压缩:
设 \(f_{i,0/1}\) 为当前整数大小为 \(i\),数列最后一项是否小于 \(p\) 的方案数。
然后仍然是转移:
答案即为 \(f_{l,0} + f_{l,1}\)。直接转移做到 \(O(kl)\)。
code
#include <bits/stdc++.h>
using namespace std; using pii = pair<int,int>;
using ll = long long; using ull = unsigned long long; using db = double; using ld = long double; using lll = __int128_t;
mt19937 rnd(chrono::steady_clock::now().time_since_epoch().count());
template<typename T> void get(T & x) {
x = 0; char ch = getchar(); bool f = false; while (ch < '0' or ch > '9') f = f or ch == '-', ch = getchar();
while ('0' <= ch and ch <= '9') x = (x << 1) + (x << 3) + ch - '0', ch = getchar(); f && (x = -x);
} template <typename T, typename ... Args> void get(T & a, Args & ... b) { get(a); get(b...); }
#define rep(i,s,t) for (register int i = (s), i##_ = (t) + 1; i < i##_; ++ i)
#define pre(i,s,t) for (register int i = (s), i##_ = (t) - 1; i > i##_; -- i)
const int N = 5e3 + 10;
// const int B = 300;
const int mod = 20201114;
// const int mod = 469762049, g = 3;
// const int mod = 998244353, g = 3;
// const int mod = 1004535809, g = 3;
// const int mod = 1e9 + 7;
// const int mod = 1e9 + 9;
// const int mod = 1e9 + 3579, bse = 131;
const int inv2 = (mod + 1) >> 1;
template <typename T1, typename T2> T1 add(T1 a, T2 b) { return (a += b) >= mod ? a - mod : a; }
template <typename T1, typename ...Args> T1 add(T1 a, Args ... b) { return add(a, add(b...)); }
struct FastMod { int m; ll b; void init(int _m) { m = _m; b = ((lll)1<<64) / m; } FastMod(int _m) { init(_m); } int operator() (ll a) {ll q = ((lll)a * b) >> 64; a -= q * m; if (a >= m) a -= m; return a; } } Mod(mod);
int mul(int a, int b) { return Mod(1ll * a * b); } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
// int mul(int a, int b) { return 1ll * a * b % mod; } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
template <typename T1, typename T2> T1 qp(T1 a, T2 b) { T1 ret = 1; for (; b; a = mul(a, a), b >>= 1) if (b & 1) ret = mul(ret, a); return ret; }
int k, mx, p, sum[N][2], ans;
ll l;
int main() {
get(k, p, l);
sum[0][0] = 1;
rep(i,1,l) {
rep(j,1,min(p - 1, i)) {
sum[i][0] = add(sum[i][0], sum[i - j][0], sum[i - j][1]);
}
rep(j,p,min(k, i)) {
sum[i][1] = add(sum[i][1], sum[i - j][0]);
}
}
cout << add(sum[l][0], sum[l][1]) << endl;
return 0;
}
2. \(1\le k,p \le 50,\ 1\le l\le 10^{18}\)
我们发现,如上 \(O(kl)\) 的转移可以写成矩阵的形式。具体地,我们使用 \(2k\times 2k\) 的矩阵进行转移,初始向量记录 \(1\sim k\) 段内所有 \(f_{i,0}\) 与 \(f_{i,1}\)。
k=5, p=3 的情况
\[\begin{bmatrix} f_{i+1,1}\\ f_{i+1,0}\\ f_{i,1}\\ f_{i,0}\\ f_{i-1,1}\\ f_{i-1,0}\\ f_{i-2,1}\\ f_{i-2,0}\\ f_{i-3,1}\\ f_{i-3,0}\\ \end{bmatrix}= \begin{bmatrix} 0 & 0 & 0 & 0 & 0 &1 & 0 & 1 & 0 & 1\\ 1 & 1 & 1 & 1 & 0 &0 & 0 & 0 & 0 & 0\\ 1 & 0 & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0\\ 0 & 1 & 0 & 0 & 0 &0 & 0 & 0 & 0 & 0\\ 0 & 0 & 1 & 0 & 0 &0 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 1 & 0 &0 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 1 &0 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 0 &1 & 0 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 0 &0 & 1 & 0 & 0 & 0\\ 0 & 0 & 0 & 0 & 0 &0 & 0 & 1 & 0 & 0\\ \end{bmatrix} \begin{bmatrix} f_{i,1}\\ f_{i,0}\\ f_{i-1,1}\\ f_{i-1,0}\\ f_{i-2,1}\\ f_{i-2,0}\\ f_{i-3,1}\\ f_{i-3,0}\\ f_{i-4,1}\\ f_{i-4,0}\\ \end{bmatrix} \]然后转移即可。总时间复杂度 \(O(8k^3\log l)\)。
code
#include <bits/stdc++.h>
using namespace std; using pii = pair<int,int>;
using ll = long long; using ull = unsigned long long; using db = double; using ld = long double; using lll = __int128_t;
mt19937 rnd(chrono::steady_clock::now().time_since_epoch().count());
template<typename T> void get(T & x) {
x = 0; char ch = getchar(); bool f = false; while (ch < '0' or ch > '9') f = f or ch == '-', ch = getchar();
while ('0' <= ch and ch <= '9') x = (x << 1) + (x << 3) + ch - '0', ch = getchar(); f && (x = -x);
} template <typename T, typename ... Args> void get(T & a, Args & ... b) { get(a); get(b...); }
#define rep(i,s,t) for (register int i = (s), i##_ = (t) + 1; i < i##_; ++ i)
#define pre(i,s,t) for (register int i = (s), i##_ = (t) - 1; i > i##_; -- i)
const int N = 5e3 + 10;
// const int B = 300;
const int mod = 20201114;
// const int mod = 469762049, g = 3;
// const int mod = 998244353, g = 3;
// const int mod = 1004535809, g = 3;
// const int mod = 1e9 + 7;
// const int mod = 1e9 + 9;
// const int mod = 1e9 + 3579, bse = 131;
const int inv2 = (mod + 1) >> 1;
template <typename T1, typename T2> T1 add(T1 a, T2 b) { return (a += b) >= mod ? a - mod : a; }
template <typename T1, typename ...Args> T1 add(T1 a, Args ... b) { return add(a, add(b...)); }
struct FastMod { int m; ll b; void init(int _m) { m = _m; b = ((lll)1<<64) / m; } FastMod(int _m) { init(_m); } int operator() (ll a) {ll q = ((lll)a * b) >> 64; a -= q * m; if (a >= m) a -= m; return a; } } Mod(mod);
int mul(int a, int b) { return Mod(1ll * a * b); } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
// int mul(int a, int b) { return 1ll * a * b % mod; } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
template <typename T1, typename T2> T1 qp(T1 a, T2 b) { T1 ret = 1; for (; b; a = mul(a, a), b >>= 1) if (b & 1) ret = mul(ret, a); return ret; }
int k, mx, p, sum[N][2], ans;
ll l;
struct mat {
int a[405][405];
int* operator [] (const int & b) { return a[b]; }
const int* operator [] (const int & b) const { return a[b]; }
void clear() { memset(a, 0, sizeof a); }
mat operator * (const mat & b) const {
mat ret; ret.clear();
rep(i,0,mx) rep(j,0,mx) rep(k,0,mx) ret[i][j] = add(ret[i][j], mul(a[i][k], b[k][j]));
return ret;
}
} bse, str;
int main() {
get(k, p, l); mx = (k<<1) - 1;
sum[0][0] = 1;
rep(i,1,k) {
rep(j,1,min(p - 1, i)) {
sum[i][0] = add(sum[i][0], sum[i - j][0], sum[i - j][1]);
}
rep(j,p,min(k, i)) {
sum[i][1] = (sum[i][1] + sum[i - j][0]) % mod;
}
} if (l <= k) {
cout << add(sum[l][0], sum[l][1]) << endl;
return 0;
}
rep(i,0,k - 1) {
str[i << 1][0] = sum[k - i][1];
str[i << 1 | 1][0] = sum[k - i][0];
}
for (int i = (k << 1) - 1; i >= (p << 1) - 1; i -= 2) bse[0][i] = 1;
for (int i = 0; i < (p - 1) << 1; i++) bse[1][i] = 1;
for (int i = 2; i < k << 1; i++) bse[i][i - 2] = 1;
l -= k;
while (l) {
if (l & 1) str = bse * str;
bse = bse * bse;
l >>= 1;
}
printf("%d\n", add(str[0][0], str[1][0]));
}
我不知道为什么它要给一个 \(k=200\) 的部分分。反正我数组开小的正解得了这分。大概是兜底?
3. \(1\le k,p \le 1000,\ 1\le l\le 10^{18}\)
你看这个矩阵是不是很像线性递推?
考虑设 \(g_{i} = f_{i,0} + f_{i,1}\)。
我们可以写
前面那部分可以直接扔进线性递推转移。后面那部分……
考虑在最后一步 \(i \ge p\) 时,仅有倒数第二步 \(j < p\) 的状态能够全部转移。同时,考虑这里的贡献在 \(j < p\) 时已经被算了,我们只需要 \(\ge p\) 的 \(j\)。算一下有多少个 \(j\) 满足条件,然后 \(f_i\) 的系数就是它。
这里由于需要让所有贡献都被算完,线性递推的长度是 \(k + p - 1\) 的。
转移时直接 \(O(k^2)\) 模拟多项式乘法即可,因为不想写任意模数 ntt。
模拟的总复杂度为 \(O(k^2 \log l)\)。
code
#include <bits/stdc++.h>
using namespace std; using pii = pair<int,int>;
using ll = long long; using ull = unsigned long long; using db = double; using ld = long double; using lll = __int128_t;
mt19937 rnd(chrono::steady_clock::now().time_since_epoch().count());
template<typename T> void get(T & x) {
x = 0; char ch = getchar(); bool f = false; while (ch < '0' or ch > '9') f = f or ch == '-', ch = getchar();
while ('0' <= ch and ch <= '9') x = (x << 1) + (x << 3) + ch - '0', ch = getchar(); f && (x = -x);
} template <typename T, typename ... Args> void get(T & a, Args & ... b) { get(a); get(b...); }
#define rep(i,s,t) for (register int i = (s), i##_ = (t) + 1; i < i##_; ++ i)
#define pre(i,s,t) for (register int i = (s), i##_ = (t) - 1; i > i##_; -- i)
const int N = 4e3 + 10;
// const int B = 300;
const int mod = 20201114;
// const int mod = 469762049, g = 3;
// const int mod = 998244353, g = 3;
// const int mod = 1004535809, g = 3;
// const int mod = 1e9 + 7;
// const int mod = 1e9 + 9;
// const int mod = 1e9 + 3579, bse = 131;
const int inv2 = (mod + 1) >> 1;
template <typename T1, typename T2> T1 add(T1 a, T2 b) { return (a += b) >= mod ? a - mod : a; }
template <typename T1, typename ...Args> T1 add(T1 a, Args ... b) { return add(a, add(b...)); }
struct FastMod { int m; ll b; void init(int _m) { m = _m; b = ((lll)1<<64) / m; } FastMod(int _m) { init(_m); } int operator() (ll a) {ll q = ((lll)a * b) >> 64; a -= q * m; if (a >= m) a -= m; return a; } } Mod(mod);
int mul(int a, int b) { return Mod(1ll * a * b); } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
// int mul(int a, int b) { return 1ll * a * b % mod; } template <typename ...Args> int mul(int a, Args ...b) { return mul(a, mul(b...)); }
// template <typename T1, typename T2> T1 qp(T1 a, T2 b) { T1 ret = 1; for (; b; a = mul(a, a), b >>= 1) if (b & 1) ret = mul(ret, a); return ret; }
int k, mx, p, sum[N][2], ans;
int f[N], h[N];
ll l;
struct lst {
int a[N];
int & operator [] (const int & p) { return a[p]; }
const int operator [] (const int & p) const { return a[p]; }
lst operator * (const lst & b) const {
lst ret; rep(i,0,mx-1) ret[i] = 0;
rep(i,0,mx-1) rep(j,0,mx-1) ret[i + j] = add(ret[i + j], mul(a[i], b[j]));
for (int i = (mx-1) << 1; i >= mx; ret[i] = 0, -- i) rep(j,1,mx)
ret[i - j] = add(ret[i - j], mul(ret[i], f[j]));
return ret;
}
} res;
lst qp(lst a, ll b) {
lst ret; memset(ret.a, 0, sizeof ret.a); ret[0] = 1;
while (b) {
if (b & 1) ret = ret * a;
a = a * a;
b >>= 1;
} return ret;
}
int main() {
get(k, p, l); mx = k + p - 1; -- l;
sum[0][0] = 1;
rep(i,1,mx) {
rep(j,1,min(p - 1, i)) {
sum[i][0] = add(sum[i][0], sum[i - j][0], sum[i - j][1]);
}
rep(j,p,min(k, i)) {
sum[i][1] = (sum[i][1] + sum[i - j][0]) % mod;
}
}
rep(i,1,p-1) f[i] = 1;
rep(i,p,k+p-1) rep(j,p,k) if (0 < i - j and i - j < p) ++ f[i];
rep(i,0,k+p-1) h[i] = add(sum[i + 1][0], sum[i + 1][1]);
if (l <= mx) {
cout << h[l] << endl;
return 0;
}
res[1] = 1; ans = 0;
res = qp(res, l);
rep(i,0,mx - 1) ans = add(ans, mul(res[i], h[i]));
cout << ans << endl;
}
鰰 NaCly_Fish 实现了 \(O(k \log k \log l)\) 的多项式写法。 方便学习存一下。
by NaCly_Fish
#include<cstdio>
#include<iostream>
#include<cstring>
#include<algorithm>
#include<vector>
#define ll long long
#define N 16387
#define p 20201114
#define i128 __int128_t
using namespace std;
inline int power(int a,int t,const int& m){
int res = 1;
while(t){
if(t&1) res = (ll)res*a%m;
a = (ll)a*a%m;
t >>= 1;
}
return res;
}
int siz;
int rev[N],rt1[N],rt2[N];
const int p1 = 1004535809,p2 = 998244353;
void init(int n){
int lim = 1;
while(lim<=n) lim <<= 1,++siz;
for(int i=0;i!=lim;++i) rev[i] = (rev[i>>1]>>1)|((i&1)<<(siz-1));
int w1 = power(3,(p1-1)>>siz,p1),w2 = power(3,(p2-1)>>siz,p2);
rt1[lim>>1] = rt2[lim>>1] = 1;
for(int i=(lim>>1)+1;i!=lim;++i) rt1[i] = (ll)rt1[i-1]*w1%p1,rt2[i] = (ll)rt2[i-1]*w2%p2;
for(int i=(lim>>1)-1;i;--i) rt1[i] = rt1[i<<1],rt2[i] = rt2[i<<1];
}
inline void dft(int *f,int n,const int *rt,const int& m){
static unsigned long long a[N];
int x,shift = siz-__builtin_ctz(n);
for(int i=0;i!=n;++i) a[rev[i]>>shift] = f[i];
for(int mid=1;mid!=n;mid<<=1)
for(int j=0;j!=n;j+=(mid<<1))
for(int k=0;k!=mid;++k){
x = a[j|k|mid]*rt[mid|k]%m;
a[j|k|mid] = a[j|k]+m-x;
a[j|k] += x;
}
for(int i=0;i!=n;++i) f[i] = a[i]%m;
}
inline void idft(int *f,int n,const int *rt,const int& m){
reverse(f+1,f+n);
dft(f,n,rt,m);
int x = m-((m-1)>>__builtin_ctz(n));
for(int i=0;i!=n;++i) f[i] = (ll)f[i]*x%m;
}
const ll M = (ll)p1*p2,t1 = (ll)power(p2,p1-2,p1)*p2,t2 = (ll)power(p1,p2-2,p2)*p1;
inline int crt(int x,int y){
return ((i128)x*t1+(i128)y*t2)%M%p;
}
inline void multiply(const int *f,const int *g,int n,int m,int *R){
static int f1[N],g1[N],f2[N],g2[N],h1[N],h2[N];
memcpy(f1,f,(n+1)<<2),memcpy(g1,g,(m+1)<<2);
memcpy(f2,f,(n+1)<<2),memcpy(g2,g,(m+1)<<2);
int lim = 1<<(32-__builtin_clz(n+m));
memset(f1+n+1,0,(lim-n)<<2),memset(g1+m+1,0,(lim-m)<<2);
memset(f2+n+1,0,(lim-n)<<2),memset(g2+m+1,0,(lim-m)<<2);
dft(f1,lim,rt1,p1),dft(f2,lim,rt2,p2);
dft(g1,lim,rt1,p1),dft(g2,lim,rt2,p2);
for(int i=0;i!=lim;++i) h1[i] = (ll)f1[i]*g1[i]%p1,h2[i] = (ll)f2[i]*g2[i]%p2;
idft(h1,lim,rt1,p1),idft(h2,lim,rt2,p2);
for(int i=0;i<=n+m;++i) R[i] = crt(h1[i],h2[i]);
}
int solve(const int *P,const int *Q,int k,ll n){
static int f[N],g[N],h[N];
memcpy(f,P,k<<2),memcpy(g,Q,(k+1)<<2);
while(n>k){
for(int i=0;i<=k;++i) h[i] = (i&1)?p-g[i]:g[i];
multiply(f,h,k-1,k,f),multiply(g,h,k,k,g);
for(int i=1;i<=k;++i) g[i] = g[i<<1],g[i<<1] = 0;
int t = n&1;
if(t==1) f[0] = f[1],f[1] = 0;
for(int i=1;i<k;++i) f[i] = f[i<<1|t],f[i<<1|t] = 0;
n >>= 1;
}
h[0] = f[0];
for(int i=1;i<=n;++i){
h[i] = f[i];
for(int j=0;j<i;++j) h[i] = (h[i]-(ll)h[j]*g[i-j])%p;
}
return (h[n]+p)%p;
}
int P[N],Q[N];
int k,m;
ll n;
int main(){
scanf("%d%d%lld",&k,&m,&n);
init(k<<2|1);
if(k==1||m==1){
puts("0");
return 0;
}
P[0] = 1,P[1] = p-1,P[k+1] = p-1,P[m] = 1,Q[0] = 1,Q[1] = -1;
for(int i=1;i<m;++i) Q[i]--,Q[i+1]++,Q[i+k+1]++,Q[i+m]--;
for(int i=0;i<=m+k;++i) Q[i] = Q[i]<0?Q[i]+p:Q[i];
printf("%d",solve(P,Q,m+k,n));
return 0;
}