设 \(P_i=\max(p_1,p_2,...,p_i)\)。
首先转容斥,方便计算。
接下来容斥的点很巧妙:钦定哪些前缀最大值重合(非位置),我们可以把这个前缀最大值放在第一次重合的位置计算。
设 \(f[i]\) 表示考虑了前 \(i\) 个位置,并且 \(i\) 是钦定的前缀最大值的第一个重合位置,带符号的方案数之和。
枚举上一个钦定的位置 \(j\) 来转移。注意当 \(P_i=P_{i-1}\) 时,因为 \(i\) 是第一个重合的位置,所以有 \(q_i=P_i\);当 \(P_i\not =P_{i-1}\) 时,\(P_i\) 在 \(q\) 中可以填在任意位置,容易发现可以填在 \([j+1,i]\) 位置。
- 当 \(P_i=P_{i-1}\) 时
- 当 \(P_i\not =P_{i-1}\) 时
初始值:\(f[0]=1\)
随便分治 NTT 一下即可,时间复杂度 \(O(n\log ^2n)\)
点击查看代码
#pragma GCC diagnostic error "-std=c++11"
#pragma GCC target("avx")
#pragma GCC optimize(3)
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-fwhole-program")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-fstrict-overflow")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-skip-blocks")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("-funsafe-loop-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")
#include<bits/stdc++.h>
#define ll int
#define pir pair<ll,ll>
#define mkp make_pair
#define fi first
#define se second
#define mkp make_pair
#define pb push_back
using namespace std;
const ll maxn=2e5+10, mod=998244353;
ll power(ll a,ll b=mod-2){
ll s=1;
while(b){
if(b&1) s=1ll*s*a%mod;
a=1ll*a*a%mod; b>>=1;
} return s;
}
struct POLY{
ll rev[maxn<<2],A[maxn<<2],B[maxn<<2],Inv[maxn<<2],G[maxn<<2],gg[maxn<<2];
ll tr;
inline void Getrev(const ll &n){
if(n==tr) return; tr=n;
for(ll i=1;i<n;i++)
rev[i]=(rev[i>>1]>>1)|(i&1? n>>1:0);
}
inline void ntt(ll *a,const ll &n){
Getrev(n);
for(ll i=1;i<n;i++)
if(i<rev[i]) swap(a[i],a[rev[i]]);
for(ll i=1;i<n;i<<=1){
ll g=(G[i]? G[i]:G[i]=power(3,(mod-1)/(i<<1)));
gg[0]=1;
for(ll j=1;j<i;j++) gg[j]=1ll*gg[j-1]*g%mod;
for(ll j=0;j<n;j+=(i<<1))
for(ll k=0;k<i;k++){
ll x=a[j|k], y=1ll*a[i|j|k]*gg[k]%mod;
a[j|k]=(x+y>=mod? x+y-mod:x+y),
a[i|j|k]=(x<y? x+mod-y:x-y);
}
}
}
inline void mul(const ll *a,const ll *b,ll *c,const ll n,const ll m,const ll k){
for(ll i=0;i<n;i++) A[i]=a[i];
for(ll i=0;i<m;i++) B[i]=b[i];
ll l=1;
while(l<n+m-1) l<<=1;
ntt(A,l), ntt(B,l);
for(ll i=0;i<l;i++) A[i]=1ll*A[i]*B[i]%mod;
ntt(A,l), reverse(A+1,A+l);
ll inv=(Inv[l]? Inv[l]:Inv[l]=power(l));
for(ll i=0;i<l;i++){
if(i<k) c[i]=1ll*A[i]*inv%mod;
A[i]=B[i]=0;
}
for(ll i=l;i<k;i++) c[i]=0;
}
}D;
ll t,n,a[maxn],P[maxn],f[maxn],g[maxn],h[maxn],fac[maxn],ifac[maxn],ff[maxn];
ll w[maxn],cg[maxn],ch[maxn];
inline void cdq(const ll &l,const ll &r){
if(l==r){
if(l){
if(P[l]==P[l-1]) f[l]=mod-1ll*g[l]*ifac[P[l]-l]%mod;
else f[l]=(mod-1ll*l*g[l]%mod+h[l])%mod*ifac[P[l]-l]%mod;
}
ff[l]=1ll*f[l]*l%mod;
return;
} ll mid=l+r>>1;
cdq(l,mid);
for(register ll i=l;i<=mid;i++) cg[i-l]=f[i], ch[i-l]=ff[i];
for(register ll i=mid+1;i<=r;i++) cg[i-l]=ch[i-l]=0;
ll b=P[mid+1]-mid;
for(register ll i=b;i<=P[r]-l;i++) w[i-b]=fac[i-1];
D.mul(cg,w,cg,mid-l+1,P[r]-l-b+1,P[r]-l-b+1);
D.mul(ch,w,ch,mid-l+1,P[r]-l-b+1,P[r]-l-b+1);
for(register ll i=mid+1;i<=r;i++) g[i]=(g[i]+cg[P[i]-l-b]>=mod? g[i]+cg[P[i]-l-b]-mod:
g[i]+cg[P[i]-l-b]), h[i]=(h[i]+ch[P[i]-l-b]>=mod? h[i]+ch[P[i]-l-b]-mod:h[i]+ch[P[i]-l-b]);
if(P[mid]>mid){
ll sumg=0, sumh=0;
for(register ll i=mid;i>=l&&P[i]==P[mid];i--){
sumg=(sumg+1ll*f[i]*fac[P[mid]-i-1])%mod;
sumh=(sumh+1ll*f[i]*i%mod*fac[P[mid]-i-1])%mod;
}
for(register ll i=mid+1;i<=r&&P[i]==P[mid];i++){
g[i]=(g[i]-sumg+mod)%mod, h[i]=(h[i]-sumh+mod)%mod;
}
}
cdq(mid+1,r);
}
void rd(ll &x){
char c;
while(!isdigit(c=getchar())) ;
x=c-'0';
while(isdigit(c=getchar())) x=(x<<1)+(x<<3)+c-'0';
}
int main(){ //freopen("p.in","r",stdin);
fac[0]=1; ll M=2e5;
for(register ll i=1;i<=M;i++) fac[i]=1ll*fac[i-1]*i%mod;
ifac[M]=power(fac[M]);
for(register ll i=M;i;i--) ifac[i-1]=1ll*ifac[i]*i%mod;
rd(t);
while(t--){
rd(n);
for(register ll i=1;i<=n;i++){
rd(a[i]);
P[i]=max(P[i-1],a[i]);
}
f[0]=1;
cdq(0,n-1);
ll ans=0;
for(register ll i=0;i<n;i++)
ans=(ans+1ll*f[i]*fac[n-i])%mod, g[i]=h[i]=f[i]=0;
printf("%d\n",ans);
}
return 0;
}