首先设 \(a_i = \max\limits_{j = 1}^i p_j\),\(b_i = \max\limits_{j = 1}^i q_j\)。
直接容斥,钦定有多少值不同的 \(a_i\) 使得 \(a_i = b_i\)。然后再把钦定的每种值转化成每种值第一次使得 \(a_i = b_i\) 的位置 \(i\)。
也就是说我们现在要钦定一些位置,满足 \(a_i = b_i\),且不存在 \(j \in [1, i - 1]\) 使得 \(a_j = b_j = a_i\)。
考虑 dp,设 \(f_i\) 表示考虑了 \([1, i]\),当前最后一个被钦定的位置是 \(i\),每种方案的容斥系数之和。
转移枚举上一个被钦定的位置 \(j\)(需要满足 \(a_j \ne a_i\))。
- 若 \(a_i = a_{i - 1}\),说明 \(a_{i - 1} \ne b_{i - 1}\),那么 \(p_i = a_i\)。\([j + 1, i - 1]\) 中相当于从 \(a_i - 1 - j\) 个数中依次选 \(i - j - 1\) 个数填进去,有 \(f_i = -\sum\limits_{j = 0}^{i - 1} [a_j \ne a_i] f_j \frac{(a_i - 1 - j)!}{(a_i - i)!}\)。
- 若 \(a_i \ne a_{i - 1}\),我们可以从 \([j + 1, i]\) 中选一个位置 \(k\) 使得 \(p_k = a_i\)(方案数为 \(i - j\)),然后再从 \(a_i - 1 - j\) 个数中依次选 \(i - j - 1\) 个数填进空位,有 \(f_i = -\sum\limits_{j = 0}^{i - 1} [a_j \ne a_i] f_j \frac{(a_i - 1 - j)!}{(a_i - i)!} (i - j)\)。
初值为 \(f_0 = 1\)。答案即为 \(\sum\limits_{i = 0}^{n - 1} f_i (n - i)!\)。
暴力转移是 \(O(n^2)\) 的。观察转移形式很像可以分治 NTT 的样子,但是又不是传统的分治 NTT 形式(因为贡献形式是 \(f(i) = \sum f(j) g(a_i - j)\))。不妨先尝试 cdq,每次计算 \([l, mid] \to [mid + 1, r]\) 的贡献。
首先 \(a_j \ne a_i\) 的限制是好解决的,可以直接把 \(a_j = a_i\) 的情况减掉,就是分治过程中对于 \(i \in [l, mid], a_i = a_{mid}\) 的所有 \(i\) 转移到每个 \(j \in [mid + 1, r], a_j = a_{mid}\) 的每个 \(j\) 贡献相同,直接相加即可。
然后我们现在希望处理出 \(g_i = \sum\limits_{j = 0}^{i - 1} f_j (a_i - 1 - j)!\) 和 \(h_i = \sum\limits_{j = 0}^{i - 1} f_j (a_i - 1 - j)! j\)。两者类似,只考虑如何计算 \(g_i\)。
相当于我们现在要让 \([l, mid]\) 的 \(f\) 卷上 \([a_{mid + 1} - 1 - mid, a_r - 1 - l]\) 的阶乘,贡献到一个数组 \(c\),那么 \(g_i\) 得到的贡献是 \(c_{a_i - 1}\)。
因为 \(a\) 序列单调不降且值域为 \([0, n]\),所以复杂度和普通的分治 NTT 一样,是 \(O(n \log^2 n)\)。
code
// Problem: E. Yet Yet Another Permutation Problem
// Contest: Codeforces - Codeforces Round 930 (Div. 1)
// URL: https://codeforces.com/contest/1936/problem/E
// Memory Limit: 1024 MB
// Time Limit: 5000 ms
//
// Powered by CP Editor (https://cpeditor.org)
#include <bits/stdc++.h>
#define pb emplace_back
#define fst first
#define scd second
#define mkp make_pair
#define mems(a, x) memset((a), (x), sizeof(a))
using namespace std;
typedef long long ll;
typedef double db;
typedef unsigned long long ull;
typedef long double ldb;
typedef pair<ll, ll> pii;
const int maxn = 1000100;
int n, a[maxn];
namespace atcoder {
namespace internal {
// @param m `1 <= m`
// @return x mod m
constexpr long long safe_mod(long long x, long long m) {
x %= m;
if (x < 0) x += m;
return x;
}
// Fast modular multiplication by barrett reduction
// Reference: https://en.wikipedia.org/wiki/Barrett_reduction
// NOTE: reconsider after Ice Lake
// @param n `0 <= n`
// @param m `1 <= m`
// @return `(x ** n) % m`
constexpr long long pow_mod_constexpr(long long x, long long n, int m) {
if (m == 1) return 0;
unsigned int _m = (unsigned int)(m);
unsigned long long r = 1;
unsigned long long y = safe_mod(x, m);
while (n) {
if (n & 1) r = (r * y) % _m;
y = (y * y) % _m;
n >>= 1;
}
return r;
}
// Reference:
// M. Forisek and J. Jancina,
// Fast Primality Testing for Integers That Fit into a Machine Word
// @param n `0 <= n`
constexpr bool is_prime_constexpr(int n) {
if (n <= 1) return false;
if (n == 2 || n == 7 || n == 61) return true;
if (n % 2 == 0) return false;
long long d = n - 1;
while (d % 2 == 0) d /= 2;
constexpr long long bases[3] = {2, 7, 61};
for (long long a : bases) {
long long t = d;
long long y = pow_mod_constexpr(a, t, n);
while (t != n - 1 && y != 1 && y != n - 1) {
y = y * y % n;
t <<= 1;
}
if (y != n - 1 && t % 2 == 0) {
return false;
}
}
return true;
}
template <int n> constexpr bool is_prime = is_prime_constexpr(n);
// @param b `1 <= b`
// @return pair(g, x) s.t. g = gcd(a, b), xa = g (mod b), 0 <= x < b/g
constexpr std::pair<long long, long long> inv_gcd(long long a, long long b) {
a = safe_mod(a, b);
if (a == 0) return {b, 0};
// Contracts:
// [1] s - m0 * a = 0 (mod b)
// [2] t - m1 * a = 0 (mod b)
// [3] s * |m1| + t * |m0| <= b
long long s = b, t = a;
long long m0 = 0, m1 = 1;
while (t) {
long long u = s / t;
s -= t * u;
m0 -= m1 * u; // |m1 * u| <= |m1| * s <= b
// [3]:
// (s - t * u) * |m1| + t * |m0 - m1 * u|
// <= s * |m1| - t * u * |m1| + t * (|m0| + |m1| * u)
// = s * |m1| + t * |m0| <= b
auto tmp = s;
s = t;
t = tmp;
tmp = m0;
m0 = m1;
m1 = tmp;
}
// by [3]: |m0| <= b/g
// by g != b: |m0| < b/g
if (m0 < 0) m0 += b / s;
return {s, m0};
}
// Compile time primitive root
// @param m must be prime
// @return primitive root (and minimum in now)
constexpr int primitive_root_constexpr(int m) {
if (m == 2) return 1;
if (m == 167772161) return 3;
if (m == 469762049) return 3;
if (m == 754974721) return 11;
if (m == 998244353) return 3;
int divs[20] = {};
divs[0] = 2;
int cnt = 1;
int x = (m - 1) / 2;
while (x % 2 == 0) x /= 2;
for (int i = 3; (long long)(i)*i <= x; i += 2) {
if (x % i == 0) {
divs[cnt++] = i;
while (x % i == 0) {
x /= i;
}
}
}
if (x > 1) {
divs[cnt++] = x;
}
for (int g = 2;; g++) {
bool ok = true;
for (int i = 0; i < cnt; i++) {
if (pow_mod_constexpr(g, (m - 1) / divs[i], m) == 1) {
ok = false;
break;
}
}
if (ok) return g;
}
}
template <int m> constexpr int primitive_root = primitive_root_constexpr(m);
} // namespace internal
} // namespace atcoder
#include <cassert>
#include <numeric>
#include <type_traits>
#include <cassert>
#include <numeric>
#include <type_traits>
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace atcoder {
namespace internal {
struct modint_base {};
struct static_modint_base : modint_base {};
template <class T> using is_modint = std::is_base_of<modint_base, T>;
template <class T> using is_modint_t = std::enable_if_t<is_modint<T>::value>;
} // namespace internal
template <int m, std::enable_if_t<(1 <= m)>* = nullptr>
struct static_modint : internal::static_modint_base {
using mint = static_modint;
public:
static constexpr int mod() { return m; }
static mint raw(int v) {
mint x;
x._v = v;
return x;
}
static_modint() : _v(0) {}
template <class T, enable_if_t<is_signed<T>::value>* = nullptr>
static_modint(T v) {
long long x = (long long)(v % (long long)(umod()));
if (x < 0) x += umod();
_v = (unsigned int)(x);
}
template <class T, enable_if_t<is_unsigned<T>::value>* = nullptr>
static_modint(T v) {
_v = (unsigned int)(v % umod());
}
static_modint(bool v) { _v = ((unsigned int)(v) % umod()); }
unsigned int val() const { return _v; }
mint& operator++() {
_v++;
if (_v == umod()) _v = 0;
return *this;
}
mint& operator--() {
if (_v == 0) _v = umod();
_v--;
return *this;
}
mint operator++(int) {
mint result = *this;
++*this;
return result;
}
mint operator--(int) {
mint result = *this;
--*this;
return result;
}
mint& operator+=(const mint& rhs) {
_v += rhs._v;
if (_v >= umod()) _v -= umod();
return *this;
}
mint& operator-=(const mint& rhs) {
_v -= rhs._v;
if (_v >= umod()) _v += umod();
return *this;
}
mint& operator*=(const mint& rhs) {
unsigned long long z = _v;
z *= rhs._v;
_v = (unsigned int)(z % umod());
return *this;
}
mint& operator/=(const mint& rhs) { return *this = *this * rhs.inv(); }
mint operator+() const { return *this; }
mint operator-() const { return mint() - *this; }
mint pow(long long n) const {
assert(0 <= n);
mint x = *this, r = 1;
while (n) {
if (n & 1) r *= x;
x *= x;
n >>= 1;
}
return r;
}
mint inv() const {
if (prime) {
assert(_v);
return pow(umod() - 2);
} else {
auto eg = internal::inv_gcd(_v, m);
assert(eg.first == 1);
return eg.second;
}
}
friend mint operator+(const mint& lhs, const mint& rhs) {
return mint(lhs) += rhs;
}
friend mint operator-(const mint& lhs, const mint& rhs) {
return mint(lhs) -= rhs;
}
friend mint operator*(const mint& lhs, const mint& rhs) {
return mint(lhs) *= rhs;
}
friend mint operator/(const mint& lhs, const mint& rhs) {
return mint(lhs) /= rhs;
}
friend bool operator==(const mint& lhs, const mint& rhs) {
return lhs._v == rhs._v;
}
friend bool operator!=(const mint& lhs, const mint& rhs) {
return lhs._v != rhs._v;
}
private:
unsigned int _v;
static constexpr unsigned int umod() { return m; }
static constexpr bool prime = internal::is_prime<m>;
};
using modint998244353 = static_modint<998244353>;
using modint1000000007 = static_modint<1000000007>;
namespace internal {
template <class T>
using is_static_modint = std::is_base_of<internal::static_modint_base, T>;
template <class T>
using is_static_modint_t = std::enable_if_t<is_static_modint<T>::value>;
} // namespace internal
} // namespace atcoder
#include <algorithm>
#include <array>
#ifdef _MSC_VER
#include <intrin.h>
#endif
namespace atcoder {
namespace internal {
// @param n `0 <= n`
// @return minimum non-negative `x` s.t. `n <= 2**x`
int ceil_pow2(int n) {
int x = 0;
while ((1U << x) < (unsigned int)(n)) x++;
return x;
}
// @param n `1 <= n`
// @return minimum non-negative `x` s.t. `(n & (1 << x)) != 0`
int bsf(unsigned int n) {
#ifdef _MSC_VER
unsigned long index;
_BitScanForward(&index, n);
return index;
#else
return __builtin_ctz(n);
#endif
}
} // namespace internal
} // namespace atcoder
#include <cassert>
#include <type_traits>
#include <vector>
namespace atcoder {
namespace internal {
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
void butterfly(std::vector<mint>& a) {
static constexpr int g = internal::primitive_root<mint::mod()>;
int n = int(a.size());
int h = internal::ceil_pow2(n);
static bool first = true;
static mint sum_e[30]; // sum_e[i] = ies[0] * ... * ies[i - 1] * es[i]
if (first) {
first = false;
mint es[30], ies[30]; // es[i]^(2^(2+i)) == 1
int cnt2 = bsf(mint::mod() - 1);
mint e = mint(g).pow((mint::mod() - 1) >> cnt2), ie = e.inv();
for (int i = cnt2; i >= 2; i--) {
// e^(2^i) == 1
es[i - 2] = e;
ies[i - 2] = ie;
e *= e;
ie *= ie;
}
mint now = 1;
for (int i = 0; i <= cnt2 - 2; i++) {
sum_e[i] = es[i] * now;
now *= ies[i];
}
}
for (int ph = 1; ph <= h; ph++) {
int w = 1 << (ph - 1), p = 1 << (h - ph);
mint now = 1;
for (int s = 0; s < w; s++) {
int offset = s << (h - ph + 1);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p] * now;
a[i + offset] = l + r;
a[i + offset + p] = l - r;
}
now *= sum_e[bsf(~(unsigned int)(s))];
}
}
}
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
void butterfly_inv(std::vector<mint>& a) {
static constexpr int g = internal::primitive_root<mint::mod()>;
int n = int(a.size());
int h = internal::ceil_pow2(n);
static bool first = true;
static mint sum_ie[30]; // sum_ie[i] = es[0] * ... * es[i - 1] * ies[i]
if (first) {
first = false;
mint es[30], ies[30]; // es[i]^(2^(2+i)) == 1
int cnt2 = bsf(mint::mod() - 1);
mint e = mint(g).pow((mint::mod() - 1) >> cnt2), ie = e.inv();
for (int i = cnt2; i >= 2; i--) {
// e^(2^i) == 1
es[i - 2] = e;
ies[i - 2] = ie;
e *= e;
ie *= ie;
}
mint now = 1;
for (int i = 0; i <= cnt2 - 2; i++) {
sum_ie[i] = ies[i] * now;
now *= es[i];
}
}
for (int ph = h; ph >= 1; ph--) {
int w = 1 << (ph - 1), p = 1 << (h - ph);
mint inow = 1;
for (int s = 0; s < w; s++) {
int offset = s << (h - ph + 1);
for (int i = 0; i < p; i++) {
auto l = a[i + offset];
auto r = a[i + offset + p];
a[i + offset] = l + r;
a[i + offset + p] =
(unsigned long long)(mint::mod() + l.val() - r.val()) *
inow.val();
}
inow *= sum_ie[bsf(~(unsigned int)(s))];
}
}
}
} // namespace internal
template <class mint, internal::is_static_modint_t<mint>* = nullptr>
std::vector<mint> convolution(std::vector<mint> a, std::vector<mint> b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
if (std::min(n, m) <= 60) {
if (n < m) {
std::swap(n, m);
std::swap(a, b);
}
std::vector<mint> ans(n + m - 1);
for (int i = 0; i < n; i++) {
for (int j = 0; j < m; j++) {
ans[i + j] += a[i] * b[j];
}
}
return ans;
}
int z = 1 << internal::ceil_pow2(n + m - 1);
a.resize(z);
internal::butterfly(a);
b.resize(z);
internal::butterfly(b);
for (int i = 0; i < z; i++) {
a[i] *= b[i];
}
internal::butterfly_inv(a);
a.resize(n + m - 1);
mint iz = mint(z).inv();
for (int i = 0; i < n + m - 1; i++) a[i] *= iz;
return a;
}
template <unsigned int mod = 998244353, class T, enable_if_t<is_integral<T>::value>* = nullptr>
std::vector<T> convolution(const std::vector<T>& a, const std::vector<T>& b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
using mint = static_modint<mod>;
std::vector<mint> a2(n), b2(m);
for (int i = 0; i < n; i++) {
a2[i] = mint(a[i]);
}
for (int i = 0; i < m; i++) {
b2[i] = mint(b[i]);
}
auto c2 = convolution(move(a2), move(b2));
std::vector<T> c(n + m - 1);
for (int i = 0; i < n + m - 1; i++) {
c[i] = c2[i].val();
}
return c;
}
std::vector<long long> convolution_ll(const std::vector<long long>& a,
const std::vector<long long>& b) {
int n = int(a.size()), m = int(b.size());
if (!n || !m) return {};
static constexpr unsigned long long MOD1 = 754974721; // 2^24
static constexpr unsigned long long MOD2 = 167772161; // 2^25
static constexpr unsigned long long MOD3 = 469762049; // 2^26
static constexpr unsigned long long M2M3 = MOD2 * MOD3;
static constexpr unsigned long long M1M3 = MOD1 * MOD3;
static constexpr unsigned long long M1M2 = MOD1 * MOD2;
static constexpr unsigned long long M1M2M3 = MOD1 * MOD2 * MOD3;
static constexpr unsigned long long i1 =
internal::inv_gcd(MOD2 * MOD3, MOD1).second;
static constexpr unsigned long long i2 =
internal::inv_gcd(MOD1 * MOD3, MOD2).second;
static constexpr unsigned long long i3 =
internal::inv_gcd(MOD1 * MOD2, MOD3).second;
auto c1 = convolution<MOD1>(a, b);
auto c2 = convolution<MOD2>(a, b);
auto c3 = convolution<MOD3>(a, b);
std::vector<long long> c(n + m - 1);
for (int i = 0; i < n + m - 1; i++) {
unsigned long long x = 0;
x += (c1[i] * i1) % MOD1 * M2M3;
x += (c2[i] * i2) % MOD2 * M1M3;
x += (c3[i] * i3) % MOD3 * M1M2;
// B = 2^63, -B <= x, r(real value) < B
// (x, x - M, x - 2M, or x - 3M) = r (mod 2B)
// r = c1[i] (mod MOD1)
// focus on MOD1
// r = x, x - M', x - 2M', x - 3M' (M' = M % 2^64) (mod 2B)
// r = x,
// x - M' + (0 or 2B),
// x - 2M' + (0, 2B or 4B),
// x - 3M' + (0, 2B, 4B or 6B) (without mod!)
// (r - x) = 0, (0)
// - M' + (0 or 2B), (1)
// -2M' + (0 or 2B or 4B), (2)
// -3M' + (0 or 2B or 4B or 6B) (3) (mod MOD1)
// we checked that
// ((1) mod MOD1) mod 5 = 2
// ((2) mod MOD1) mod 5 = 3
// ((3) mod MOD1) mod 5 = 4
long long diff =
c1[i] - internal::safe_mod((long long)(x), (long long)(MOD1));
if (diff < 0) diff += MOD1;
static constexpr unsigned long long offset[5] = {
0, 0, M1M2M3, 2 * M1M2M3, 3 * M1M2M3};
x -= offset[diff % 5];
c[i] = x;
}
return c;
}
} // namespace atcoder
using atcoder::modint998244353;
using atcoder::convolution;
typedef modint998244353 mint;
typedef vector<mint> poly;
mint fac[maxn], ifac[maxn], f[maxn], g[maxn], h[maxn];
void cdq(int l, int r) {
if (l == r) {
if (l) {
if (a[l] == a[l - 1]) {
f[l] = -g[l] * ifac[a[l] - l];
} else {
f[l] = -g[l] * ifac[a[l] - l] * l + h[l] * ifac[a[l] - l];
}
}
return;
}
int mid = (l + r) >> 1;
cdq(l, mid);
if (a[mid] == a[mid + 1]) {
mint s1 = 0, s2 = 0;
for (int i = mid; i >= l && a[i] == a[mid]; --i) {
s1 += f[i] * fac[a[i] - 1 - i];
s2 += f[i] * fac[a[i] - 1 - i] * i;
}
for (int i = mid + 1; i <= r && a[i] == a[mid]; ++i) {
g[i] -= s1;
h[i] -= s2;
}
}
poly A(mid - l + 1), B(a[r] - l - (a[mid + 1] - mid) + 1);
for (int i = l; i <= mid; ++i) {
A[i - l] = f[i];
}
for (int i = a[mid + 1] - 1 - mid; i <= a[r] - 1 - l; ++i) {
B[i - (a[mid + 1] - 1 - mid)] = fac[i];
}
poly res = convolution(A, B);
for (int i = mid + 1; i <= r; ++i) {
int j = a[i] - 1 - l - (a[mid + 1] - 1 - mid);
assert(j >= 0 && j < (int)res.size());
g[i] += res[j];
}
A = poly(mid - l + 1);
for (int i = l; i <= mid; ++i) {
A[i - l] = f[i] * i;
}
res = convolution(A, B);
for (int i = mid + 1; i <= r; ++i) {
int j = a[i] - 1 - l - (a[mid + 1] - 1 - mid);
assert(j >= 0 && j < (int)res.size());
h[i] += res[j];
}
cdq(mid + 1, r);
}
void solve() {
scanf("%d", &n);
fac[0] = 1;
for (int i = 1; i <= n; ++i) {
scanf("%d", &a[i]);
a[i] = max(a[i - 1], a[i]);
fac[i] = fac[i - 1] * i;
}
ifac[n] = fac[n].inv();
for (int i = n - 1; ~i; --i) {
ifac[i] = ifac[i + 1] * (i + 1);
}
for (int i = 0; i <= n + 3; ++i) {
f[i] = g[i] = h[i] = 0;
}
f[0] = 1;
cdq(0, n - 1);
mint ans = fac[n];
for (int i = 1; i < n; ++i) {
// f[i] = 0;
// for (int j = 0; j < i; ++j) {
// if (a[j] == a[i]) {
// continue;
// }
// f[i] = (f[i] - f[j] * fac[a[i] - 1 - j] % mod * ifac[a[i] - i] % mod * (a[i] == a[i - 1] ? 1 : i - j) % mod + mod) % mod;
// }
ans += f[i] * fac[n - i];
}
printf("%d\n", ans.val());
}
int main() {
int T = 1;
scanf("%d", &T);
while (T--) {
solve();
}
return 0;
}