考虑我们如何判定一个排列是否能成为最终答案。连边 \(i \to p_i\),设环数为 \(k\),那么最少交换次数为 \(n - k\)。那么充要条件是,每个环所有点的 \(c_i\) 相同,并且 \(n - k \le K\) 且 \(2 \mid (K - (n - k))\)。\(K\) 和 \(n - k\) 奇偶性相同是因为,如果交换 \(p_i, p_j\),其中 \(i \ne j\),那么环数恰好改变 \(1\)。这个的证明是平凡的,考虑它们原本在或者不在同一个环内即可。
也就是说现在要求出,每个环所有点的 \(c_i\) 相同,共形成了 \(k\) 个环的 \(p_i\) 的方案数。考虑一个 \(O(n^2)\) 的 dp,\(f_{i, j}\) 表示,考虑了 \(1 \sim i\),共形成了 \(j\) 个环的方案数。转移就是讨论 \(i\) 这个点是自环还是加到之前的环里面。设 \(a_i = \sum\limits_{j = 1}^{i - 1} [c_j = c_i]\),于是有:
\[f_{i, j} = f_{i - 1, j - 1} + f_{i - 1, j} \times a_i \]考虑优化。发现经过一次转移,\(f_{i, j}\) 可能转移到 \(f_{i + 1, j}\) 和 \(f_{i + 1, j + 1}\)。考虑构造多项式 \(g(x) = \prod\limits_{i = 1}^n (a_i + x)\),就是要求 \(g(x)\) 的第 \(1 \sim n\) 项。直接乘起来复杂度不对,考虑一个分治,分治区间 \([l, r]\) 表示计算 \(\prod\limits_{i = l}^r (a_i + x)\)。设 \(mid = \left\lfloor\frac{l + r}{2}\right\rfloor\),把 \([l, mid]\) 和 \([mid + 1, r]\) 的结果乘起来即可。时间复杂度 \(O(n \log^2 n)\)。
code
// Problem: Ex - Rearranging Problem
// Contest: AtCoder - AtCoder Beginner Contest 247
// URL: https://atcoder.jp/contests/abc247/tasks/abc247_h
// Memory Limit: 1024 MB
// Time Limit: 4000 ms
//
// Powered by CP Editor (https://cpeditor.org)
#include <bits/stdc++.h>
#define pb emplace_back
#define fst first
#define scd second
#define mems(a, x) memset((a), (x), sizeof(a))
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
typedef double db;
typedef long double ldb;
typedef pair<ll, ll> pii;
const int maxn = 1000100;
const ll mod = 998244353, G = 3;
inline ll qpow(ll b, ll p) {
ll res = 1;
while (p) {
if (p & 1) {
res = res * b % mod;
}
b = b * b % mod;
p >>= 1;
}
return res;
}
ll n, m, a[maxn], b[maxn], r[maxn];
typedef vector<ll> poly;
inline poly NTT(poly a, int op) {
int n = (int)a.size();
for (int i = 0; i < n; ++i) {
if (i < r[i]) {
swap(a[i], a[r[i]]);
}
}
for (int k = 1; k < n; k <<= 1) {
ll wn = qpow(op == 1 ? G : qpow(G, mod - 2), (mod - 1) / (k << 1));
for (int i = 0; i < n; i += (k << 1)) {
ll w = 1;
for (int j = 0; j < k; ++j, w = w * wn % mod) {
ll x = a[i + j], y = w * a[i + j + k] % mod;
a[i + j] = (x + y) % mod;
a[i + j + k] = (x - y + mod) % mod;
}
}
}
if (op == -1) {
ll inv = qpow(n, mod - 2);
for (int i = 0; i < n; ++i) {
a[i] = a[i] * inv % mod;
}
}
return a;
}
inline poly operator * (poly a, poly b) {
a = NTT(a, 1);
b = NTT(b, 1);
int n = (int)a.size();
for (int i = 0; i < n; ++i) {
a[i] = a[i] * b[i] % mod;
}
a = NTT(a, -1);
return a;
}
poly dfs(int l, int r) {
if (l == r) {
return (poly){a[l], 1};
}
int mid = (l + r) >> 1;
poly L = dfs(l, mid), R = dfs(mid + 1, r);
int k = 0;
while ((1 << k) <= r - l + 1) {
++k;
}
for (int i = 1; i < (1 << k); ++i) {
::r[i] = (::r[i >> 1] >> 1) | ((i & 1) << (k - 1));
}
poly A(1 << k), B(1 << k);
for (int i = 0; i <= mid - l + 1; ++i) {
A[i] = L[i];
}
for (int i = 0; i <= r - mid; ++i) {
B[i] = R[i];
}
poly res = A * B;
res.resize(r - l + 2);
return res;
}
void solve() {
scanf("%lld%lld", &n, &m);
for (int i = 1, x; i <= n; ++i) {
scanf("%d", &x);
a[i] = (b[x]++);
}
poly res = dfs(1, n);
ll ans = 0;
for (int i = 1; i <= n; ++i) {
if (n - i <= m && (m - (n - i)) % 2 == 0) {
ans = (ans + res[i]) % mod;
}
}
printf("%lld\n", ans);
}
int main() {
int T = 1;
// scanf("%d", &T);
while (T--) {
solve();
}
return 0;
}