考虑设 \(f_i\) 为 \(i\) 的答案,那么:
\[f_i = [x_i](1 + x)^A \prod\limits_{j = 2}^{i - 1} (1 + f_j x) \]这个东西其实是可以分治 FFT 的。具体地,设分治区间为 \([l, r]\),要求一个 \(r - l + 1\) 次多项式 \(\prod\limits_{i = l}^r (1 + f_i x)\)。递归的时候打个 tag 表示这个区间要整体乘的一个多项式。也就是对于前面 \([l, mid]\) 的部分要整体乘一个 \((1 + x)^A\),\([mid + 1, r]\) 的部分要整体乘一个 \((1 + x)^A \prod\limits_{j = mid + 1}^r (1 + f_j x)\)。注意这个 tag 的最低次数应该是 \(x^l\),因为 \(< l\) 的不会对 \(i \in [l, r]\) 的 \(f_i\) 产生贡献。然后就可以用一个长度为 \(r - l + 1\) 的 vector 来存 tag 了。
时间复杂度 \(O(n \log^2 n)\)。
code
// Problem: Ex - Alchemy
// Contest: AtCoder - AtCoder Beginner Contest 281
// URL: https://atcoder.jp/contests/abc281/tasks/abc281_h
// Memory Limit: 1024 MB
// Time Limit: 4000 ms
//
// Powered by CP Editor (https://cpeditor.org)
#include <bits/stdc++.h>
#define pb emplace_back
#define fst first
#define scd second
#define mems(a, x) memset((a), (x), sizeof(a))
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
typedef double db;
typedef long double ldb;
typedef pair<ll, ll> pii;
const int maxn = 3000100;
const ll mod = 998244353, G = 3;
inline ll qpow(ll b, ll p) {
ll res = 1;
while (p) {
if (p & 1) {
res = res * b % mod;
}
b = b * b % mod;
p >>= 1;
}
return res;
}
ll n, m, r[maxn], f[maxn], g[maxn];
typedef vector<ll> poly;
inline poly NTT(poly a, int op) {
int n = (int)a.size();
for (int i = 0; i < n; ++i) {
if (i < r[i]) {
swap(a[i], a[r[i]]);
}
}
for (int k = 1; k < n; k <<= 1) {
ll wn = qpow(op == 1 ? G : qpow(G, mod - 2), (mod - 1) / (k << 1));
for (int i = 0; i < n; i += (k << 1)) {
ll w = 1;
for (int j = 0; j < k; ++j, w = w * wn % mod) {
ll x = a[i + j], y = w * a[i + j + k] % mod;
a[i + j] = (x + y) % mod;
a[i + j + k] = (x - y + mod) % mod;
}
}
}
if (op == -1) {
ll inv = qpow(n, mod - 2);
for (int i = 0; i < n; ++i) {
a[i] = a[i] * inv % mod;
}
}
return a;
}
inline poly operator * (poly a, poly b) {
a = NTT(a, 1);
b = NTT(b, 1);
int n = (int)a.size();
for (int i = 0; i < n; ++i) {
a[i] = a[i] * b[i] % mod;
}
a = NTT(a, -1);
return a;
}
poly dfs(int l, int r, poly a) {
if (l == r) {
f[l] = a[0];
return (poly){1, a[0]};
}
int mid = (l + r) >> 1;
poly t = a;
t.resize(mid - l + 1);
poly L = dfs(l, mid, t);
int k = 0;
while ((1 << k) <= (int)a.size() + (int)L.size()) {
++k;
}
for (int i = 1; i < (1 << k); ++i) {
::r[i] = (::r[i >> 1] >> 1) | ((i & 1) << (k - 1));
}
poly A(1 << k), B(1 << k);
for (int i = 0; i < (int)a.size(); ++i) {
A[i] = a[i];
}
for (int i = 0; i < (int)L.size(); ++i) {
B[i] = L[i];
}
poly C = A * B;
t = poly(r - mid);
for (int i = mid + 1 - l; i <= r - l; ++i) {
t[i - (mid + 1 - l)] = C[i];
}
poly R = dfs(mid + 1, r, t);
k = 0;
while ((1 << k) <= r - l + 1) {
++k;
}
for (int i = 1; i < (1 << k); ++i) {
::r[i] = (::r[i >> 1] >> 1) | ((i & 1) << (k - 1));
}
A = B = poly(1 << k);
for (int i = 0; i < (int)L.size(); ++i) {
A[i] = L[i];
}
for (int i = 0; i < (int)R.size(); ++i) {
B[i] = R[i];
}
C = A * B;
poly res(r - l + 2);
for (int i = 0; i <= r - l + 1; ++i) {
res[i] = C[i];
}
return res;
}
void solve() {
scanf("%lld%lld", &n, &m);
if (n == 1) {
printf("%lld\n", m % mod);
return;
}
g[0] = 1;
for (int i = 1; i <= n; ++i) {
g[i] = g[i - 1] * (m - i + 1) % mod * qpow(i, mod - 2) % mod;
}
poly a;
for (int i = 2; i <= n; ++i) {
a.pb(g[i]);
}
dfs(2, n, a);
printf("%lld\n", f[n]);
}
int main() {
int T = 1;
// scanf("%d", &T);
while (T--) {
solve();
}
return 0;
}