test/verify/yosupo-subset-convolution.test.cpp

View this file on GitHub
Last update: 2024-04-24 02:34:04+09:00
Problem: https://judge.yosupo.jp/problem/subset_convolution

Depends on

Code

// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/subset_convolution

#include "../../template/template.hpp"

#include "../../math/fft/subset-convolution.hpp"

#include "../../math/combinatorics/montgomery-mod-int.hpp"

#include "../../other/scanner.hpp"
#include "../../other/printer.hpp"

using mint = modint998244353;

int main() {
  Scanner in(stdin);
  Printer out(stdout);
  int N;
  in.read(N);
  vector< mint > f(1 << N), g(1 << N);
  for(auto &a : f) {
    int x;
    in.read(x);
    a = x;
  }
  for(auto &a : g) {
    int x;
    in.read(x);
    a = x;
  }
  auto h = SubsetConvolution< mint, 20 >::multiply(f, g);
  for(auto &a : h) {
    out.write(a.val());
    out.write(' ');
  }
  out.writeln();
}

#line 1 "test/verify/yosupo-subset-convolution.test.cpp"
// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/subset_convolution

#line 1 "template/template.hpp"
#include<bits/stdc++.h>

using namespace std;

using int64 = long long;

const int64 infll = (1LL << 62) - 1;
const int inf = (1 << 30) - 1;

struct IoSetup {
  IoSetup() {
    cin.tie(nullptr);
    ios::sync_with_stdio(false);
    cout << fixed << setprecision(10);
    cerr << fixed << setprecision(10);
  }
} iosetup;

template< typename T1, typename T2 >
ostream &operator<<(ostream &os, const pair< T1, T2 >& p) {
  os << p.first << " " << p.second;
  return os;
}

template< typename T1, typename T2 >
istream &operator>>(istream &is, pair< T1, T2 > &p) {
  is >> p.first >> p.second;
  return is;
}

template< typename T >
ostream &operator<<(ostream &os, const vector< T > &v) {
  for(int i = 0; i < (int) v.size(); i++) {
    os << v[i] << (i + 1 != v.size() ? " " : "");
  }
  return os;
}

template< typename T >
istream &operator>>(istream &is, vector< T > &v) {
  for(T &in : v) is >> in;
  return is;
}

template< typename T1, typename T2 >
inline bool chmax(T1 &a, T2 b) { return a < b && (a = b, true); }

template< typename T1, typename T2 >
inline bool chmin(T1 &a, T2 b) { return a > b && (a = b, true); }

template< typename T = int64 >
vector< T > make_v(size_t a) {
  return vector< T >(a);
}

template< typename T, typename... Ts >
auto make_v(size_t a, Ts... ts) {
  return vector< decltype(make_v< T >(ts...)) >(a, make_v< T >(ts...));
}

template< typename T, typename V >
typename enable_if< is_class< T >::value == 0 >::type fill_v(T &t, const V &v) {
  t = v;
}

template< typename T, typename V >
typename enable_if< is_class< T >::value != 0 >::type fill_v(T &t, const V &v) {
  for(auto &e : t) fill_v(e, v);
}

template< typename F >
struct FixPoint : F {
  explicit FixPoint(F &&f) : F(forward< F >(f)) {}

  template< typename... Args >
  decltype(auto) operator()(Args &&... args) const {
    return F::operator()(*this, forward< Args >(args)...);
  }
};
 
template< typename F >
inline decltype(auto) MFP(F &&f) {
  return FixPoint< F >{forward< F >(f)};
}
#line 4 "test/verify/yosupo-subset-convolution.test.cpp"

#line 1 "math/fft/subset-convolution.hpp"
/**
 * @brief Subset Convolution
*/
template< typename Mint, int _s >
struct SubsetConvolution {
  using fps = array< Mint, _s + 1 >;
  static array< int, (1 << _s) > pop_count;
  static constexpr int s = _s;

  SubsetConvolution() = default;

  static void init() {
    if(pop_count.back() == 0) {
      pop_count[0] = 0;
      for(int i = 1; i < (1 << s); i++) {
        pop_count[i] = pop_count[i - (i & -i)] + 1;
      }
    }
  }

  static inline void add(fps &f, const fps &g, int d) {
    for(int i = 0; i < d; i++) {
      f[i] += g[i];
    }
  }

  static inline void sub(fps &f, const fps &g, int d) {
    for(int i = d; i <= s; i++) {
      f[i] -= g[i];
    }
  }

  static void zeta_transform(vector< fps > &F) {
    const int n = (int) F.size();
    assert((n & (n - 1)) == 0);
    init();
    for(int i = 1; i < n; i <<= 1) {
      for(int j = 0; j < n; j += i << 1) {
        for(int k = 0; k < i; k++) {
          add(F[j + k + i], F[j + k], pop_count[j + k + i]);
        }
      }
    }
  }

  static void moebius_transform(vector< fps > &F) {
    const int n = (int) F.size();
    assert((n & (n - 1)) == 0);
    init();
    for(int i = 1; i < n; i <<= 1) {
      for(int j = 0; j < n; j += i << 1) {
        for(int k = 0; k < i; k++) {
          sub(F[j + k + i], F[j + k], pop_count[j + k + i]);
        }
      }
    }
  }

  static vector< fps > lift(const vector< Mint > &f) {
    const int n = (int) f.size();
    init();
    vector< fps > F(n);
    for(int i = 0; i < n; i++) {
      fill(begin(F[i]), end(F[i]), Mint());
      F[i][pop_count[i]] = f[i];
    }
    return F;
  }

  static vector< Mint > unlift(const vector< fps > &F) {
    const int n = (int) F.size();
    init();
    vector< Mint > f(n);
    for(int i = 0; i < (int) F.size(); i++) {
      f[i] = F[i][pop_count[i]];
    }
    return f;
  }

  static void prod(vector< fps > &F, const vector< fps > &G) {
    int n = (int) F.size();
    int d = __builtin_ctz(n);
    for(int i = 0; i < n; i++) {
      fps h{};
      for(int j = 0; j <= d; j++) {
        for(int k = 0; k <= d - j; k++) {
          h[j + k] += F[i][j] * G[i][k];
        }
      }
      F[i] = move(h);
    }
  }

  static vector< Mint > multiply(const vector< Mint > &f, const vector< Mint > &g) {
    auto F = lift(f), G = lift(g);
    zeta_transform(F);
    zeta_transform(G);
    prod(F, G);
    moebius_transform(F);
    return unlift(F);
  }
};

template< typename Mint, int s >
array< int, (1 << s) > SubsetConvolution< Mint, s >::pop_count;
#line 6 "test/verify/yosupo-subset-convolution.test.cpp"

#line 2 "math/combinatorics/montgomery-mod-int.hpp"

template< uint32_t mod_, bool fast = false >
struct MontgomeryModInt {
private:
  using mint = MontgomeryModInt;
  using i32 = int32_t;
  using i64 = int64_t;
  using u32 = uint32_t;
  using u64 = uint64_t;

  static constexpr u32 get_r() {
    u32 ret = mod_;
    for (i32 i = 0; i < 4; i++) ret *= 2 - mod_ * ret;
    return ret;
  }

  static constexpr u32 r = get_r();

  static constexpr u32 n2 = -u64(mod_) % mod_;

  static_assert(r * mod_ == 1, "invalid, r * mod != 1");
  static_assert(mod_ < (1 << 30), "invalid, mod >= 2 ^ 30");
  static_assert((mod_ & 1) == 1, "invalid, mod % 2 == 0");

  u32 x;

public:
  MontgomeryModInt(): x{} {}

  MontgomeryModInt(const i64 &a)
      : x(reduce(u64(fast ? a : (a % mod() + mod())) * n2)) {}

  static constexpr u32 reduce(const u64 &b) {
    return u32(b >> 32) + mod() - u32((u64(u32(b) * r) * mod()) >> 32);
  }

  mint &operator+=(const mint &p) {
    if (i32(x += p.x - 2 * mod()) < 0) x += 2 * mod();
    return *this;
  }

  mint &operator-=(const mint &p) {
    if (i32(x -= p.x) < 0) x += 2 * mod();
    return *this;
  }

  mint &operator*=(const mint &p) {
    x = reduce(u64(x) * p.x);
    return *this;
  }

  mint &operator/=(const mint &p) {
    *this *= p.inv();
    return *this;
  }

  mint operator-() const { return mint() - *this; }

  mint operator+(const mint &p) const { return mint(*this) += p; }

  mint operator-(const mint &p) const { return mint(*this) -= p; }

  mint operator*(const mint &p) const { return mint(*this) *= p; }

  mint operator/(const mint &p) const { return mint(*this) /= p; }

  bool operator==(const mint &p) const {
    return (x >= mod() ? x - mod() : x) == (p.x >= mod() ? p.x - mod() : p.x);
  }

  bool operator!=(const mint &p) const {
    return (x >= mod() ? x - mod() : x) != (p.x >= mod() ? p.x - mod() : p.x);
  }

  u32 val() const {
    u32 ret = reduce(x);
    return ret >= mod() ? ret - mod() : ret;
  }

  mint pow(u64 n) const {
    mint ret(1), mul(*this);
    while (n > 0) {
      if (n & 1) ret *= mul;
      mul *= mul;
      n >>= 1;
    }
    return ret;
  }

  mint inv() const {
    return pow(mod() - 2);
  }

  friend ostream &operator<<(ostream &os, const mint &p) {
    return os << p.val();
  }

  friend istream &operator>>(istream &is, mint &a) {
    i64 t;
    is >> t;
    a = mint(t);
    return is;
  }

  static constexpr u32 mod() { return mod_; }
};

template< uint32_t mod >
using modint = MontgomeryModInt< mod >;
using modint998244353 = modint< 998244353 >;
using modint1000000007 = modint< 1000000007 >;
#line 8 "test/verify/yosupo-subset-convolution.test.cpp"

#line 1 "other/scanner.hpp"
/**
 * @brief Scanner(高速入力)
 */
struct Scanner {
public:

  explicit Scanner(FILE *fp) : fp(fp) {}

  template< typename T, typename... E >
  void read(T &t, E &... e) {
    read_single(t);
    read(e...);
  }

private:
  static constexpr size_t line_size = 1 << 16;
  static constexpr size_t int_digits = 20;
  char line[line_size + 1] = {};
  FILE *fp = nullptr;
  char *st = line;
  char *ed = line;

  void read() {}

  static inline bool is_space(char c) {
    return c <= ' ';
  }

  void reread() {
    ptrdiff_t len = ed - st;
    memmove(line, st, len);
    char *tmp = line + len;
    ed = tmp + fread(tmp, 1, line_size - len, fp);
    *ed = 0;
    st = line;
  }

  void skip_space() {
    while(true) {
      if(st == ed) reread();
      while(*st && is_space(*st)) ++st;
      if(st != ed) return;
    }
  }

  template< typename T, enable_if_t< is_integral< T >::value, int > = 0 >
  void read_single(T &s) {
    skip_space();
    if(st + int_digits >= ed) reread();
    bool neg = false;
    if(is_signed< T >::value && *st == '-') {
      neg = true;
      ++st;
    }
    typename make_unsigned< T >::type y = *st++ - '0';
    while(*st >= '0') {
      y = 10 * y + *st++ - '0';
    }
    s = (neg ? -y : y);
  }

  template< typename T, enable_if_t< is_same< T, string >::value, int > = 0 >
  void read_single(T &s) {
    s = "";
    skip_space();
    while(true) {
      char *base = st;
      while(*st && !is_space(*st)) ++st;
      s += string(base, st);
      if(st != ed) return;
      reread();
    }
  }

  template< typename T >
  void read_single(vector< T > &s) {
    for(auto &d : s) read(d);
  }
};
#line 1 "other/printer.hpp"
/**
 * @brief Printer(高速出力)
 */
struct Printer {
public:
  explicit Printer(FILE *fp) : fp(fp) {}

  ~Printer() { flush(); }

  template< bool f = false, typename T, typename... E >
  void write(const T &t, const E &... e) {
    if(f) write_single(' ');
    write_single(t);
    write< true >(e...);
  }

  template< typename... T >
  void writeln(const T &...t) {
    write(t...);
    write_single('\n');
  }

  void flush() {
    fwrite(line, 1, st - line, fp);
    st = line;
  }

private:
  FILE *fp = nullptr;
  static constexpr size_t line_size = 1 << 16;
  static constexpr size_t int_digits = 20;
  char line[line_size + 1] = {};
  char *st = line;

  template< bool f = false >
  void write() {}

  void write_single(const char &t) {
    if(st + 1 >= line + line_size) flush();
    *st++ = t;
  }

  template< typename T, enable_if_t< is_integral< T >::value, int > = 0 >
  void write_single(T s) {
    if(st + int_digits >= line + line_size) flush();
    st += to_chars(st, st + int_digits, s).ptr - st;
  }

  void write_single(const string &s) {
    for(auto &c: s) write_single(c);
  }

  void write_single(const char *s) {
    while(*s != 0) write_single(*s++);
  }

  template< typename T >
  void write_single(const vector< T > &s) {
    for(size_t i = 0; i < s.size(); i++) {
      if(i) write_single(' ');
      write_single(s[i]);
    }
  }
};
#line 11 "test/verify/yosupo-subset-convolution.test.cpp"

using mint = modint998244353;

int main() {
  Scanner in(stdin);
  Printer out(stdout);
  int N;
  in.read(N);
  vector< mint > f(1 << N), g(1 << N);
  for(auto &a : f) {
    int x;
    in.read(x);
    a = x;
  }
  for(auto &a : g) {
    int x;
    in.read(x);
    a = x;
  }
  auto h = SubsetConvolution< mint, 20 >::multiply(f, g);
  for(auto &a : h) {
    out.write(a.val());
    out.write(' ');
  }
  out.writeln();
}

Test cases

Env	Name	Status	Elapsed	Memory
g++	example_00	AC	8 ms	8 MB
g++	hack01_00	AC	809 ms	192 MB
g++	max_random_00	AC	813 ms	192 MB
g++	max_random_01	AC	818 ms	192 MB
g++	max_random_02	AC	814 ms	192 MB
g++	random_00	AC	816 ms	192 MB
g++	random_01	AC	9 ms	8 MB
g++	random_02	AC	8 ms	8 MB
g++	small_00	AC	8 ms	8 MB
g++	small_01	AC	8 ms	8 MB
g++	small_02	AC	8 ms	8 MB
clang++	example_00	AC	8 ms	8 MB
clang++	hack01_00	AC	534 ms	192 MB
clang++	max_random_00	AC	553 ms	192 MB
clang++	max_random_01	AC	559 ms	192 MB
clang++	max_random_02	AC	548 ms	192 MB
clang++	random_00	AC	558 ms	192 MB
clang++	random_01	AC	9 ms	8 MB
clang++	random_02	AC	8 ms	8 MB
clang++	small_00	AC	8 ms	8 MB
clang++	small_01	AC	8 ms	8 MB
clang++	small_02	AC	8 ms	8 MB