Luzhiled's Library

This documentation is automatically generated by online-judge-tools/verification-helper

View the Project on GitHub ei1333/library

:heavy_check_mark: test/verify/yosupo-bitwise-and-convolution-3.test.cpp

Depends on

Code

// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/bitwise_and_convolution

#include "../../template/template.hpp"

#include "../../math/fft/superset-zeta-moebius-transform-simd.hpp"

#include "../../other/scanner.hpp"
#include "../../other/printer.hpp"

const int MOD = 998244353;

int main() {
  Scanner in(stdin);
  Printer out(stdout);
  int N;
  in.read(N);
  int a[1 << N], b[1 << N];
  for(int i = 0; i < (1 << N); i++) in.read(a[i]);
  for(int i = 0; i < (1 << N); i++) in.read(b[i]);
  bitwise_and_convolution_simd< MOD >(a, b, 1 << N);
  for(int i = 0; i < (1 << N); i++) {
    if(i) out.write(' ');
    out.write(a[i]);
  }
  out.writeln();
}
#line 1 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"
// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/bitwise_and_convolution

#line 1 "template/template.hpp"
#include <bits/stdc++.h>

using namespace std;

using int64 = long long;

const int64 infll = (1LL << 62) - 1;
const int inf = (1 << 30) - 1;

struct IoSetup {
  IoSetup() {
    cin.tie(nullptr);
    ios::sync_with_stdio(false);
    cout << fixed << setprecision(10);
    cerr << fixed << setprecision(10);
  }
} iosetup;

template <typename T1, typename T2>
ostream &operator<<(ostream &os, const pair<T1, T2> &p) {
  os << p.first << " " << p.second;
  return os;
}

template <typename T1, typename T2>
istream &operator>>(istream &is, pair<T1, T2> &p) {
  is >> p.first >> p.second;
  return is;
}

template <typename T>
ostream &operator<<(ostream &os, const vector<T> &v) {
  for (int i = 0; i < (int)v.size(); i++) {
    os << v[i] << (i + 1 != v.size() ? " " : "");
  }
  return os;
}

template <typename T>
istream &operator>>(istream &is, vector<T> &v) {
  for (T &in : v) is >> in;
  return is;
}

template <typename T1, typename T2>
inline bool chmax(T1 &a, T2 b) {
  return a < b && (a = b, true);
}

template <typename T1, typename T2>
inline bool chmin(T1 &a, T2 b) {
  return a > b && (a = b, true);
}

template <typename T = int64>
vector<T> make_v(size_t a) {
  return vector<T>(a);
}

template <typename T, typename... Ts>
auto make_v(size_t a, Ts... ts) {
  return vector<decltype(make_v<T>(ts...))>(a, make_v<T>(ts...));
}

template <typename T, typename V>
typename enable_if<is_class<T>::value == 0>::type fill_v(T &t, const V &v) {
  t = v;
}

template <typename T, typename V>
typename enable_if<is_class<T>::value != 0>::type fill_v(T &t, const V &v) {
  for (auto &e : t) fill_v(e, v);
}

template <typename F>
struct FixPoint : F {
  explicit FixPoint(F &&f) : F(forward<F>(f)) {}

  template <typename... Args>
  decltype(auto) operator()(Args &&...args) const {
    return F::operator()(*this, forward<Args>(args)...);
  }
};

template <typename F>
inline decltype(auto) MFP(F &&f) {
  return FixPoint<F>{forward<F>(f)};
}
#line 4 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"

#line 1 "math/fft/superset-zeta-moebius-transform-simd.hpp"
#include <immintrin.h>

/**
 * @brief Superset Zeta/Moebius Transform SIMD (上位集合のゼータ/メビウス変換,
 * SIMD)
 */
__attribute__((target("avx2"))) void superset_zeta_transform_simd(int *buf,
                                                                  int mod,
                                                                  int n) {
  assert((n & (n - 1)) == 0);
  auto m_zero = _mm256_set1_epi32(0);
  auto m_mod_one = _mm256_set1_epi32(mod - 1);
  auto m_mod = _mm256_set1_epi32(mod);
  auto m_zero2 = _mm_set1_epi32(0);
  auto m_mod_one2 = _mm_set1_epi32(mod - 1);
  auto m_mod2 = _mm_set1_epi32(mod);
  for (int i = 1; i < n; i <<= 1) {
    for (int j = 0; j < n; j += i << 1) {
      if (i <= 2) {
        for (int k = 0; k < i; k++) {
          buf[j + k] += buf[j + k + i];
          if (buf[j + k] >= mod) buf[j + k] -= mod;
        }
      } else if (i == 4) {
        for (int k = 0; k < i; k += 4) {
          auto a = _mm_loadu_si128((__m128i *)(buf + j + k));
          auto b = _mm_loadu_si128((__m128i *)(buf + j + k + i));
          a = _mm_add_epi32(a, b);
          a = _mm_sub_epi32(
              a, _mm_and_si128(_mm_cmpgt_epi32(a, m_mod_one2), m_mod2));
          _mm_storeu_si128((__m128i *)(buf + j + k), a);
        }
      } else {
        for (int k = 0; k < i; k += 8) {
          auto a = _mm256_loadu_si256((__m256i *)(buf + j + k));
          auto b = _mm256_loadu_si256((__m256i *)(buf + j + k + i));
          a = _mm256_add_epi32(a, b);
          a = _mm256_sub_epi32(
              a, _mm256_and_si256(_mm256_cmpgt_epi32(a, m_mod_one), m_mod));
          _mm256_storeu_si256((__m256i *)(buf + j + k), a);
        }
      }
    }
  }
}

__attribute__((target("avx2"))) void superset_moebius_transform_simd(int *buf,
                                                                     int mod,
                                                                     int n) {
  assert((n & (n - 1)) == 0);
  auto m_zero = _mm256_set1_epi32(0);
  auto m_mod = _mm256_set1_epi32(mod);
  auto m_zero2 = _mm_set1_epi32(0);
  auto m_mod2 = _mm_set1_epi32(mod);
  for (int i = 1; i < n; i <<= 1) {
    for (int j = 0; j < n; j += i << 1) {
      if (i <= 2) {
        for (int k = 0; k < i; k++) {
          buf[j + k] += mod - buf[j + k + i];
          if (buf[j + k] >= mod) buf[j + k] -= mod;
        }
      } else if (i == 4) {
        for (int k = 0; k < i; k += 4) {
          auto a = _mm_loadu_si128((__m128i *)(buf + j + k));
          auto b = _mm_loadu_si128((__m128i *)(buf + j + k + i));
          a = _mm_sub_epi32(a, b);
          a = _mm_add_epi32(a,
                            _mm_and_si128(_mm_cmpgt_epi32(m_zero2, a), m_mod2));
          _mm_storeu_si128((__m128i *)(buf + j + k), a);
        }
      } else {
        for (int k = 0; k < i; k += 8) {
          auto a = _mm256_loadu_si256((__m256i *)(buf + j + k));
          auto b = _mm256_loadu_si256((__m256i *)(buf + j + k + i));
          a = _mm256_sub_epi32(a, b);
          a = _mm256_add_epi32(
              a, _mm256_and_si256(_mm256_cmpgt_epi32(m_zero, a), m_mod));
          _mm256_storeu_si256((__m256i *)(buf + j + k), a);
        }
      }
    }
  }
}

template <int mod>
int *bitwise_and_convolution_simd(int *f, int *g, int n) {
  assert((n & (n - 1)) == 0);
  superset_zeta_transform_simd(f, mod, n);
  superset_zeta_transform_simd(g, mod, n);
  for (int i = 0; i < n; i++) f[i] = (1uLL * f[i] * g[i]) % mod;
  superset_moebius_transform_simd(f, mod, n);
  return f;
}
#line 6 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"

#line 1 "other/scanner.hpp"
/**
 * @brief Scanner(高速入力)
 */
struct Scanner {
 public:
  explicit Scanner(FILE *fp) : fp(fp) {}

  template <typename T, typename... E>
  void read(T &t, E &...e) {
    read_single(t);
    read(e...);
  }

 private:
  static constexpr size_t line_size = 1 << 16;
  static constexpr size_t int_digits = 20;
  char line[line_size + 1] = {};
  FILE *fp = nullptr;
  char *st = line;
  char *ed = line;

  void read() {}

  static inline bool is_space(char c) { return c <= ' '; }

  void reread() {
    ptrdiff_t len = ed - st;
    memmove(line, st, len);
    char *tmp = line + len;
    ed = tmp + fread(tmp, 1, line_size - len, fp);
    *ed = 0;
    st = line;
  }

  void skip_space() {
    while (true) {
      if (st == ed) reread();
      while (*st && is_space(*st)) ++st;
      if (st != ed) return;
    }
  }

  template <typename T, enable_if_t<is_integral<T>::value, int> = 0>
  void read_single(T &s) {
    skip_space();
    if (st + int_digits >= ed) reread();
    bool neg = false;
    if (is_signed<T>::value && *st == '-') {
      neg = true;
      ++st;
    }
    typename make_unsigned<T>::type y = *st++ - '0';
    while (*st >= '0') {
      y = 10 * y + *st++ - '0';
    }
    s = (neg ? -y : y);
  }

  template <typename T, enable_if_t<is_same<T, string>::value, int> = 0>
  void read_single(T &s) {
    s = "";
    skip_space();
    while (true) {
      char *base = st;
      while (*st && !is_space(*st)) ++st;
      s += string(base, st);
      if (st != ed) return;
      reread();
    }
  }

  template <typename T>
  void read_single(vector<T> &s) {
    for (auto &d : s) read(d);
  }
};
#line 1 "other/printer.hpp"
/**
 * @brief Printer(高速出力)
 */
struct Printer {
 public:
  explicit Printer(FILE *fp) : fp(fp) {}

  ~Printer() { flush(); }

  template <bool f = false, typename T, typename... E>
  void write(const T &t, const E &...e) {
    if (f) write_single(' ');
    write_single(t);
    write<true>(e...);
  }

  template <typename... T>
  void writeln(const T &...t) {
    write(t...);
    write_single('\n');
  }

  void flush() {
    fwrite(line, 1, st - line, fp);
    st = line;
  }

 private:
  FILE *fp = nullptr;
  static constexpr size_t line_size = 1 << 16;
  static constexpr size_t int_digits = 20;
  char line[line_size + 1] = {};
  char *st = line;

  template <bool f = false>
  void write() {}

  void write_single(const char &t) {
    if (st + 1 >= line + line_size) flush();
    *st++ = t;
  }

  template <typename T, enable_if_t<is_integral<T>::value, int> = 0>
  void write_single(T s) {
    if (st + int_digits >= line + line_size) flush();
    st += to_chars(st, st + int_digits, s).ptr - st;
  }

  void write_single(const string &s) {
    for (auto &c : s) write_single(c);
  }

  void write_single(const char *s) {
    while (*s != 0) write_single(*s++);
  }

  template <typename T>
  void write_single(const vector<T> &s) {
    for (size_t i = 0; i < s.size(); i++) {
      if (i) write_single(' ');
      write_single(s[i]);
    }
  }
};
#line 9 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"

const int MOD = 998244353;

int main() {
  Scanner in(stdin);
  Printer out(stdout);
  int N;
  in.read(N);
  int a[1 << N], b[1 << N];
  for(int i = 0; i < (1 << N); i++) in.read(a[i]);
  for(int i = 0; i < (1 << N); i++) in.read(b[i]);
  bitwise_and_convolution_simd< MOD >(a, b, 1 << N);
  for(int i = 0; i < (1 << N); i++) {
    if(i) out.write(' ');
    out.write(a[i]);
  }
  out.writeln();
}

Test cases

Env Name Status Elapsed Memory
g++ example_00 :heavy_check_mark: AC 7 ms 4 MB
g++ max_random_00 :heavy_check_mark: AC 58 ms 12 MB
g++ max_random_01 :heavy_check_mark: AC 60 ms 12 MB
g++ max_random_02 :heavy_check_mark: AC 59 ms 12 MB
g++ random_00 :heavy_check_mark: AC 9 ms 4 MB
g++ random_01 :heavy_check_mark: AC 10 ms 4 MB
g++ random_02 :heavy_check_mark: AC 19 ms 6 MB
g++ small_00 :heavy_check_mark: AC 7 ms 4 MB
g++ small_01 :heavy_check_mark: AC 6 ms 4 MB
g++ small_02 :heavy_check_mark: AC 6 ms 4 MB
g++ tiny_00 :heavy_check_mark: AC 6 ms 4 MB
g++ tiny_01 :heavy_check_mark: AC 6 ms 4 MB
g++ tiny_02 :heavy_check_mark: AC 6 ms 4 MB
clang++ example_00 :heavy_check_mark: AC 7 ms 4 MB
clang++ max_random_00 :heavy_check_mark: AC 59 ms 12 MB
clang++ max_random_01 :heavy_check_mark: AC 60 ms 12 MB
clang++ max_random_02 :heavy_check_mark: AC 59 ms 12 MB
clang++ random_00 :heavy_check_mark: AC 9 ms 4 MB
clang++ random_01 :heavy_check_mark: AC 9 ms 4 MB
clang++ random_02 :heavy_check_mark: AC 19 ms 6 MB
clang++ small_00 :heavy_check_mark: AC 7 ms 4 MB
clang++ small_01 :heavy_check_mark: AC 6 ms 4 MB
clang++ small_02 :heavy_check_mark: AC 6 ms 4 MB
clang++ tiny_00 :heavy_check_mark: AC 6 ms 4 MB
clang++ tiny_01 :heavy_check_mark: AC 6 ms 4 MB
clang++ tiny_02 :heavy_check_mark: AC 6 ms 4 MB
Back to top page