This documentation is automatically generated by online-judge-tools/verification-helper
// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/bitwise_and_convolution
#include "../../template/template.hpp"
#include "../../math/fft/superset-zeta-moebius-transform-simd.hpp"
#include "../../other/scanner.hpp"
#include "../../other/printer.hpp"
const int MOD = 998244353;
int main() {
Scanner in(stdin);
Printer out(stdout);
int N;
in.read(N);
int a[1 << N], b[1 << N];
for(int i = 0; i < (1 << N); i++) in.read(a[i]);
for(int i = 0; i < (1 << N); i++) in.read(b[i]);
bitwise_and_convolution_simd< MOD >(a, b, 1 << N);
for(int i = 0; i < (1 << N); i++) {
if(i) out.write(' ');
out.write(a[i]);
}
out.writeln();
}
#line 1 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"
// competitive-verifier: PROBLEM https://judge.yosupo.jp/problem/bitwise_and_convolution
#line 1 "template/template.hpp"
#include <bits/stdc++.h>
using namespace std;
using int64 = long long;
const int64 infll = (1LL << 62) - 1;
const int inf = (1 << 30) - 1;
struct IoSetup {
IoSetup() {
cin.tie(nullptr);
ios::sync_with_stdio(false);
cout << fixed << setprecision(10);
cerr << fixed << setprecision(10);
}
} iosetup;
template <typename T1, typename T2>
ostream &operator<<(ostream &os, const pair<T1, T2> &p) {
os << p.first << " " << p.second;
return os;
}
template <typename T1, typename T2>
istream &operator>>(istream &is, pair<T1, T2> &p) {
is >> p.first >> p.second;
return is;
}
template <typename T>
ostream &operator<<(ostream &os, const vector<T> &v) {
for (int i = 0; i < (int)v.size(); i++) {
os << v[i] << (i + 1 != v.size() ? " " : "");
}
return os;
}
template <typename T>
istream &operator>>(istream &is, vector<T> &v) {
for (T &in : v) is >> in;
return is;
}
template <typename T1, typename T2>
inline bool chmax(T1 &a, T2 b) {
return a < b && (a = b, true);
}
template <typename T1, typename T2>
inline bool chmin(T1 &a, T2 b) {
return a > b && (a = b, true);
}
template <typename T = int64>
vector<T> make_v(size_t a) {
return vector<T>(a);
}
template <typename T, typename... Ts>
auto make_v(size_t a, Ts... ts) {
return vector<decltype(make_v<T>(ts...))>(a, make_v<T>(ts...));
}
template <typename T, typename V>
typename enable_if<is_class<T>::value == 0>::type fill_v(T &t, const V &v) {
t = v;
}
template <typename T, typename V>
typename enable_if<is_class<T>::value != 0>::type fill_v(T &t, const V &v) {
for (auto &e : t) fill_v(e, v);
}
template <typename F>
struct FixPoint : F {
explicit FixPoint(F &&f) : F(forward<F>(f)) {}
template <typename... Args>
decltype(auto) operator()(Args &&...args) const {
return F::operator()(*this, forward<Args>(args)...);
}
};
template <typename F>
inline decltype(auto) MFP(F &&f) {
return FixPoint<F>{forward<F>(f)};
}
#line 4 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"
#line 1 "math/fft/superset-zeta-moebius-transform-simd.hpp"
#include <immintrin.h>
/**
* @brief Superset Zeta/Moebius Transform SIMD (上位集合のゼータ/メビウス変換,
* SIMD)
*/
__attribute__((target("avx2"))) void superset_zeta_transform_simd(int *buf,
int mod,
int n) {
assert((n & (n - 1)) == 0);
auto m_zero = _mm256_set1_epi32(0);
auto m_mod_one = _mm256_set1_epi32(mod - 1);
auto m_mod = _mm256_set1_epi32(mod);
auto m_zero2 = _mm_set1_epi32(0);
auto m_mod_one2 = _mm_set1_epi32(mod - 1);
auto m_mod2 = _mm_set1_epi32(mod);
for (int i = 1; i < n; i <<= 1) {
for (int j = 0; j < n; j += i << 1) {
if (i <= 2) {
for (int k = 0; k < i; k++) {
buf[j + k] += buf[j + k + i];
if (buf[j + k] >= mod) buf[j + k] -= mod;
}
} else if (i == 4) {
for (int k = 0; k < i; k += 4) {
auto a = _mm_loadu_si128((__m128i *)(buf + j + k));
auto b = _mm_loadu_si128((__m128i *)(buf + j + k + i));
a = _mm_add_epi32(a, b);
a = _mm_sub_epi32(
a, _mm_and_si128(_mm_cmpgt_epi32(a, m_mod_one2), m_mod2));
_mm_storeu_si128((__m128i *)(buf + j + k), a);
}
} else {
for (int k = 0; k < i; k += 8) {
auto a = _mm256_loadu_si256((__m256i *)(buf + j + k));
auto b = _mm256_loadu_si256((__m256i *)(buf + j + k + i));
a = _mm256_add_epi32(a, b);
a = _mm256_sub_epi32(
a, _mm256_and_si256(_mm256_cmpgt_epi32(a, m_mod_one), m_mod));
_mm256_storeu_si256((__m256i *)(buf + j + k), a);
}
}
}
}
}
__attribute__((target("avx2"))) void superset_moebius_transform_simd(int *buf,
int mod,
int n) {
assert((n & (n - 1)) == 0);
auto m_zero = _mm256_set1_epi32(0);
auto m_mod = _mm256_set1_epi32(mod);
auto m_zero2 = _mm_set1_epi32(0);
auto m_mod2 = _mm_set1_epi32(mod);
for (int i = 1; i < n; i <<= 1) {
for (int j = 0; j < n; j += i << 1) {
if (i <= 2) {
for (int k = 0; k < i; k++) {
buf[j + k] += mod - buf[j + k + i];
if (buf[j + k] >= mod) buf[j + k] -= mod;
}
} else if (i == 4) {
for (int k = 0; k < i; k += 4) {
auto a = _mm_loadu_si128((__m128i *)(buf + j + k));
auto b = _mm_loadu_si128((__m128i *)(buf + j + k + i));
a = _mm_sub_epi32(a, b);
a = _mm_add_epi32(a,
_mm_and_si128(_mm_cmpgt_epi32(m_zero2, a), m_mod2));
_mm_storeu_si128((__m128i *)(buf + j + k), a);
}
} else {
for (int k = 0; k < i; k += 8) {
auto a = _mm256_loadu_si256((__m256i *)(buf + j + k));
auto b = _mm256_loadu_si256((__m256i *)(buf + j + k + i));
a = _mm256_sub_epi32(a, b);
a = _mm256_add_epi32(
a, _mm256_and_si256(_mm256_cmpgt_epi32(m_zero, a), m_mod));
_mm256_storeu_si256((__m256i *)(buf + j + k), a);
}
}
}
}
}
template <int mod>
int *bitwise_and_convolution_simd(int *f, int *g, int n) {
assert((n & (n - 1)) == 0);
superset_zeta_transform_simd(f, mod, n);
superset_zeta_transform_simd(g, mod, n);
for (int i = 0; i < n; i++) f[i] = (1uLL * f[i] * g[i]) % mod;
superset_moebius_transform_simd(f, mod, n);
return f;
}
#line 6 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"
#line 1 "other/scanner.hpp"
/**
* @brief Scanner(高速入力)
*/
struct Scanner {
public:
explicit Scanner(FILE *fp) : fp(fp) {}
template <typename T, typename... E>
void read(T &t, E &...e) {
read_single(t);
read(e...);
}
private:
static constexpr size_t line_size = 1 << 16;
static constexpr size_t int_digits = 20;
char line[line_size + 1] = {};
FILE *fp = nullptr;
char *st = line;
char *ed = line;
void read() {}
static inline bool is_space(char c) { return c <= ' '; }
void reread() {
ptrdiff_t len = ed - st;
memmove(line, st, len);
char *tmp = line + len;
ed = tmp + fread(tmp, 1, line_size - len, fp);
*ed = 0;
st = line;
}
void skip_space() {
while (true) {
if (st == ed) reread();
while (*st && is_space(*st)) ++st;
if (st != ed) return;
}
}
template <typename T, enable_if_t<is_integral<T>::value, int> = 0>
void read_single(T &s) {
skip_space();
if (st + int_digits >= ed) reread();
bool neg = false;
if (is_signed<T>::value && *st == '-') {
neg = true;
++st;
}
typename make_unsigned<T>::type y = *st++ - '0';
while (*st >= '0') {
y = 10 * y + *st++ - '0';
}
s = (neg ? -y : y);
}
template <typename T, enable_if_t<is_same<T, string>::value, int> = 0>
void read_single(T &s) {
s = "";
skip_space();
while (true) {
char *base = st;
while (*st && !is_space(*st)) ++st;
s += string(base, st);
if (st != ed) return;
reread();
}
}
template <typename T>
void read_single(vector<T> &s) {
for (auto &d : s) read(d);
}
};
#line 1 "other/printer.hpp"
/**
* @brief Printer(高速出力)
*/
struct Printer {
public:
explicit Printer(FILE *fp) : fp(fp) {}
~Printer() { flush(); }
template <bool f = false, typename T, typename... E>
void write(const T &t, const E &...e) {
if (f) write_single(' ');
write_single(t);
write<true>(e...);
}
template <typename... T>
void writeln(const T &...t) {
write(t...);
write_single('\n');
}
void flush() {
fwrite(line, 1, st - line, fp);
st = line;
}
private:
FILE *fp = nullptr;
static constexpr size_t line_size = 1 << 16;
static constexpr size_t int_digits = 20;
char line[line_size + 1] = {};
char *st = line;
template <bool f = false>
void write() {}
void write_single(const char &t) {
if (st + 1 >= line + line_size) flush();
*st++ = t;
}
template <typename T, enable_if_t<is_integral<T>::value, int> = 0>
void write_single(T s) {
if (st + int_digits >= line + line_size) flush();
st += to_chars(st, st + int_digits, s).ptr - st;
}
void write_single(const string &s) {
for (auto &c : s) write_single(c);
}
void write_single(const char *s) {
while (*s != 0) write_single(*s++);
}
template <typename T>
void write_single(const vector<T> &s) {
for (size_t i = 0; i < s.size(); i++) {
if (i) write_single(' ');
write_single(s[i]);
}
}
};
#line 9 "test/verify/yosupo-bitwise-and-convolution-3.test.cpp"
const int MOD = 998244353;
int main() {
Scanner in(stdin);
Printer out(stdout);
int N;
in.read(N);
int a[1 << N], b[1 << N];
for(int i = 0; i < (1 << N); i++) in.read(a[i]);
for(int i = 0; i < (1 << N); i++) in.read(b[i]);
bitwise_and_convolution_simd< MOD >(a, b, 1 << N);
for(int i = 0; i < (1 << N); i++) {
if(i) out.write(' ');
out.write(a[i]);
}
out.writeln();
}
Env | Name | Status | Elapsed | Memory |
---|---|---|---|---|
g++ | example_00 | AC | 7 ms | 4 MB |
g++ | max_random_00 | AC | 58 ms | 12 MB |
g++ | max_random_01 | AC | 60 ms | 12 MB |
g++ | max_random_02 | AC | 59 ms | 12 MB |
g++ | random_00 | AC | 9 ms | 4 MB |
g++ | random_01 | AC | 10 ms | 4 MB |
g++ | random_02 | AC | 19 ms | 6 MB |
g++ | small_00 | AC | 7 ms | 4 MB |
g++ | small_01 | AC | 6 ms | 4 MB |
g++ | small_02 | AC | 6 ms | 4 MB |
g++ | tiny_00 | AC | 6 ms | 4 MB |
g++ | tiny_01 | AC | 6 ms | 4 MB |
g++ | tiny_02 | AC | 6 ms | 4 MB |
clang++ | example_00 | AC | 7 ms | 4 MB |
clang++ | max_random_00 | AC | 59 ms | 12 MB |
clang++ | max_random_01 | AC | 60 ms | 12 MB |
clang++ | max_random_02 | AC | 59 ms | 12 MB |
clang++ | random_00 | AC | 9 ms | 4 MB |
clang++ | random_01 | AC | 9 ms | 4 MB |
clang++ | random_02 | AC | 19 ms | 6 MB |
clang++ | small_00 | AC | 7 ms | 4 MB |
clang++ | small_01 | AC | 6 ms | 4 MB |
clang++ | small_02 | AC | 6 ms | 4 MB |
clang++ | tiny_00 | AC | 6 ms | 4 MB |
clang++ | tiny_01 | AC | 6 ms | 4 MB |
clang++ | tiny_02 | AC | 6 ms | 4 MB |