-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.cpp
More file actions
102 lines (96 loc) · 4.07 KB
/
main.cpp
File metadata and controls
102 lines (96 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include <array>
#include <complex>
#include <ducc0/nufft/nufft.h>
#include <fftw3.h>
#include <finufft.h>
#include <fmt/core.h>
#include <iostream>
#include <nanobench.h>
using namespace std;
using namespace std::complex_literals;
int main() {
// Set up data
size_t M = 5e5; // number of nonuniform points
vector<double> x(M), y(M), z(M);
vector<complex<double>> c(M);
vector<complex<float>> c_float(M);
for (size_t j = 0; j < M; ++j) {
x[j] = M_PI * (2 * ((double)rand() / RAND_MAX) - 1);
y[j] = M_PI * (2 * ((double)rand() / RAND_MAX) - 1);
z[j] = M_PI * (2 * ((double)rand() / RAND_MAX) - 1);
c[j] = 2 * ((double)rand() / RAND_MAX) - 1 +
1i * (2 * ((double)rand() / RAND_MAX) - 1);
c_float[j] = 2 * ((float)rand() / RAND_MAX) - 1 +
1if * (2 * ((float)rand() / RAND_MAX) - 1);
}
size_t N1 = 128, N2 = 128, N3 = 128;
vector<complex<double>> F(N1 * N2 * N3);
vector<complex<float>> F_float(N1 * N2 * N3);
// Bench FINUFFT
auto bench = ankerl::nanobench::Bench().warmup(1).minEpochIterations(5);
bench.run("FINUFFT 3d", [&] {
int ier = finufft3d1(M, &x[0], &y[0], &z[0], &c[0], +1, 1e-3, N1, N2, N3,
&F[0], NULL);
ankerl::nanobench::doNotOptimizeAway(ier);
ankerl::nanobench::doNotOptimizeAway(F[0]);
});
std::array<int64_t, 3> nmodes{128, 128, 128};
finufft_plan plan;
finufft_opts options;
finufft_default_opts(&options);
int res = finufft_makeplan(1, 3, nmodes.data(), 1, 1, 1e-6, &plan, &options);
bench.run("FINUFFT With plan", [&] {
finufft_setpts(plan, M, x.data(), y.data(), z.data(), 0, nullptr, nullptr,
nullptr);
finufft_execute(plan, c.data(), F.data());
});
options.fftw = FFTW_MEASURE;
int res2 = finufft_makeplan(1, 3, nmodes.data(), 1, 1, 1e-6, &plan, &options);
bench.run("FINUFFT With plan & FFTW_MEASURE", [&] {
finufft_setpts(plan, M, x.data(), y.data(), z.data(), 0, nullptr, nullptr,
nullptr);
finufft_execute(plan, c.data(), F.data());
});
// Set up same data ducc0
std::vector<double> coords(M * 3);
for (size_t i = 0; i < M; i++) {
coords[3 * i] = x[i] + M_PI;
coords[3 * i + 1] = y[i] + M_PI;
coords[3 * i + 2] = z[i] + M_PI;
}
auto coords_cm =
ducc0::cmav<double, 2>(coords.data(), std::array<size_t, 2>{M, 3l});
auto points_cm = ducc0::cmav<complex<double>, 1>(c.data(), {M});
auto points_float = ducc0::cmav<complex<float>, 1>(c_float.data(), {M});
auto grid_cm = ducc0::vfmav<complex<double>>(
F.data(), std::vector<size_t>{128ul, 128ul, 128ul});
auto grid_cm_float = ducc0::vfmav<complex<float>>(
F_float.data(), std::vector<size_t>{128ul, 128ul, 128ul});
// Bench ducc0
for (int nthreads : {32, 16, 8, 6, 4}) {
bench.run(fmt::format("Ducco 1e-4 {} threads", nthreads), [&] {
ducc0::nu2u<double, double>(coords_cm, points_cm, true, 1e-4, nthreads,
grid_cm, 0, 1.1, 2.6, 2 * M_PI, false);
});
bench.run(fmt::format("Ducco 1e-4 {} threads float", nthreads), [&] {
ducc0::nu2u<float, float>(coords_cm, points_float, true, 1e-4, nthreads,
grid_cm_float, 0, 1.1, 2.6, 2 * M_PI, false);
});
}
bench.run("Ducco 1e-4 8 fourier output", [&] {
ducc0::nu2u<double, double>(coords_cm, points_cm, true, 1e-4, 8, grid_cm, 0,
1.1, 2.6, 2 * M_PI, true);
});
std::cout << "32 threads verbose\n";
ducc0::nu2u<double, double>(coords_cm, points_cm, true, 1e-6, 32, grid_cm, 1,
1.1, 2.6, 2 * M_PI, false);
std::cout << "16 threads verbose\n";
ducc0::nu2u<double, double>(coords_cm, points_cm, true, 1e-6, 16, grid_cm, 1,
1.1, 2.6, 2 * M_PI, false);
std::cout << "8 threads verbose\n";
ducc0::nu2u<double, double>(coords_cm, points_cm, true, 1e-6, 8, grid_cm, 1,
1.1, 2.6, 2 * M_PI, false);
std::cout << "4 threads verbose \n";
ducc0::nu2u<double, double>(coords_cm, points_cm, true, 1e-6, 4, grid_cm, 1,
1.1, 2.6, 2 * M_PI, false);
}