-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
161 lines (147 loc) · 6.59 KB
/
Copy pathCMakeLists.txt
File metadata and controls
161 lines (147 loc) · 6.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# Build numpycpp Python test/benchmark module.
#
# Two build modes (select with -DNUMPYCPP_STD_ONLY):
#
# OFF (default) — bit-exact mode:
# All 981 tests verify IEEE 754 bit-identical results vs numpy.
# Requires: dlsym, numpy .so loaded.
# cmake -S tests -B tests/build
#
# ON — std / performance-first mode:
# Transcendental + linalg tests use atol/rtol tolerance (0–2 ULP allowed).
# No external dependencies — pure C++17 std only.
# cmake -S tests -B tests/build -DNUMPYCPP_STD_ONLY=ON
#
# Build: cmake --build tests/build
# Test: cd tests && python3 -m pytest test_all.py -q
cmake_minimum_required(VERSION 3.16)
project(numpycpp_tests CXX)
option(NUMPYCPP_STD_ONLY
"Use only C++ standard library (no SVML/BLAS/dlopen). Performance-first, not bit-exact."
OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# ---- Find dependencies -------------------------------------------------------
# Python3 (for pybind11 extension)
find_package(Python3 REQUIRED COMPONENTS Interpreter Development)
# pybind11 — prefer the copy installed alongside Python
# pybind11 v2.x uses --cmake; v3.x uses --cmakedir — try both
execute_process(
COMMAND "${Python3_EXECUTABLE}" -m pybind11 --cmakedir
OUTPUT_VARIABLE PYBIND11_CMAKE_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE _pybind11_cmake_result
ERROR_QUIET
)
if(NOT (_pybind11_cmake_result EQUAL 0 AND EXISTS "${PYBIND11_CMAKE_DIR}"))
execute_process(
COMMAND "${Python3_EXECUTABLE}" -m pybind11 --cmake
OUTPUT_VARIABLE PYBIND11_CMAKE_DIR
OUTPUT_STRIP_TRAILING_WHITESPACE
RESULT_VARIABLE _pybind11_cmake_result
ERROR_QUIET
)
endif()
if(_pybind11_cmake_result EQUAL 0 AND EXISTS "${PYBIND11_CMAKE_DIR}")
find_package(pybind11 CONFIG REQUIRED
PATHS "${PYBIND11_CMAKE_DIR}" NO_DEFAULT_PATH)
else()
find_package(pybind11 CONFIG REQUIRED)
endif()
# Eigen3 (optional — used for norm/std helpers in module.cpp)
find_package(Eigen3 CONFIG QUIET)
# OpenMP (optional — einsum uses #pragma omp only when _OPENMP is defined)
find_package(OpenMP)
# AVX-512 runtime probe — compile AND run a tiny binary that executes a ZMM
# instruction. check_cxx_source_runs returns TRUE only if the binary exits 0,
# so it correctly distinguishes machines where CPUID says avx512f but the
# hypervisor traps ZMM execution (e.g. some Azure VM SKUs used by GitHub Actions).
include(CheckCXXSourceRuns)
set(CMAKE_REQUIRED_FLAGS "-mavx512f -mfma")
check_cxx_source_runs("
#include <immintrin.h>
int main() {
__m512d a = _mm512_set1_pd(2.0);
__m512d b = _mm512_sqrt_pd(a);
(void)b;
return 0;
}
" NUMPYCPP_AVX512_WORKS)
unset(CMAKE_REQUIRED_FLAGS)
# ---- Python extension module -------------------------------------------------
pybind11_add_module(numpycpp MODULE module.cpp)
# Include the numpycpp header-only library (parent dir)
target_include_directories(numpycpp PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/..
)
if(Eigen3_FOUND)
target_include_directories(numpycpp PRIVATE ${EIGEN3_INCLUDE_DIR})
endif()
if(NUMPYCPP_STD_ONLY)
# ── std / performance-first build ─────────────────────────────────────────
# No bit-exact requirement: let the compiler optimise freely.
# FMA contraction, full auto-vectorisation, and native SIMD are all allowed.
target_compile_definitions(numpycpp PRIVATE NUMPYCPP_STD_ONLY)
target_compile_options(numpycpp PRIVATE
-O3
-march=native # auto-vectorise with available SIMD (AVX-512 etc.)
# No -ffp-contract=off: FMA fusion allowed for performance
# No -fno-builtin-*: std::exp etc. can use GCC builtins (fast path)
)
if(OpenMP_CXX_FOUND)
target_link_libraries(numpycpp PRIVATE OpenMP::OpenMP_CXX)
endif()
# No -ldl: no dlsym in std backend
message(STATUS "Test build: std mode (-O3 -march=native, no dlsym, no SVML)")
else()
# ── bit-exact build (default) ─────────────────────────────────────────────
# Flags determined empirically: each flag's removal was tested against all
# 981 tests. Only flags whose removal caused failures are marked REQUIRED.
target_compile_options(numpycpp PRIVATE
-O2
-ffp-contract=off # REQUIRED — verified: 36 einsum tests fail without
-msse4.1 # REQUIRED — _mm_insert_epi32 in linalg.h
-mfma # REQUIRED — _mm512_fmadd_* in AVX-512 loops
# Disable GCC builtin replacements — defensive: no test currently fails
# without these, but they prevent future GCC versions from silently
# substituting npy_* call sites with builtins.
-fno-builtin-exp -fno-builtin-log -fno-builtin-sin
-fno-builtin-cos -fno-builtin-tan -fno-builtin-pow
-fno-builtin-sqrt -fno-builtin-atan2 -fno-builtin-log2
-fno-builtin-log10 -fno-builtin-asin -fno-builtin-acos
-fno-builtin-atan -fno-builtin-exp2
-fno-builtin-cbrt -fno-builtin-expm1 -fno-builtin-log1p
)
if(NUMPYCPP_AVX512_WORKS)
target_compile_options(numpycpp PRIVATE
-mavx512f -mprefer-vector-width=256)
message(STATUS "AVX-512: usable — SVML + wide-loop path enabled")
else()
message(STATUS "AVX-512: not usable (missing or hypervisor-trapped) — scalar npy_* path (still bit-exact)")
endif()
if(OpenMP_CXX_FOUND)
target_link_libraries(numpycpp PRIVATE OpenMP::OpenMP_CXX)
endif()
target_link_libraries(numpycpp PRIVATE dl)
message(STATUS "Test build: bit-exact mode (-O2 -ffp-contract=off -msse4.1 -mfma, dlsym, SVML)")
endif()
# Place .so next to the test scripts for easy import
set_target_properties(numpycpp PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
PREFIX ""
SUFFIX ".so"
)
# ---- Test target -------------------------------------------------------------
add_custom_target(test
COMMAND "${Python3_EXECUTABLE}" -m pytest
"${CMAKE_CURRENT_SOURCE_DIR}/test_all.py" -q --tb=short --no-header
DEPENDS numpycpp
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Running bit-exact alignment tests"
)
message(STATUS "numpycpp test module")
message(STATUS " Python: ${Python3_VERSION} (${Python3_EXECUTABLE})")
message(STATUS " pybind11: ${pybind11_VERSION}")
message(STATUS " Build: cmake --build <build_dir>")
message(STATUS " Test: cmake --build <build_dir> --target test")