Skip to content

Commit 749559a

Browse files
author
peng.li24
committed
feat: add reciprocal (1/x) element-wise — bit-exact to numpy.reciprocal
1 parent 7b1ad4f commit 749559a

5 files changed

Lines changed: 55 additions & 8 deletions

File tree

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
55
[![C++17](https://img.shields.io/badge/C%2B%2B-17-blue.svg)](https://en.cppreference.com/w/cpp/17)
66
[![CMake](https://img.shields.io/badge/CMake-%3E%3D3.16-green.svg)](https://cmake.org/)
7-
[![Tests](https://img.shields.io/badge/tests-961%20bit--exact-brightgreen.svg)](tests/test_all.py)
7+
[![Tests](https://img.shields.io/badge/tests-970%20bit--exact-brightgreen.svg)](tests/test_all.py)
88
[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
99

1010
## Background
@@ -17,7 +17,7 @@ We created `numpycpp` to keep NumPy's familiar usage patterns while letting C++
1717

1818
`numpycpp` is a **header-only C++ library** implementing numpy's core API (`numpy.*`, `numpy.linalg.*`, `numpy.einsum`) with **bit-level precision alignment**. Raw pointer + size interface. Zero external dependencies — pure C++17 standard library.
1919

20-
All APIs are tested against Python numpy under strict bit-level comparison: every IEEE 754 float bit must match exactly (961 tests, float64 + float32, including NaN passthrough, signed-zero, ±∞, domain-error cases, and advanced indexing).
20+
All APIs are tested against Python numpy under strict bit-level comparison: every IEEE 754 float bit must match exactly (970 tests, float64 + float32, including NaN passthrough, signed-zero, ±∞, domain-error cases, and advanced indexing).
2121

2222
**Bit-exact math** is achieved by resolving numpy's own math functions from `_multiarray_umath.so` at runtime. The SVML bridge auto-detects your CPU and selects the same path numpy uses: AVX‑512 SVML (`__svml_exp8`) when available, or scalar `npy_exp`/`npy_log`/etc. otherwise. AVX‑512 intrinsics are isolated behind `__attribute__((target))` — the binary is safe on any x86_64 CPU (no SIGILL). Every transcendental function produces the exact same IEEE 754 bits as numpy on **all architectures**.
2323

@@ -117,7 +117,7 @@ Add `-Ipath/to/numpycpp` to your compiler flags and include the headers directly
117117

118118
The test suite verifies **bit-level precision alignment** between every C++ function and Python numpy.
119119
No tolerance, no `atol`/`rtol` — raw IEEE 754 bits must match exactly.
120-
961 tests: float64 + float32, including NaN passthrough, signed-zero, ±∞, domain errors, advanced indexing, and AVX-512 boundary sizes.
120+
970 tests: float64 + float32, including NaN passthrough, signed-zero, ±∞, domain errors, advanced indexing, and AVX-512 boundary sizes.
121121

122122
```bash
123123
# build
@@ -155,7 +155,7 @@ cmake -DNUMPYCPP_STD_ONLY=ON .. # std / performance-first backend
155155
#### Compiler flags — bitexact backend (`NUMPYCPP_STD_ONLY=OFF`)
156156

157157
The minimum set was determined empirically: each flag was removed in isolation
158-
and the full 961-test suite was re-run. Only flags whose removal caused at
158+
and the full 970-test suite was re-run. Only flags whose removal caused at
159159
least one test failure are marked **required**.
160160

161161
```cmake
@@ -279,7 +279,7 @@ numpycpp/
279279
│ └── bench_numpy.py # pure-numpy baseline
280280
├── tests/ # bit-level precision tests + test module
281281
│ ├── module.cpp # pybind11 module for testing
282-
│ ├── test_all.py # single entry — all APIs, 961 tests, float64+float32
282+
│ ├── test_all.py # single entry — all APIs, 970 tests, float64+float32
283283
│ ├── conftest.py # silent-mode output suppression
284284
│ ├── make_csv.py # ULP precision CSV generator
285285
│ ├── diagnose_numpy.py # numpy internal diagnostic tool

numpycpp/elementwise.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// numpy.expm1 numpy.log1p numpy.power numpy.clip
1010
// numpy.log10 numpy.log2 numpy.arcsin numpy.arccos
1111
// numpy.arctan numpy.round numpy.floor numpy.ceil
12-
// numpy.degrees numpy.radians numpy.sign
12+
// numpy.degrees numpy.radians numpy.sign numpy.reciprocal
1313
//
1414
// Binary element-wise:
1515
// numpy.hypot numpy.arctan2 numpy.maximum numpy.minimum
@@ -204,6 +204,14 @@ inline void sign(const T* src, T* dst, size_t n) {
204204
: T((src[i] > T(0)) - (src[i] < T(0))));
205205
}
206206

207+
/// numpy.reciprocal(x, /, out=None, *, where=True, ...)
208+
/// Returns 1/x element-wise. IEEE 754 division handles edge cases:
209+
/// 1/0 → ±∞, 1/±∞ → ±0, 1/NaN → NaN, -0 preservation.
210+
template<typename T>
211+
inline void reciprocal(const T* src, T* dst, size_t n) {
212+
NUMPY_UNROLL4(i, dst[i] = T(1) / src[i]);
213+
}
214+
207215
// ============================================================================
208216
// Binary element-wise — 2 arrays T in → T out
209217
// ============================================================================
@@ -366,7 +374,7 @@ inline void truncate_to_float32(const double* src, double* dst, size_t n) {
366374
//
367375
// Unary math — delegate to detail:: (SVML-bridge or std, same accuracy):
368376
// sqrt abs exp log sin cos tan cbrt expm1 log1p
369-
// log10 log2 arcsin arccos arctan round floor ceil degrees radians sign
377+
// log10 log2 arcsin arccos arctan round floor ceil degrees radians sign reciprocal
370378
//
371379
// Binary — two scalars in, one scalar out:
372380
// power(x,e) hypot(x,y) arctan2(y,x) maximum(a,b) minimum(a,b)
@@ -397,6 +405,7 @@ template<typename T> inline T ceil (T x) { ceil (&x, &x, 1); return x; }
397405
template<typename T> inline T degrees(T x) { degrees(&x, &x, 1); return x; }
398406
template<typename T> inline T radians(T x) { radians(&x, &x, 1); return x; }
399407
template<typename T> inline T sign (T x) { sign (&x, &x, 1); return x; }
408+
template<typename T> inline T reciprocal(T x) { reciprocal(&x, &x, 1); return x; }
400409

401410
// ── Binary ─────────────────────────────────────────────────────────────────
402411

numpycpp/elementwise_py.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// Pybind11 wrappers: element-wise operations and type conversion.
44
// Unary: sqrt abs exp log sin cos tan cbrt expm1 log1p log10 log2
55
// arcsin arccos arctan round floor ceil degrees radians sign
6-
// power clip
6+
// reciprocal power clip
77
// Binary: hypot arctan2 maximum minimum
88
// Comparison: greater less equal greater_equal less_equal not_equal
99
// Logical: logical_and logical_or logical_not logical_xor
@@ -57,6 +57,7 @@ DEF_ELEMWISE(ceil)
5757
DEF_ELEMWISE(degrees)
5858
DEF_ELEMWISE(radians)
5959
DEF_ELEMWISE(sign)
60+
DEF_ELEMWISE(reciprocal)
6061
#undef DEF_ELEMWISE
6162

6263
/// numpy.power(x1, x2) — scalar exponent

tests/module.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ PYBIND11_MODULE(numpycpp, m) {
110110
BIND_F1(log10); BIND_F1(log2); BIND_F1(arcsin); BIND_F1(arccos); BIND_F1(arctan);
111111
BIND_F1(round); BIND_F1(floor); BIND_F1(ceil);
112112
BIND_F1(degrees); BIND_F1(radians); BIND_F1(sign);
113+
BIND_F1(reciprocal);
113114
m.def("power", static_cast<py::array_t<float>(*)(const py::array_t<float>&, float)>(&numpy::power));
114115
m.def("power", static_cast<py::array_t<double>(*)(const py::array_t<double>&, double)>(&numpy::power));
115116
m.def("clip", static_cast<py::array_t<double>(*)(const py::array_t<double>&, double, double)>(&numpy::clip));

tests/test_all.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,42 @@ def test_sign_zero_signs(cpp):
16511651
assert_bit_aligned(cpp.sign(a), np.sign(a), f"sign(±0) {dt.__name__}")
16521652

16531653

1654+
# --- reciprocal (1/x) ---
1655+
1656+
def test_reciprocal_basic(cpp, dtype):
1657+
"""reciprocal(1/x) — basic values bit-exact vs numpy."""
1658+
a = random_array((128,), dtype=dtype)
1659+
assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), f"reciprocal {dtype.__name__}")
1660+
1661+
def test_reciprocal_zero_f32(cpp):
1662+
"""1/0 → +inf, 1/−0 → −inf, 1/inf → 0, 1/−inf → −0."""
1663+
a = np.array([0.0, -0.0, np.inf, -np.inf], dtype=np.float32)
1664+
assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), "reciprocal edges f32")
1665+
1666+
def test_reciprocal_zero_f64(cpp):
1667+
"""1/0 → +inf, 1/−0 → −inf, 1/inf → 0, 1/−inf → −0."""
1668+
a = np.array([0.0, -0.0, np.inf, -np.inf], dtype=np.float64)
1669+
assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), "reciprocal edges f64")
1670+
1671+
def test_reciprocal_nan(cpp):
1672+
"""1/NaN → NaN."""
1673+
a = np.array([np.nan], dtype=np.float32)
1674+
assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), "reciprocal nan f32")
1675+
1676+
def test_reciprocal_special_values(cpp, dtype):
1677+
"""reciprocal preserves special values: ±inf, ±0, NaN passthrough."""
1678+
if dtype == np.float64:
1679+
a = np.array([0.0, -0.0, 1.0, -1.0, 2.0, np.inf, -np.inf, np.nan], dtype=dtype)
1680+
else:
1681+
a = np.array([0.0, -0.0, 1.0, -1.0, 2.0, np.inf, -np.inf, np.nan], dtype=dtype)
1682+
assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), f"reciprocal special {dtype.__name__}")
1683+
1684+
def test_reciprocal_random(cpp, dtype):
1685+
"""reciprocal large random arrays — bit-identical to numpy."""
1686+
a = random_array((1024,), dtype=dtype)
1687+
assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), f"reciprocal large {dtype.__name__}")
1688+
1689+
16541690
# --- unwrap NaN propagation ---
16551691

16561692
def test_unwrap_nan_propagation(cpp):

0 commit comments

Comments
 (0)