feat: add reciprocal (1/x) element-wise — bit-exact to numpy.reciprocal

peng.li24 · peng.li24 · commit 749559a38ab6 · 2026-06-11T15:56:27.000+08:00
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 [![C++17](https://img.shields.io/badge/C%2B%2B-17-blue.svg)](https://en.cppreference.com/w/cpp/17)
 [![CMake](https://img.shields.io/badge/CMake-%3E%3D3.16-green.svg)](https://cmake.org/)
-[![Tests](https://img.shields.io/badge/tests-961%20bit--exact-brightgreen.svg)](tests/test_all.py)
+[![Tests](https://img.shields.io/badge/tests-970%20bit--exact-brightgreen.svg)](tests/test_all.py)
 [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
 
 ## Background
@@ -17,7 +17,7 @@ We created `numpycpp` to keep NumPy's familiar usage patterns while letting C++
 
 `numpycpp` is a **header-only C++ library** implementing numpy's core API (`numpy.*`, `numpy.linalg.*`, `numpy.einsum`) with **bit-level precision alignment**. Raw pointer + size interface. Zero external dependencies — pure C++17 standard library.
 
-All APIs are tested against Python numpy under strict bit-level comparison: every IEEE 754 float bit must match exactly (961 tests, float64 + float32, including NaN passthrough, signed-zero, ±∞, domain-error cases, and advanced indexing).
+All APIs are tested against Python numpy under strict bit-level comparison: every IEEE 754 float bit must match exactly (970 tests, float64 + float32, including NaN passthrough, signed-zero, ±∞, domain-error cases, and advanced indexing).
 
 **Bit-exact math** is achieved by resolving numpy's own math functions from `_multiarray_umath.so` at runtime. The SVML bridge auto-detects your CPU and selects the same path numpy uses: AVX‑512 SVML (`__svml_exp8`) when available, or scalar `npy_exp`/`npy_log`/etc. otherwise. AVX‑512 intrinsics are isolated behind `__attribute__((target))` — the binary is safe on any x86_64 CPU (no SIGILL). Every transcendental function produces the exact same IEEE 754 bits as numpy on **all architectures**.
 
@@ -117,7 +117,7 @@ Add `-Ipath/to/numpycpp` to your compiler flags and include the headers directly
 
 The test suite verifies **bit-level precision alignment** between every C++ function and Python numpy.
 No tolerance, no `atol`/`rtol` — raw IEEE 754 bits must match exactly.
-961 tests: float64 + float32, including NaN passthrough, signed-zero, ±∞, domain errors, advanced indexing, and AVX-512 boundary sizes.
+970 tests: float64 + float32, including NaN passthrough, signed-zero, ±∞, domain errors, advanced indexing, and AVX-512 boundary sizes.
 
 ```bash
 # build
@@ -155,7 +155,7 @@ cmake -DNUMPYCPP_STD_ONLY=ON  ..   # std / performance-first backend
 #### Compiler flags — bitexact backend (`NUMPYCPP_STD_ONLY=OFF`)
 
 The minimum set was determined empirically: each flag was removed in isolation
-and the full 961-test suite was re-run. Only flags whose removal caused at
+and the full 970-test suite was re-run. Only flags whose removal caused at
 least one test failure are marked **required**.
 
 ```cmake
@@ -279,7 +279,7 @@ numpycpp/
 │   └── bench_numpy.py          # pure-numpy baseline
 ├── tests/                      # bit-level precision tests + test module
 │   ├── module.cpp              # pybind11 module for testing
-│   ├── test_all.py             # single entry — all APIs, 961 tests, float64+float32
+│   ├── test_all.py             # single entry — all APIs, 970 tests, float64+float32
 │   ├── conftest.py             # silent-mode output suppression
 │   ├── make_csv.py             # ULP precision CSV generator
 │   ├── diagnose_numpy.py       # numpy internal diagnostic tool
diff --git a/numpycpp/elementwise.h b/numpycpp/elementwise.h
@@ -9,7 +9,7 @@
 //      numpy.expm1   numpy.log1p   numpy.power   numpy.clip
 //      numpy.log10   numpy.log2    numpy.arcsin  numpy.arccos
 //      numpy.arctan  numpy.round   numpy.floor   numpy.ceil
-//      numpy.degrees numpy.radians numpy.sign
+//      numpy.degrees numpy.radians numpy.sign    numpy.reciprocal
 //
 //  Binary element-wise:
 //      numpy.hypot   numpy.arctan2  numpy.maximum  numpy.minimum
@@ -204,6 +204,14 @@ inline void sign(const T* src, T* dst, size_t n) {
                                                   : T((src[i] > T(0)) - (src[i] < T(0))));
 }
 
+/// numpy.reciprocal(x, /, out=None, *, where=True, ...)
+/// Returns 1/x element-wise.  IEEE 754 division handles edge cases:
+///   1/0 → ±∞, 1/±∞ → ±0, 1/NaN → NaN, -0 preservation.
+template<typename T>
+inline void reciprocal(const T* src, T* dst, size_t n) {
+    NUMPY_UNROLL4(i, dst[i] = T(1) / src[i]);
+}
+
 // ============================================================================
 // Binary element-wise — 2 arrays T in → T out
 // ============================================================================
@@ -366,7 +374,7 @@ inline void truncate_to_float32(const double* src, double* dst, size_t n) {
 //
 // Unary math — delegate to detail:: (SVML-bridge or std, same accuracy):
 //   sqrt  abs   exp   log   sin   cos   tan   cbrt  expm1 log1p
-//   log10 log2  arcsin arccos arctan round floor ceil degrees radians sign
+//   log10 log2  arcsin arccos arctan round floor ceil degrees radians sign reciprocal
 //
 // Binary — two scalars in, one scalar out:
 //   power(x,e)  hypot(x,y)  arctan2(y,x)  maximum(a,b)  minimum(a,b)
@@ -397,6 +405,7 @@ template<typename T> inline T ceil   (T x) { ceil   (&x, &x, 1); return x; }
 template<typename T> inline T degrees(T x) { degrees(&x, &x, 1); return x; }
 template<typename T> inline T radians(T x) { radians(&x, &x, 1); return x; }
 template<typename T> inline T sign   (T x) { sign   (&x, &x, 1); return x; }
+template<typename T> inline T reciprocal(T x) { reciprocal(&x, &x, 1); return x; }
 
 // ── Binary ─────────────────────────────────────────────────────────────────
 
diff --git a/numpycpp/elementwise_py.h b/numpycpp/elementwise_py.h
@@ -3,7 +3,7 @@
 //  Pybind11 wrappers: element-wise operations and type conversion.
 //      Unary: sqrt abs exp log sin cos tan cbrt expm1 log1p log10 log2
 //             arcsin arccos arctan round floor ceil degrees radians sign
-//             power clip
+//             reciprocal power clip
 //      Binary: hypot arctan2 maximum minimum
 //      Comparison: greater less equal greater_equal less_equal not_equal
 //      Logical: logical_and logical_or logical_not logical_xor
@@ -57,6 +57,7 @@ DEF_ELEMWISE(ceil)
 DEF_ELEMWISE(degrees)
 DEF_ELEMWISE(radians)
 DEF_ELEMWISE(sign)
+DEF_ELEMWISE(reciprocal)
 #undef DEF_ELEMWISE
 
 /// numpy.power(x1, x2) — scalar exponent
diff --git a/tests/module.cpp b/tests/module.cpp
@@ -110,6 +110,7 @@ PYBIND11_MODULE(numpycpp, m) {
     BIND_F1(log10); BIND_F1(log2); BIND_F1(arcsin); BIND_F1(arccos); BIND_F1(arctan);
     BIND_F1(round); BIND_F1(floor); BIND_F1(ceil);
     BIND_F1(degrees); BIND_F1(radians); BIND_F1(sign);
+    BIND_F1(reciprocal);
     m.def("power", static_cast<py::array_t<float>(*)(const py::array_t<float>&, float)>(&numpy::power));
     m.def("power", static_cast<py::array_t<double>(*)(const py::array_t<double>&, double)>(&numpy::power));
     m.def("clip", static_cast<py::array_t<double>(*)(const py::array_t<double>&, double, double)>(&numpy::clip));
diff --git a/tests/test_all.py b/tests/test_all.py
@@ -1651,6 +1651,42 @@ def test_sign_zero_signs(cpp):
         assert_bit_aligned(cpp.sign(a), np.sign(a), f"sign(±0) {dt.__name__}")
 
 
+# --- reciprocal (1/x) ---
+
+def test_reciprocal_basic(cpp, dtype):
+    """reciprocal(1/x) — basic values bit-exact vs numpy."""
+    a = random_array((128,), dtype=dtype)
+    assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), f"reciprocal {dtype.__name__}")
+
+def test_reciprocal_zero_f32(cpp):
+    """1/0 → +inf, 1/−0 → −inf, 1/inf → 0, 1/−inf → −0."""
+    a = np.array([0.0, -0.0, np.inf, -np.inf], dtype=np.float32)
+    assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), "reciprocal edges f32")
+
+def test_reciprocal_zero_f64(cpp):
+    """1/0 → +inf, 1/−0 → −inf, 1/inf → 0, 1/−inf → −0."""
+    a = np.array([0.0, -0.0, np.inf, -np.inf], dtype=np.float64)
+    assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), "reciprocal edges f64")
+
+def test_reciprocal_nan(cpp):
+    """1/NaN → NaN."""
+    a = np.array([np.nan], dtype=np.float32)
+    assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), "reciprocal nan f32")
+
+def test_reciprocal_special_values(cpp, dtype):
+    """reciprocal preserves special values: ±inf, ±0, NaN passthrough."""
+    if dtype == np.float64:
+        a = np.array([0.0, -0.0, 1.0, -1.0, 2.0, np.inf, -np.inf, np.nan], dtype=dtype)
+    else:
+        a = np.array([0.0, -0.0, 1.0, -1.0, 2.0, np.inf, -np.inf, np.nan], dtype=dtype)
+    assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), f"reciprocal special {dtype.__name__}")
+
+def test_reciprocal_random(cpp, dtype):
+    """reciprocal large random arrays — bit-identical to numpy."""
+    a = random_array((1024,), dtype=dtype)
+    assert_bit_aligned(cpp.reciprocal(a), np.reciprocal(a), f"reciprocal large {dtype.__name__}")
+
+
 # --- unwrap NaN propagation ---
 
 def test_unwrap_nan_propagation(cpp):