Project-Navi · Navi Bot (project-navi-bot) · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026 · Jun 20, 2026
@@ -189,8 +189,8 @@ jobs:
         run: cargo test --features test-utils
       - name: cargo test (no default features)
         run: cargo test --no-default-features
-      - name: cargo build --release --example bench_rank
-        run: cargo build --release --example bench_rank
+      - name: cargo build --release --features bench-utils --example bench_rank
+        run: cargo build --release --features bench-utils --example bench_rank
 
   # ----------------------------------------------------------------------
   # Prove the declared MSRV (1.89.0) actually builds and tests. There is
@@ -445,6 +445,7 @@ jobs:
           set -euo pipefail
           cargo test
           cargo test --features experimental
+          cargo test --features bench-utils
 
   # ----------------------------------------------------------------------
   # WASM: the bitmap/sign popcount kernels have a `simd128` path
@@ -542,4 +543,4 @@ jobs:
           toolchain: stable
       - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
       - name: run bench_rank (scaled, seeded synthetic corpus)
-        run: cargo run --release --example bench_rank -- --n 10000 --queries 100
+        run: cargo run --release --features bench-utils --example bench_rank -- --n 10000 --queries 100
@@ -259,6 +259,7 @@ jobs:
           set -euo pipefail
           cargo test
           cargo test --features experimental
+          cargo test --features bench-utils
 
   notes:
     name: release notes (git-cliff) + draft Release

@@ -81,11 +81,19 @@ rand_chacha = "0.10"
 # (research scaffold), kept off the stable surface.
 experimental = []
 serde = ["dep:serde"]
+# `bench-utils` exposes benchmark-only reference paths used by examples and
+# parity tests. These helpers are not part of the default public API.
+bench-utils = []
 # `test-utils` exposes internal dispatch probes used by the crate's own integration
 # tests (e.g. the allocation-free guarantee check). Gated off the default surface
 # because these helpers are not part of the public API and carry no semver guarantee.
 test-utils = []
 
+[[example]]
+name = "bench_rank"
+path = "examples/bench_rank.rs"
+required-features = ["bench-utils"]
+
 [profile.release]
 lto = true
 codegen-units = 1

@@ -437,7 +437,7 @@ the **quality numbers (R@10, candidate-recall, bytes/vec) are deterministic**
 and regenerable from a clean checkout with no external corpus file:
 
 ```sh
-cargo run --release --example bench_rank
+cargo run --release --features bench-utils --example bench_rank
 ```
 
 A few operating points from the committed run

@@ -4,7 +4,7 @@
 # external corpus file required — the corpus is generated in-process from
 # a fixed seed):
 #
-#     cargo run --release --example bench_rank
+#     cargo run --release --features bench-utils --example bench_rank
 #
 # No system dependencies are required — ordvec links no BLAS.
 #
@@ -47,7 +47,7 @@
 # To benchmark a real public corpus instead (e.g. GloVe / OpenAI
 # text-embedding-3 dumps), pass 2-D little-endian float32 .npy files (C
 # order); not required for the default run:
-#     cargo run --release --example bench_rank -- \
+#     cargo run --release --features bench-utils --example bench_rank -- \
 #         --corpus-npy /path/to/corpus.npy --queries-npy /path/to/queries.npy
 # ===========================================================================
 

@@ -20,7 +20,7 @@ with a single command, no external data and no system dependencies
 (ordvec links no BLAS):
 
 ```bash
-cargo run --release --example bench_rank
+cargo run --release --features bench-utils --example bench_rank
 ```
 
 That runs the head-to-head on a structured synthetic corpus (D=256,
@@ -201,7 +201,7 @@ This is the clean-checkout stress test — regenerated by the default
 `bench_rank` run, no external data required:
 
 ```bash
-cargo run --release --example bench_rank
+cargo run --release --features bench-utils --example bench_rank
 ```
 
 Setup: D=256, N=30,000 documents, 200 queries, k=10. Low-rank
@@ -276,7 +276,7 @@ table](#synthetic-stress-test-numbers). The default
 `bench_rank` run uses these parameters; the explicit form is:
 
 ```bash
-cargo run --release --example bench_rank -- \
+cargo run --release --features bench-utils --example bench_rank -- \
   --dim 256 --n 30000 --queries 200 --clusters 200 --latent 64
 ```
 
@@ -371,7 +371,7 @@ facts qualify this:
   overfitting top-k order at near-tolerance boundaries.
 
 The byte-LUT scorer remains in the codebase as a labelled reference
-path (`ordvec::search_asymmetric_byte_lut`,
+path behind the `bench-utils` feature (`ordvec::search_asymmetric_byte_lut`,
 benched as the `RankQuant b=… asym byte-LUT` rows) but is not the
 production scoring route — streaming SIMD math beats query-LUT cache
 traffic on the hardware tested.
@@ -406,7 +406,7 @@ To check the modes on real embeddings, point the same bench at your own
 `.npy` arrays:
 
 ```bash
-cargo run --release --example bench_rank -- \
+cargo run --release --features bench-utils --example bench_rank -- \
   --corpus-npy  /path/to/embeddings.npy \
   --queries-npy /path/to/queries.npy \
   --queries 200 --k 10
@@ -525,10 +525,10 @@ cargo test --features experimental                   # + MultiBucket tests
 
 # Headline benchmark (synthetic clustered corpus — no external data,
 # no BLAS).
-cargo run --release --example bench_rank
+cargo run --release --features bench-utils --example bench_rank
 
 # Same bench against your own real-embedding arrays.
-cargo run --release --example bench_rank -- \
+cargo run --release --features bench-utils --example bench_rank -- \
     --corpus-npy  /path/to/embeddings.npy \
     --queries-npy /path/to/queries.npy \
     --queries 200 --k 10

@@ -65,9 +65,9 @@ The `experimental` feature is a default-off research surface. Today it exposes
 normal pre-1.0 compatibility policy above. Its direct `.ovfs`
 `RankQuantFastscan::{write,load}` path is supported, but in v0.5.0 `.ovfs` is
 not yet part of the primitive persisted-format, `probe_index_metadata()`, or
-`ordvec-manifest` v1 contract. `#[doc(hidden)]` exports such as
+`ordvec-manifest` v1 contract. Feature-gated `#[doc(hidden)]` exports such as
 `search_asymmetric_byte_lut` are reachable for internal benchmarks and parity
-tests, but are not part of the stable default API.
+tests only when explicitly enabled, and are not part of the stable default API.
 
 New feature flags must declare their stability class before merging:
 

@@ -7,7 +7,7 @@
 //! synthetic corpus in-process, so the headline numbers are regenerable
 //! from a clean checkout with a single command:
 //!
-//!     cargo run --release --example bench_rank
+//!     cargo run --release --features bench-utils --example bench_rank
 //!
 //! No system dependencies are required — ordvec links no BLAS.
 //!
@@ -31,11 +31,11 @@
 //! `benchmarks/rank_modes_results.txt`.
 //!
 //! Larger sweeps / real public corpora:
-//!     cargo run --release --example bench_rank -- --dim 1024 --n 100000 --queries 200
+//!     cargo run --release --features bench-utils --example bench_rank -- --dim 1024 --n 100000 --queries 200
 //!     # Point at a real public embedding corpus (no file required for
 //!     # the default run). Both must be 2-D little-endian float32 .npy
 //!     # (C order). For GloVe or OpenAI text-embedding-3 dumps:
-//!     cargo run --release --example bench_rank -- \
+//!     cargo run --release --features bench-utils --example bench_rank -- \
 //!         --corpus-npy /path/to/corpus.npy --queries-npy /path/to/queries.npy
 //!
 //! Output is a human-readable table followed by a JSON line for

@@ -14,6 +14,9 @@ publish = false
 name = "_ordvec"
 crate-type = ["cdylib"]
 
+[features]
+bench-utils = ["ordvec_core/bench-utils"]
+
 [dependencies]
 # Alias the core crate as `ordvec_core` so binding code is unambiguous and never
 # mixes `ordvec::` with the Python-facing `ordvec` package name.

@@ -10,11 +10,10 @@
 ``rank_to_bucket``, ``bucket_ranks``, ``pack_buckets``, ``unpack_buckets``,
 ``rankquant_bytes_per_vec``, ``bucket_centre``, ``rank_norm``,
 ``rankquant_norm``), the eval-only arbitrary-width scorer
-``rankquant_eval_search``, the byte-LUT scoring helper
-``search_asymmetric_byte_lut``, and the loader limit constants (``MAX_DIM``,
+``rankquant_eval_search``, and the loader limit constants (``MAX_DIM``,
 ``MAX_SIGN_BITMAP_DIM``, ``MAX_VECTORS``). Together with the four classes'
-methods this mirrors the headline Rust retrieval API. Rust-only metadata
-probing and manifest-verification helpers remain available through the Rust
+methods this mirrors the headline Rust retrieval API. Rust-only metadata,
+benchmark, and manifest-verification helpers remain available through the Rust
 crates and the ``ordvec-manifest`` CLI; the low-level ``rank_io`` read/write
 functions are reached through the classes' ``write()`` / ``load()`` methods
 rather than exposed as standalone free functions. The specialized
@@ -77,7 +76,6 @@
     rankquant_eval_search,
     rankquant_bytes_per_vec,
     rankquant_norm,
-    search_asymmetric_byte_lut,
     unpack_buckets,
 )
 
@@ -106,7 +104,6 @@
     "rank_norm",
     "rankquant_norm",
     "rankquant_eval_search",
-    "search_asymmetric_byte_lut",
     # loader limit constants
     "MAX_DIM",
     "MAX_SIGN_BITMAP_DIM",

@@ -2,8 +2,8 @@
 
 Hand-written to mirror the PyO3 surface in ``ordvec-python/src/lib.rs`` exactly
 — the four index classes (``Rank``, ``RankQuant``, ``Bitmap``, ``SignBitmap``),
-the module-level rank-math primitives, the byte-LUT / eval scorers, and the
-``MAX_*`` loader limit constants. abi3 wheels carry no embedded type
+the module-level rank-math primitives, the eval scorer, and the ``MAX_*``
+loader limit constants. abi3 wheels carry no embedded type
 information, so without this stub (and the ``py.typed`` marker) editors and
 ``mypy`` see ``Any`` for the whole package.
 
@@ -171,7 +171,7 @@ class SignBitmap:
 
 # ---------------------------------------------------------------------------
 # Module-level rank-math primitives (parity with ``ordvec::rank::*``) and the
-# byte-LUT / eval scoring helpers.
+# eval scoring helper.
 # ---------------------------------------------------------------------------
 
 def rank_transform(v: NDArray[Any]) -> NDArray[np.uint16]: ...
@@ -183,9 +183,6 @@ def rankquant_bytes_per_vec(d: int, bits: int) -> int: ...
 def bucket_centre(bucket: int, bits: int) -> float: ...
 def rank_norm(d: int) -> float: ...
 def rankquant_norm(d: int, bits: int) -> float: ...
-def search_asymmetric_byte_lut(
-    index: RankQuant, queries: NDArray[Any], k: int
-) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
 def rankquant_eval_search(
     corpus: NDArray[Any], queries: NDArray[Any], bits: int, k: int
 ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ...
@@ -1483,9 +1483,8 @@ impl SignBitmap {
 // The four classes above give object-level parity with the Rust API; these
 // free functions expose the `ordvec::rank` math primitives (the data-oblivious
 // kernels the OrdVec/RankQuant paper's Python pipeline verifies against numpy)
-// and the byte-LUT scoring path, so the crate's `pub` surface is fully
-// reachable from Python. Each mirrors the core's argument asserts as a typed
-// `ValueError` instead of letting them surface as a `PanicException`.
+// and the eval-only scoring path. Each mirrors the core's argument asserts as a
+// typed `ValueError` instead of letting them surface as a `PanicException`.
 // =====================================================================
 
 /// Dimension-wise rank transform: `out[k]` = rank of `v[k]` among `v` (ties
@@ -1680,6 +1679,7 @@ fn rankquant_norm(d: usize, bits: u8) -> PyResult<f32> {
 /// Asymmetric search via the byte-LUT scoring path (a benchmark/parity helper;
 /// requires `bits ∈ {2, 4}`). Returns `(scores, indices)` matching
 /// `RankQuant.search_asymmetric`.
+#[cfg(feature = "bench-utils")]
 #[pyfunction]
 fn search_asymmetric_byte_lut<'py>(
     py: Python<'py>,
@@ -1773,8 +1773,7 @@ fn _ordvec(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<Bitmap>()?;
     m.add_class::<SignBitmap>()?;
 
-    // Module-level rank-math primitives (parity with `ordvec::rank::*` and the
-    // crate-root `search_asymmetric_byte_lut`).
+    // Module-level rank-math primitives (parity with `ordvec::rank::*`).
     m.add_function(wrap_pyfunction!(rank_transform, m)?)?;
     m.add_function(wrap_pyfunction!(rank_to_bucket, m)?)?;
     m.add_function(wrap_pyfunction!(bucket_ranks, m)?)?;
@@ -1784,6 +1783,7 @@ fn _ordvec(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(bucket_centre, m)?)?;
     m.add_function(wrap_pyfunction!(rank_norm, m)?)?;
     m.add_function(wrap_pyfunction!(rankquant_norm, m)?)?;
+    #[cfg(feature = "bench-utils")]
     m.add_function(wrap_pyfunction!(search_asymmetric_byte_lut, m)?)?;
     m.add_function(wrap_pyfunction!(rankquant_eval_search, m)?)?;
 

@@ -1,11 +1,9 @@
 """Tests for the module-level rank-math primitives and limit constants.
 
-These free functions mirror ``ordvec::rank::*``, the crate-root
-``search_asymmetric_byte_lut``, and the ``ordvec::rank_io`` limit constants,
-giving the Python package 1:1 parity with the Rust public surface. Algorithmic
-correctness is proven in the crate's Rust tests; these cover the FFI boundary,
-the numpy round-trips, and the argument guards (bad input → typed exception,
-never a PanicException).
+These free functions mirror ``ordvec::rank::*`` and the ``ordvec::rank_io``
+limit constants. Algorithmic correctness is proven in the crate's Rust tests;
+these cover the FFI boundary, the numpy round-trips, and the argument guards
+(bad input → typed exception, never a PanicException).
 """
 from __future__ import annotations
 
@@ -17,7 +15,6 @@
     MAX_DIM,
     MAX_SIGN_BITMAP_DIM,
     MAX_VECTORS,
-    RankQuant,
     bucket_centre,
     bucket_ranks,
     pack_buckets,
@@ -26,7 +23,6 @@
     rank_transform,
     rankquant_bytes_per_vec,
     rankquant_norm,
-    search_asymmetric_byte_lut,
     unpack_buckets,
 )
 
@@ -149,32 +145,6 @@ def test_primitive_bits_guards():
         rank_to_bucket(0, 1024, 8)
 
 
-def test_search_asymmetric_byte_lut_self_retrieves_top1():
-    rng = np.random.default_rng(0)
-    vectors = rng.standard_normal((40, 128)).astype(np.float32)
-    vectors /= np.linalg.norm(vectors, axis=1, keepdims=True) + 1e-9
-    idx = RankQuant(dim=128, bits=2)
-    idx.add(vectors)
-    queries = vectors[:3]
-    s_lut, i_lut = search_asymmetric_byte_lut(idx, queries, k=10)
-    _, i_ref = idx.search_asymmetric(queries, k=10)
-    assert s_lut.shape == (3, 10)
-    # Both the byte-LUT and the production kernel are the asymmetric path, so a
-    # self-query must self-rank at top-1 in both.
-    for bi in range(3):
-        assert int(i_lut[bi][0]) == bi
-        assert int(i_ref[bi][0]) == bi
-
-
-def test_search_asymmetric_byte_lut_rejects_b1():
-    rng = np.random.default_rng(0)
-    vectors = rng.standard_normal((10, 128)).astype(np.float32)
-    idx = RankQuant(dim=128, bits=1)
-    idx.add(vectors)
-    with pytest.raises(ValueError, match="benchmark-only"):
-        search_asymmetric_byte_lut(idx, vectors[:2], k=5)
-
-
 def test_constants_exposed():
     assert MAX_DIM == 65535
     assert MAX_SIGN_BITMAP_DIM == (1 << 24)

@@ -20,8 +20,7 @@
   files and a forged-huge-dim DoS-allocation header;
 * exotic dtypes (bool / float16 / object / complex / int families) and NaN bit
   patterns (signaling + quiet) across every f32 entry point;
-* type confusion on the ``search_asymmetric_byte_lut`` ``PyRef<RankQuant>`` arg
-  and on every ``None`` / list / str argument;
+* type confusion on every ``None`` / list / str argument;
 * the documented PyO3 borrow-flag reentrancy contract (a ``__index__`` callback
   that re-enters a ``&mut self`` method on the object a ``&self`` method already
   borrowed → clean ``Already borrowed`` ``RuntimeError``, never a data race).
@@ -61,7 +60,6 @@
     rank_transform,
     rankquant_bytes_per_vec,
     rankquant_norm,
-    search_asymmetric_byte_lut,
     unpack_buckets,
 )
 
@@ -527,26 +525,10 @@ def test_signbitmap_batched_fortran_order_raises_value_error():
 
 # =====================================================================
 # Type confusion on non-array params: None / list / str must be a clean
-# TypeError everywhere, including the search_asymmetric_byte_lut PyRef arg.
+# TypeError everywhere.
 # =====================================================================
 
 
-@pytest.mark.parametrize("bad_first", [None, [1, 2, 3], "rq", 42])
-def test_byte_lut_wrong_index_type_raises_type_error(bad_first):
-    q = unit_vectors(2, 64)
-    with pytest.raises(TypeError):
-        search_asymmetric_byte_lut(bad_first, q, k=3)
-
-
-def test_byte_lut_rank_instead_of_rankquant_raises_type_error():
-    # A Rank (wrong index type) where RankQuant is required → TypeError, not a
-    # mis-cast that reads RankQuant fields off a Rank.
-    rk = Rank(dim=64)
-    rk.add(unit_vectors(10, 64))
-    with pytest.raises(TypeError):
-        search_asymmetric_byte_lut(rk, unit_vectors(2, 64), k=3)
-
-
 @pytest.mark.parametrize("bad", [None, [[1.0] * 64] * 4, "hello"])
 def test_rank_add_non_array_raises_type_error(bad):
     with pytest.raises(TypeError):