diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cee4d1e8..e22d96c0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -189,8 +189,8 @@ jobs: run: cargo test --features test-utils - name: cargo test (no default features) run: cargo test --no-default-features - - name: cargo build --release --example bench_rank - run: cargo build --release --example bench_rank + - name: cargo build --release --features bench-utils --example bench_rank + run: cargo build --release --features bench-utils --example bench_rank # ---------------------------------------------------------------------- # Prove the declared MSRV (1.89.0) actually builds and tests. There is @@ -445,6 +445,7 @@ jobs: set -euo pipefail cargo test cargo test --features experimental + cargo test --features bench-utils # ---------------------------------------------------------------------- # WASM: the bitmap/sign popcount kernels have a `simd128` path @@ -542,4 +543,4 @@ jobs: toolchain: stable - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 - name: run bench_rank (scaled, seeded synthetic corpus) - run: cargo run --release --example bench_rank -- --n 10000 --queries 100 + run: cargo run --release --features bench-utils --example bench_rank -- --n 10000 --queries 100 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bbe46149..fac626d9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -259,6 +259,7 @@ jobs: set -euo pipefail cargo test cargo test --features experimental + cargo test --features bench-utils notes: name: release notes (git-cliff) + draft Release diff --git a/Cargo.toml b/Cargo.toml index 5cb07d96..065d8e2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,11 +81,19 @@ rand_chacha = "0.10" # (research scaffold), kept off the stable surface. experimental = [] serde = ["dep:serde"] +# `bench-utils` exposes benchmark-only reference paths used by examples and +# parity tests. These helpers are not part of the default public API. +bench-utils = [] # `test-utils` exposes internal dispatch probes used by the crate's own integration # tests (e.g. the allocation-free guarantee check). Gated off the default surface # because these helpers are not part of the public API and carry no semver guarantee. test-utils = [] +[[example]] +name = "bench_rank" +path = "examples/bench_rank.rs" +required-features = ["bench-utils"] + [profile.release] lto = true codegen-units = 1 diff --git a/README.md b/README.md index c7f4a768..23dc7095 100644 --- a/README.md +++ b/README.md @@ -437,7 +437,7 @@ the **quality numbers (R@10, candidate-recall, bytes/vec) are deterministic** and regenerable from a clean checkout with no external corpus file: ```sh -cargo run --release --example bench_rank +cargo run --release --features bench-utils --example bench_rank ``` A few operating points from the committed run diff --git a/benchmarks/rank_modes_results.txt b/benchmarks/rank_modes_results.txt index a94702da..da96d2d5 100644 --- a/benchmarks/rank_modes_results.txt +++ b/benchmarks/rank_modes_results.txt @@ -4,7 +4,7 @@ # external corpus file required — the corpus is generated in-process from # a fixed seed): # -# cargo run --release --example bench_rank +# cargo run --release --features bench-utils --example bench_rank # # No system dependencies are required — ordvec links no BLAS. # @@ -47,7 +47,7 @@ # To benchmark a real public corpus instead (e.g. GloVe / OpenAI # text-embedding-3 dumps), pass 2-D little-endian float32 .npy files (C # order); not required for the default run: -# cargo run --release --example bench_rank -- \ +# cargo run --release --features bench-utils --example bench_rank -- \ # --corpus-npy /path/to/corpus.npy --queries-npy /path/to/queries.npy # =========================================================================== diff --git a/docs/RANK_MODES.md b/docs/RANK_MODES.md index 8409bd1e..65a092d7 100644 --- a/docs/RANK_MODES.md +++ b/docs/RANK_MODES.md @@ -20,7 +20,7 @@ with a single command, no external data and no system dependencies (ordvec links no BLAS): ```bash -cargo run --release --example bench_rank +cargo run --release --features bench-utils --example bench_rank ``` That runs the head-to-head on a structured synthetic corpus (D=256, @@ -201,7 +201,7 @@ This is the clean-checkout stress test — regenerated by the default `bench_rank` run, no external data required: ```bash -cargo run --release --example bench_rank +cargo run --release --features bench-utils --example bench_rank ``` Setup: D=256, N=30,000 documents, 200 queries, k=10. Low-rank @@ -276,7 +276,7 @@ table](#synthetic-stress-test-numbers). The default `bench_rank` run uses these parameters; the explicit form is: ```bash -cargo run --release --example bench_rank -- \ +cargo run --release --features bench-utils --example bench_rank -- \ --dim 256 --n 30000 --queries 200 --clusters 200 --latent 64 ``` @@ -371,7 +371,7 @@ facts qualify this: overfitting top-k order at near-tolerance boundaries. The byte-LUT scorer remains in the codebase as a labelled reference -path (`ordvec::search_asymmetric_byte_lut`, +path behind the `bench-utils` feature (`ordvec::search_asymmetric_byte_lut`, benched as the `RankQuant b=… asym byte-LUT` rows) but is not the production scoring route — streaming SIMD math beats query-LUT cache traffic on the hardware tested. @@ -406,7 +406,7 @@ To check the modes on real embeddings, point the same bench at your own `.npy` arrays: ```bash -cargo run --release --example bench_rank -- \ +cargo run --release --features bench-utils --example bench_rank -- \ --corpus-npy /path/to/embeddings.npy \ --queries-npy /path/to/queries.npy \ --queries 200 --k 10 @@ -525,10 +525,10 @@ cargo test --features experimental # + MultiBucket tests # Headline benchmark (synthetic clustered corpus — no external data, # no BLAS). -cargo run --release --example bench_rank +cargo run --release --features bench-utils --example bench_rank # Same bench against your own real-embedding arrays. -cargo run --release --example bench_rank -- \ +cargo run --release --features bench-utils --example bench_rank -- \ --corpus-npy /path/to/embeddings.npy \ --queries-npy /path/to/queries.npy \ --queries 200 --k 10 diff --git a/docs/compatibility-policy.md b/docs/compatibility-policy.md index 4efede6f..a471b51c 100644 --- a/docs/compatibility-policy.md +++ b/docs/compatibility-policy.md @@ -65,9 +65,9 @@ The `experimental` feature is a default-off research surface. Today it exposes normal pre-1.0 compatibility policy above. Its direct `.ovfs` `RankQuantFastscan::{write,load}` path is supported, but in v0.5.0 `.ovfs` is not yet part of the primitive persisted-format, `probe_index_metadata()`, or -`ordvec-manifest` v1 contract. `#[doc(hidden)]` exports such as +`ordvec-manifest` v1 contract. Feature-gated `#[doc(hidden)]` exports such as `search_asymmetric_byte_lut` are reachable for internal benchmarks and parity -tests, but are not part of the stable default API. +tests only when explicitly enabled, and are not part of the stable default API. New feature flags must declare their stability class before merging: diff --git a/examples/bench_rank.rs b/examples/bench_rank.rs index ff6e1026..7adb5d5b 100644 --- a/examples/bench_rank.rs +++ b/examples/bench_rank.rs @@ -7,7 +7,7 @@ //! synthetic corpus in-process, so the headline numbers are regenerable //! from a clean checkout with a single command: //! -//! cargo run --release --example bench_rank +//! cargo run --release --features bench-utils --example bench_rank //! //! No system dependencies are required — ordvec links no BLAS. //! @@ -31,11 +31,11 @@ //! `benchmarks/rank_modes_results.txt`. //! //! Larger sweeps / real public corpora: -//! cargo run --release --example bench_rank -- --dim 1024 --n 100000 --queries 200 +//! cargo run --release --features bench-utils --example bench_rank -- --dim 1024 --n 100000 --queries 200 //! # Point at a real public embedding corpus (no file required for //! # the default run). Both must be 2-D little-endian float32 .npy //! # (C order). For GloVe or OpenAI text-embedding-3 dumps: -//! cargo run --release --example bench_rank -- \ +//! cargo run --release --features bench-utils --example bench_rank -- \ //! --corpus-npy /path/to/corpus.npy --queries-npy /path/to/queries.npy //! //! Output is a human-readable table followed by a JSON line for diff --git a/ordvec-python/Cargo.toml b/ordvec-python/Cargo.toml index 44ab6768..174fe13f 100644 --- a/ordvec-python/Cargo.toml +++ b/ordvec-python/Cargo.toml @@ -14,6 +14,9 @@ publish = false name = "_ordvec" crate-type = ["cdylib"] +[features] +bench-utils = ["ordvec_core/bench-utils"] + [dependencies] # Alias the core crate as `ordvec_core` so binding code is unambiguous and never # mixes `ordvec::` with the Python-facing `ordvec` package name. diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py index 596c6572..4726b895 100644 --- a/ordvec-python/python/ordvec/__init__.py +++ b/ordvec-python/python/ordvec/__init__.py @@ -10,11 +10,10 @@ ``rank_to_bucket``, ``bucket_ranks``, ``pack_buckets``, ``unpack_buckets``, ``rankquant_bytes_per_vec``, ``bucket_centre``, ``rank_norm``, ``rankquant_norm``), the eval-only arbitrary-width scorer -``rankquant_eval_search``, the byte-LUT scoring helper -``search_asymmetric_byte_lut``, and the loader limit constants (``MAX_DIM``, +``rankquant_eval_search``, and the loader limit constants (``MAX_DIM``, ``MAX_SIGN_BITMAP_DIM``, ``MAX_VECTORS``). Together with the four classes' -methods this mirrors the headline Rust retrieval API. Rust-only metadata -probing and manifest-verification helpers remain available through the Rust +methods this mirrors the headline Rust retrieval API. Rust-only metadata, +benchmark, and manifest-verification helpers remain available through the Rust crates and the ``ordvec-manifest`` CLI; the low-level ``rank_io`` read/write functions are reached through the classes' ``write()`` / ``load()`` methods rather than exposed as standalone free functions. The specialized @@ -77,7 +76,6 @@ rankquant_eval_search, rankquant_bytes_per_vec, rankquant_norm, - search_asymmetric_byte_lut, unpack_buckets, ) @@ -106,7 +104,6 @@ "rank_norm", "rankquant_norm", "rankquant_eval_search", - "search_asymmetric_byte_lut", # loader limit constants "MAX_DIM", "MAX_SIGN_BITMAP_DIM", diff --git a/ordvec-python/python/ordvec/_ordvec.pyi b/ordvec-python/python/ordvec/_ordvec.pyi index de4dc7f0..0bc7e9dc 100644 --- a/ordvec-python/python/ordvec/_ordvec.pyi +++ b/ordvec-python/python/ordvec/_ordvec.pyi @@ -2,8 +2,8 @@ Hand-written to mirror the PyO3 surface in ``ordvec-python/src/lib.rs`` exactly — the four index classes (``Rank``, ``RankQuant``, ``Bitmap``, ``SignBitmap``), -the module-level rank-math primitives, the byte-LUT / eval scorers, and the -``MAX_*`` loader limit constants. abi3 wheels carry no embedded type +the module-level rank-math primitives, the eval scorer, and the ``MAX_*`` +loader limit constants. abi3 wheels carry no embedded type information, so without this stub (and the ``py.typed`` marker) editors and ``mypy`` see ``Any`` for the whole package. @@ -171,7 +171,7 @@ class SignBitmap: # --------------------------------------------------------------------------- # Module-level rank-math primitives (parity with ``ordvec::rank::*``) and the -# byte-LUT / eval scoring helpers. +# eval scoring helper. # --------------------------------------------------------------------------- def rank_transform(v: NDArray[Any]) -> NDArray[np.uint16]: ... @@ -183,9 +183,6 @@ def rankquant_bytes_per_vec(d: int, bits: int) -> int: ... def bucket_centre(bucket: int, bits: int) -> float: ... def rank_norm(d: int) -> float: ... def rankquant_norm(d: int, bits: int) -> float: ... -def search_asymmetric_byte_lut( - index: RankQuant, queries: NDArray[Any], k: int -) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ... def rankquant_eval_search( corpus: NDArray[Any], queries: NDArray[Any], bits: int, k: int ) -> tuple[NDArray[np.float32], NDArray[np.int64]]: ... diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index be269151..3056e77f 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -1483,9 +1483,8 @@ impl SignBitmap { // The four classes above give object-level parity with the Rust API; these // free functions expose the `ordvec::rank` math primitives (the data-oblivious // kernels the OrdVec/RankQuant paper's Python pipeline verifies against numpy) -// and the byte-LUT scoring path, so the crate's `pub` surface is fully -// reachable from Python. Each mirrors the core's argument asserts as a typed -// `ValueError` instead of letting them surface as a `PanicException`. +// and the eval-only scoring path. Each mirrors the core's argument asserts as a +// typed `ValueError` instead of letting them surface as a `PanicException`. // ===================================================================== /// Dimension-wise rank transform: `out[k]` = rank of `v[k]` among `v` (ties @@ -1680,6 +1679,7 @@ fn rankquant_norm(d: usize, bits: u8) -> PyResult { /// Asymmetric search via the byte-LUT scoring path (a benchmark/parity helper; /// requires `bits ∈ {2, 4}`). Returns `(scores, indices)` matching /// `RankQuant.search_asymmetric`. +#[cfg(feature = "bench-utils")] #[pyfunction] fn search_asymmetric_byte_lut<'py>( py: Python<'py>, @@ -1773,8 +1773,7 @@ fn _ordvec(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; - // Module-level rank-math primitives (parity with `ordvec::rank::*` and the - // crate-root `search_asymmetric_byte_lut`). + // Module-level rank-math primitives (parity with `ordvec::rank::*`). m.add_function(wrap_pyfunction!(rank_transform, m)?)?; m.add_function(wrap_pyfunction!(rank_to_bucket, m)?)?; m.add_function(wrap_pyfunction!(bucket_ranks, m)?)?; @@ -1784,6 +1783,7 @@ fn _ordvec(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(bucket_centre, m)?)?; m.add_function(wrap_pyfunction!(rank_norm, m)?)?; m.add_function(wrap_pyfunction!(rankquant_norm, m)?)?; + #[cfg(feature = "bench-utils")] m.add_function(wrap_pyfunction!(search_asymmetric_byte_lut, m)?)?; m.add_function(wrap_pyfunction!(rankquant_eval_search, m)?)?; diff --git a/ordvec-python/tests/test_primitives.py b/ordvec-python/tests/test_primitives.py index ab93682a..9cb5a12f 100644 --- a/ordvec-python/tests/test_primitives.py +++ b/ordvec-python/tests/test_primitives.py @@ -1,11 +1,9 @@ """Tests for the module-level rank-math primitives and limit constants. -These free functions mirror ``ordvec::rank::*``, the crate-root -``search_asymmetric_byte_lut``, and the ``ordvec::rank_io`` limit constants, -giving the Python package 1:1 parity with the Rust public surface. Algorithmic -correctness is proven in the crate's Rust tests; these cover the FFI boundary, -the numpy round-trips, and the argument guards (bad input → typed exception, -never a PanicException). +These free functions mirror ``ordvec::rank::*`` and the ``ordvec::rank_io`` +limit constants. Algorithmic correctness is proven in the crate's Rust tests; +these cover the FFI boundary, the numpy round-trips, and the argument guards +(bad input → typed exception, never a PanicException). """ from __future__ import annotations @@ -17,7 +15,6 @@ MAX_DIM, MAX_SIGN_BITMAP_DIM, MAX_VECTORS, - RankQuant, bucket_centre, bucket_ranks, pack_buckets, @@ -26,7 +23,6 @@ rank_transform, rankquant_bytes_per_vec, rankquant_norm, - search_asymmetric_byte_lut, unpack_buckets, ) @@ -149,32 +145,6 @@ def test_primitive_bits_guards(): rank_to_bucket(0, 1024, 8) -def test_search_asymmetric_byte_lut_self_retrieves_top1(): - rng = np.random.default_rng(0) - vectors = rng.standard_normal((40, 128)).astype(np.float32) - vectors /= np.linalg.norm(vectors, axis=1, keepdims=True) + 1e-9 - idx = RankQuant(dim=128, bits=2) - idx.add(vectors) - queries = vectors[:3] - s_lut, i_lut = search_asymmetric_byte_lut(idx, queries, k=10) - _, i_ref = idx.search_asymmetric(queries, k=10) - assert s_lut.shape == (3, 10) - # Both the byte-LUT and the production kernel are the asymmetric path, so a - # self-query must self-rank at top-1 in both. - for bi in range(3): - assert int(i_lut[bi][0]) == bi - assert int(i_ref[bi][0]) == bi - - -def test_search_asymmetric_byte_lut_rejects_b1(): - rng = np.random.default_rng(0) - vectors = rng.standard_normal((10, 128)).astype(np.float32) - idx = RankQuant(dim=128, bits=1) - idx.add(vectors) - with pytest.raises(ValueError, match="benchmark-only"): - search_asymmetric_byte_lut(idx, vectors[:2], k=5) - - def test_constants_exposed(): assert MAX_DIM == 65535 assert MAX_SIGN_BITMAP_DIM == (1 << 24) diff --git a/ordvec-python/tests/test_redteam_fuzz.py b/ordvec-python/tests/test_redteam_fuzz.py index 0d2b168c..a47ee66e 100644 --- a/ordvec-python/tests/test_redteam_fuzz.py +++ b/ordvec-python/tests/test_redteam_fuzz.py @@ -20,8 +20,7 @@ files and a forged-huge-dim DoS-allocation header; * exotic dtypes (bool / float16 / object / complex / int families) and NaN bit patterns (signaling + quiet) across every f32 entry point; -* type confusion on the ``search_asymmetric_byte_lut`` ``PyRef`` arg - and on every ``None`` / list / str argument; +* type confusion on every ``None`` / list / str argument; * the documented PyO3 borrow-flag reentrancy contract (a ``__index__`` callback that re-enters a ``&mut self`` method on the object a ``&self`` method already borrowed → clean ``Already borrowed`` ``RuntimeError``, never a data race). @@ -61,7 +60,6 @@ rank_transform, rankquant_bytes_per_vec, rankquant_norm, - search_asymmetric_byte_lut, unpack_buckets, ) @@ -527,26 +525,10 @@ def test_signbitmap_batched_fortran_order_raises_value_error(): # ===================================================================== # Type confusion on non-array params: None / list / str must be a clean -# TypeError everywhere, including the search_asymmetric_byte_lut PyRef arg. +# TypeError everywhere. # ===================================================================== -@pytest.mark.parametrize("bad_first", [None, [1, 2, 3], "rq", 42]) -def test_byte_lut_wrong_index_type_raises_type_error(bad_first): - q = unit_vectors(2, 64) - with pytest.raises(TypeError): - search_asymmetric_byte_lut(bad_first, q, k=3) - - -def test_byte_lut_rank_instead_of_rankquant_raises_type_error(): - # A Rank (wrong index type) where RankQuant is required → TypeError, not a - # mis-cast that reads RankQuant fields off a Rank. - rk = Rank(dim=64) - rk.add(unit_vectors(10, 64)) - with pytest.raises(TypeError): - search_asymmetric_byte_lut(rk, unit_vectors(2, 64), k=3) - - @pytest.mark.parametrize("bad", [None, [[1.0] * 64] * 4, "hello"]) def test_rank_add_non_array_raises_type_error(bad): with pytest.raises(TypeError): diff --git a/src/bitmap.rs b/src/bitmap.rs index 46e527b4..4869d24c 100644 --- a/src/bitmap.rs +++ b/src/bitmap.rs @@ -136,12 +136,12 @@ impl Bitmap { /// loader's `n_vectors` ceiling. (Bounds the count, not the byte payload — /// see the loaders' separate `MAX_PAYLOAD` cap.) Also panics if the /// resulting row-major buffer length would overflow `usize` (reachable only - /// on 32-bit targets — see `util::checked_new_len`). + /// on 32-bit targets — see `util::checked_new_count`). pub fn add(&mut self, vectors: &[f32]) { let n = vectors.len() / self.dim; assert_eq!(vectors.len(), n * self.dim); assert_all_finite(vectors); - let new_n = crate::util::checked_new_len(self.n_vectors, n, self.qwords_per_vec); + let new_n = crate::util::checked_new_count(self.n_vectors, n, self.qwords_per_vec); let qpv = self.qwords_per_vec; let cutoff = (self.dim - self.n_top) as u16; let start = self.bitmaps.len(); diff --git a/src/bucket_code.rs b/src/bucket_code.rs index 46afd985..00a41eed 100644 --- a/src/bucket_code.rs +++ b/src/bucket_code.rs @@ -14,8 +14,8 @@ //! exactly `dim / buckets` coordinates. It owns the code-validation rules: //! length, range, and per-bucket occupancy. //! - [`RankQuantSpec`] — the RankQuant-shaped specialisation: `buckets` -//! derived as `1 << bits` for `bits ∈ {1, 2, 4}`, matching the crate's -//! [`crate::RankQuant`] bit-width domain. +//! derived as `1 << bits` for `bits ∈ {1, 2, 4, 8}` when the fixed-composition +//! invariant exists. //! - [`BucketCode`] — a single validated code vector against a //! [`CompositionSpec`], built from raw codes, from a rank permutation //! ([`BucketCode::from_ranks`]), or directly from a float vector @@ -43,8 +43,8 @@ //! delegates to the crate's shared [`crate::rank`] primitives, so callers //! no longer need to fork rank or bucket semantics. //! -//! Two intentional constraints to note: `bits = 8` is rejected (it lands as a -//! capability-gated width in the separate b=8 work, #221), and +//! Two intentional constraints to note: `bits = 8` requires `dim % 256 == 0` +//! because this surface validates fixed-composition codes, and //! [`CompositionSpec::new`] rejects `buckets > 256` (codes are `u8`). use std::error::Error; @@ -177,9 +177,13 @@ impl CompositionSpec { /// RankQuant-shaped fixed-composition code parameters. /// /// Specialises [`CompositionSpec`] to the crate's RankQuant bit-width domain: -/// the bucket count is `1 << bits` for `bits ∈ {1, 2, 4}`, and `dim` is capped -/// at `u16::MAX` to mirror the crate-wide rank invariant (a rank vector is a -/// permutation of `[0, dim)` stored as `u16`). +/// the bucket count is `1 << bits` for `bits ∈ {1, 2, 4, 8}`. Because this type +/// models fixed-composition codes, `bits = 8` is valid only when +/// `dim % 256 == 0`; arbitrary-dimension asymmetric-only b=8 remains a +/// [`crate::RankQuant`] index capability, not a composition spec. +/// +/// `dim` is capped at `u16::MAX` to mirror the crate-wide rank invariant +/// (a rank vector is a permutation of `[0, dim)` stored as `u16`). #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct RankQuantSpec { bits: u8, @@ -190,14 +194,11 @@ impl RankQuantSpec { /// Build a RankQuant spec for `dim` coordinates at `bits` bits/coordinate. /// /// # Errors - /// - [`CompositionViolation::InvalidBits`] if `bits ∉ {1, 2, 4}`. This is - /// the crate's [`crate::RankQuant`] bit-width domain — the reference - /// prototype also accepted `8`, but ordvec's packed format and analytical - /// norm are defined only for `{1, 2, 4}`, so 8-bit is rejected here. + /// - [`CompositionViolation::InvalidBits`] if `bits ∉ {1, 2, 4, 8}`. /// - [`CompositionViolation::DimTooLarge`] if `dim > u16::MAX`. /// - the [`CompositionSpec::new`] errors (non-divisible `dim`). pub fn new(dim: usize, bits: u8) -> Result { - if !matches!(bits, 1 | 2 | 4) { + if !matches!(bits, 1 | 2 | 4 | 8) { return Err(CompositionViolation::InvalidBits { bits }); } if dim > u16::MAX as usize { @@ -213,7 +214,7 @@ impl RankQuantSpec { }) } - /// Bits per coordinate (`1`, `2`, or `4`). + /// Bits per coordinate (`1`, `2`, `4`, or fixed-composition `8`). pub fn bits(&self) -> u8 { self.bits } @@ -317,8 +318,8 @@ impl BucketCode { /// panicking inside the rank primitives. /// /// # Errors - /// - the [`RankQuantSpec::new`] errors (`bits ∉ {1, 2, 4}`, `dim` too large - /// or non-divisible). + /// - the [`RankQuantSpec::new`] errors (`bits ∉ {1, 2, 4, 8}`, `dim` too + /// large or non-divisible). /// - [`CompositionViolation::WrongLength`] if `vector.len() != dim`. /// - [`CompositionViolation::NonFiniteValue`] on the first non-finite /// coordinate. @@ -383,7 +384,7 @@ impl BucketCode { pub enum CompositionViolation { /// A structural spec parameter was invalid (`dim == 0`, `buckets < 2`). InvalidSpec(&'static str), - /// `bits` was outside the supported RankQuant set `{1, 2, 4}`. + /// `bits` was outside the supported RankQuant set `{1, 2, 4, 8}`. InvalidBits { /// The rejected bit width. bits: u8, @@ -453,7 +454,7 @@ impl fmt::Display for CompositionViolation { match self { Self::InvalidSpec(message) => write!(f, "{message}"), Self::InvalidBits { bits } => { - write!(f, "bits {bits} is invalid; expected one of 1, 2, 4") + write!(f, "bits {bits} is invalid; expected one of 1, 2, 4, 8") } Self::DimTooLarge { dim, max } => write!(f, "dim {dim} exceeds maximum {max}"), Self::NonUniformSpec { dim, buckets } => { @@ -555,19 +556,29 @@ mod tests { ); } - // Pin the b=8 decision: the reference prototype accepted bits=8 but ordvec - // rejects it. These tests ensure that boundary cannot change silently. #[test] - fn rankquant_spec_rejects_bits_8() { - assert_eq!( - RankQuantSpec::new(8, 8).unwrap_err(), - CompositionViolation::InvalidBits { bits: 8 } - ); - // `from_vector` takes the same path: bits=8 is rejected at the spec level. - let v: Vec = (0..8).map(|i| i as f32).collect(); + fn rankquant_spec_accepts_fixed_composition_bits_8() { + let spec = RankQuantSpec::new(512, 8).unwrap(); + assert_eq!(spec.bits(), 8); + assert_eq!(spec.composition().buckets(), 256); + assert_eq!(spec.composition().expected_per_bucket(), 2); + + let v: Vec = (0..512).map(|i| i as f32).collect(); + let code = BucketCode::from_vector(512, 8, &v).unwrap(); + assert_eq!(code.spec().buckets(), 256); + assert_eq!(code.spec().expected_per_bucket(), 2); + assert_eq!(code.codes()[0], 0); + assert_eq!(code.codes()[511], 255); + } + + #[test] + fn rankquant_spec_rejects_non_fixed_composition_bits_8() { assert_eq!( - BucketCode::from_vector(8, 8, &v).unwrap_err(), - CompositionViolation::InvalidBits { bits: 8 } + RankQuantSpec::new(384, 8).unwrap_err(), + CompositionViolation::NonUniformSpec { + dim: 384, + buckets: 256, + } ); } diff --git a/src/lib.rs b/src/lib.rs index 44c419fb..754b6a4d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -108,13 +108,10 @@ pub use rank_io::{probe_index_metadata, IndexKind, IndexMetadata, IndexParams}; pub use sign_bitmap::CandidateBatch; pub use sign_bitmap::SignBitmap; -// `search_asymmetric_byte_lut` is a bench-only scoring reference: it -// panics on b=1 and exists so `examples/bench_rank` can compare the -// byte-LUT path against the production AVX kernels on the same data. -// Re-exported `#[doc(hidden)]` — reachable for the example and the -// red-team parity tests, but not part of the headline API. Production -// callers use `RankQuant::search_asymmetric`, whose dispatch routes -// every supported bit width to a non-panicking kernel. +// Bench-only scoring reference for `examples/bench_rank` and parity tests. +// Gated off the default public API surface; production callers use +// `RankQuant::search_asymmetric`. +#[cfg(feature = "bench-utils")] #[doc(hidden)] pub use quant::search_asymmetric_byte_lut; diff --git a/src/multi_bucket.rs b/src/multi_bucket.rs index a375a363..1d32af06 100644 --- a/src/multi_bucket.rs +++ b/src/multi_bucket.rs @@ -409,10 +409,9 @@ impl MultiBucketBitmap { /// forces the **portable scalar** diagonal kernel, bypassing the runtime /// AVX-512 dispatch. It exists so `examples/bench_contingency` can time the /// scalar and SIMD diagonal paths against each other on the same index - /// (mirroring the `#[doc(hidden)]` `search_asymmetric_byte_lut` bench - /// reference at the crate root). Not part of the stable API — production - /// callers use [`Self::diagonal_overlap_row`], which dispatches to the - /// fastest available kernel. + /// (mirroring the feature-gated byte-LUT bench reference). Not part of the + /// stable API — production callers use [`Self::diagonal_overlap_row`], which + /// dispatches to the fastest available kernel. /// /// # Panics /// Panics if `doc_idx >= len()` or `q_bitmaps.len() != nb * qpb`. diff --git a/src/quant.rs b/src/quant.rs index 831f54bc..1c8ce627 100644 --- a/src/quant.rs +++ b/src/quant.rs @@ -18,10 +18,9 @@ //! gather against the `dim * 256` LUT: an AVX-512 `vgatherdps` kernel when //! available (`avx512f` + `avx512bw` + `dim % 16 == 0`), else the portable scalar LUT. //! -//! The byte-LUT path ([`search_asymmetric_byte_lut`]) is re-exported -//! `#[doc(hidden)]` (reachable as `ordvec::search_asymmetric_byte_lut`) -//! so `examples/bench_rank.rs` can compare it against the production -//! AVX path on the same data. +//! The byte-LUT reference path is available only with the non-default +//! `bench-utils` feature so `examples/bench_rank.rs` can compare it against +//! the production AVX path on the same data. use rayon::prelude::*; @@ -584,7 +583,7 @@ impl RankQuant { /// loader's `n_vectors` ceiling. (Bounds the count, not the byte payload — /// see the loaders' separate `MAX_PAYLOAD` cap.) Also panics if the /// resulting row-major buffer length would overflow `usize` (reachable only - /// on 32-bit targets — see `util::checked_new_len`). + /// on 32-bit targets — see `util::checked_new_count`). pub fn add(&mut self, vectors: &[f32]) { let n = vectors.len() / self.dim; assert_eq!( @@ -594,7 +593,7 @@ impl RankQuant { ); assert_all_finite(vectors); let bytes_per_vec = rankquant_bytes_per_vec(self.dim, self.bits); - let new_n = crate::util::checked_new_len(self.n_vectors, n, bytes_per_vec); + let new_n = crate::util::checked_new_count(self.n_vectors, n, bytes_per_vec); let start = self.packed.len(); self.packed.resize(start + n * bytes_per_vec, 0); let dim = self.dim; @@ -1624,14 +1623,15 @@ pub fn rankquant_eval_search( // B=2: 256 groups × 256 entries × 4 B = 256 KiB per query (fits L2) // B=4: 512 groups × 256 entries × 4 B = 512 KiB per query (spills L2 a little) // -// Re-exported `#[doc(hidden)]` for benchmarking. Production callers should reach -// for [`RankQuant::search_asymmetric`] which dispatches to the -// fastest implementation for the current CPU. +// Available behind `bench-utils` for benchmarking. Production callers should +// reach for `RankQuant::search_asymmetric`, which dispatches to the fastest +// implementation for the current CPU. // ------------------------------------------------------------------- /// Build the byte-LUT for B=2 asymmetric: `lut[g * 256 + byte]` is the /// f32 contribution of `doc[g] == byte` to the score, summed across /// the 4 coordinates packed into that byte. +#[cfg(feature = "bench-utils")] fn build_byte_lut_b2(q_unit: &[f32]) -> Vec { let dim = q_unit.len(); debug_assert_eq!(dim % 4, 0); @@ -1654,6 +1654,7 @@ fn build_byte_lut_b2(q_unit: &[f32]) -> Vec { } /// Build the byte-LUT for B=4 asymmetric. +#[cfg(feature = "bench-utils")] fn build_byte_lut_b4(q_unit: &[f32]) -> Vec { let dim = q_unit.len(); debug_assert_eq!(dim % 2, 0); @@ -1672,6 +1673,7 @@ fn build_byte_lut_b4(q_unit: &[f32]) -> Vec { } /// Scalar byte-LUT scan for B=2 asymmetric. One add per doc byte. +#[cfg(feature = "bench-utils")] fn scan_b2_asym_byte_lut( packed: &[u8], n: usize, @@ -1693,6 +1695,7 @@ fn scan_b2_asym_byte_lut( } /// Scalar byte-LUT scan for B=4 asymmetric. +#[cfg(feature = "bench-utils")] fn scan_b4_asym_byte_lut( packed: &[u8], n: usize, @@ -1728,6 +1731,7 @@ fn scan_b4_asym_byte_lut( /// /// Returns the raw `Vec` of doc indices per query, length /// `queries.len() / dim * k`. +#[cfg(feature = "bench-utils")] pub fn search_asymmetric_byte_lut(index: &RankQuant, queries: &[f32], k: usize) -> SearchResults { let dim = index.dim; let bits = index.bits; diff --git a/src/rank.rs b/src/rank.rs index 5c511edf..5a2bcf1d 100644 --- a/src/rank.rs +++ b/src/rank.rs @@ -87,31 +87,10 @@ pub fn rank_transform_into(v: &[f32], out: &mut [u16]) { /// hot path. #[inline] pub fn rank_to_bucket(rank: u16, d: usize, bits: u8) -> u8 { - // `bits` is a `u8`, so a caller could pass e.g. 9 or 255. `1u32 << bits` - // overflows for `bits >= 32` (in release that silently wraps and yields a - // wrong bucket; in debug it panics inconsistently), and the result must - // also fit in the returned `u8`, so cap at 8 — the widest RankQuant width - // (b=8 yields one bucket per code value in `[0, 256)`, which still fits a - // `u8`). `d == 0` would divide by zero. Guard both up front so the failure - // is loud in every build. assert!(bits <= 8, "bits too large"); assert!(d > 0, "d must be positive"); - // A valid rank is a position in `[0, d)`. Reject `rank >= d` loudly instead - // of silently clamping the quotient back into range: the rest of the public - // bucket API ([`pack_buckets`] / [`bucket_centre`]) fails loud on an - // out-of-domain argument, so a direct caller that miscomputes a rank should - // hear about it rather than receive a plausible-but-wrong bucket. assert!((rank as usize) < d, "rank ({rank}) must be < d ({d})"); let n_buckets = 1u32 << bits; - // u64 math: `d` is a `usize` and reaches this from the Python binding as a - // free argument, so `d as u32` could truncate a `d >= 2^32` (e.g. to 0, - // which would divide by zero and panic). rank ≤ u16::MAX and n_buckets ≤ - // 128, so the product fits u64 comfortably; over the realistic d ≤ u16::MAX - // domain this is bit-identical to the previous u32 form. - // - // With `rank < d` guaranteed above, `rank * n_buckets / d < n_buckets` - // (integer division floors), so the quotient already lands in - // `[0, n_buckets)` and fits the returned `u8` without a clamp. ((rank as u64 * n_buckets as u64) / d as u64) as u8 } @@ -123,10 +102,6 @@ pub fn rank_to_bucket(rank: u16, d: usize, bits: u8) -> u8 { /// valid rank vector is a permutation of `[0, ranks.len())`, so well-formed /// input never trips the per-entry guard. pub fn bucket_ranks(ranks: &[u16], bits: u8) -> Vec { - // Validate `bits` up front so an invalid width fails loud even for empty - // input — an empty `ranks` skips the per-entry `rank_to_bucket` check and - // would otherwise silently return an empty vec. Mirrors the Python binding, - // which checks `bits` before its empty short-circuit. assert!(bits <= 8, "bits too large"); let d = ranks.len(); ranks.iter().map(|&r| rank_to_bucket(r, d, bits)).collect() @@ -166,14 +141,6 @@ pub fn pack_buckets(buckets: &[u8], bits: u8) -> Vec { let n_bytes = d / codes_per_byte; let mut out = vec![0u8; n_bytes]; let bits_u = bits as usize; - // Pack in a single pass, failing loud on an out-of-range code rather than - // silently masking it (`code & mask` would turn e.g. 7 at bits=2 into 3, - // packing a different vector). Checking inside the loop keeps the - // fail-loud guarantee without a second O(d) pass over `buckets`; the - // branch is loop-invariant-predictable for the always-valid internal - // callers. Asserting `b <= mask` makes the trailing `& mask` redundant. - // At `b=8`, `codes_per_byte == 1`, so `shift == 0` and each byte holds one - // code verbatim. for (i, &b) in buckets.iter().enumerate() { assert!( b <= mask, @@ -366,7 +333,7 @@ impl Rank { /// loader's `n_vectors` ceiling. (Bounds the count, not the byte payload — /// see the loaders' separate `MAX_PAYLOAD` cap.) Also panics if the /// resulting row-major buffer length would overflow `usize` (reachable only - /// on 32-bit targets — see `util::checked_new_len`). + /// on 32-bit targets — see `util::checked_new_count`). pub fn add(&mut self, vectors: &[f32]) { let n = vectors.len() / self.dim; assert_eq!( @@ -375,7 +342,7 @@ impl Rank { "vectors length must be a multiple of dim", ); assert_all_finite(vectors); - let new_n = crate::util::checked_new_len(self.n_vectors, n, self.dim); + let new_n = crate::util::checked_new_count(self.n_vectors, n, self.dim); let start = self.ranks.len(); self.ranks.resize(start + n * self.dim, 0); let dim = self.dim; @@ -886,10 +853,10 @@ mod tests { } #[test] - #[should_panic(expected = "bits must be 1,2,4")] + #[should_panic(expected = "bits must be 1,2,4,8")] fn rankquant_norm_rejects_invalid_bits() { - // 3-bit packing has no RankQuant scheme; the norm must refuse it - // rather than return a value for a non-existent layout. + // Only byte-dividing RankQuant widths are valid; unsupported widths + // must fail loud instead of returning a norm for a non-existent layout. let _ = rankquant_norm(64, 3); } } diff --git a/src/sign_bitmap.rs b/src/sign_bitmap.rs index 7d6bbcc6..66f971ab 100644 --- a/src/sign_bitmap.rs +++ b/src/sign_bitmap.rs @@ -175,12 +175,12 @@ impl SignBitmap { /// loader's `n_vectors` ceiling. (Bounds the count, not the byte payload — /// see the loaders' separate `MAX_PAYLOAD` cap.) Also panics if the /// resulting row-major buffer length would overflow `usize` (reachable only - /// on 32-bit targets — see `util::checked_new_len`). + /// on 32-bit targets — see `util::checked_new_count`). pub fn add(&mut self, vectors: &[f32]) { crate::util::assert_all_finite(vectors); let n = vectors.len() / self.dim; assert_eq!(vectors.len(), n * self.dim); - let new_n = crate::util::checked_new_len(self.n_vectors, n, self.qwords_per_vec); + let new_n = crate::util::checked_new_count(self.n_vectors, n, self.qwords_per_vec); let qpv = self.qwords_per_vec; let dim = self.dim; let start = self.bitmaps.len(); diff --git a/src/util.rs b/src/util.rs index 8e229f59..5f9eb1dd 100644 --- a/src/util.rs +++ b/src/util.rs @@ -4,7 +4,7 @@ //! path (full ranks, bucketed ranks, bitmap overlap). //! - [`l2_normalise`] returns the unit-norm copy of a query vector for //! the asymmetric scoring path. -//! - The checked-allocation guards (`result_buffer_len`, `checked_new_len`), +//! - The checked-allocation guards (`result_buffer_len`, `checked_new_count`), //! the finite-input assert (`assert_all_finite`), and the portable AND/XOR //! popcount reductions (`and_popcount` / `xor_popcount`) round out the //! shared helpers. @@ -47,7 +47,7 @@ pub(crate) fn result_buffer_len(nq: usize, k: usize) -> usize { /// Validate that an `add` would not grow an index past /// `rank_io::MAX_VECTORS`, **and** that the resulting row-major buffer of -/// `new_n * elems_per_vec` elements still fits `usize`. Returns the new length. +/// `new_n * elems_per_vec` elements still fits `usize`. Returns the new count. /// /// The on-disk loaders cap `n_vectors` at `MAX_VECTORS` (64 Mi); the four /// in-memory growth paths (`Rank` / `RankQuant` / `Bitmap` / `SignBitmap` @@ -66,7 +66,7 @@ pub(crate) fn result_buffer_len(nq: usize, k: usize) -> usize { /// buffer (issue #25). The *count* cap is the `u32` / round-trip contract; the /// byte payload is bounded separately by the loaders' `MAX_PAYLOAD` cap. #[inline] -pub(crate) fn checked_new_len(current: usize, adding: usize, elems_per_vec: usize) -> usize { +pub(crate) fn checked_new_count(current: usize, adding: usize, elems_per_vec: usize) -> usize { let new_n = current .checked_add(adding) .expect("ordvec: n_vectors overflows usize"); @@ -81,21 +81,18 @@ pub(crate) fn checked_new_len(current: usize, adding: usize, elems_per_vec: usiz new_n } +const L2_NORMALISE_EPSILON: f32 = 1e-12; + /// Unit-L2 copy of `v`, used by the asymmetric scoring path. /// /// **Degenerate queries are intentional, not errors.** A query with L2 norm -/// `≤ 1e-12` (the all-zero vector, or one numerically indistinguishable from -/// it) has no direction, so its unit copy is the zero vector. The asymmetric -/// score is then `0` for every document: they all tie, and the returned top-k -/// is an arbitrary — though deterministic, via the `(score, doc_id)` -/// tie-break — prefix of the corpus. This is the correct outcome for a -/// retrieval substrate (a directionless query has no nearest neighbour), and -/// it is deliberately *silent*: the input is finite and valid, so it is not -/// rejected the way NaN/±Inf are by [`assert_all_finite`]. Callers that treat -/// an all-zero query as an upstream bug should check `‖q‖` before searching. +/// `≤ L2_NORMALISE_EPSILON` (the all-zero vector, or one numerically +/// indistinguishable from it) has no direction, so its unit copy is the zero +/// vector. Callers that treat this as an upstream bug should check `‖q‖` +/// before searching. pub(crate) fn l2_normalise(v: &[f32]) -> Vec { let norm: f32 = v.iter().map(|x| x * x).sum::().sqrt(); - if norm <= 1e-12 { + if norm <= L2_NORMALISE_EPSILON { vec![0.0; v.len()] } else { let inv = 1.0 / norm; @@ -109,7 +106,7 @@ pub(crate) fn l2_normalise(v: &[f32]) -> Vec { pub(crate) fn l2_normalise_into(out: &mut Vec, v: &[f32]) { out.clear(); let norm: f32 = v.iter().map(|x| x * x).sum::().sqrt(); - if norm <= 1e-12 { + if norm <= L2_NORMALISE_EPSILON { out.resize(v.len(), 0.0); } else { let inv = 1.0 / norm; @@ -600,7 +597,10 @@ impl TopK { #[cfg(test)] mod tests { - use super::{and_popcount, checked_new_len, xor_popcount, TopK}; + use super::{ + and_popcount, checked_new_count, l2_normalise, l2_normalise_into, xor_popcount, TopK, + L2_NORMALISE_EPSILON, + }; use rand::{RngExt, SeedableRng}; use rand_chacha::ChaCha8Rng; @@ -676,47 +676,47 @@ mod tests { } #[test] - fn checked_new_len_accepts_up_to_max() { + fn checked_new_count_accepts_up_to_max() { use crate::rank_io::MAX_VECTORS; // Exactly MAX_VECTORS is allowed — the loaders accept the same ceiling, // so a freshly grown index stays write/load round-trippable. (elems=1 // isolates the count cap from the buffer-size check.) - assert_eq!(checked_new_len(0, MAX_VECTORS, 1), MAX_VECTORS); - assert_eq!(checked_new_len(MAX_VECTORS - 1, 1, 1), MAX_VECTORS); + assert_eq!(checked_new_count(0, MAX_VECTORS, 1), MAX_VECTORS); + assert_eq!(checked_new_count(MAX_VECTORS - 1, 1, 1), MAX_VECTORS); // An empty add never trips the guard. - assert_eq!(checked_new_len(MAX_VECTORS, 0, 1), MAX_VECTORS); + assert_eq!(checked_new_count(MAX_VECTORS, 0, 1), MAX_VECTORS); // MAX_VECTORS * 4096 = 2^38 fits usize on 64-bit; on 32-bit it overflows, // which the guard correctly panics on (see - // `checked_new_len_rejects_buffer_overflow`). Gate the success assertion + // `checked_new_count_rejects_buffer_overflow`). Gate the success assertion // to 64-bit so the suite stays portable (wasm32 / armv7). #[cfg(target_pointer_width = "64")] { - assert_eq!(checked_new_len(0, MAX_VECTORS, 4096), MAX_VECTORS); + assert_eq!(checked_new_count(0, MAX_VECTORS, 4096), MAX_VECTORS); } } #[test] #[should_panic(expected = "MAX_VECTORS")] - fn checked_new_len_rejects_one_past_max() { + fn checked_new_count_rejects_one_past_max() { use crate::rank_io::MAX_VECTORS; // One past the loader ceiling must fail loud rather than build an index // that write/load would refuse to round-trip. - let _ = checked_new_len(MAX_VECTORS, 1, 1); + let _ = checked_new_count(MAX_VECTORS, 1, 1); } #[test] #[should_panic(expected = "n_vectors overflows usize")] - fn checked_new_len_rejects_usize_overflow() { + fn checked_new_count_rejects_usize_overflow() { // The running count itself must not wrap before the cap is checked. - let _ = checked_new_len(usize::MAX, 1, 1); + let _ = checked_new_count(usize::MAX, 1, 1); } #[test] #[should_panic(expected = "buffer length")] - fn checked_new_len_rejects_buffer_overflow() { + fn checked_new_count_rejects_buffer_overflow() { // Count is within MAX_VECTORS, but new_n * elems_per_vec overflows // usize — the 32-bit (wasm32) hazard the `resize` in `add` would hit. - let _ = checked_new_len(0, 2, usize::MAX); + let _ = checked_new_count(0, 2, usize::MAX); } #[test] @@ -767,7 +767,6 @@ mod tests { #[test] fn l2_normalise_into_matches_l2_normalise_and_reuses_capacity() { - use super::{l2_normalise, l2_normalise_into}; let v = vec![3.0f32, 0.0, 4.0, 0.0]; // norm 5 let expected = l2_normalise(&v); let mut out: Vec = Vec::new(); @@ -785,4 +784,22 @@ mod tests { l2_normalise_into(&mut out, &v); assert_eq!(out.capacity(), cap, "l2_normalise_into must reuse capacity"); } + + #[test] + fn l2_normalise_threshold_edges_are_pinned() { + let below = vec![L2_NORMALISE_EPSILON * 0.5, 0.0]; + assert_eq!(l2_normalise(&below), vec![0.0, 0.0]); + + let at = vec![L2_NORMALISE_EPSILON, 0.0]; + assert_eq!(l2_normalise(&at), vec![0.0, 0.0]); + + let above = vec![L2_NORMALISE_EPSILON * 2.0, 0.0]; + assert_eq!(l2_normalise(&above), vec![1.0, 0.0]); + + let mut out = Vec::new(); + l2_normalise_into(&mut out, &below); + assert_eq!(out, vec![0.0, 0.0]); + l2_normalise_into(&mut out, &above); + assert_eq!(out, vec![1.0, 0.0]); + } } diff --git a/tests/determinism_contract.rs b/tests/determinism_contract.rs index 56c8f486..daceed3e 100644 --- a/tests/determinism_contract.rs +++ b/tests/determinism_contract.rs @@ -1,4 +1,4 @@ -use ordvec::{search_asymmetric_byte_lut, Bitmap, Rank, RankQuant, SignBitmap}; +use ordvec::{Bitmap, Rank, RankQuant, SignBitmap}; fn repeated_docs(n: usize, dim: usize, value: f32) -> Vec { vec![value; n * dim] @@ -49,7 +49,10 @@ fn full_search_ties_return_lowest_row_ids() { } #[test] +#[cfg(feature = "bench-utils")] fn rankquant_dispatch_matches_scalar_reference_on_ordered_ties() { + use ordvec::search_asymmetric_byte_lut; + for &dim in &[20usize, 64] { let docs = repeated_docs(8, dim, 1.0); let query = vec![0.0; dim]; diff --git a/tests/index/finite.rs b/tests/index/finite.rs index a2dfbef5..701a04a5 100644 --- a/tests/index/finite.rs +++ b/tests/index/finite.rs @@ -65,6 +65,7 @@ fn rank_transform_rejects_nan() { } #[test] +#[cfg(feature = "bench-utils")] #[should_panic(expected = "non-finite")] fn search_asymmetric_byte_lut_rejects_inf() { let mut idx = RankQuant::new(D, 2); diff --git a/tests/index/two_stage.rs b/tests/index/two_stage.rs index 1b10c3b4..85855d48 100644 --- a/tests/index/two_stage.rs +++ b/tests/index/two_stage.rs @@ -2,7 +2,9 @@ use ordvec::{ validate_candidate_ids, validate_flat_vectors_len, Bitmap, OrdvecError, RankQuant, SignBitmap, TwoStageCandidatePolicy, }; +#[cfg(feature = "bench-utils")] use rand::{RngExt, SeedableRng}; +#[cfg(feature = "bench-utils")] use rand_chacha::ChaCha8Rng; use crate::{make_corpus, D, N}; @@ -697,6 +699,7 @@ fn batched_serial_wrapper_matches_into_and_full_set_matches_search_asymmetric() /// Scores compared within the existing kernel parity tolerance, NOT byte-identical /// across tiers. (Same convention as redteam_beta + determinism_contract.) #[test] +#[cfg(feature = "bench-utils")] fn batched_subset_rerank_matches_scalar_reference_across_tiers() { use ordvec::search_asymmetric_byte_lut; for dim in [64usize, 80, 128] { diff --git a/tests/redteam_beta.rs b/tests/redteam_beta.rs index 12d0e663..58d55e8e 100644 --- a/tests/redteam_beta.rs +++ b/tests/redteam_beta.rs @@ -27,8 +27,9 @@ use rand::{RngExt, SeedableRng}; use rand_chacha::ChaCha8Rng; use ordvec::rank::{bucket_centre, bucket_ranks, rank_transform, rankquant_norm}; -use ordvec::search_asymmetric_byte_lut; -use ordvec::{Rank, RankQuant, SearchResults, SignBitmap}; +#[cfg(feature = "bench-utils")] +use ordvec::SearchResults; +use ordvec::{Rank, RankQuant, SignBitmap}; fn make_corpus(seed: u64, n: usize, dim: usize) -> Vec { let mut rng = ChaCha8Rng::seed_from_u64(seed); @@ -71,7 +72,10 @@ fn ref_rankquant_asymmetric(query: &[f32], doc: &[f32], bits: u8) -> f32 { // - 768 b4: production-scale AVX-512 happy path // ------------------------------------------------------------------- +#[cfg(feature = "bench-utils")] fn assert_asym_matches_byte_lut(dim: usize, bits: u8, seed: u64) { + use ordvec::search_asymmetric_byte_lut; + let n = 64; let corpus = make_corpus(seed, n, dim); let mut idx = RankQuant::new(dim, bits); @@ -115,36 +119,43 @@ fn assert_asym_matches_byte_lut(dim: usize, bits: u8, seed: u64) { } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b2_dim48_matches_scalar() { assert_asym_matches_byte_lut(48, 2, 101); } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b4_dim80_matches_scalar() { assert_asym_matches_byte_lut(80, 4, 102); } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b2_dim20_matches_scalar() { assert_asym_matches_byte_lut(20, 2, 103); } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b2_dim4_matches_scalar() { assert_asym_matches_byte_lut(4, 2, 104); } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b2_dim64_happy_path_matches_scalar() { assert_asym_matches_byte_lut(64, 2, 105); } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b4_dim128_happy_path_matches_scalar() { assert_asym_matches_byte_lut(128, 4, 106); } #[test] +#[cfg(feature = "bench-utils")] fn rt2_asym_b4_dim768_happy_path_matches_scalar() { assert_asym_matches_byte_lut(768, 4, 107); } @@ -288,7 +299,10 @@ fn sign_bitmap_top_m_huge_m_clamps() { // ------------------------------------------------------------------- #[test] +#[cfg(feature = "bench-utils")] fn byte_lut_huge_k_clamps_no_overflow() { + use ordvec::search_asymmetric_byte_lut; + let dim = 64; let n = 16; let corpus = make_corpus(501, n, dim); @@ -312,7 +326,10 @@ fn byte_lut_huge_k_clamps_no_overflow() { } #[test] +#[cfg(feature = "bench-utils")] fn byte_lut_huge_k_multi_query_clamps_no_overflow() { + use ordvec::search_asymmetric_byte_lut; + // Multi-query exercises the `nq * k` result-buffer axis (Finding 1): // with the raw `usize::MAX` the product `nq * k` overflows usize and // would silently wrap to a too-small Vec; `result_buffer_len` turns diff --git a/tests/redteam_delta.rs b/tests/redteam_delta.rs index f891ad6a..7187692a 100644 --- a/tests/redteam_delta.rs +++ b/tests/redteam_delta.rs @@ -49,7 +49,7 @@ use rand::{RngExt, SeedableRng}; use rand_chacha::ChaCha8Rng; use ordvec::rank::rank_norm; -use ordvec::{search_asymmetric_byte_lut, Bitmap, Rank, RankQuant, SignBitmap}; +use ordvec::{Bitmap, Rank, RankQuant, SignBitmap}; /// `MAX_VECTORS` from `rank_io` — the on-disk document-count ceiling. /// Re-declared here (not imported) to keep the test independent of @@ -707,8 +707,11 @@ fn delta_d4_large_nq_small_k() { /// `b = 1` to the scalar LUT and is unaffected — covered by the `beta` /// suite). This is an intentional, documented contract, not a bug. #[test] +#[cfg(feature = "bench-utils")] #[should_panic(expected = "byte-LUT path only supports bits")] fn delta_e1_byte_lut_panics_on_b1_index() { + use ordvec::search_asymmetric_byte_lut; + let dim = 64; let mut idx = RankQuant::new(dim, 1); idx.add(&make_corpus(8901, 8, dim)); diff --git a/tests/redteam_gamma.rs b/tests/redteam_gamma.rs index a95c0968..2ff14dfc 100644 --- a/tests/redteam_gamma.rs +++ b/tests/redteam_gamma.rs @@ -39,7 +39,7 @@ fn rank_to_bucket_zero_d_panics() { // --------------------------------------------------------------------------- #[test] -#[should_panic(expected = "bits must be 1,2,4")] +#[should_panic(expected = "bits must be 1,2,4,8")] fn rankquant_bytes_per_vec_zero_bits_panics() { let _ = rankquant_bytes_per_vec(64, 0); }