From a0e8103d725da83f4597d59578d630b7fa52cfc7 Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Fri, 19 Jun 2026 20:13:04 -0500 Subject: [PATCH 1/2] Tighten v0.5.0 pre-tag release docs --- CHANGELOG.md | 41 +++++++-------- README.md | 12 +++-- ordvec-python/README.md | 11 +++-- ordvec-python/python/ordvec/__init__.py | 19 ++++--- ordvec-python/src/lib.rs | 6 ++- src/lib.rs | 27 +++++----- tests/release_publish_invariants.py | 66 +++++++++++++++++++++++++ 7 files changed, 128 insertions(+), 54 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ba7e1bf..b75da59d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +_No unreleased changes._ + +## 0.5.0 - 2026-06-19 + ### Security - Hardened the Python binding's GIL-released search, candidate, scoring, and @@ -24,23 +28,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 and verifier/create keyword arguments. - Aligned `.ovfs` / `OVFS` security and provenance docs with the now-public `RankQuantFastscan` persistence loader and fuzz target. -- Updated formalization links and release invariants after the companion - `ordvec-formalization` repository moved under `Project-Navi`. - -### Fixed - -- Added a persisted-format registry that drives probe, manifest-coverage, and - C-ABI load decisions from one table; `.ovfs` now remains explicitly - known-but-not-probeable/not-manifest-covered, and the C ABI reports it as an - unsupported format rather than a corrupt index. -- Hid the `SubsetScratch::capacities_for_test` helper behind the non-default - `test-utils` feature and cleaned stale release-doc comments around FastScan - and b=8 bucket rustdoc. - -## 0.5.0 - 2026-06-19 - -### Security - - **Hardened `.ovfs` FastScan loading before the format's first stable release.** `RankQuantFastscan` now rejects invalid FastScan payload bytes (`byte & 0xf0 != 0`), rows that violate b=2 constant composition, and @@ -114,6 +101,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed +- Updated formalization links and release invariants after the companion + `ordvec-formalization` repository moved under `Project-Navi`. - **Clarified BEIR benchmark release claims.** The committed README figures use the default method set and do not yet include the newer `sign-rq2-threaded` probe row; the docs and plot generator now distinguish @@ -151,6 +140,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Added a persisted-format registry that drives probe, manifest-coverage, and + C-ABI load decisions from one table; `.ovfs` now remains explicitly + known-but-not-probeable/not-manifest-covered, and the C ABI reports it as an + unsupported format rather than a corrupt index. +- Hid the `SubsetScratch::capacities_for_test` helper behind the non-default + `test-utils` feature and cleaned stale release-doc comments around FastScan + and b=8 bucket rustdoc. - **Made Intel SDE AVX-512 coverage fail closed for release publishes.** Pull requests and main pushes may emit a visible warning and skip SDE-dependent steps during an Intel mirror outage, but the tag-triggered release workflow @@ -334,11 +330,12 @@ these aliases in a future release. ## [0.1.0] - 2026-05-22 Initial release. `ordvec` is the training-free ordinal & sign quantization -substrate for vector retrieval, developed within the -[turbovec](https://github.com/RyanCodrai/turbovec) project (MIT, by Ryan -Codrai) and factored out here as a standalone crate. It is data-oblivious (no -training, rotation, or codebook), uses analytical norms, and carries **no -system dependencies** — no BLAS, no `ndarray`, no `faer`. +substrate for vector retrieval. It was developed using the early +[turbovec](https://github.com/RyanCodrai/turbovec) project context as a +rapid-development scaffold, but ordvec's implementation history lives in this +repository. It is data-oblivious (no training, rotation, or codebook), uses +analytical norms, and carries **no system dependencies** — no BLAS, no +`ndarray`, no `faer`. ### Added diff --git a/README.md b/README.md index 85b10c33..c7f4a768 100644 --- a/README.md +++ b/README.md @@ -496,11 +496,13 @@ in the full repository. ## Provenance -ordvec was developed within turbovec, factored out into this standalone, -zero-system-dependency crate. -[turbovec](https://github.com/RyanCodrai/turbovec) (MIT, by Ryan Codrai) -is credited as the project it grew within, with thanks; ordvec's -development history is in this repository's git log. +ordvec's active upstream, implementation history, issues, releases, and +governance live in `Project-Navi/ordvec`. + +Courtesy note: ordvec was developed using the early +[turbovec](https://github.com/RyanCodrai/turbovec) project context as a +rapid-development scaffold, with thanks to that lineage. It is not a source +fork of turbovec. The ordvec project is jointly maintained by [@Project-Navi](https://github.com/Project-Navi) and diff --git a/ordvec-python/README.md b/ordvec-python/README.md index d6ee5c98..8375d287 100644 --- a/ordvec-python/README.md +++ b/ordvec-python/README.md @@ -86,10 +86,13 @@ native module is otherwise opaque to static analysis. ## Provenance & license -The `ordvec` Python bindings were developed within turbovec, factored out -into this standalone package. turbovec -([MIT](https://github.com/RyanCodrai/turbovec), by Ryan Codrai) is credited as -the origin project. +The `ordvec` Python package's active upstream, implementation history, issues, +releases, and governance live in `Project-Navi/ordvec`. + +Courtesy note: ordvec was developed using the early +[turbovec](https://github.com/RyanCodrai/turbovec) project context as a +rapid-development scaffold, with thanks to that lineage. It is not a source +fork of turbovec. Dual-licensed under either of [MIT](https://github.com/Project-Navi/ordvec/blob/main/LICENSE-MIT) or diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py index df608def..596c6572 100644 --- a/ordvec-python/python/ordvec/__init__.py +++ b/ordvec-python/python/ordvec/__init__.py @@ -1,7 +1,9 @@ """ordvec — training-free ordinal & sign vector quantization (Python bindings). -Developed within the turbovec project -(MIT, by Ryan Codrai) and factored out. Dual-licensed MIT OR Apache-2.0. +ordvec was developed using the early turbovec project context as a +rapid-development scaffold, with thanks to that lineage. ordvec's implementation +history, active development, issues, releases, and governance live in +Project-Navi/ordvec. Dual-licensed MIT OR Apache-2.0. Public API: the four index classes ``Rank``, ``RankQuant``, ``Bitmap``, ``SignBitmap``, plus the module-level rank-math primitives (``rank_transform``, @@ -48,11 +50,14 @@ candidate generator methods release the GIL during the heavy Rust scan, so other Python threads run concurrently. ``add`` also releases the GIL while mutating an index, but mutable index operations must be treated as exclusive. -The input arrays are *read in place* (not copied) for that window — do not -mutate an array from another thread while a call that received it is in -progress, including subset candidate arrays, or the scan races the write and -may return inconsistent results. This is the standard contract for -GIL-releasing numeric extensions (NumPy itself behaves this way). +GIL-released search, candidate-generation, scoring, and ``add`` methods copy +NumPy inputs into Rust-owned buffers before detaching, so ordinary Python +in-place NumPy mutation in another thread cannot race detached Rust reads. This +intentionally trades zero-copy detached reads for race-free snapshots; large +calls may temporarily require an additional input-sized buffer. Callers still +own object-level scheduling: do not overlap mutable index operations such as +``add`` with searches on the same index unless the binding method explicitly +documents that pattern. """ from ._ordvec import ( diff --git a/ordvec-python/src/lib.rs b/ordvec-python/src/lib.rs index c7ba3b8c..be269151 100644 --- a/ordvec-python/src/lib.rs +++ b/ordvec-python/src/lib.rs @@ -8,8 +8,10 @@ //! The core crate is aliased as `ordvec_core` throughout, so the Rust namespace //! never collides with the `ordvec` Python package name. //! -//! Provenance: developed within turbovec -//! (MIT, by Ryan Codrai), factored out. Dual-licensed MIT OR Apache-2.0. +//! Provenance: developed using the early turbovec project context as a +//! rapid-development scaffold, with thanks to that lineage. ordvec's +//! implementation history and active upstream live in Project-Navi/ordvec. +//! Dual-licensed MIT OR Apache-2.0. //! //! Every FFI entry point validates its inputs at the boundary so the core's //! `assert!`/`assert_all_finite` panics surface as typed Python exceptions, not diff --git a/src/lib.rs b/src/lib.rs index 1220dadf..44c419fb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,12 @@ //! Training-free ordinal & sign quantization for vector retrieval. //! -//! `ordvec` is a training-free ordinal/sign retrieval -//! substrate, developed within the -//! [turbovec](https://github.com/RyanCodrai/turbovec) project (MIT, by -//! Ryan Codrai) and factored out here as a standalone crate. It carries -//! no system dependencies — no BLAS, no `ndarray`, no `faer` — and needs -//! no training, rotation, or codebook. Norms are analytical. +//! `ordvec` is a training-free ordinal/sign retrieval substrate. It was +//! developed using the early +//! [turbovec](https://github.com/RyanCodrai/turbovec) project context as a +//! rapid-development scaffold, with thanks to that lineage; ordvec's +//! implementation history and active upstream live in this repository. It +//! carries no system dependencies — no BLAS, no `ndarray`, no `faer` — and +//! needs no training, rotation, or codebook. Norms are analytical. //! //! Four substrate families, all data-oblivious: //! @@ -140,19 +141,17 @@ pub use quant::subset_rerank_uses_simd; #[cfg(feature = "experimental")] pub use multi_bucket::MultiBucketBitmap; -// `Contingency` / `Projection` are the **stable** stateless dense-code -// contingency-table surface added in this release (issue #219): the full +// `Contingency` / `Projection` are intended-to-stabilize stateless dense-code +// contingency-table analysis APIs added in this release (issue #219): the full // `nb × nb` bucket-overlap table for two `&[u8]` code slices, plus its named // projections (diagonal agreement, band agreement, top-bucket overlap, L1 // distance, etc.). This is a research/analysis primitive — it is *not* a // retrieval index and is never wired into any search path. // -// Although `Contingency` and `Projection` are gated behind the same -// `experimental` feature as `MultiBucketBitmap` (they complement the bilinear -// decomposition that surface exposes), they are the **stable** side of the -// `experimental` gate: the stateless dense API is the intended long-term -// surface and is covered by semver guarantees from this release forward. -// `MultiBucketBitmap` is the unstable counterpart — see the note above. +// They remain behind the same non-default `experimental` feature as +// `MultiBucketBitmap`, so they are not yet part of the patch-stable default +// Rust surface. The stateless dense API is the intended long-term surface, but +// graduating it to a stable feature is a later compatibility decision. #[cfg(feature = "experimental")] pub use contingency::{Contingency, Projection}; diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py index 022d6ea4..26800457 100644 --- a/tests/release_publish_invariants.py +++ b/tests/release_publish_invariants.py @@ -383,6 +383,39 @@ def semver_minor_requirement(version: str) -> str: return f"{match.group(1)}.{match.group(2)}" +def changelog_section_after_heading(changelog: str, heading: str) -> str: + match = re.search(rf"^## {re.escape(heading)}\s*$", changelog, re.MULTILINE) + if match is None: + fail(f"CHANGELOG.md must contain a {heading} section") + following = changelog[match.end() :] + next_heading = re.search(r"^## ", following, re.MULTILINE) + if next_heading is not None: + return following[: next_heading.start()] + return following + + +def check_unreleased_section_empty_for_dated_version(changelog: str, version: str) -> None: + has_dated_current_version = re.search( + rf"^## \[?{re.escape(version)}\]? - \d{{4}}-\d{{2}}-\d{{2}}$", + changelog, + re.MULTILINE, + ) + if has_dated_current_version is None: + return + + unreleased = changelog_section_after_heading(changelog, "[Unreleased]") + meaningful_lines = [ + line.strip() + for line in unreleased.splitlines() + if line.strip() and line.strip() != "_No unreleased changes._" + ] + if meaningful_lines: + fail( + "CHANGELOG.md [Unreleased] must be empty once the current package " + f"version {version} has a dated release section" + ) + + def check_release_version_sync() -> None: core_version = package_version("Cargo.toml") expected = { @@ -424,6 +457,7 @@ def check_release_version_sync() -> None: changelog = read_text("CHANGELOG.md") if not re.search(rf"^## \[?{re.escape(core_version)}\]? - \d{{4}}-\d{{2}}-\d{{2}}$", changelog, re.MULTILINE): fail(f"CHANGELOG.md must contain a dated section for {core_version}") + check_unreleased_section_empty_for_dated_version(changelog, core_version) threat_model = read_text("THREAT_MODEL.md") if not re.search( @@ -441,6 +475,37 @@ def check_release_version_sync() -> None: fail(f"fuzz/Cargo.lock must lock the path dependency ordvec at {core_version}") +def check_python_binding_safety_docs_sync() -> None: + package_doc = read_text("ordvec-python/python/ordvec/__init__.py") + safety_doc = read_text("docs/bindings-safety.md") + package_doc_normalized = " ".join(package_doc.split()).lower() + safety_doc_normalized = " ".join(safety_doc.split()).lower() + + required_fragments = ( + "copy NumPy inputs into Rust-owned buffers before detaching", + "Large calls may temporarily require an additional input-sized buffer", + ) + for fragment in required_fragments: + if fragment.lower() not in package_doc_normalized: + fail(f"ordvec-python/python/ordvec/__init__.py must document: {fragment}") + + safety_fragment = "copies NumPy input arrays into Rust-owned buffers" + if safety_fragment.lower() not in safety_doc_normalized: + fail("docs/bindings-safety.md must document Python copy-before-detach") + + forbidden_fragments = ( + "read in place", + "not copied", + "do not mutate an array from another thread", + ) + for fragment in forbidden_fragments: + if fragment in package_doc: + fail( + "ordvec-python/python/ordvec/__init__.py still contains stale " + f"zero-copy threading wording: {fragment!r}" + ) + + def check_release_compatibility_sync() -> None: core_version = package_version("Cargo.toml") core_msrv = package_rust_version("Cargo.toml") @@ -1972,6 +2037,7 @@ def main() -> None: ci_workflow = load_workflow(CI_WORKFLOW_PATH) check_release_version_sync() check_release_compatibility_sync() + check_python_binding_safety_docs_sync() check_registry_metadata_parity() check_manifest_cli_defaults() check_publication_model() From 7a6a12f5e418ec8876d7b70a569fca3368fc9944 Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Fri, 19 Jun 2026 20:18:33 -0500 Subject: [PATCH 2/2] Harden pre-tag invariant checks --- tests/release_publish_invariants.py | 40 ++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py index 26800457..e4d39a83 100644 --- a/tests/release_publish_invariants.py +++ b/tests/release_publish_invariants.py @@ -3,6 +3,7 @@ from __future__ import annotations +import ast import json import os import posixpath @@ -24,6 +25,10 @@ CI_WORKFLOW_PATH = os.environ.get("CI_WORKFLOW_PATH", ".github/workflows/ci.yml") PYTHON_WORKFLOW_PATH = os.environ.get("PYTHON_WORKFLOW_PATH", ".github/workflows/python.yml") STRICT_STABLE_TAG_PATTERN = r"^v(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)$" +CHANGELOG_RELEASE_HEADING_RE = re.compile( + r"^## (?:\[Unreleased\]|\[?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\]? - " + r"\d{4}-\d{2}-\d{2})\s*$" +) COVERAGE_WORKFLOW_PATH = os.environ.get("COVERAGE_WORKFLOW_PATH", ".github/workflows/coverage.yml") SDE_ACTION_PATH = os.environ.get( "SDE_ACTION_PATH", ".github/actions/setup-intel-sde/action.yml" @@ -383,15 +388,32 @@ def semver_minor_requirement(version: str) -> str: return f"{match.group(1)}.{match.group(2)}" +def normalized_text(value: str) -> str: + return " ".join(value.split()).lower() + + +def python_module_docstring(path: str) -> str: + text = read_text(path) + try: + module = ast.parse(text, filename=path) + except SyntaxError as exc: + fail(f"{path}: could not parse Python source for module docstring: {exc}") + docstring = ast.get_docstring(module) + if not docstring: + fail(f"{path}: must contain a module docstring") + return docstring + + def changelog_section_after_heading(changelog: str, heading: str) -> str: match = re.search(rf"^## {re.escape(heading)}\s*$", changelog, re.MULTILINE) if match is None: fail(f"CHANGELOG.md must contain a {heading} section") - following = changelog[match.end() :] - next_heading = re.search(r"^## ", following, re.MULTILINE) - if next_heading is not None: - return following[: next_heading.start()] - return following + section_lines: list[str] = [] + for line in changelog[match.end() :].splitlines(keepends=True): + if CHANGELOG_RELEASE_HEADING_RE.match(line): + break + section_lines.append(line) + return "".join(section_lines) def check_unreleased_section_empty_for_dated_version(changelog: str, version: str) -> None: @@ -476,10 +498,10 @@ def check_release_version_sync() -> None: def check_python_binding_safety_docs_sync() -> None: - package_doc = read_text("ordvec-python/python/ordvec/__init__.py") + package_doc = python_module_docstring("ordvec-python/python/ordvec/__init__.py") safety_doc = read_text("docs/bindings-safety.md") - package_doc_normalized = " ".join(package_doc.split()).lower() - safety_doc_normalized = " ".join(safety_doc.split()).lower() + package_doc_normalized = normalized_text(package_doc) + safety_doc_normalized = normalized_text(safety_doc) required_fragments = ( "copy NumPy inputs into Rust-owned buffers before detaching", @@ -499,7 +521,7 @@ def check_python_binding_safety_docs_sync() -> None: "do not mutate an array from another thread", ) for fragment in forbidden_fragments: - if fragment in package_doc: + if fragment in package_doc_normalized: fail( "ordvec-python/python/ordvec/__init__.py still contains stale " f"zero-copy threading wording: {fragment!r}"