Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
name: fuzz

# Bounded cargo-fuzz smoke. The seven targets in fuzz/ are normally exercised
# in manual campaigns; this adds CI cadence so a regression that reintroduces a
# loader panic / OOM, breaks the write->load round-trip, or destabilises the
# FastScan kernel surfaces in CI rather than only at the next manual run
# (THREAT-FUZZ-002 in THREAT_MODEL.md).
#
# * pull_request / push(main): a SHORT smoke (60s/target) over the
# highest-value targets — fast enough to run on every change.
# * schedule (weekly) / workflow_dispatch: a LONGER sweep (300s/target)
# across ALL seven targets.
#
# This runs UNATTENDED on a cron schedule, so every third-party action is
# SHA-pinned and cargo-fuzz is version-pinned — a fuzz smoke must not itself
# become a supply-chain hole. Read-only token; the only `run:` interpolation is
# the matrix target name, passed through `env:` (never inlined into the shell)
# so there is no template-injection surface (THREAT-CICD-001).

on:
pull_request:
push:
branches: [main]
schedule:
- cron: "0 5 * * 4" # 05:00 UTC every Thursday (clear of audit/scorecard Mon + codeql Wed)
workflow_dispatch:

permissions:
contents: read

concurrency:
group: fuzz-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
# Short per-change smoke over the highest-value targets: two loaders plus the
# FastScan b=2 kernel (the one unsafe-heavy scan path the loader targets do
# not reach).
smoke:
name: fuzz smoke (60s)
if: github.event_name == 'pull_request' || github.event_name == 'push'
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
target: [load_rank, load_rankquant, fastscan_b2]
steps:
- uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
with:
egress-policy: audit
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # nightly; channel via toolchain: below
with:
toolchain: nightly
- name: Install cargo-fuzz (version-pinned)
# NB: no `--locked` — cargo-fuzz 0.13.1's bundled Cargo.lock pins an old
# rustix (0.36.x) that no longer compiles on current nightly. The tool
# itself stays version-pinned; its build deps resolve to compatible
# versions.
run: cargo install cargo-fuzz --version 0.13.1
- name: Smoke
env:
TARGET: ${{ matrix.target }}
run: cargo +nightly fuzz run "$TARGET" -- -max_total_time=60 -rss_limit_mb=4096

# Weekly full sweep over all seven targets at a larger time budget.
weekly:
name: fuzz weekly (300s)
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
timeout-minutes: 45
strategy:
fail-fast: false
matrix:
target:
- load_rank
- load_rankquant
- load_bitmap
- load_sign_bitmap
- roundtrip_rankquant
- search_rankquant
- fastscan_b2
steps:
- uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4
with:
egress-policy: audit
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
persist-credentials: false
- uses: dtolnay/rust-toolchain@29eef336d9b2848a0b548edc03f92a220660cdb8 # nightly; channel via toolchain: below
with:
toolchain: nightly
- name: Install cargo-fuzz (version-pinned)
# NB: no `--locked` — cargo-fuzz 0.13.1's bundled Cargo.lock pins an old
# rustix (0.36.x) that no longer compiles on current nightly. The tool
# itself stays version-pinned; its build deps resolve to compatible
# versions.
run: cargo install cargo-fuzz --version 0.13.1
- name: Fuzz
env:
TARGET: ${{ matrix.target }}
run: cargo +nightly fuzz run "$TARGET" -- -max_total_time=300 -rss_limit_mb=4096
18 changes: 18 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- **CI fuzz smoke** (`.github/workflows/fuzz.yml`): a bounded cargo-fuzz run on
every pull request / push to `main` (60s each over `load_rank`,
`load_rankquant`, and `fastscan_b2`) plus a weekly full sweep over all seven
targets, so a loader, write→load round-trip, or FastScan-kernel regression
surfaces in CI between manual campaigns (THREAT-FUZZ-002). cargo-fuzz is
version-pinned and the actions are SHA-pinned.

### Changed

- **`#![deny(unsafe_op_in_unsafe_fn)]` is now enforced crate-wide** (previously
only in `fastscan.rs`): every unsafe operation in the `bitmap`, `sign_bitmap`,
`quant_kernels`, and `util` (NEON) SIMD kernels now sits in an explicit
`unsafe {}` block, keeping the unsafe surface visible to future edits
(THREAT-SIMD-001).
- **`rank::rank_to_bucket` rejects `rank >= d`** — it now panics (and the Python
binding raises `ValueError`) instead of silently clamping the result into
range, matching the fail-loud contract of `pack_buckets` / `bucket_centre`.
Valid rank vectors (a permutation of `[0, d)`) are unaffected.
- **Python bindings (`ordvec-python`):** raised the floor to **Python 3.10** and
**numpy 2.0**; the abi3 wheel target moves to `abi3-py310`. Python 3.9 reached
end-of-life (October 2025) and pytest's CVE-2025-71176 fix dropped 3.9 support.
Expand Down
14 changes: 8 additions & 6 deletions RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,15 @@ Trusted Publishing step.
> These two settings are the supply-chain backstop the workflow code cannot
> express on its own (THREAT-SUPPLY-001 in [THREAT_MODEL.md](THREAT_MODEL.md)).

### Recommended (open)
### Tag and branch protection

- A **`v*` tag-protection ruleset** (block update + deletion) and a basic
`main` ruleset, so a release tag cannot be force-moved and `main` cannot be
force-pushed/deleted (THREAT-SUPPLY-002). Registries are already immutable
(crates.io is yank-only; PyPI burns a version on delete), so this closes the
remaining GitHub-side mutability surface.
- **Immutable releases** is enabled, so a published release's `v*` tag cannot be
force-moved or deleted and its assets cannot be replaced after publication.
This closes the GitHub-side mutability surface the registries already close on
their end (crates.io is yank-only; PyPI burns a version on delete).
- **`main` is a protected branch** — pull-request review is required and
force-pushes and deletions are blocked, so the branch a release dispatches
from cannot be rewritten (THREAT-SUPPLY-002).

## Checklist

Expand Down
84 changes: 47 additions & 37 deletions THREAT_MODEL.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ absence of a second maintainer is itself a tracked supply-chain residual
| **Compute kernels** | `fastscan.rs`, `quant_kernels.rs`, `bitmap.rs`, `sign_bitmap.rs` | Trust established after format validation |
| **Index API** | `rank.rs`, `quant.rs`, `bitmap.rs`, `sign_bitmap.rs` | Caller-controlled query embeddings |
| **Python FFI** | `ordvec-python` (PyO3 / maturin) | Python ↔ Rust boundary; NumPy buffers |
| **CI / supply chain** | 12 GitHub Actions workflows; `Cargo.lock`; crates.io + PyPI | GitHub OIDC, crates.io, PyPI trust chains |
| **CI / supply chain** | 13 GitHub Actions workflows; `Cargo.lock`; crates.io + PyPI | GitHub OIDC, crates.io, PyPI trust chains |

The `fuzz/` directory holds **seven** cargo-fuzz targets: `load_rank`,
`load_rankquant`, `load_bitmap`, `load_sign_bitmap` (deserialization);
Expand Down Expand Up @@ -161,7 +161,7 @@ to this kernel.

### 3.2 Risks

**THREAT-SIMD-001 (P1, mitigated this cycle; crate-wide rollout tracked):
**THREAT-SIMD-001 (P1, mitigated this cycle):
Unsafe-kernel invariant preservation under future refactors.**
`scan_b2_fastscan_avx512` safety depends on caller-established invariants —
`packed_fs.len() == n_blocks * pairs * 32` (formed via `checked_mul`, overflow
Expand All @@ -172,11 +172,13 @@ by construction. A future refactor calling the inner function directly could
bypass the asserts. *Mitigations:* the runtime asserts + the type wrapper are
the primary boundary; the scalar-vs-SIMD equivalence test
(`fastscan_b2_top10_matches_avx512_kernel`) guards behavior; and
**`#![deny(unsafe_op_in_unsafe_fn)]` is now enforced in `fastscan.rs`**, so
every unsafe operation in the kernel sits in an explicit `unsafe {}` block and
stays visible to future edits. *Open:* roll the lint out crate-wide to the
other SIMD modules (`bitmap.rs`, `sign_bitmap.rs`, `quant_kernels.rs`,
`util.rs` NEON) — tracked as a follow-up.
**`#![deny(unsafe_op_in_unsafe_fn)]` is now enforced crate-wide** (at the crate
root in `lib.rs`), so every unsafe operation in every SIMD kernel —
`fastscan.rs`, `bitmap.rs`, `sign_bitmap.rs`, `quant_kernels.rs`, and the
`util.rs` NEON popcount — sits in an explicit `unsafe {}` block and stays
visible to future edits. (The lone exception, `horizontal_sum_avx2`, is
register-only with no memory access, so its intrinsics are safe under the
`#[target_feature]` gate and an explicit block would be `unused_unsafe`.)

**THREAT-SIMD-002 (P4, deployment note): Microarchitectural side channels in
co-tenancy.** `ordvec` does not claim protection against microarchitectural
Expand Down Expand Up @@ -237,7 +239,7 @@ applications must validate paths before calling").

### 5.1 Existing controls (verified)

**Workflow code (all 12 workflows):** third-party actions pinned by commit
**Workflow code (all 13 workflows):** third-party actions pinned by commit
SHA; `persist-credentials: false` on every checkout; `permissions: contents:
read` default. **Release workflows** (`release-crate.yml`, `release-python.yml`)
are `workflow_dispatch`-only (no tag/push trigger), run a `require-ci-green`
Expand Down Expand Up @@ -270,17 +272,22 @@ passkeys on the maintainer account; recruiting a **second owner/maintainer**
deployment **wait timer** worthwhile (a second party able to cancel a bad
release during the window). See [`RELEASING.md`](RELEASING.md).

**THREAT-SUPPLY-002 (P3): Release immutability and tag integrity.** Published
artifacts are **immutable by registry design** — crates.io is yank-only (a
published version's bytes can never be overwritten) and PyPI burns a version on
delete (no different artifact may be re-uploaded under the same version). So
post-publish "silent replacement" of a version is not possible on either
registry, and consumers can verify artifacts against the SLSA / PEP 740
provenance above. *Residual (GitHub-side):* `changelog.yml` cuts tagged GitHub
Releases, but the repo currently has **no tag-protection ruleset and no `main`
ruleset**, so a tag could be force-moved or a release asset replaced.
*Mitigation:* add a `v*` **tag ruleset** (block update + deletion) and a basic
`main` ruleset; optionally enable GitHub immutable releases.
**THREAT-SUPPLY-002 (mitigated): Release immutability and tag integrity.**
Published artifacts are **immutable by registry design** — crates.io is
yank-only (a published version's bytes can never be overwritten) and PyPI burns
a version on delete (no different artifact may be re-uploaded under the same
version). So post-publish "silent replacement" of a version is not possible on
either registry, and consumers can verify artifacts against the SLSA / PEP 740
provenance above. The GitHub-side mutability surface is now closed too:
`changelog.yml` cuts tagged GitHub Releases, and **GitHub immutable releases is
enabled**, so a published release's `v*` tag cannot be force-moved or deleted
and its assets cannot be replaced after publication; the **`main` branch is
protected** (pull-request review required, force-pushes and deletions blocked)
and is the **only deployment branch** permitted for the `pypi` / `crates-io`
release environments. *Residual:* draft / non-release tags are not covered by
release immutability, and — as with the registries — these GitHub controls
ultimately trust the single maintainer account; that residual folds into
THREAT-SUPPLY-001.

**THREAT-SUPPLY-003 (P3): Typosquatting adjacent names.** Namespace-adjacent
crate/package names (`ord-vec`, `ordvecs`, `order-vec`) could be registered to
Expand Down Expand Up @@ -358,11 +365,15 @@ single-rate compute path, and (new) the FastScan kernel.
non-AVX-512 CI runners it exercises the scalar reference kernel; under Intel SDE
it exercises the AVX-512 kernel.

**THREAT-FUZZ-002 (P3): No CI-bound fuzzing for continuous regression.** Fuzzing
is run manually; there is no CI gate. A bounded weekly smoke job (e.g.
`-runs=50000` on `load_rank`, `load_rankquant`, and `fastscan_b2`) would catch
regressions between manual runs. (Low overhead; weighed against maintenance
budget.)
**THREAT-FUZZ-002 (mitigated this cycle): CI-bound fuzzing for continuous
regression.** A `fuzz.yml` workflow now runs a bounded smoke on every pull
request and push to `main` (`-max_total_time=60` over `load_rank`,
`load_rankquant`, and `fastscan_b2`) plus a weekly full sweep
(`-max_total_time=300` over all seven targets), so a regression that
reintroduces a loader panic / OOM, breaks the write→load round-trip, or
destabilises the FastScan kernel surfaces in CI rather than only at the next
manual campaign. cargo-fuzz is version-pinned and the actions are SHA-pinned,
matching the repo's scheduled-workflow hardening.

*Note on `load_sign_bitmap`:* all bit patterns are structurally valid for sign
bitmaps (no per-row invariant), so that target is correctly scoped to parser
Expand All @@ -386,16 +397,16 @@ blast radius of a compromised dependency separately.

| ID | Category | Owner | Description | Likelihood | Impact | Status / priority |
|---|---|---|---|---|---|---|
| THREAT-SIMD-001 | Memory safety | Library | Unsafe-kernel invariant bypass on refactor | Medium | High | **P1** — lint enforced in `fastscan.rs`; crate-wide rollout tracked |
| THREAT-SIMD-001 | Memory safety | Library | Unsafe-kernel invariant bypass on refactor | Medium | High | **Mitigated** — `unsafe_op_in_unsafe_fn` denied crate-wide + type wrapper + equivalence test |
| THREAT-FFI-001 | FFI | Binding | Concurrent input mutation during released-GIL call | Medium | Medium | **P2** — documented contract |
| THREAT-FFI-002 | FFI | Binding | Unsanitized path forwarding | Medium | Medium | **P2** — documented contract |
| THREAT-SUPPLY-001 | Supply chain | Config | Release config / single-owner | Low | Critical | **Mitigated** (reviewer + main-only); residual = account compromise / 2nd owner |
| THREAT-SUPPLY-002 | Supply chain | Config | Release immutability / tag integrity | Low | High | **P3** — registries immutable; add tag ruleset |
| THREAT-SUPPLY-002 | Supply chain | Config | Release immutability / tag integrity | Low | High | **Mitigated** — registries immutable; GitHub immutable releases on + `main` protected |
| THREAT-SUPPLY-003 | Supply chain | Config | Typosquatting adjacent names | Medium | Medium | P3 |
| THREAT-QUERY-001 | Resource | Deployment | Batch / `k` exhaustion in serving | Medium | Medium | **P2** — deployment docs |
| THREAT-QUERY-002 | Resource | Deployment | Panic on contract violation (Rust servers) | Low | Medium | P3 |
| THREAT-FUZZ-001 | Fuzzing | Library | FastScan path unfuzzed | Medium | High | **Closed** (`fastscan_b2` added) |
| THREAT-FUZZ-002 | Fuzzing | Library | No CI-bound fuzzing | Medium | Medium | P3 |
| THREAT-FUZZ-002 | Fuzzing | Library | No CI-bound fuzzing | Medium | Medium | **Mitigated** — `fuzz.yml` PR smoke + weekly sweep |
| THREAT-DESER-001 | Deserialization | Library | TOCTOU on shared mounts | Very Low | Low | P4 |
| THREAT-DESER-002 | Provenance | Deployment | Malicious-but-valid index | Medium | High | P3 (docs — `INDEX_PROVENANCE.md`) |
| THREAT-CICD-001 | CI/CD | Library | Workflow injection via PR metadata | Low | High | P3 — mitigated by `zizmor` |
Expand All @@ -409,19 +420,18 @@ blast radius of a compromised dependency separately.

## 11. Open mitigations

**Done this cycle:** `#![deny(unsafe_op_in_unsafe_fn)]` in `fastscan.rs`
(SIMD-001); `fastscan_b2` fuzz target (FUZZ-001); release-environment reviewers
+ main-only deployment (SUPPLY-001); [`docs/INDEX_PROVENANCE.md`](docs/INDEX_PROVENANCE.md)
(DESER-002); [`RELEASING.md`](RELEASING.md) (SUPPLY-001).
**Done this cycle:** `#![deny(unsafe_op_in_unsafe_fn)]` enforced **crate-wide**
across all SIMD modules (SIMD-001); the `fastscan_b2` fuzz target (FUZZ-001)
plus a CI `fuzz.yml` — PR smoke + weekly sweep (FUZZ-002); the `rank_to_bucket`
primitive made fail-loud (`rank < d`) to match the rest of the bucket API, with
matching binding guards; release-environment reviewers + main-only deployment
(SUPPLY-001); **GitHub immutable releases enabled + `main` branch protection**
(SUPPLY-002); [`docs/INDEX_PROVENANCE.md`](docs/INDEX_PROVENANCE.md) (DESER-002);
[`RELEASING.md`](RELEASING.md) (SUPPLY-001).

**Open, low cost:**

1. Add a `v*` tag-protection ruleset (+ basic `main` ruleset) and optionally
enable GitHub immutable releases (THREAT-SUPPLY-002).
2. Roll `#![deny(unsafe_op_in_unsafe_fn)]` out crate-wide across the remaining
SIMD modules (THREAT-SIMD-001).
3. Add a bounded weekly CI fuzz smoke job (THREAT-FUZZ-002).
4. Document recommended `nq` / `k` / corpus bounds for single-process serving
1. Document recommended `nq` / `k` / corpus bounds for single-process serving
in the Rust and Python API docs (THREAT-QUERY-001).

**Later (not release blockers):** a second maintainer/owner (then a release
Expand Down
27 changes: 24 additions & 3 deletions ordvec-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1084,6 +1084,14 @@ fn rank_to_bucket(rank: u16, d: usize, bits: u8) -> PyResult<u8> {
if d == 0 {
return Err(pyo3::exceptions::PyValueError::new_err("d must be > 0"));
}
// The core `rank_to_bucket` now asserts `rank < d` (fail-loud, matching the
// other bucket primitives); surface that as a clean `ValueError` rather
// than letting the assert escape as a `PanicException`.
if rank as usize >= d {
return Err(pyo3::exceptions::PyValueError::new_err(format!(
"rank ({rank}) must be < d ({d})"
)));
}
Ok(ordvec_core::rank::rank_to_bucket(rank, d, bits))
}

Expand All @@ -1110,6 +1118,17 @@ fn bucket_ranks<'py>(
if slice.is_empty() {
return Ok(Vec::<u8>::new().into_pyarray(py));
}
// `bucket_ranks` treats the input as a rank vector: each entry indexes into
// `[0, len)`, and the core `rank_to_bucket` now asserts `rank < len`. Reject
// an out-of-range entry here with a clean `ValueError` rather than letting
// that assert surface as a `PanicException`. A valid rank vector (a
// permutation of `[0, len)`) never trips this.
let d = slice.len();
if let Some(&bad) = slice.iter().find(|&&r| r as usize >= d) {
return Err(pyo3::exceptions::PyValueError::new_err(format!(
"rank ({bad}) must be < d ({d})"
)));
}
Ok(ordvec_core::rank::bucket_ranks(slice, bits).into_pyarray(py))
}

Expand All @@ -1135,9 +1154,11 @@ fn pack_buckets<'py>(
slice.len()
)));
}
// Reject out-of-range bucket codes rather than silently masking them: the
// core packs `b & ((1 << bits) - 1)`, so a value with high bits set would be
// truncated to a different bucket. The bucket alphabet is [0, 1 << bits).
// Reject out-of-range bucket codes here so the caller gets a clean
// `ValueError`: the core `pack_buckets` now *asserts* every code is in
// `[0, 1 << bits)` (it fails loud rather than masking), so an unchecked
// out-of-range value would otherwise escape as a `PanicException`. The
// bucket alphabet is [0, 1 << bits).
let max_code = (1u16 << bits) - 1;
if let Some(&bad) = slice.iter().find(|&&b| b as u16 > max_code) {
return Err(pyo3::exceptions::PyValueError::new_err(format!(
Expand Down
Loading
Loading