Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Security

- **Cleared OSV / OpenSSF-Scorecard advisories on the dev-only BEIR benchmark
tooling** (introduced with the benchmark harness; none reach the published
`ordvec` crate or the `ordvec` PyPI wheel). The `benchmarks/beir/requirements.txt`
deps were unpinned, so OSV flagged each against its full historical CVE list;
they are now lower-bound-pinned at the first patched release (`requests>=2.32.4`,
`hnswlib>=0.8.0`, `numpy>=1.26`, plus safe floors for the rest). `bincode` 1.x
(RUSTSEC-2025-0141, *unmaintained* — not a vulnerability) enters only
transitively via `hnsw_rs` in `benchmarks/beir-bench` and is absent from
`cargo tree -p ordvec`; it is triaged with a documented `deny.toml` ignore.

### Added

- **Reproducible BEIR benchmark harness** (`make benchmark-beir`; dev-only,
Expand Down
34 changes: 20 additions & 14 deletions benchmarks/beir/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,36 @@
# sdist, which fails to build on modern gcc. The BEIR dataset loader is vendored
# in beir_prepare.py and evaluation uses the prebuilt `pytrec-eval-terrier`
# wheel instead.
#
# Versions are LOWER-BOUND-PINNED at the first patched release for every package
# with a known advisory, so this dev-only harness stays clean under OSV /
# OpenSSF-Scorecard scanning (an UNPINNED dep is flagged against the package's
# entire historical CVE list). `>=` keeps installs on the latest compatible wheel
# (incl. recent CPython) while excluding all known-vulnerable versions.

# --- core ---
numpy
scipy
requests
tqdm
pandas
tabulate
numpy>=1.26.0
scipy>=1.11.0
requests>=2.32.4 # GHSA-9hjg-9r4m-mvj7 (.netrc leak) + all older requests CVEs
tqdm>=4.66.3 # CVE-2024-34062
pandas>=2.2.0
tabulate>=0.9.0

# --- model download for the canonical llamacpp lane ---
huggingface-hub
huggingface-hub>=0.24.0

# --- retrieval baselines (comparison references, NOT ground truth) ---
faiss-cpu
hnswlib
faiss-cpu>=1.8.0
hnswlib>=0.8.0 # GHSA-xwc8-rf6m-xr86 (double free)

# --- evaluation: trec_eval bindings (prebuilt wheel, no C compile) ---
pytrec-eval-terrier
pytrec-eval-terrier>=0.5.6

# --- README benchmark graphics ---
matplotlib
matplotlib>=3.8.0

# --- optional: sentence-transformers lane (`--provider st`) ---
# Heavy (pulls torch). Uncomment to enable the fp32 ST encoder lane:
# sentence-transformers
# torch
# transformers
# sentence-transformers>=3.0.0
# torch>=2.2.0
# transformers>=4.44.0
12 changes: 9 additions & 3 deletions deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,15 @@ all-features = true

[advisories]
# Default behaviour: error on any RUSTSEC advisory (vulnerability) or
# unmaintained crate in the tree. No advisories are currently ignored; add
# entries with an explicit `reason` only when triaged.
ignore = []
# unmaintained crate in the tree. Triaged ignores carry an explicit reason.
#
# RUSTSEC-2025-0141 — bincode 1.x is UNMAINTAINED (an informational advisory,
# NOT a vulnerability). It enters the graph only transitively via `hnsw_rs`,
# itself a dependency of the dev-only `benchmarks/beir-bench` harness. It is NOT
# in the published `ordvec` crate (`cargo tree -p ordvec` is clean of bincode),
# so it does not reach any shipped artifact or crate consumer. Revisit if a
# maintained HNSW crate that does not pull bincode 1.x is adopted.
ignore = ["RUSTSEC-2025-0141"]

[licenses]
# Allow-list only. cargo-deny denies any license not listed here.
Expand Down
Loading