From f5324d69ed13e4146f464e11e45c3f753e87308a Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Sat, 30 May 2026 17:12:49 -0500 Subject: [PATCH 1/2] docs/tests: pin concurrency contract across Python, C, and Go Signed-off-by: Nelson Spence --- README.md | 15 ++++++ docs/c-api.md | 12 +++-- ordvec-go/doc.go | 9 ++++ ordvec-go/ordvec_test.go | 64 +++++++++++++++++++++++++ ordvec-python/python/ordvec/__init__.py | 16 ++++--- ordvec-python/tests/test_rank_quant.py | 21 ++++++++ 6 files changed, 126 insertions(+), 11 deletions(-) create mode 100644 ordvec-go/doc.go diff --git a/README.md b/README.md index bf1ff1ef..b85e2bd6 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,21 @@ Wheels target CPython 3.10+ (abi3); to build from source instead, see [`ordvec-python/`](https://github.com/Fieldnote-Echo/ordvec/tree/main/ordvec-python). The runtime dependency floor is `numpy>=2.2`. +### Threading / concurrency + +`ordvec` supports concurrent read-only/search use. Mutation is exclusive. + +Python search, candidate-generation, and scoring methods release the GIL and +read NumPy inputs in place. Callers must not mutate query, corpus, candidate, +or scoring input arrays passed to those methods until the call returns. + +The C ABI allows concurrent search and info calls on one loaded handle. +`ordvec_index_free` must not race with any other call on the same handle. + +The Go wrapper serializes `Close` against `Search` and `Info`; after `Close`, +`Search` and `Info` return `ErrClosed`. Callers must not mutate query or +candidate slices passed to `Search` until the call returns. + ## Documentation - **Design deep-dive & reproducible benchmark tables:** diff --git a/docs/c-api.md b/docs/c-api.md index ac26b043..568f0d45 100644 --- a/docs/c-api.md +++ b/docs/c-api.md @@ -167,10 +167,14 @@ the candidate entry count, including duplicates. `prepare_ns`, `score_ns`, ## Threading -Concurrent searches and info calls on one loaded handle are allowed. -`ordvec_index_free` must not race with any other call on the same handle. -`ordvec_index_free(NULL)` is a no-op. Use after free and double free are -undefined behavior. +Concurrent `ordvec_index_search` and `ordvec_index_info` calls on one loaded +handle are allowed. Search borrows caller-owned query and candidate buffers in +place for the duration of the call; callers must not mutate those buffers until +the call returns. + +Mutation is exclusive. `ordvec_index_free` must not race with any other call on +the same handle. `ordvec_index_free(NULL)` is a no-op. Use after free and +double free are undefined behavior. ## V1 Exclusions diff --git a/ordvec-go/doc.go b/ordvec-go/doc.go new file mode 100644 index 00000000..fb79eff3 --- /dev/null +++ b/ordvec-go/doc.go @@ -0,0 +1,9 @@ +// Package ordvec provides a thin cgo wrapper over the ordvec C ABI. +// +// Index values support concurrent Search and Info calls. Close is serialized +// against Search and Info; after Close, both methods return ErrClosed. +// +// Search pins and passes caller-owned query and candidate slices to the C ABI +// without copying them. Callers must not mutate those slices until Search +// returns. +package ordvec diff --git a/ordvec-go/ordvec_test.go b/ordvec-go/ordvec_test.go index 9c70d8c7..57e0cf99 100644 --- a/ordvec-go/ordvec_test.go +++ b/ordvec-go/ordvec_test.go @@ -3,10 +3,12 @@ package ordvec import ( "encoding/binary" "errors" + "fmt" "math" "os" "path/filepath" "strings" + "sync" "testing" ) @@ -214,3 +216,65 @@ func TestCloseIsIdempotentAndErrClosed(t *testing.T) { t.Fatalf("Search after Close should return ErrClosed, got %v", err) } } + +func TestConcurrentSearchInfoAndClose(t *testing.T) { + idx, err := Load(writeRankQuantFixture(t)) + if err != nil { + t.Fatal(err) + } + + const workers = 8 + const iterations = 64 + + start := make(chan struct{}) + errCh := make(chan error, workers*iterations) + var wg sync.WaitGroup + + run := func(name string, fn func() error) { + defer wg.Done() + defer func() { + if r := recover(); r != nil { + errCh <- fmt.Errorf("%s panic: %v", name, r) + } + }() + <-start + for i := 0; i < iterations; i++ { + err := fn() + if err != nil && !errors.Is(err, ErrClosed) { + errCh <- fmt.Errorf("%s returned unexpected error: %w", name, err) + } + } + } + + query := query16() + for i := 0; i < workers/2; i++ { + wg.Add(1) + go run("Search", func() error { + _, _, err := idx.Search(query, 2, nil) + return err + }) + wg.Add(1) + go run("Info", func() error { + _, err := idx.Info() + return err + }) + } + + close(start) + if err := idx.Close(); err != nil { + t.Errorf("Close returned unexpected error: %v", err) + } + wg.Wait() + close(errCh) + + for err := range errCh { + t.Error(err) + } + + if _, err := idx.Info(); !errors.Is(err, ErrClosed) { + t.Fatalf("Info after Close should return ErrClosed, got %v", err) + } + if _, _, err := idx.Search(query16(), 1, nil); !errors.Is(err, ErrClosed) { + t.Fatalf("Search after Close should return ErrClosed, got %v", err) + } +} diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py index bc9adb95..9d96982f 100644 --- a/ordvec-python/python/ordvec/__init__.py +++ b/ordvec-python/python/ordvec/__init__.py @@ -33,13 +33,15 @@ caller-supplied data, validate or sandbox the path first, exactly as you would before a bare ``open()``. -Threading: ``search`` / ``search_asymmetric`` / ``add`` and the dense scoring / -candidate generator methods release the GIL during the heavy Rust scan, so other -Python threads run concurrently. The input arrays are *read in place* (not -copied) for that window — do not mutate an array from another thread while a -call that received it is in progress, or the scan races the write and may return -inconsistent results. This is the standard contract for GIL-releasing numeric -extensions (NumPy itself behaves this way). +Threading: the contract is read-concurrent, mutation-exclusive. ``search`` / +``search_asymmetric`` and the dense scoring / candidate generator methods +release the GIL during the heavy Rust scan, so other Python threads run +concurrently. ``add`` also releases the GIL while mutating an index, but mutable +index operations must be treated as exclusive. The input arrays are *read in +place* (not copied) for that window — do not mutate an array from another thread +while a call that received it is in progress, or the scan races the write and +may return inconsistent results. This is the standard contract for +GIL-releasing numeric extensions (NumPy itself behaves this way). """ from ._ordvec import ( diff --git a/ordvec-python/tests/test_rank_quant.py b/ordvec-python/tests/test_rank_quant.py index 24d0bd96..21bf8305 100644 --- a/ordvec-python/tests/test_rank_quant.py +++ b/ordvec-python/tests/test_rank_quant.py @@ -9,6 +9,8 @@ """ from __future__ import annotations +from concurrent.futures import ThreadPoolExecutor + import numpy as np import pytest @@ -113,6 +115,25 @@ def test_search_asymmetric_shape(bits): assert indices.shape == (3, 10) +def test_search_asymmetric_read_concurrent_results_match_baseline(): + corpus = np.ascontiguousarray(unit_vectors(64, 128, seed=101)) + queries = np.ascontiguousarray(unit_vectors(5, 128, seed=202)) + idx = RankQuant(dim=128, bits=2) + idx.add(corpus) + + baseline_scores, baseline_indices = idx.search_asymmetric(queries, k=8) + + def run_search(): + return idx.search_asymmetric(queries, k=8) + + with ThreadPoolExecutor(max_workers=4) as pool: + results = list(pool.map(lambda _: run_search(), range(40))) + + for scores, indices in results: + np.testing.assert_array_equal(indices, baseline_indices) + np.testing.assert_allclose(scores, baseline_scores, rtol=0, atol=0) + + @pytest.mark.parametrize("bits", [1, 2, 4]) def test_rankquant_eval_search_matches_rankquant_search(bits): vectors = unit_vectors(45, 128, seed=31 + bits) From 8cc9ae22ecd200187d8968396d7040dc681ee017 Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Wed, 3 Jun 2026 19:30:13 -0500 Subject: [PATCH 2/2] Address concurrency contract review findings Signed-off-by: Nelson Spence --- ordvec-go/ordvec_test.go | 35 ++++++++++++++++++------- ordvec-python/python/ordvec/__init__.py | 15 ++++++----- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/ordvec-go/ordvec_test.go b/ordvec-go/ordvec_test.go index 57e0cf99..4cdf2ee6 100644 --- a/ordvec-go/ordvec_test.go +++ b/ordvec-go/ordvec_test.go @@ -227,10 +227,14 @@ func TestConcurrentSearchInfoAndClose(t *testing.T) { const iterations = 64 start := make(chan struct{}) + searchReady := make(chan struct{}) + infoReady := make(chan struct{}) errCh := make(chan error, workers*iterations) + var searchReadyOnce sync.Once + var infoReadyOnce sync.Once var wg sync.WaitGroup - run := func(name string, fn func() error) { + run := func(name string, fn func() error, markReady func()) { defer wg.Done() defer func() { if r := recover(); r != nil { @@ -240,6 +244,9 @@ func TestConcurrentSearchInfoAndClose(t *testing.T) { <-start for i := 0; i < iterations; i++ { err := fn() + if err == nil { + markReady() + } if err != nil && !errors.Is(err, ErrClosed) { errCh <- fmt.Errorf("%s returned unexpected error: %w", name, err) } @@ -249,18 +256,28 @@ func TestConcurrentSearchInfoAndClose(t *testing.T) { query := query16() for i := 0; i < workers/2; i++ { wg.Add(1) - go run("Search", func() error { - _, _, err := idx.Search(query, 2, nil) - return err - }) + go run( + "Search", + func() error { + _, _, err := idx.Search(query, 2, nil) + return err + }, + func() { searchReadyOnce.Do(func() { close(searchReady) }) }, + ) wg.Add(1) - go run("Info", func() error { - _, err := idx.Info() - return err - }) + go run( + "Info", + func() error { + _, err := idx.Info() + return err + }, + func() { infoReadyOnce.Do(func() { close(infoReady) }) }, + ) } close(start) + <-searchReady + <-infoReady if err := idx.Close(); err != nil { t.Errorf("Close returned unexpected error: %v", err) } diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py index 9d96982f..a29a6550 100644 --- a/ordvec-python/python/ordvec/__init__.py +++ b/ordvec-python/python/ordvec/__init__.py @@ -33,13 +33,14 @@ caller-supplied data, validate or sandbox the path first, exactly as you would before a bare ``open()``. -Threading: the contract is read-concurrent, mutation-exclusive. ``search`` / -``search_asymmetric`` and the dense scoring / candidate generator methods -release the GIL during the heavy Rust scan, so other Python threads run -concurrently. ``add`` also releases the GIL while mutating an index, but mutable -index operations must be treated as exclusive. The input arrays are *read in -place* (not copied) for that window — do not mutate an array from another thread -while a call that received it is in progress, or the scan races the write and +Threading: the contract is read-concurrent, mutation-exclusive. ``search``, +``search_asymmetric``, ``search_asymmetric_subset``, and the dense scoring / +candidate generator methods release the GIL during the heavy Rust scan, so +other Python threads run concurrently. ``add`` also releases the GIL while +mutating an index, but mutable index operations must be treated as exclusive. +The input arrays are *read in place* (not copied) for that window — do not +mutate an array from another thread while a call that received it is in +progress, including subset candidate arrays, or the scan races the write and may return inconsistent results. This is the standard contract for GIL-releasing numeric extensions (NumPy itself behaves this way). """