eclipse-store · fh-ms · Apr 20, 2026 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/docs/modules/gigamap/pages/indexing/jvector/advanced.adoc b/docs/modules/gigamap/pages/indexing/jvector/advanced.adoc
@@ -102,9 +102,9 @@ The FusedPQ implementation requires exactly `maxDegree=32`. This is a fixed cons
 
 When PQ compression is active, searches use a **two-phase approach** to balance speed and accuracy:
 
-**Phase 1 -- Approximate candidate retrieval:** The HNSW graph is traversed using the FusedPQ-compressed vectors for fast approximate distance computation. This phase fetches `2 * k` candidates (twice the requested result count) to ensure the true top-k results are captured despite the approximation error introduced by quantization.
+**Phase 1 -- Approximate candidate retrieval:** The HNSW graph is traversed using the FusedPQ-compressed vectors for fast approximate distance computation. This phase fetches `max(2 * k, minSearchBeamWidth)` candidates — at least twice the requested result count to absorb quantization error, and never less than the configured search beam width (see xref:indexing/jvector/configuration.adoc#_basic_hnsw_parameters[`minSearchBeamWidth`]) so the top-k stays stable across different `k` values. The per-query `search(query, k, searchBeamWidth)` overload raises this floor for a single call when higher recall is needed.
 
-**Phase 2 -- Exact reranking:** The `2 * k` approximate candidates are then re-scored using the full-precision inline vectors stored in the graph file. The exact scores are sorted and the best _k_ results are returned.
+**Phase 2 -- Exact reranking:** The approximate candidates are then re-scored using the full-precision inline vectors stored in the graph file. The exact scores are sorted and the best _k_ results are returned.
 
 This two-phase approach achieves nearly the same recall as an uncompressed search while benefiting from the speed and memory advantages of PQ during graph traversal.
 

diff --git a/docs/modules/gigamap/pages/indexing/jvector/configuration.adoc b/docs/modules/gigamap/pages/indexing/jvector/configuration.adoc
@@ -95,7 +95,11 @@ NOTE: The `jdk.incubator.vector` module is an incubator feature in Java 17-21. S
 
 |`beamWidth`
 |100
-|Search beam width during index construction. Higher values improve recall during construction.
+|Beam width during index construction (HNSW _efConstruction_). Higher values improve graph quality but slow down construction. Has no effect at query time.
+
+|`minSearchBeamWidth`
+|100
+|Minimum beam width during search (HNSW _efSearch_ floor). The effective beam width is `max(k, minSearchBeamWidth)`. Keeps the top-k stable across different `k` values; set to `1` to disable the floor and make the beam width equal to the requested `k`. Independent of `beamWidth`. Can be overridden per query via `search(query, k, searchBeamWidth)`.
 
 |`neighborOverflow`
 |1.2

diff --git a/docs/modules/gigamap/pages/indexing/jvector/index.adoc b/docs/modules/gigamap/pages/indexing/jvector/index.adoc
@@ -187,6 +187,19 @@ List<Document> topDocs = result.stream()
     .toList();
 ----
 
+=== Tuning search effort per query
+
+Every call uses a minimum beam width (HNSW _efSearch_) configured via `minSearchBeamWidth` (default 100). The effective beam width is `max(k, minSearchBeamWidth)` and keeps the top-k stable regardless of the requested `k`. A per-query overload lets you override this floor for a single call — useful to widen exploration for higher recall, or narrow it for lower latency when reproducibility across different `k` values is not required.
+
+[source, java]
+----
+// Widen exploration for this query (higher recall, higher latency)
+VectorSearchResult<Document> highRecall = index.search(queryVector, 10, 500);
+
+// Narrow exploration for this query (lower latency, may differ from higher-k results)
+VectorSearchResult<Document> fast = index.search(queryVector, 10, 10);
+----
+
 == Similarity Functions
 
 The following similarity functions are available:

diff --git a/gigamap/jvector/src/main/java/org/eclipse/store/gigamap/jvector/PQCompressionManager.java b/gigamap/jvector/src/main/java/org/eclipse/store/gigamap/jvector/PQCompressionManager.java
@@ -84,6 +84,7 @@ interface PQCompressionManager
      *
      * @param query              the query vector
      * @param k                  the number of results to return
+     * @param rerankK            minimum beam width (search effort) for the HNSW search
      * @param searcher           the graph searcher to use
      * @param ravv               random access vector values for exact reranking
      * @param similarityFunction the similarity function to use
@@ -92,6 +93,7 @@ interface PQCompressionManager
     public SearchResult searchWithRerank(
         VectorFloat<?>           query             ,
         int                      k                 ,
+        int                      rerankK           ,
         GraphSearcher            searcher          ,
         RandomAccessVectorValues ravv              ,
         VectorSimilarityFunction similarityFunction
@@ -238,13 +240,14 @@ private void trainPQ()
         public SearchResult searchWithRerank(
             final VectorFloat<?>           query             ,
             final int                      k                 ,
+            final int                      rerankK           ,
             final GraphSearcher            searcher          ,
             final RandomAccessVectorValues ravv              ,
             final VectorSimilarityFunction similarityFunction
         )
         {
             // Search with PQ for approximate results (fetch more candidates for reranking)
-            final int candidateCount = k * PQ_RERANK_MULTIPLIER;
+            final int candidateCount = Math.max(k * PQ_RERANK_MULTIPLIER, rerankK);
 
             // Use exact vectors for search but rerank with exact vectors
             final SearchScoreProvider ssp = DefaultSearchScoreProvider.exact(
@@ -253,7 +256,7 @@ public SearchResult searchWithRerank(
                 ravv
             );
 
-            final SearchResult result = searcher.search(ssp, candidateCount, Bits.ALL);
+            final SearchResult result = searcher.search(ssp, candidateCount, candidateCount, 0f, 0f, Bits.ALL);
 
             // Rerank with exact vectors to get the best k
             final List<NodeScoreEntry> reranked = new ArrayList<>();

diff --git a/gigamap/jvector/src/main/java/org/eclipse/store/gigamap/jvector/VectorIndex.java b/gigamap/jvector/src/main/java/org/eclipse/store/gigamap/jvector/VectorIndex.java
@@ -46,6 +46,8 @@
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.stream.IntStream;
 
+import static org.eclipse.serializer.math.XMath.positive;
+
 /**
  * A vector index that enables k-nearest-neighbor (k-NN) similarity search on entities.
  * <p>
@@ -366,6 +368,31 @@ public default boolean isSuitableAsUniqueConstraint()
      */
     public VectorSearchResult<E> search(float[] queryVector, int k);
 
+    /**
+     * Searches for the k nearest neighbors with an explicit per-query search beam width
+     * (HNSW <i>efSearch</i>).
+     * <p>
+     * This overload overrides the configured floor from
+     * {@link VectorIndexConfiguration#minSearchBeamWidth()} for a single call. The effective
+     * beam width is {@code max(k, searchBeamWidth)} because jvector requires the beam width
+     * to be at least as large as the requested {@code k}.
+     * <p>
+     * Use this to widen exploration (e.g. {@code searchBeamWidth=500} for higher recall) or
+     * to narrow it (e.g. {@code searchBeamWidth=k} for minimum latency when reproducibility
+     * across different {@code k} values is not required).
+     *
+     * @param queryVector      the query vector; must have exactly
+     *                         {@link VectorIndexConfiguration#dimension()} elements
+     * @param k                the number of nearest neighbors to return; must be positive
+     * @param searchBeamWidth  the beam width to use for this query; must be positive
+     * @return the search result
+     * @throws IllegalArgumentException if queryVector is null, has wrong dimension, or
+     *                                  {@code k} / {@code searchBeamWidth} are not positive
+     * @see #search(float[], int)
+     * @see VectorIndexConfiguration#minSearchBeamWidth()
+     */
+    public VectorSearchResult<E> search(float[] queryVector, int k, int searchBeamWidth);
+
     /**
      * Searches for the k nearest neighbors to the given entity's vector.
      * <p>
@@ -405,6 +432,21 @@ public default VectorSearchResult<E> search(final E queryEntity, final int k)
         return this.search(this.vectorizer().vectorize(queryEntity), k);
     }
 
+    /**
+     * Searches for the k nearest neighbors to the given entity's vector with an explicit
+     * per-query search beam width.
+     *
+     * @param queryEntity     the query entity whose vector will be extracted via the vectorizer
+     * @param k               the number of nearest neighbors to return; must be positive
+     * @param searchBeamWidth the beam width to use for this query; must be positive
+     * @return the search result
+     * @see #search(float[], int, int)
+     */
+    public default VectorSearchResult<E> search(final E queryEntity, final int k, final int searchBeamWidth)
+    {
+        return this.search(this.vectorizer().vectorize(queryEntity), k, searchBeamWidth);
+    }
+
     /**
      * Performs cleanup and optimization of the index graph structure.
      * <p>
@@ -1520,6 +1562,17 @@ public float[] getVector(final long entityId)
 
         @Override
         public VectorSearchResult<E> search(final float[] queryVector, final int k)
+        {
+            return this.doSearch(queryVector, k, this.computeRerankK(k));
+        }
+
+        @Override
+        public VectorSearchResult<E> search(final float[] queryVector, final int k, final int searchBeamWidth)
+        {
+            return this.doSearch(queryVector, k, Math.max(k, positive(searchBeamWidth)));
+        }
+
+        private VectorSearchResult<E> doSearch(final float[] queryVector, final int k, final int rerankK)
         {
             this.validateDimension(queryVector);
 
@@ -1538,15 +1591,15 @@ public VectorSearchResult<E> search(final float[] queryVector, final int k)
                 final SearchResult result;
                 if (this.incrementalMode)
                 {
-                    result = this.searchIncremental(query, k);
+                    result = this.searchIncremental(query, k, rerankK);
                 }
                 else if (this.diskManager != null && this.diskManager.isLoaded() && this.diskManager.getDiskIndex() != null)
                 {
-                    result = this.searchDiskIndex(query, k);
+                    result = this.searchDiskIndex(query, k, rerankK);
                 }
                 else
                 {
-                    result = this.searchInMemoryIndex(query, k);
+                    result = this.searchInMemoryIndex(query, k, rerankK);
                 }
 
                 return this.convertSearchResult(result);
@@ -1557,10 +1610,20 @@ else if (this.diskManager != null && this.diskManager.isLoaded() && this.diskMan
             }
         }
 
+        /**
+         * Computes the search beam width (rerankK), ensuring a minimum exploration effort
+         * regardless of how small k is. This prevents the HNSW search from returning
+         * different top-k results depending on the requested k value.
+         */
+        private int computeRerankK(final int k)
+        {
+            return Math.max(k, this.configuration.minSearchBeamWidth());
+        }
+
         /**
          * Searches the in-memory index using a pooled GraphSearcher.
          */
-        private SearchResult searchInMemoryIndex(final VectorFloat<?> query, final int k)
+        private SearchResult searchInMemoryIndex(final VectorFloat<?> query, final int k, final int rerankK)
         {
             final SearchScoreProvider scoreProvider = DefaultSearchScoreProvider.exact(
                 query,
@@ -1578,13 +1641,13 @@ private SearchResult searchInMemoryIndex(final VectorFloat<?> query, final int k
                 searcher.setView(view);
             }
             final Bits acceptBits = view != null ? view.liveNodes() : Bits.ALL;
-            return searcher.search(scoreProvider, k, acceptBits);
+            return searcher.search(scoreProvider, k, rerankK, 0f, 0f, acceptBits);
         }
 
         /**
          * Searches the on-disk index using a pooled GraphSearcher, with optional PQ-based approximate search and reranking.
          */
-        private SearchResult searchDiskIndex(final VectorFloat<?> query, final int k)
+        private SearchResult searchDiskIndex(final VectorFloat<?> query, final int k, final int rerankK)
         {
             // If PQ is available, use compressed scoring with reranking
             if(this.pqManager != null && this.pqManager.isTrained() && this.pqManager.getCompressedVectors() != null)
@@ -1593,6 +1656,7 @@ private SearchResult searchDiskIndex(final VectorFloat<?> query, final int k)
                 return this.pqManager.searchWithRerank(
                     query,
                     k,
+                    rerankK,
                     searcher,
                     this.createCachingVectorValues(),
                     this.jvectorSimilarityFunction()
@@ -1607,14 +1671,14 @@ private SearchResult searchDiskIndex(final VectorFloat<?> query, final int k)
             );
 
             final GraphSearcher searcher = this.inMemorySearcherPool.get();
-            return searcher.search(scoreProvider, k, Bits.ALL);
+            return searcher.search(scoreProvider, k, rerankK, 0f, 0f, Bits.ALL);
         }
 
         /**
          * Searches in incremental mode: queries both the disk graph (for existing data)
          * and the in-memory builder graph (for new mutations), then merges results.
          */
-        private SearchResult searchIncremental(final VectorFloat<?> query, final int k)
+        private SearchResult searchIncremental(final VectorFloat<?> query, final int k, final int rerankK)
         {
             final SearchScoreProvider scoreProvider = DefaultSearchScoreProvider.exact(
                 query,
@@ -1623,36 +1687,41 @@ private SearchResult searchIncremental(final VectorFloat<?> query, final int k)
             );
 
             // 1. Search disk graph (excluding deleted/updated ordinals)
+            // Use rerankK as topK to give the merge a richer candidate pool
             SearchResult diskResult = null;
             if(this.diskSearcherPool != null)
             {
                 final GraphSearcher diskSearcher = this.diskSearcherPool.get();
                 final Bits acceptBits = this.createDiskAcceptBits();
-                diskResult = diskSearcher.search(scoreProvider, k, acceptBits);
+                diskResult = diskSearcher.search(scoreProvider, rerankK, rerankK, 0f, 0f, acceptBits);
             }
 
             // 2. Search in-memory graph (new mutations only)
             SearchResult memResult = null;
             if(this.inMemorySearcherPool != null && this.index != null && this.index.size(0) > 0)
             {
                 final GraphSearcher memSearcher = this.inMemorySearcherPool.get();
-                // Refresh view so the searcher sees nodes added since pool initialization
-                memSearcher.setView(this.index.getView());
-                memResult = memSearcher.search(scoreProvider, k, this.index.getView().liveNodes());
+                // Capture the view once so setView(...) and liveNodes() agree on the same
+                // snapshot (ConcurrentGraphIndexView uses snapshot isolation — two separate
+                // getView() calls could return different snapshots).
+                final var view = this.index.getView();
+                memSearcher.setView(view);
+                memResult = memSearcher.search(scoreProvider, rerankK, rerankK, 0f, 0f, view.liveNodes());
             }
 
-            // 3. Merge results
+            // 3. Merge results — truncate single-source results to k since sub-graphs
+            // over-fetch to provide the merge with a richer candidate pool
             if(diskResult == null && memResult == null)
             {
                 return new SearchResult(new SearchResult.NodeScore[0], 0, 0, 0, 0, 0f);
             }
             if(diskResult == null)
             {
-                return memResult;
+                return this.truncateResult(memResult, k);
             }
             if(memResult == null)
             {
-                return diskResult;
+                return this.truncateResult(diskResult, k);
             }
 
             return this.mergeSearchResults(diskResult, memResult, k);
@@ -1707,6 +1776,23 @@ private Bits createDiskAcceptBits()
             return i -> i < 0 || i >= deletedMask.length || !deletedMask[i];
         }
 
+        /**
+         * Truncates a SearchResult to at most k entries. Used when a single sub-graph
+         * provided all results and the over-fetched candidate pool needs trimming.
+         */
+        private SearchResult truncateResult(final SearchResult result, final int k)
+        {
+            final SearchResult.NodeScore[] nodes = result.getNodes();
+            if(nodes.length <= k)
+            {
+                return result;
+            }
+            return new SearchResult(
+                Arrays.copyOf(nodes, k),
+                result.getVisitedCount(), 0, 0, 0, 0f
+            );
+        }
+
         /**
          * Merges two SearchResults: combines nodes, deduplicates by ordinal
          * (keeping higher score), sorts by score descending, and takes top-k.