Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
75ceb6f
IVF-SQ
viclafargue Feb 27, 2026
cf19a86
add IVF-SQ bench constraints
viclafargue Mar 2, 2026
6a95e8a
Update default IVF-SQ benchmark config
viclafargue Mar 2, 2026
2d78609
Merge branch 'main' into ivf-sq
viclafargue Mar 11, 2026
83b8c63
Update postprocess_neighbors signature
viclafargue Mar 12, 2026
1050deb
update testing
viclafargue Mar 12, 2026
3a911d8
documentation
viclafargue Mar 13, 2026
b124628
memset in index constructor
viclafargue Mar 13, 2026
641c6ca
random sampling
viclafargue Mar 13, 2026
70ca00a
inplace residuals
viclafargue Mar 13, 2026
e7d660c
improved kernel layout for residuals computation
viclafargue Mar 13, 2026
96b28db
raft::device_vector
viclafargue Mar 13, 2026
206cb2e
drop adaptative_centers feature
viclafargue Mar 13, 2026
e34bdd8
Add IVF-SQ FAISS benchmark
viclafargue Mar 16, 2026
dcb8a59
Merge branch 'main' into ivf-sq
viclafargue Mar 16, 2026
9bd7bc0
Adressing review
viclafargue Mar 19, 2026
0ce1641
Addressing review
viclafargue Mar 20, 2026
3694e43
Merge branch 'main' into ivf-sq
cjnolet Mar 25, 2026
cbe2a7e
Merge branch 'main' into ivf-sq
viclafargue Apr 2, 2026
77c4a79
Fix issue with host data + half testing
viclafargue Apr 2, 2026
b46ea79
Update metric in doc
viclafargue Apr 2, 2026
44c5f0a
Fix manage_local_topk / Capacity mismatch in IVF-SQ search
viclafargue Apr 2, 2026
ef957f7
Add large-k tests for IVF-SQ materialized fallback path
viclafargue Apr 2, 2026
56ebfc9
Improve shared memory synchronization in IVF-SQ scan kernel
viclafargue Apr 2, 2026
15b2f15
IVF-SQ scan: reduce L2 global reads and refine fused top-k capacity s…
viclafargue Apr 2, 2026
3a3427f
Addressing review (tests updates)
viclafargue Apr 7, 2026
7a238d3
Merge branch 'main' into ivf-sq
viclafargue Apr 7, 2026
1b182d7
Swap IdxT for CodeT
viclafargue Apr 20, 2026
8c44557
addressing review
viclafargue Apr 20, 2026
e087e19
Merge branch 'main' into ivf-sq
viclafargue Apr 20, 2026
d8ada75
Merge branch 'main' into ivf-sq
viclafargue Apr 22, 2026
80a55fd
account for RAFT update
viclafargue Apr 22, 2026
ac8ea4e
IVF-SQ JIT-LTO
viclafargue Apr 27, 2026
3ba5e70
Merge branch 'main' into ivf-sq
viclafargue Apr 27, 2026
55a91bf
doc fix + build assert addition
viclafargue Apr 30, 2026
6d5ec72
Switching to raft::TxN_t
viclafargue May 4, 2026
1e638e5
Merge branch 'main' into ivf-sq
viclafargue May 4, 2026
6889624
Dropping the MetricTag template parameter
viclafargue May 5, 2026
df55c51
Inner product trick
viclafargue May 5, 2026
c5948a2
Fix + minor cleanups
viclafargue May 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,86 @@ if(NOT BUILD_CPU_ONLY)
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_flat/post_process"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
set(ivf_sq_ns "cuvs::neighbors::ivf_sq::detail")
generate_jit_lto_kernels(
jit_lto_files
NAME_FORMAT "ivf_sq_scan_capacity_@capacity@"
MATRIX_JSON_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/scan_matrix.json"
KERNEL_INPUT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/scan_kernel.cu.in"
FRAGMENT_TAG_FORMAT "${ivf_sq_ns}::fragment_tag_ivf_sq_scan<@capacity@>"
FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_sq/scan_fragments.hpp>"
"<cuvs/detail/jit_lto/common_fragments.hpp>"
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_sq/scan"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
generate_jit_lto_kernels(
jit_lto_files
NAME_FORMAT "ivf_sq_setup_invariant_smem_metric_@metric_name@"
MATRIX_JSON_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/setup_invariant_smem_matrix.json"
KERNEL_INPUT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/setup_invariant_smem_kernel.cu.in"
FRAGMENT_TAG_FORMAT
"${ivf_sq_ns}::fragment_tag_setup_invariant_smem<${ivf_sq_ns}::tag_metric_@metric_name@>"
FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_sq/scan_fragments.hpp>"
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_sq/setup_invariant_smem"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
generate_jit_lto_kernels(
jit_lto_files
NAME_FORMAT "ivf_sq_setup_per_probe_smem_metric_@metric_name@"
MATRIX_JSON_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/setup_per_probe_smem_matrix.json"
KERNEL_INPUT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/setup_per_probe_smem_kernel.cu.in"
FRAGMENT_TAG_FORMAT
"${ivf_sq_ns}::fragment_tag_setup_per_probe_smem<${ivf_sq_ns}::tag_metric_@metric_name@>"
FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_sq/scan_fragments.hpp>"
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_sq/setup_per_probe_smem"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
generate_jit_lto_kernels(
jit_lto_files
NAME_FORMAT "ivf_sq_accumulate_distance_metric_@metric_name@"
MATRIX_JSON_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/accumulate_distance_matrix.json"
KERNEL_INPUT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/accumulate_distance_kernel.cu.in"
FRAGMENT_TAG_FORMAT
"${ivf_sq_ns}::fragment_tag_accumulate_distance<${ivf_sq_ns}::tag_metric_@metric_name@>"
FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_sq/scan_fragments.hpp>"
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_sq/accumulate_distance"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
generate_jit_lto_kernels(
jit_lto_files
NAME_FORMAT "ivf_sq_finalize_distance_metric_@metric_name@"
MATRIX_JSON_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/finalize_distance_matrix.json"
KERNEL_INPUT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/finalize_distance_kernel.cu.in"
FRAGMENT_TAG_FORMAT
"${ivf_sq_ns}::fragment_tag_finalize_distance<${ivf_sq_ns}::tag_metric_@metric_name@>"
FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_sq/scan_fragments.hpp>"
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_sq/finalize_distance"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
generate_jit_lto_kernels(
jit_lto_files
NAME_FORMAT "ivf_sq_filter_@filter_name@"
MATRIX_JSON_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/filter_matrix.json"
KERNEL_INPUT_FILE
"${CMAKE_CURRENT_SOURCE_DIR}/src/neighbors/ivf_sq/detail/jit_lto_kernels/filter_kernel.cu.in"
FRAGMENT_TAG_FORMAT
"${ivf_sq_ns}::fragment_tag_ivf_sq_filter<${neighbors_ns}::tag_filter_@filter_name@>"
FRAGMENT_TAG_HEADER_FILES "<cuvs/detail/jit_lto/ivf_sq/scan_fragments.hpp>"
"<cuvs/detail/jit_lto/common_fragments.hpp>"
OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/generated_kernels/ivf_sq/filter"
KERNEL_LINK_LIBRARIES jit_lto_kernel_usage_requirements
)
set(ivf_pq_ns "cuvs::neighbors::ivf_pq::detail")
generate_jit_lto_kernels(
jit_lto_files
Expand Down Expand Up @@ -897,6 +977,11 @@ if(NOT BUILD_CPU_ONLY)
src/neighbors/ivf_pq/detail/ivf_pq_process_and_fill_codes.cu
${ivf_pq_search_inst_files}
${ivf_pq_transform_inst_files}
src/neighbors/ivf_sq_index.cpp
src/neighbors/ivf_sq/ivf_sq_build_extend_float_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_build_extend_half_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_search_uint8_t_int64_t.cu
src/neighbors/ivf_sq/ivf_sq_serialize_uint8_t.cu
src/neighbors/knn_merge_parts.cu
src/neighbors/nn_descent.cu
${nn_descent_inst_files}
Expand Down
24 changes: 24 additions & 0 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ list(APPEND CMAKE_MODULE_PATH "${CUVS_SOURCE_DIR}")
option(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_SQ "Include faiss' ivf sq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA "Include faiss' cagra algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_GPU_CAGRA_HNSW
"Include faiss' cagra algorithm for build and hnsw for search in benchmark" ON
Expand All @@ -22,8 +23,10 @@ option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algori
ON
)
option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_SQ "Include faiss' cpu ivf sq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT "Include faiss' hnsw algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT "Include cuVS ivf flat algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ "Include cuVS ivf sq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ "Include cuVS ivf pq algorithm in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_CAGRA "Include cuVS CAGRA in benchmark" ON)
option(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE "Include cuVS brute force knn in benchmark" ON)
Expand Down Expand Up @@ -80,6 +83,7 @@ set(CUVS_USE_FAISS_STATIC ON)
if(BUILD_CPU_ONLY)
set(CUVS_FAISS_ENABLE_GPU OFF)
set(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT OFF)
set(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ OFF)
set(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ OFF)
set(CUVS_ANN_BENCH_USE_CUVS_CAGRA OFF)
set(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE OFF)
Expand All @@ -97,6 +101,7 @@ set(CUVS_ANN_BENCH_USE_CUVS OFF)
if(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
OR CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE
OR CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT
OR CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
OR CUVS_ANN_BENCH_USE_CUVS_CAGRA
OR CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB
OR CUVS_KNN_BENCH_USE_CUVS_BRUTE_FORCE
Expand Down Expand Up @@ -244,6 +249,12 @@ if(CUVS_ANN_BENCH_USE_CUVS_IVF_FLAT)
)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_IVF_SQ)
ConfigureAnnBench(
NAME CUVS_IVF_SQ PATH src/cuvs/cuvs_benchmark.cu src/cuvs/cuvs_ivf_sq.cu LINKS cuvs
)
endif()

if(CUVS_ANN_BENCH_USE_CUVS_BRUTE_FORCE)
ConfigureAnnBench(NAME CUVS_BRUTE_FORCE PATH src/cuvs/cuvs_benchmark.cu LINKS cuvs)
endif()
Expand Down Expand Up @@ -309,6 +320,12 @@ if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_PQ)
)
endif()

if(CUVS_ANN_BENCH_USE_FAISS_CPU_IVF_SQ)
ConfigureAnnBench(
NAME FAISS_CPU_IVF_SQ PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS}
)
endif()

if(CUVS_ANN_BENCH_USE_FAISS_CPU_HNSW_FLAT)
ConfigureAnnBench(
NAME FAISS_CPU_HNSW_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS ${CUVS_FAISS_TARGETS}
Expand All @@ -329,6 +346,13 @@ if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_PQ AND CUVS_FAISS_ENABLE_GPU)
)
endif()

if(CUVS_ANN_BENCH_USE_FAISS_GPU_IVF_SQ AND CUVS_FAISS_ENABLE_GPU)
ConfigureAnnBench(
NAME FAISS_GPU_IVF_SQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS}
raft::raft
)
endif()

if(CUVS_ANN_BENCH_USE_FAISS_GPU_FLAT AND CUVS_FAISS_ENABLE_GPU)
ConfigureAnnBench(
NAME FAISS_GPU_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${CUVS_FAISS_TARGETS}
Expand Down
25 changes: 25 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ extern template class cuvs::bench::cuvs_cagra<uint8_t, uint32_t>;
extern template class cuvs::bench::cuvs_cagra<int8_t, uint32_t>;
#endif

#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
#include "cuvs_ivf_sq_wrapper.h"
extern template class cuvs::bench::cuvs_ivf_sq<float>;
extern template class cuvs::bench::cuvs_ivf_sq<half>;
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_MG
#include "cuvs_ivf_flat_wrapper.h"
#include "cuvs_mg_ivf_flat_wrapper.h"
Expand Down Expand Up @@ -86,6 +91,26 @@ void parse_search_param(const nlohmann::json& conf,
}
#endif

#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
template <typename T>
void parse_build_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_ivf_sq<T>::build_param& param)
{
param.n_lists = conf.at("nlist");
if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
if (conf.contains("ratio")) {
param.kmeans_trainset_fraction = 1.0 / static_cast<double>(conf.at("ratio"));
}
}

template <typename T>
void parse_search_param(const nlohmann::json& conf,
typename cuvs::bench::cuvs_ivf_sq<T>::search_param& param)
{
param.ivf_sq_params.n_probes = conf.at("nprobe");
}
#endif

#if defined(CUVS_ANN_BENCH_USE_CUVS_IVF_PQ) || defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA) || \
defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_HNSWLIB) || defined(CUVS_ANN_BENCH_USE_CUVS_MG) || \
defined(CUVS_ANN_BENCH_USE_CUVS_CAGRA_DISKANN)
Expand Down
20 changes: 19 additions & 1 deletion cpp/bench/ann/src/cuvs/cuvs_benchmark.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -84,6 +84,15 @@ auto create_algo(const std::string& algo_name,
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, half>) {
if (algo_name == "cuvs_ivf_sq") {
typename cuvs::bench::cuvs_ivf_sq<T>::build_param param;
parse_build_param<T>(conf, param);
a = std::make_unique<cuvs::bench::cuvs_ivf_sq<T>>(metric, dim, param);
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") {
typename cuvs::bench::cuvs_ivf_pq<T, int64_t>::build_param param;
Expand Down Expand Up @@ -151,6 +160,15 @@ auto create_search_param(const std::string& algo_name, const nlohmann::json& con
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_SQ
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, half>) {
if (algo_name == "cuvs_ivf_sq") {
auto param = std::make_unique<typename cuvs::bench::cuvs_ivf_sq<T>::search_param>();
parse_search_param<T>(conf, *param);
return param;
}
}
#endif
#ifdef CUVS_ANN_BENCH_USE_CUVS_IVF_PQ
if (algo_name == "raft_ivf_pq" || algo_name == "cuvs_ivf_pq") {
auto param = std::make_unique<typename cuvs::bench::cuvs_ivf_pq<T, int64_t>::search_param>();
Expand Down
10 changes: 10 additions & 0 deletions cpp/bench/ann/src/cuvs/cuvs_ivf_sq.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/
#include "cuvs_ivf_sq_wrapper.h"

namespace cuvs::bench {
template class cuvs_ivf_sq<float>;
template class cuvs_ivf_sq<half>;
} // namespace cuvs::bench
Loading
Loading