Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
2accbba
first commit
tarang-jain Feb 3, 2026
4c6182c
update vpq_dataset
tarang-jain Feb 3, 2026
f18e00c
clean pimpl separation
tarang-jain Feb 3, 2026
fa70a01
fix vpq_build
tarang-jain Feb 3, 2026
bf763e3
revert changes to quantizer struct
tarang-jain Feb 3, 2026
ac85ece
Merge branch 'main' into view-pq-quantizer
tarang-jain Feb 4, 2026
2728273
Merge branch 'main' into view-pq-quantizer
tarang-jain Feb 5, 2026
a0f6c76
Merge branch 'main' into view-pq-quantizer
tarang-jain Feb 10, 2026
1620486
Merge branch 'main' into view-pq-quantizer
tarang-jain Feb 13, 2026
b0aaa05
make user class pure pimpl
tarang-jain Feb 13, 2026
b479c34
Merge branch 'release/26.04' of https://github.com/rapidsai/cuvs into…
tarang-jain Mar 13, 2026
04be0a0
fixes
tarang-jain Mar 13, 2026
f80280e
style
tarang-jain Mar 16, 2026
51e8209
fix tests
tarang-jain Mar 16, 2026
c558964
Merge branch 'release/26.04' into view-pq-quantizer
tarang-jain Mar 16, 2026
ebfc7d2
move vpq_dataset class
tarang-jain Mar 19, 2026
b949c2c
Merge branch 'release/26.04' into view-pq-quantizer
tarang-jain Mar 19, 2026
a8b3ce4
fix style
tarang-jain Mar 19, 2026
4e9565f
Merge branch 'view-pq-quantizer' of https://github.com/tarang-jain/cu…
tarang-jain Mar 19, 2026
f8432b5
fix the signature
tarang-jain Mar 20, 2026
a15e054
addtogroup
tarang-jain Mar 20, 2026
12a872a
Merge branch 'release/26.04' into view-pq-quantizer
tarang-jain Mar 20, 2026
34ce8ff
sync stream after vamana build
tarang-jain Mar 25, 2026
be681d3
Merge branch 'view-pq-quantizer' of https://github.com/tarang-jain/cu…
tarang-jain Mar 30, 2026
65437ec
Merge branch 'release/26.04' of https://github.com/rapidsai/cuvs into…
tarang-jain Mar 30, 2026
fc30857
Merge branch 'release/26.04' into view-pq-quantizer
tfeher Apr 1, 2026
faa46f9
Merge branch 'view-pq-quantizer' of https://github.com/tarang-jain/cu…
tarang-jain Apr 1, 2026
2c1aa71
Update cpp/include/cuvs/preprocessing/quantize/pq.hpp
tarang-jain Apr 3, 2026
d6a8364
Update cpp/src/preprocessing/quantize/detail/pq.cuh
tarang-jain Apr 3, 2026
963f16e
Merge branch 'view-pq-quantizer' of https://github.com/tarang-jain/cu…
tarang-jain Apr 3, 2026
cbb5d75
merge upstream; resolve merge conflicts
tarang-jain Apr 3, 2026
68f016d
update namespace
tarang-jain Apr 3, 2026
0070cda
fix compilation
tarang-jain Apr 3, 2026
819eef8
create vpq_codebooks
tarang-jain Apr 3, 2026
19fe976
reduce diff
tarang-jain Apr 3, 2026
77bd557
fix compilation
tarang-jain Apr 3, 2026
1c85f16
pre-commit
tarang-jain Apr 3, 2026
4708434
revert bm change
tarang-jain Apr 3, 2026
55db0a4
rm unnecessary commits
tarang-jain Apr 3, 2026
6408cbb
fix error message and copyright
tarang-jain Apr 3, 2026
c4250f5
fix condition check
tarang-jain Apr 6, 2026
8c1f792
change trailing return type
tarang-jain Apr 6, 2026
f5ac6ba
Merge branch 'main' of https://github.com/rapidsai/cuvs into view-pq-…
tarang-jain Apr 6, 2026
4b7015f
add non const getters
tarang-jain Apr 8, 2026
cc2a900
Merge branch 'main' into view-pq-quantizer
tarang-jain Apr 8, 2026
70b5d04
Merge branch 'main' into view-pq-quantizer
tarang-jain Apr 21, 2026
7247688
update to use the view api
tarang-jain Apr 30, 2026
be4cb4a
make vq codebook optional
tarang-jain Apr 30, 2026
12cd162
resolve merge conflicts
tarang-jain Apr 30, 2026
ee0170e
style
tarang-jain Apr 30, 2026
04888f9
input validation for vq codebooks
tarang-jain Apr 30, 2026
fd1eb38
add warning
tarang-jain Apr 30, 2026
075c1ba
check vq_labels
tarang-jain Apr 30, 2026
740b78e
fix compilation errors
tarang-jain May 1, 2026
901c8bb
Merge branch 'main' into view-pq-quantizer
tarang-jain May 1, 2026
c5e231c
add instantiations
tarang-jain May 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions c/src/preprocessing/quantize/pq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ extern "C" cuvsError_t cuvsProductQuantizerGetPqCodebook(cuvsProductQuantizer_t
if (quantizer->dtype.code == kDLFloat && quantizer->dtype.bits == 32) {
auto pq_mdspan =
(reinterpret_cast<cuvs::preprocessing::quantize::pq::quantizer<float>*>(quant_addr))
->vpq_codebooks.pq_code_book.view();
->codebooks.pq_code_book();
cuvs::core::to_dlpack(pq_mdspan, pq_codebook);
} else {
RAFT_FAIL("Unsupported quantizer dtype: %d and bits: %d",
Expand All @@ -264,10 +264,12 @@ extern "C" cuvsError_t cuvsProductQuantizerGetVqCodebook(cuvsProductQuantizer_t
if (quantizer != nullptr) {
auto quant_addr = quantizer->addr;
if (quantizer->dtype.code == kDLFloat && quantizer->dtype.bits == 32) {
auto pq_mdspan =
auto vq_opt =
(reinterpret_cast<cuvs::preprocessing::quantize::pq::quantizer<float>*>(quant_addr))
->vpq_codebooks.vq_code_book.view();
cuvs::core::to_dlpack(pq_mdspan, vq_codebook);
->codebooks.vq_code_book();
RAFT_EXPECTS(vq_opt.has_value(),
"quantizer has no VQ codebook (build with use_vq=true to enable)");
cuvs::core::to_dlpack(vq_opt.value(), vq_codebook);
} else {
RAFT_FAIL("Unsupported quantizer dtype: %d and bits: %d",
quantizer->dtype.code,
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ function(ConfigureAnnBench)
add_dependencies(${BENCH_NAME} ANN_BENCH)
else()
add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN>)
target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN)
target_link_libraries(
${BENCH_NAME} PRIVATE benchmark::benchmark $<$<TARGET_EXISTS:CUDA::nvtx3>:CUDA::nvtx3>
)
Expand Down
14 changes: 10 additions & 4 deletions cpp/bench/ann/src/cuvs/cuvs_cagra_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <cuvs/neighbors/dynamic_batching.hpp>
#include <cuvs/neighbors/ivf_pq.hpp>
#include <cuvs/neighbors/nn_descent.hpp>
#include <cuvs/preprocessing/quantize/pq.hpp>
#include <cuvs/preprocessing/quantize/vpq_dataset.hpp>
#include <raft/core/device_mdspan.hpp>
#include <raft/core/device_resources.hpp>
#include <raft/core/logger.hpp>
Expand Down Expand Up @@ -357,8 +359,10 @@ void cuvs_cagra<T, IdxT>::set_search_dataset(const T* dataset, size_t nrow)
} else {
using ds_idx_type = decltype(index_->data().n_rows());
bool is_vpq =
dynamic_cast<const cuvs::neighbors::vpq_dataset<half, ds_idx_type>*>(&index_->data()) ||
dynamic_cast<const cuvs::neighbors::vpq_dataset<float, ds_idx_type>*>(&index_->data());
dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<half, ds_idx_type>*>(
&index_->data()) ||
dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<float, ds_idx_type>*>(
&index_->data());
// It can happen that we are re-using a previous algo object which already has
// the dataset set. Check if we need update.
if (static_cast<size_t>(input_dataset_v_->extent(0)) != nrow ||
Expand All @@ -385,8 +389,10 @@ void cuvs_cagra<T, IdxT>::save(const std::string& file) const
} else {
using ds_idx_type = decltype(index_->data().n_rows());
bool is_vpq =
dynamic_cast<const cuvs::neighbors::vpq_dataset<half, ds_idx_type>*>(&index_->data()) ||
dynamic_cast<const cuvs::neighbors::vpq_dataset<float, ds_idx_type>*>(&index_->data());
dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<half, ds_idx_type>*>(
&index_->data()) ||
dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<float, ds_idx_type>*>(
&index_->data());
cuvs::neighbors::cagra::serialize(handle_, file, *index_, is_vpq);
}
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/diskann/diskann_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void parse_build_param(const nlohmann::json& conf,
{
param.R = conf.at("R");
param.L_build = conf.at("L_build");
if (conf.contains("alpha")) { param.num_threads = conf.at("alpha"); }
if (conf.contains("alpha")) { param.alpha = conf.at("alpha"); }
if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
}

Expand Down
94 changes: 0 additions & 94 deletions cpp/include/cuvs/neighbors/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,100 +391,6 @@ auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_b
raft::round_up_safe<size_t>(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize;
return make_strided_dataset(res, std::forward<SrcT>(src), required_stride);
}
/**
* @brief VPQ compressed dataset.
*
* The dataset is compressed using two level quantization
*
* 1. Vector Quantization
* 2. Product Quantization of residuals
*
* @tparam MathT the type of elements in the codebooks
* @tparam IdxT type of the vector indices (represent dataset.extent(0))
*
*/
template <typename MathT, typename IdxT>
struct vpq_dataset : public dataset<IdxT> {
Comment thread
tarang-jain marked this conversation as resolved.
using index_type = IdxT;
using math_type = MathT;
/** Vector Quantization codebook - "coarse cluster centers". */
raft::device_matrix<math_type, uint32_t, raft::row_major> vq_code_book;
/** Product Quantization codebook - "fine cluster centers". */
raft::device_matrix<math_type, uint32_t, raft::row_major> pq_code_book;
/** Compressed dataset. */
raft::device_matrix<uint8_t, index_type, raft::row_major> data;

vpq_dataset(raft::device_matrix<math_type, uint32_t, raft::row_major>&& vq_code_book,
raft::device_matrix<math_type, uint32_t, raft::row_major>&& pq_code_book,
raft::device_matrix<uint8_t, index_type, raft::row_major>&& data)
: vq_code_book{std::move(vq_code_book)},
pq_code_book{std::move(pq_code_book)},
data{std::move(data)}
{
}

[[nodiscard]] auto n_rows() const noexcept -> index_type final { return data.extent(0); }
[[nodiscard]] auto dim() const noexcept -> uint32_t final { return vq_code_book.extent(1); }
[[nodiscard]] auto is_owning() const noexcept -> bool final { return true; }

/** Row length of the encoded data in bytes. */
[[nodiscard]] constexpr inline auto encoded_row_length() const noexcept -> uint32_t
{
return data.extent(1);
}
/** The number of "coarse cluster centers" */
[[nodiscard]] constexpr inline auto vq_n_centers() const noexcept -> uint32_t
{
return vq_code_book.extent(0);
}
/** The bit length of an encoded vector element after compression by PQ. */
[[nodiscard]] constexpr inline auto pq_bits() const noexcept -> uint32_t
{
/*
NOTE: pq_bits and the book size

Normally, we'd store `pq_bits` as a part of the index.
However, we know there's an invariant `pq_n_centers = 1 << pq_bits`, i.e. the codebook size is
the same as the number of possible code values. Hence, we don't store the pq_bits and derive it
from the array dimensions instead.
*/
auto pq_width = pq_n_centers();
#ifdef __cpp_lib_bitops
return std::countr_zero(pq_width);
#else
uint32_t pq_bits = 0;
while (pq_width > 1) {
pq_bits++;
pq_width >>= 1;
}
return pq_bits;
#endif
}
/** The dimensionality of an encoded vector after compression by PQ. */
[[nodiscard]] constexpr inline auto pq_dim() const noexcept -> uint32_t
{
return raft::div_rounding_up_unsafe(dim(), pq_len());
}
/** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */
[[nodiscard]] constexpr inline auto pq_len() const noexcept -> uint32_t
{
return pq_code_book.extent(1);
}
/** The number of vectors in a PQ codebook (`1 << pq_bits`). */
[[nodiscard]] constexpr inline auto pq_n_centers() const noexcept -> uint32_t
{
return pq_code_book.extent(0);
}
};

template <typename DatasetT>
struct is_vpq_dataset : std::false_type {};

template <typename MathT, typename IdxT>
struct is_vpq_dataset<vpq_dataset<MathT, IdxT>> : std::true_type {};

template <typename DatasetT>
inline constexpr bool is_vpq_dataset_v = is_vpq_dataset<DatasetT>::value;

namespace filtering {

Expand Down
55 changes: 50 additions & 5 deletions cpp/include/cuvs/preprocessing/quantize/pq.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <cuvs/cluster/kmeans.hpp>
#include <cuvs/neighbors/common.hpp>
#include <cuvs/preprocessing/quantize/vpq_dataset.hpp>
#include <raft/core/device_mdspan.hpp>
#include <raft/core/handle.hpp>
#include <raft/core/host_mdspan.hpp>
Expand Down Expand Up @@ -135,19 +136,21 @@ struct params {
/**
* @brief Defines and stores VPQ codebooks upon training
*
* @tparam T data element type
* The quantizer holds a vpq_dataset, which can either own the codebooks
* or non-owning (referencing external codebooks).
Comment on lines 137 to +140
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Fix the quantizer Doxygen to describe vpq_codebooks, not vpq_dataset.

This comment no longer matches the type below, so the generated public API docs now describe the wrong ownership model.

As per coding guidelines, public C++ API headers must include complete Doxygen documentation for public functions/classes.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@cpp/include/cuvs/preprocessing/quantize/pq.hpp` around lines 81 - 84, Update
the Doxygen on the quantizer declaration to describe vpq_codebooks (not
vpq_dataset): explain that the quantizer holds a vpq_codebooks instance and
whether it owns the codebooks or holds a non-owning reference, and clarify
ownership semantics for public API consumers. Edit the comment above the
quantizer class/struct (symbol: quantizer) to mention vpq_codebooks, state if
ownership is owning vs non-owning (referencing external codebooks), and ensure
the wording follows existing Doxygen style used elsewhere in the header.

*
* @tparam T data element type
*/
template <typename T>
struct quantizer {
/** Parameters used to build this quantizer. */
params params_quantizer;
/** VPQ codebooks produced during training. */
cuvs::neighbors::vpq_dataset<T, int64_t> vpq_codebooks;
/** VPQ codebooks (owning or view). */
cuvs::preprocessing::quantize::pq::vpq_codebooks<T> codebooks;
};
Comment on lines 145 to 150
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

This public member rename needs a deprecation or migration path.

quantizer<T> is part of the public header, so replacing the old codebook field with codebooks is a source-breaking change for existing callers. I don't see a compatibility shim or any deprecation marker here.

As per coding guidelines, breaking changes require deprecation warnings and migration guide updates.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@cpp/include/cuvs/preprocessing/quantize/pq.hpp` around lines 89 - 94, The
public struct quantizer<T> changed its field name to codebooks and needs a
compat shim: add a deprecated alias/forwarding member for the old field name
(e.g., the previous "codebook" identifier) that forwards to the new codebooks
member, mark it with your deprecation attribute/ macro (or C++ [[deprecated]]),
and keep params_quantizer and codebooks unchanged; update public header comments
and add a short note to the migration guide mentioning the rename and removal
timeline so consumers can switch to quantizer::codebooks before the deprecated
alias is removed.


/**
* @brief Initializes a product quantizer to be used later for quantizing the dataset.
* @brief Initializes a product quantizer by training on the dataset (owning).
*
* The use of a pool memory resource is recommended for more consistent training performance.
*
Expand All @@ -161,7 +164,7 @@ struct quantizer {
* @endcode
*
* @param[in] res raft resource
* @param[in] params configure product quantizer, e.g. quantile
* @param[in] params configure product quantizer, e.g. pq_bits, pq_dim
* @param[in] dataset a row-major matrix view on device or host
*
* @return quantizer
Expand All @@ -175,6 +178,48 @@ quantizer<float> build(raft::resources const& res,
const params params,
raft::host_matrix_view<const float, int64_t> dataset);

/**
* @brief Creates a product quantizer from pre-computed codebooks.
*
* This function creates a non-owning quantizer that references the provided codebooks.
*
* Usage example:
* @code{.cpp}
* raft::handle_t handle;
* // Assume pq_centers and vq_centers are pre-computed on device
* cuvs::preprocessing::quantize::pq::params params;
* params.pq_bits = 8;
* params.pq_dim = 32;
* params.use_vq = true;
* params.use_subspaces = true;
* // With VQ centers:
* auto quant_view = cuvs::preprocessing::quantize::pq::build(handle, params,
* pq_centers_view,
* std::make_optional<raft::device_matrix_view<const
* float, uint32_t, raft::row_major>>(vq_centers_view));
* // Without VQ (PQ only):
* auto quant_pq_only = cuvs::preprocessing::quantize::pq::build(handle, params, pq_centers_view);
* @endcode
*
* @param[in] res raft resource
* @param[in] params configure product quantizer parameters. Must be fully specified
* (pq_bits, pq_dim must be set; use_subspaces and use_vq must match the codebook shapes).
* @param[in] pq_centers PQ codebook on device memory:
* - For use_subspaces=true: [pq_dim * pq_n_centers, pq_len]
* - For use_subspaces=false: [pq_n_centers, pq_len]
* where pq_n_centers = (1 << pq_bits), pq_len = dim / pq_dim
* @param[in] vq_centers Optional VQ codebook on device memory [vq_n_centers, dim].
* Required when use_vq=true. Defaults to std::nullopt (no VQ).
*
* @return A view-type quantizer that references the provided data
*/
quantizer<float> build(
raft::resources const& res,
const params params,
raft::device_matrix_view<const float, uint32_t, raft::row_major> pq_centers,
std::optional<raft::device_matrix_view<const float, uint32_t, raft::row_major>> vq_centers =
std::nullopt);

/**
* @brief Applies quantization transform to given dataset
*
Expand Down
Loading
Loading