rapidsai · tarang-jain · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
@@ -244,7 +244,7 @@ extern "C" cuvsError_t cuvsProductQuantizerGetPqCodebook(cuvsProductQuantizer_t
       if (quantizer->dtype.code == kDLFloat && quantizer->dtype.bits == 32) {
         auto pq_mdspan =
           (reinterpret_cast<cuvs::preprocessing::quantize::pq::quantizer<float>*>(quant_addr))
-            ->vpq_codebooks.pq_code_book.view();
+            ->codebooks.pq_code_book();
         cuvs::core::to_dlpack(pq_mdspan, pq_codebook);
       } else {
         RAFT_FAIL("Unsupported quantizer dtype: %d and bits: %d",
@@ -264,10 +264,12 @@ extern "C" cuvsError_t cuvsProductQuantizerGetVqCodebook(cuvsProductQuantizer_t
     if (quantizer != nullptr) {
       auto quant_addr = quantizer->addr;
       if (quantizer->dtype.code == kDLFloat && quantizer->dtype.bits == 32) {
-        auto pq_mdspan =
+        auto vq_opt =
           (reinterpret_cast<cuvs::preprocessing::quantize::pq::quantizer<float>*>(quant_addr))
-            ->vpq_codebooks.vq_code_book.view();
-        cuvs::core::to_dlpack(pq_mdspan, vq_codebook);
+            ->codebooks.vq_code_book();
+        RAFT_EXPECTS(vq_opt.has_value(),
+                     "quantizer has no VQ codebook (build with use_vq=true to enable)");
+        cuvs::core::to_dlpack(vq_opt.value(), vq_codebook);
       } else {
         RAFT_FAIL("Unsupported quantizer dtype: %d and bits: %d",
                   quantizer->dtype.code,

@@ -157,7 +157,7 @@ function(ConfigureAnnBench)
     add_dependencies(${BENCH_NAME} ANN_BENCH)
   else()
     add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
-    target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN>)
+    target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN)
     target_link_libraries(
       ${BENCH_NAME} PRIVATE benchmark::benchmark $<$<TARGET_EXISTS:CUDA::nvtx3>:CUDA::nvtx3>
     )

@@ -17,6 +17,8 @@
 #include <cuvs/neighbors/dynamic_batching.hpp>
 #include <cuvs/neighbors/ivf_pq.hpp>
 #include <cuvs/neighbors/nn_descent.hpp>
+#include <cuvs/preprocessing/quantize/pq.hpp>
+#include <cuvs/preprocessing/quantize/vpq_dataset.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/device_resources.hpp>
 #include <raft/core/logger.hpp>
@@ -357,8 +359,10 @@ void cuvs_cagra<T, IdxT>::set_search_dataset(const T* dataset, size_t nrow)
   } else {
     using ds_idx_type = decltype(index_->data().n_rows());
     bool is_vpq =
-      dynamic_cast<const cuvs::neighbors::vpq_dataset<half, ds_idx_type>*>(&index_->data()) ||
-      dynamic_cast<const cuvs::neighbors::vpq_dataset<float, ds_idx_type>*>(&index_->data());
+      dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<half, ds_idx_type>*>(
+        &index_->data()) ||
+      dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<float, ds_idx_type>*>(
+        &index_->data());
     // It can happen that we are re-using a previous algo object which already has
     // the dataset set. Check if we need update.
     if (static_cast<size_t>(input_dataset_v_->extent(0)) != nrow ||
@@ -385,8 +389,10 @@ void cuvs_cagra<T, IdxT>::save(const std::string& file) const
   } else {
     using ds_idx_type = decltype(index_->data().n_rows());
     bool is_vpq =
-      dynamic_cast<const cuvs::neighbors::vpq_dataset<half, ds_idx_type>*>(&index_->data()) ||
-      dynamic_cast<const cuvs::neighbors::vpq_dataset<float, ds_idx_type>*>(&index_->data());
+      dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<half, ds_idx_type>*>(
+        &index_->data()) ||
+      dynamic_cast<const cuvs::preprocessing::quantize::pq::vpq_dataset<float, ds_idx_type>*>(
+        &index_->data());
     cuvs::neighbors::cagra::serialize(handle_, file, *index_, is_vpq);
   }
 }

@@ -26,7 +26,7 @@ void parse_build_param(const nlohmann::json& conf,
 {
   param.R       = conf.at("R");
   param.L_build = conf.at("L_build");
-  if (conf.contains("alpha")) { param.num_threads = conf.at("alpha"); }
+  if (conf.contains("alpha")) { param.alpha = conf.at("alpha"); }
   if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); }
 }
 

@@ -391,100 +391,6 @@ auto make_aligned_dataset(const raft::resources& res, SrcT src, uint32_t align_b
     raft::round_up_safe<size_t>(src.extent(1) * kSize, std::lcm(align_bytes, kSize)) / kSize;
   return make_strided_dataset(res, std::forward<SrcT>(src), required_stride);
 }
-/**
- * @brief VPQ compressed dataset.
- *
- * The dataset is compressed using two level quantization
- *
- *   1. Vector Quantization
- *   2. Product Quantization of residuals
- *
- * @tparam MathT the type of elements in the codebooks
- * @tparam IdxT type of the vector indices (represent dataset.extent(0))
- *
- */
-template <typename MathT, typename IdxT>
-struct vpq_dataset : public dataset<IdxT> {
-  using index_type = IdxT;
-  using math_type  = MathT;
-  /** Vector Quantization codebook - "coarse cluster centers". */
-  raft::device_matrix<math_type, uint32_t, raft::row_major> vq_code_book;
-  /** Product Quantization codebook - "fine cluster centers".  */
-  raft::device_matrix<math_type, uint32_t, raft::row_major> pq_code_book;
-  /** Compressed dataset.  */
-  raft::device_matrix<uint8_t, index_type, raft::row_major> data;
-
-  vpq_dataset(raft::device_matrix<math_type, uint32_t, raft::row_major>&& vq_code_book,
-              raft::device_matrix<math_type, uint32_t, raft::row_major>&& pq_code_book,
-              raft::device_matrix<uint8_t, index_type, raft::row_major>&& data)
-    : vq_code_book{std::move(vq_code_book)},
-      pq_code_book{std::move(pq_code_book)},
-      data{std::move(data)}
-  {
-  }
-
-  [[nodiscard]] auto n_rows() const noexcept -> index_type final { return data.extent(0); }
-  [[nodiscard]] auto dim() const noexcept -> uint32_t final { return vq_code_book.extent(1); }
-  [[nodiscard]] auto is_owning() const noexcept -> bool final { return true; }
-
-  /** Row length of the encoded data in bytes. */
-  [[nodiscard]] constexpr inline auto encoded_row_length() const noexcept -> uint32_t
-  {
-    return data.extent(1);
-  }
-  /** The number of "coarse cluster centers" */
-  [[nodiscard]] constexpr inline auto vq_n_centers() const noexcept -> uint32_t
-  {
-    return vq_code_book.extent(0);
-  }
-  /** The bit length of an encoded vector element after compression by PQ. */
-  [[nodiscard]] constexpr inline auto pq_bits() const noexcept -> uint32_t
-  {
-    /*
-    NOTE: pq_bits and the book size
-
-    Normally, we'd store `pq_bits` as a part of the index.
-    However, we know there's an invariant `pq_n_centers = 1 << pq_bits`, i.e. the codebook size is
-    the same as the number of possible code values. Hence, we don't store the pq_bits and derive it
-    from the array dimensions instead.
-     */
-    auto pq_width = pq_n_centers();
-#ifdef __cpp_lib_bitops
-    return std::countr_zero(pq_width);
-#else
-    uint32_t pq_bits = 0;
-    while (pq_width > 1) {
-      pq_bits++;
-      pq_width >>= 1;
-    }
-    return pq_bits;
-#endif
-  }
-  /** The dimensionality of an encoded vector after compression by PQ. */
-  [[nodiscard]] constexpr inline auto pq_dim() const noexcept -> uint32_t
-  {
-    return raft::div_rounding_up_unsafe(dim(), pq_len());
-  }
-  /** Dimensionality of a subspaces, i.e. the number of vector components mapped to a subspace */
-  [[nodiscard]] constexpr inline auto pq_len() const noexcept -> uint32_t
-  {
-    return pq_code_book.extent(1);
-  }
-  /** The number of vectors in a PQ codebook (`1 << pq_bits`). */
-  [[nodiscard]] constexpr inline auto pq_n_centers() const noexcept -> uint32_t
-  {
-    return pq_code_book.extent(0);
-  }
-};
-
-template <typename DatasetT>
-struct is_vpq_dataset : std::false_type {};
-
-template <typename MathT, typename IdxT>
-struct is_vpq_dataset<vpq_dataset<MathT, IdxT>> : std::true_type {};
-
-template <typename DatasetT>
-inline constexpr bool is_vpq_dataset_v = is_vpq_dataset<DatasetT>::value;
 
 namespace filtering {
 

@@ -7,6 +7,7 @@
 
 #include <cuvs/cluster/kmeans.hpp>
 #include <cuvs/neighbors/common.hpp>
+#include <cuvs/preprocessing/quantize/vpq_dataset.hpp>
 #include <raft/core/device_mdspan.hpp>
 #include <raft/core/handle.hpp>
 #include <raft/core/host_mdspan.hpp>
@@ -135,19 +136,21 @@ struct params {
 /**
  * @brief Defines and stores VPQ codebooks upon training
  *
- * @tparam T data element type
+ * The quantizer holds a vpq_dataset, which can either own the codebooks
+ * or non-owning (referencing external codebooks).
  *
+ * @tparam T data element type
  */
 template <typename T>
 struct quantizer {
   /** Parameters used to build this quantizer. */
   params params_quantizer;
-  /** VPQ codebooks produced during training. */
-  cuvs::neighbors::vpq_dataset<T, int64_t> vpq_codebooks;
+  /** VPQ codebooks (owning or view). */
+  cuvs::preprocessing::quantize::pq::vpq_codebooks<T> codebooks;
 };
 
 /**
- * @brief Initializes a product quantizer to be used later for quantizing the dataset.
+ * @brief Initializes a product quantizer by training on the dataset (owning).
  *
  * The use of a pool memory resource is recommended for more consistent training performance.
  *
@@ -161,7 +164,7 @@ struct quantizer {
  * @endcode
  *
  * @param[in] res raft resource
- * @param[in] params configure product quantizer, e.g. quantile
+ * @param[in] params configure product quantizer, e.g. pq_bits, pq_dim
  * @param[in] dataset a row-major matrix view on device or host
  *
  * @return quantizer
@@ -175,6 +178,48 @@ quantizer<float> build(raft::resources const& res,
                        const params params,
                        raft::host_matrix_view<const float, int64_t> dataset);
 
+/**
+ * @brief Creates a product quantizer from pre-computed codebooks.
+ *
+ * This function creates a non-owning quantizer that references the provided codebooks.
+ *
+ * Usage example:
+ * @code{.cpp}
+ * raft::handle_t handle;
+ * // Assume pq_centers and vq_centers are pre-computed on device
+ * cuvs::preprocessing::quantize::pq::params params;
+ * params.pq_bits = 8;
+ * params.pq_dim = 32;
+ * params.use_vq = true;
+ * params.use_subspaces = true;
+ * // With VQ centers:
+ * auto quant_view = cuvs::preprocessing::quantize::pq::build(handle, params,
+ *                                                             pq_centers_view,
+ *                                                             std::make_optional<raft::device_matrix_view<const
+ * float, uint32_t, raft::row_major>>(vq_centers_view));
+ * // Without VQ (PQ only):
+ * auto quant_pq_only = cuvs::preprocessing::quantize::pq::build(handle, params, pq_centers_view);
+ * @endcode
+ *
+ * @param[in] res raft resource
+ * @param[in] params configure product quantizer parameters. Must be fully specified
+ *   (pq_bits, pq_dim must be set; use_subspaces and use_vq must match the codebook shapes).
+ * @param[in] pq_centers PQ codebook on device memory:
+ *   - For use_subspaces=true: [pq_dim * pq_n_centers, pq_len]
+ *   - For use_subspaces=false: [pq_n_centers, pq_len]
+ *   where pq_n_centers = (1 << pq_bits), pq_len = dim / pq_dim
+ * @param[in] vq_centers Optional VQ codebook on device memory [vq_n_centers, dim].
+ *   Required when use_vq=true. Defaults to std::nullopt (no VQ).
+ *
+ * @return A view-type quantizer that references the provided data
+ */
+quantizer<float> build(
+  raft::resources const& res,
+  const params params,
+  raft::device_matrix_view<const float, uint32_t, raft::row_major> pq_centers,
+  std::optional<raft::device_matrix_view<const float, uint32_t, raft::row_major>> vq_centers =
+    std::nullopt);
+
 /**
  * @brief Applies quantization transform to given dataset
  *