-
Notifications
You must be signed in to change notification settings - Fork 184
[FEA] View Type PQ Preprocessor #1764
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
2accbba
4c6182c
f18e00c
fa70a01
bf763e3
ac85ece
2728273
a0f6c76
1620486
b0aaa05
b479c34
04be0a0
f80280e
51e8209
c558964
ebfc7d2
b949c2c
a8b3ce4
4e9565f
f8432b5
a15e054
12a872a
34ce8ff
be681d3
65437ec
fc30857
faa46f9
2c1aa71
d6a8364
963f16e
cbb5d75
68f016d
0070cda
819eef8
19fe976
77bd557
1c85f16
4708434
55db0a4
6408cbb
c4250f5
8c1f792
f5ac6ba
4b7015f
cc2a900
70b5d04
7247688
be4cb4a
12cd162
ee0170e
04888f9
fd1eb38
075c1ba
740b78e
901c8bb
c5e231c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,7 @@ | |
|
|
||
| #include <cuvs/cluster/kmeans.hpp> | ||
| #include <cuvs/neighbors/common.hpp> | ||
| #include <cuvs/preprocessing/quantize/vpq_dataset.hpp> | ||
| #include <raft/core/device_mdspan.hpp> | ||
| #include <raft/core/handle.hpp> | ||
| #include <raft/core/host_mdspan.hpp> | ||
|
|
@@ -135,19 +136,21 @@ struct params { | |
| /** | ||
| * @brief Defines and stores VPQ codebooks upon training | ||
| * | ||
| * @tparam T data element type | ||
| * The quantizer holds a vpq_dataset, which can either own the codebooks | ||
| * or non-owning (referencing external codebooks). | ||
|
Comment on lines
137
to
+140
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix the This comment no longer matches the type below, so the generated public API docs now describe the wrong ownership model. As per coding guidelines, public C++ API headers must include complete Doxygen documentation for public functions/classes. 🤖 Prompt for AI Agents |
||
| * | ||
| * @tparam T data element type | ||
| */ | ||
| template <typename T> | ||
| struct quantizer { | ||
| /** Parameters used to build this quantizer. */ | ||
| params params_quantizer; | ||
| /** VPQ codebooks produced during training. */ | ||
| cuvs::neighbors::vpq_dataset<T, int64_t> vpq_codebooks; | ||
| /** VPQ codebooks (owning or view). */ | ||
| cuvs::preprocessing::quantize::pq::vpq_codebooks<T> codebooks; | ||
| }; | ||
|
Comment on lines
145
to
150
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This public member rename needs a deprecation or migration path.
As per coding guidelines, breaking changes require deprecation warnings and migration guide updates. 🤖 Prompt for AI Agents |
||
|
|
||
| /** | ||
| * @brief Initializes a product quantizer to be used later for quantizing the dataset. | ||
| * @brief Initializes a product quantizer by training on the dataset (owning). | ||
| * | ||
| * The use of a pool memory resource is recommended for more consistent training performance. | ||
| * | ||
|
|
@@ -161,7 +164,7 @@ struct quantizer { | |
| * @endcode | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] params configure product quantizer, e.g. quantile | ||
| * @param[in] params configure product quantizer, e.g. pq_bits, pq_dim | ||
| * @param[in] dataset a row-major matrix view on device or host | ||
| * | ||
| * @return quantizer | ||
|
|
@@ -175,6 +178,48 @@ quantizer<float> build(raft::resources const& res, | |
| const params params, | ||
| raft::host_matrix_view<const float, int64_t> dataset); | ||
|
|
||
| /** | ||
| * @brief Creates a product quantizer from pre-computed codebooks. | ||
| * | ||
| * This function creates a non-owning quantizer that references the provided codebooks. | ||
| * | ||
| * Usage example: | ||
| * @code{.cpp} | ||
| * raft::handle_t handle; | ||
| * // Assume pq_centers and vq_centers are pre-computed on device | ||
| * cuvs::preprocessing::quantize::pq::params params; | ||
| * params.pq_bits = 8; | ||
| * params.pq_dim = 32; | ||
| * params.use_vq = true; | ||
| * params.use_subspaces = true; | ||
| * // With VQ centers: | ||
| * auto quant_view = cuvs::preprocessing::quantize::pq::build(handle, params, | ||
| * pq_centers_view, | ||
| * std::make_optional<raft::device_matrix_view<const | ||
| * float, uint32_t, raft::row_major>>(vq_centers_view)); | ||
| * // Without VQ (PQ only): | ||
| * auto quant_pq_only = cuvs::preprocessing::quantize::pq::build(handle, params, pq_centers_view); | ||
| * @endcode | ||
| * | ||
| * @param[in] res raft resource | ||
| * @param[in] params configure product quantizer parameters. Must be fully specified | ||
| * (pq_bits, pq_dim must be set; use_subspaces and use_vq must match the codebook shapes). | ||
| * @param[in] pq_centers PQ codebook on device memory: | ||
| * - For use_subspaces=true: [pq_dim * pq_n_centers, pq_len] | ||
| * - For use_subspaces=false: [pq_n_centers, pq_len] | ||
| * where pq_n_centers = (1 << pq_bits), pq_len = dim / pq_dim | ||
| * @param[in] vq_centers Optional VQ codebook on device memory [vq_n_centers, dim]. | ||
| * Required when use_vq=true. Defaults to std::nullopt (no VQ). | ||
| * | ||
| * @return A view-type quantizer that references the provided data | ||
| */ | ||
| quantizer<float> build( | ||
| raft::resources const& res, | ||
| const params params, | ||
| raft::device_matrix_view<const float, uint32_t, raft::row_major> pq_centers, | ||
| std::optional<raft::device_matrix_view<const float, uint32_t, raft::row_major>> vq_centers = | ||
| std::nullopt); | ||
|
|
||
| /** | ||
| * @brief Applies quantization transform to given dataset | ||
| * | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.