From ae5534eeed9a1dfd09e1b59159fae81fc0681a57 Mon Sep 17 00:00:00 2001 From: Julian Miller Date: Fri, 24 Apr 2026 14:46:48 +0200 Subject: [PATCH] Use to be exposed RAFT numpy_serialize APIs --- c/src/neighbors/brute_force.cpp | 5 +++-- c/src/neighbors/cagra.cpp | 3 ++- c/src/neighbors/ivf_flat.cpp | 5 +++-- c/src/neighbors/mg_cagra.cpp | 7 ++++--- c/src/neighbors/mg_ivf_flat.cpp | 7 ++++--- c/src/neighbors/mg_ivf_pq.cpp | 5 +++-- cpp/cmake/thirdparty/get_raft.cmake | 6 +++--- cpp/include/cuvs/neighbors/cagra.hpp | 7 ++++--- cpp/include/cuvs/util/file_io.hpp | 9 +++++---- cpp/src/neighbors/brute_force_serialize.cu | 5 +++-- cpp/src/neighbors/detail/cagra/cagra_build.cuh | 5 +++-- cpp/src/neighbors/detail/cagra/cagra_serialize.cuh | 3 ++- cpp/src/neighbors/detail/hnsw.hpp | 8 ++++---- cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh | 4 ++-- cpp/src/neighbors/mg/snmg.cuh | 3 ++- 15 files changed, 47 insertions(+), 35 deletions(-) diff --git a/c/src/neighbors/brute_force.cpp b/c/src/neighbors/brute_force.cpp index 097f6b0f07..af4c6e27d3 100644 --- a/c/src/neighbors/brute_force.cpp +++ b/c/src/neighbors/brute_force.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -240,7 +241,7 @@ extern "C" cuvsError_t cuvsBruteForceDeserialize(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); index->dtype.bits = dtype.itemsize * 8; if (dtype.kind == 'f' && dtype.itemsize == 4) { diff --git a/c/src/neighbors/cagra.cpp b/c/src/neighbors/cagra.cpp index 081179ca46..be983e3ecf 100644 --- a/c/src/neighbors/cagra.cpp +++ b/c/src/neighbors/cagra.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -875,7 +876,7 @@ extern "C" cuvsError_t cuvsCagraDeserialize(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); index->dtype.bits = dtype.itemsize * 8; if (dtype.kind == 'f' && dtype.itemsize == 4) { diff --git a/c/src/neighbors/ivf_flat.cpp b/c/src/neighbors/ivf_flat.cpp index 56a3088e89..d36a2f3c33 100644 --- a/c/src/neighbors/ivf_flat.cpp +++ b/c/src/neighbors/ivf_flat.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -301,7 +302,7 @@ extern "C" cuvsError_t cuvsIvfFlatDeserialize(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); index->dtype.bits = dtype.itemsize * 8; if (dtype.kind == 'f' && dtype.itemsize == 4) { diff --git a/c/src/neighbors/mg_cagra.cpp b/c/src/neighbors/mg_cagra.cpp index 99e2db32cb..ce509cd879 100644 --- a/c/src/neighbors/mg_cagra.cpp +++ b/c/src/neighbors/mg_cagra.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "../core/exceptions.hpp" @@ -401,7 +402,7 @@ extern "C" cuvsError_t cuvsMultiGpuCagraDeserialize(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); is.close(); index->dtype.bits = dtype.itemsize * 8; @@ -432,7 +433,7 @@ extern "C" cuvsError_t cuvsMultiGpuCagraDistribute(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); is.close(); index->dtype.bits = dtype.itemsize * 8; diff --git a/c/src/neighbors/mg_ivf_flat.cpp b/c/src/neighbors/mg_ivf_flat.cpp index c9e3b0447a..2921ac8747 100644 --- a/c/src/neighbors/mg_ivf_flat.cpp +++ b/c/src/neighbors/mg_ivf_flat.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "../core/exceptions.hpp" @@ -398,7 +399,7 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatDeserialize(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); is.close(); index->dtype.bits = dtype.itemsize * 8; @@ -429,7 +430,7 @@ extern "C" cuvsError_t cuvsMultiGpuIvfFlatDistribute(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); is.close(); index->dtype.bits = dtype.itemsize * 8; diff --git a/c/src/neighbors/mg_ivf_pq.cpp b/c/src/neighbors/mg_ivf_pq.cpp index 5b85659fc2..7e563cb227 100644 --- a/c/src/neighbors/mg_ivf_pq.cpp +++ b/c/src/neighbors/mg_ivf_pq.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "../core/exceptions.hpp" @@ -390,7 +391,7 @@ extern "C" cuvsError_t cuvsMultiGpuIvfPqDeserialize(cuvsResources_t res, if (!is) { RAFT_FAIL("Cannot open file %s", filename); } char dtype_string[4]; is.read(dtype_string, 4); - auto dtype = raft::detail::numpy_serializer::parse_descr(std::string(dtype_string, 4)); + auto dtype = raft::numpy_serializer::parse_descr(std::string(dtype_string, 4)); is.close(); index->dtype.bits = dtype.itemsize * 8; diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 8ecf3686be..6c79a44abc 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,13 +1,13 @@ # ============================================================================= # cmake-format: off -# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on # Use RAPIDS_VERSION_MAJOR_MINOR from rapids_config.cmake set(RAFT_VERSION "${RAPIDS_VERSION_MAJOR_MINOR}") -set(RAFT_FORK "rapidsai") -set(RAFT_PINNED_TAG "${rapids-cmake-checkout-tag}") +set(RAFT_FORK "julianmi") +set(RAFT_PINNED_TAG "expose-public-npy-helpers") function(find_and_configure_raft) set(oneValueArgs VERSION FORK PINNED_TAG BUILD_STATIC_DEPS ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES CLONE_ON_PIN) diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index a7e1249677..701935fffe 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -764,7 +765,7 @@ struct index : cuvs::neighbors::index { if (lseek(fd.get(), 0, SEEK_SET) == -1) { RAFT_FAIL("Failed to seek to beginning of dataset file"); } - auto header = raft::detail::numpy_serializer::read_header(stream); + auto header = raft::numpy_serializer::read_header(stream); RAFT_EXPECTS(header.shape.size() == 2, "Dataset file should be 2D, got %zu dimensions", header.shape.size()); @@ -799,7 +800,7 @@ struct index : cuvs::neighbors::index { if (lseek(fd.get(), 0, SEEK_SET) == -1) { RAFT_FAIL("Failed to seek to beginning of graph file"); } - auto header = raft::detail::numpy_serializer::read_header(stream); + auto header = raft::numpy_serializer::read_header(stream); RAFT_EXPECTS( header.shape.size() == 2, "Graph file should be 2D, got %zu dimensions", header.shape.size()); @@ -840,7 +841,7 @@ struct index : cuvs::neighbors::index { if (lseek(fd.get(), 0, SEEK_SET) == -1) { RAFT_FAIL("Failed to seek to beginning of mapping file"); } - auto header = raft::detail::numpy_serializer::read_header(stream); + auto header = raft::numpy_serializer::read_header(stream); RAFT_EXPECTS(header.shape.size() == 1, "Mapping file should be 1D, got %zu dimensions", header.shape.size()); diff --git a/cpp/include/cuvs/util/file_io.hpp b/cpp/include/cuvs/util/file_io.hpp index f6053e69f8..bd2c018e1f 100644 --- a/cpp/include/cuvs/util/file_io.hpp +++ b/cpp/include/cuvs/util/file_io.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -187,12 +188,12 @@ std::pair create_numpy_file(const std::string& path, file_descriptor fd(path, O_CREAT | O_RDWR | O_TRUNC, 0644); // Build header - const auto dtype = raft::detail::numpy_serializer::get_numpy_dtype(); - const bool fortran_order = false; - const raft::detail::numpy_serializer::header_t header = {dtype, fortran_order, shape}; + const auto dtype = raft::numpy_serializer::get_numpy_dtype(); + const bool fortran_order = false; + const raft::numpy_serializer::header_t header = {dtype, fortran_order, shape}; std::stringstream ss; - raft::detail::numpy_serializer::write_header(ss, header); + raft::numpy_serializer::write_header(ss, header); std::string header_str = ss.str(); size_t header_size = header_str.size(); diff --git a/cpp/src/neighbors/brute_force_serialize.cu b/cpp/src/neighbors/brute_force_serialize.cu index dd1078e33f..f63438baa4 100644 --- a/cpp/src/neighbors/brute_force_serialize.cu +++ b/cpp/src/neighbors/brute_force_serialize.cu @@ -1,11 +1,12 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #include #include #include +#include #include #include @@ -24,7 +25,7 @@ void serialize(raft::resources const& handle, RAFT_LOG_DEBUG( "Saving brute force index, size %zu, dim %u", static_cast(index.size()), index.dim()); - auto dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + auto dtype_string = raft::numpy_serializer::get_numpy_dtype().to_string(); dtype_string.resize(4); os << dtype_string; diff --git a/cpp/src/neighbors/detail/cagra/cagra_build.cuh b/cpp/src/neighbors/detail/cagra/cagra_build.cuh index dd2042bd12..e115dec621 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_build.cuh @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -726,14 +727,14 @@ void ace_load_partition_dataset_from_disk( std::ifstream is(reordered_dataset_path, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", reordered_dataset_path.c_str()); } auto start_pos = is.tellg(); - raft::detail::numpy_serializer::read_header(is); + raft::numpy_serializer::read_header(is); core_header_size = static_cast(is.tellg() - start_pos); } { std::ifstream is(augmented_dataset_path, std::ios::in | std::ios::binary); if (!is) { RAFT_FAIL("Cannot open file %s", augmented_dataset_path.c_str()); } auto start_pos = is.tellg(); - raft::detail::numpy_serializer::read_header(is); + raft::numpy_serializer::read_header(is); augmented_header_size = static_cast(is.tellg() - start_pos); } diff --git a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh index 323184e757..37f91e091b 100644 --- a/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/src/neighbors/detail/cagra/cagra_serialize.cuh @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -54,7 +55,7 @@ void serialize(raft::resources const& res, RAFT_LOG_DEBUG( "Saving CAGRA index, size %zu, dim %u", static_cast(index_.size()), index_.dim()); - std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + std::string dtype_string = raft::numpy_serializer::get_numpy_dtype().to_string(); dtype_string.resize(4); os << dtype_string; diff --git a/cpp/src/neighbors/detail/hnsw.hpp b/cpp/src/neighbors/detail/hnsw.hpp index 4914a0fa1b..8dabf7b62f 100644 --- a/cpp/src/neighbors/detail/hnsw.hpp +++ b/cpp/src/neighbors/detail/hnsw.hpp @@ -14,9 +14,9 @@ #include #include -#include #include #include +#include #include #include @@ -399,7 +399,7 @@ void serialize_to_hnswlib_from_disk(raft::resources const& res, std::ifstream graph_stream(graph_path, std::ios::binary); RAFT_EXPECTS(graph_stream.good(), "Failed to open graph file: %s", graph_path.c_str()); - auto header = raft::detail::numpy_serializer::read_header(graph_stream); + auto header = raft::numpy_serializer::read_header(graph_stream); graph_header_size = static_cast(graph_stream.tellg()); RAFT_EXPECTS( header.shape.size() == 2, "Graph file should be 2D, got %zu dimensions", header.shape.size()); @@ -419,7 +419,7 @@ void serialize_to_hnswlib_from_disk(raft::resources const& res, std::ifstream dataset_stream(dataset_path, std::ios::binary); RAFT_EXPECTS(dataset_stream.good(), "Failed to open dataset file: %s", dataset_path.c_str()); - auto header = raft::detail::numpy_serializer::read_header(dataset_stream); + auto header = raft::numpy_serializer::read_header(dataset_stream); dataset_header_size = static_cast(dataset_stream.tellg()); RAFT_EXPECTS(header.shape.size() == 2, "Dataset file should be 2D, got %zu dimensions", @@ -439,7 +439,7 @@ void serialize_to_hnswlib_from_disk(raft::resources const& res, std::ifstream mapping_stream(mapping_path, std::ios::binary); RAFT_EXPECTS(mapping_stream.good(), "Failed to open mapping file: %s", mapping_path.c_str()); - auto header = raft::detail::numpy_serializer::read_header(mapping_stream); + auto header = raft::numpy_serializer::read_header(mapping_stream); label_header_size = static_cast(mapping_stream.tellg()); RAFT_EXPECTS(header.shape.size() == 1, "Mapping file should be 1D, got %zu dimensions", diff --git a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh index e29d1d9589..d6aa25ca23 100644 --- a/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh +++ b/cpp/src/neighbors/ivf_flat/ivf_flat_serialize.cuh @@ -11,8 +11,8 @@ #include #include -#include #include +#include #include #include #include @@ -44,7 +44,7 @@ void serialize(raft::resources const& handle, std::ostream& os, const index(index_.size()), index_.dim()); - std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + std::string dtype_string = raft::numpy_serializer::get_numpy_dtype().to_string(); dtype_string.resize(4); os << dtype_string; diff --git a/cpp/src/neighbors/mg/snmg.cuh b/cpp/src/neighbors/mg/snmg.cuh index d2e98f1c1a..43e4aa4471 100644 --- a/cpp/src/neighbors/mg/snmg.cuh +++ b/cpp/src/neighbors/mg/snmg.cuh @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -738,7 +739,7 @@ void serialize(const raft::resources& clique, std::ofstream of(filename, std::ios::out | std::ios::binary); if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); } - std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype().to_string(); + std::string dtype_string = raft::numpy_serializer::get_numpy_dtype().to_string(); dtype_string.resize(4); of << dtype_string;