From 198b2ede9d959150df70cfedcb92002225911727 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Wed, 22 Apr 2026 08:59:11 -0700 Subject: [PATCH 01/11] Cleanup and refactor --- sdk/cs/src/OpenAI/EmbeddingClient.cs | 8 ++++--- .../EmbeddingClientTests.cs | 15 +++++-------- sdk/cs/test/FoundryLocal.Tests/Utils.cs | 22 ------------------- sdk/rust/docs/api.md | 1 - 4 files changed, 10 insertions(+), 36 deletions(-) diff --git a/sdk/cs/src/OpenAI/EmbeddingClient.cs b/sdk/cs/src/OpenAI/EmbeddingClient.cs index 91877f47d..4486a6060 100644 --- a/sdk/cs/src/OpenAI/EmbeddingClient.cs +++ b/sdk/cs/src/OpenAI/EmbeddingClient.cs @@ -77,12 +77,14 @@ private async Task GenerateEmbeddingImplAsync(string in private async Task GenerateEmbeddingsImplAsync(IEnumerable inputs, CancellationToken? ct) { - if (inputs == null || !inputs.Any()) + var inputList = inputs?.ToList(); + + if (inputList == null || inputList.Count == 0) { throw new ArgumentException("Inputs must be a non-empty array of strings.", nameof(inputs)); } - foreach (var input in inputs) + foreach (var input in inputList) { if (string.IsNullOrWhiteSpace(input)) { @@ -90,7 +92,7 @@ private async Task GenerateEmbeddingsImplAsync(IEnumera } } - var embeddingRequest = EmbeddingCreateRequestExtended.FromUserInput(_modelId, inputs); + var embeddingRequest = EmbeddingCreateRequestExtended.FromUserInput(_modelId, inputList); var embeddingRequestJson = embeddingRequest.ToJson(); var request = new CoreInteropRequest { Params = new() { { "OpenAICreateRequest", embeddingRequestJson } } }; diff --git a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs index bed3a8ea5..d4d521970 100644 --- a/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs +++ b/sdk/cs/test/FoundryLocal.Tests/EmbeddingClientTests.cs @@ -19,19 +19,14 @@ public static async Task Setup() var manager = FoundryLocalManager.Instance; // initialized by Utils var catalog = await manager.GetCatalogAsync(); - // Reduce max_length in the embedding model's genai_config.json to avoid OOM - // when allocating the KV cache. Embedding models only need a single forward pass - // so a large max_length is unnecessary. - Utils.PatchModelMaxLength("qwen3-0.6b-embedding-generic-cpu-1", "v1"); - // Load the specific cached model variant directly - var model = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false); - await Assert.That(model).IsNotNull(); + var loadedModel = await catalog.GetModelVariantAsync("qwen3-0.6b-embedding-generic-cpu:1").ConfigureAwait(false); + await Assert.That(loadedModel).IsNotNull(); - await model!.LoadAsync().ConfigureAwait(false); - await Assert.That(await model.IsLoadedAsync()).IsTrue(); + await loadedModel!.LoadAsync().ConfigureAwait(false); + await Assert.That(await loadedModel.IsLoadedAsync()).IsTrue(); - EmbeddingClientTests.model = model; + EmbeddingClientTests.model = loadedModel; } [After(Class)] diff --git a/sdk/cs/test/FoundryLocal.Tests/Utils.cs b/sdk/cs/test/FoundryLocal.Tests/Utils.cs index f89698539..8b25ba052 100644 --- a/sdk/cs/test/FoundryLocal.Tests/Utils.cs +++ b/sdk/cs/test/FoundryLocal.Tests/Utils.cs @@ -483,26 +483,4 @@ private static string GetRepoRoot() throw new InvalidOperationException("Could not find git repository root from test file location"); } - - /// - /// Patches max_length in a cached model's genai_config.json to a small value. - /// ORT GenAI allocates a KV cache sized by max_length; the default (32768) can cause - /// OOM when multiple models are loaded. Embedding models only need a single forward pass - /// so a small max_length is sufficient. - /// - internal static void PatchModelMaxLength(string modelDirName, string variantSubDir, int newMaxLength = 512) - { - var repoRoot = new DirectoryInfo(GetRepoRoot()); - var configPath = Path.Combine(repoRoot.Parent!.FullName, "test-data-shared", - modelDirName, variantSubDir, "genai_config.json"); - - if (!File.Exists(configPath)) return; - - var json = File.ReadAllText(configPath); - if (json.Contains("\"max_length\": 32768")) - { - json = json.Replace("\"max_length\": 32768", $"\"max_length\": {newMaxLength}"); - File.WriteAllText(configPath, json); - } - } } diff --git a/sdk/rust/docs/api.md b/sdk/rust/docs/api.md index 8dcb0c292..a2045f0c8 100644 --- a/sdk/rust/docs/api.md +++ b/sdk/rust/docs/api.md @@ -16,7 +16,6 @@ - [ChatClient](#chatclient) - [ChatCompletionStream](#chatcompletionstream) - [EmbeddingClient](#embeddingclient) - - [EmbeddingResponse](#embeddingresponse) - [AudioClient](#audioclient) - [AudioTranscriptionStream](#audiotranscriptionstream) - [AudioTranscriptionResponse](#audiotranscriptionresponse) From c2bff89de99cd4bbbc398637d010fbd6070140a5 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Wed, 22 Apr 2026 09:00:11 -0700 Subject: [PATCH 02/11] Added embedding for cpp SDK --- sdk/cpp/CMakeLists.txt | 9 + sdk/cpp/include/foundry_local.h | 1 + sdk/cpp/include/model.h | 2 + .../include/openai/openai_embedding_client.h | 65 +++++ sdk/cpp/sample/main.cpp | 43 +++ sdk/cpp/src/openai_embedding_client.cpp | 91 +++++++ sdk/cpp/src/parser.h | 37 +++ sdk/cpp/test/client_test.cpp | 157 +++++++++++ sdk/cpp/test/e2e_test.cpp | 253 ++++++++++++++++++ 9 files changed, 658 insertions(+) create mode 100644 sdk/cpp/include/openai/openai_embedding_client.h create mode 100644 sdk/cpp/src/openai_embedding_client.cpp diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index 014024841..908a4d918 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -54,6 +54,7 @@ add_library(CppSdk STATIC src/audio_client.cpp src/live_audio_types.cpp src/live_audio_session.cpp + src/openai_embedding_client.cpp src/foundry_local_manager.cpp ) @@ -228,6 +229,14 @@ if (BUILD_TESTING) ${CMAKE_CURRENT_SOURCE_DIR}/src ) + # Point E2E tests at the sibling test-data-shared repo (same convention as + # the C#/JS/Python/Rust SDK test suites). At runtime, SetUpTestSuite uses + # this path as `Configuration::model_cache_dir` when the directory exists, + # so locally-cached test models are discoverable without a download. + target_compile_definitions(CppSdkE2ETests PRIVATE + CPPSDK_TEST_DATA_SHARED_DIR="${CMAKE_CURRENT_SOURCE_DIR}/../../../test-data-shared" + ) + target_link_libraries(CppSdkE2ETests PRIVATE CppSdk diff --git a/sdk/cpp/include/foundry_local.h b/sdk/cpp/include/foundry_local.h index cdf9c92b5..1c78ffdd2 100644 --- a/sdk/cpp/include/foundry_local.h +++ b/sdk/cpp/include/foundry_local.h @@ -18,3 +18,4 @@ #include "openai/audio_client.h" #include "openai/live_audio_types.h" #include "openai/live_audio_session.h" +#include "openai/openai_embedding_client.h" diff --git a/sdk/cpp/include/model.h b/sdk/cpp/include/model.h index b52fae76c..f3258b117 100644 --- a/sdk/cpp/include/model.h +++ b/sdk/cpp/include/model.h @@ -19,6 +19,7 @@ namespace foundry_local { class OpenAIChatClient; class OpenAIAudioClient; + class OpenAIEmbeddingClient; } namespace foundry_local::Internal { @@ -59,6 +60,7 @@ namespace foundry_local { friend class OpenAIChatClient; friend class OpenAIAudioClient; + friend class OpenAIEmbeddingClient; }; enum class DeviceType { diff --git a/sdk/cpp/include/openai/openai_embedding_client.h b/sdk/cpp/include/openai/openai_embedding_client.h new file mode 100644 index 000000000..5bb7319cf --- /dev/null +++ b/sdk/cpp/include/openai/openai_embedding_client.h @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace foundry_local::Internal { + struct IFoundryLocalCore; +} + +namespace foundry_local { + class ILogger; + class IModel; + + struct EmbeddingObject { + int index = 0; + std::vector embedding; + }; + + struct EmbeddingUsage { + std::optional prompt_tokens; + std::optional total_tokens; + }; + + struct EmbeddingCreateResponse { + std::string model; + std::string object; ///< Always "list" + std::vector data; + std::optional usage; + }; + + class OpenAIEmbeddingClient final { + public: + explicit OpenAIEmbeddingClient(const IModel& model); + + /// Returns the model ID this client was created for. + const std::string& GetModelId() const noexcept { return modelId_; } + + /// Generate embedding for a single input string. + EmbeddingCreateResponse GenerateEmbedding(std::string_view input) const; + + /// Generate embeddings for multiple input strings in a single request. + EmbeddingCreateResponse GenerateEmbeddings(gsl::span inputs) const; + + private: + OpenAIEmbeddingClient(gsl::not_null core, std::string_view modelId, + gsl::not_null logger); + + std::string BuildSingleRequestJson(std::string_view input) const; + std::string BuildBatchRequestJson(gsl::span inputs) const; + + std::string modelId_; + gsl::not_null core_; + gsl::not_null logger_; + }; + +} // namespace foundry_local diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 7c377da99..27defa750 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -362,6 +362,46 @@ void ChatWithToolCalling(Manager& manager, const std::string& alias) { std::cout << "Model unloaded.\n"; } +// --------------------------------------------------------------------------- +// Example 6 – Embeddings (single and batch) +// --------------------------------------------------------------------------- +void GenerateEmbeddings(Manager& manager, const std::string& alias) { + std::cout << "\n=== Example 6: Embeddings ===\n"; + + auto& catalog = manager.GetCatalog(); + + auto* model = catalog.GetModel(alias); + if (!model) { + std::cerr << "Model '" << alias << "' not found in catalog.\n"; + return; + } + + model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; }); + std::cout << "\n"; + + model->Load(); + + OpenAIEmbeddingClient embeddings(*model); + + // Single input + auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog"); + if (!single.data.empty()) { + std::cout << "Single embedding: dim=" << single.data[0].embedding.size() << "\n"; + } + + // Batch input + std::vector inputs = {"The capital of France is Paris", "Machine learning is a subset of AI"}; + auto batch = embeddings.GenerateEmbeddings(inputs); + std::cout << "Batch embeddings: count=" << batch.data.size(); + if (!batch.data.empty()) { + std::cout << " dim=" << batch.data[0].embedding.size(); + } + std::cout << "\n"; + + model->Unload(); + std::cout << "Model unloaded.\n"; +} + // --------------------------------------------------------------------------- // main // --------------------------------------------------------------------------- @@ -450,6 +490,9 @@ int main(int argc, char* argv[]) { std::cerr << "Example 5 failed: " << ex.what() << "\n"; } + // 6. Embeddings (uncomment and set a valid embedding model alias) + // GenerateEmbeddings(manager, "qwen3-0.6b-embedding"); + Manager::Destroy(); return 0; } diff --git a/sdk/cpp/src/openai_embedding_client.cpp b/sdk/cpp/src/openai_embedding_client.cpp new file mode 100644 index 000000000..6c393db89 --- /dev/null +++ b/sdk/cpp/src/openai_embedding_client.cpp @@ -0,0 +1,91 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include +#include + +#include +#include + +#include "foundry_local.h" +#include "foundry_local_internal_core.h" +#include "foundry_local_exception.h" +#include "core_interop_request.h" +#include "parser.h" +#include "logger.h" + +namespace foundry_local { + + OpenAIEmbeddingClient::OpenAIEmbeddingClient(gsl::not_null core, + std::string_view modelId, gsl::not_null logger) + : core_(core), modelId_(modelId), logger_(logger) {} + + std::string OpenAIEmbeddingClient::BuildSingleRequestJson(std::string_view input) const { + nlohmann::json req = {{"model", modelId_}, {"input", std::string(input)}}; + return req.dump(); + } + + std::string OpenAIEmbeddingClient::BuildBatchRequestJson(gsl::span inputs) const { + nlohmann::json jInputs = nlohmann::json::array(); + for (const auto& s : inputs) { + jInputs.push_back(s); + } + nlohmann::json req = {{"model", modelId_}, {"input", std::move(jInputs)}}; + return req.dump(); + } + + EmbeddingCreateResponse OpenAIEmbeddingClient::GenerateEmbedding(std::string_view input) const { + if (input.empty()) { + throw Exception("Embedding input must be a non-empty string.", *logger_); + } + + std::string openAiReqJson = BuildSingleRequestJson(input); + + CoreInteropRequest req("embeddings"); + req.AddParam("OpenAICreateRequest", openAiReqJson); + + std::string json = req.ToJson(); + auto response = core_->call(req.Command(), *logger_, &json); + if (response.HasError()) { + throw Exception("Embedding generation failed: " + response.error, *logger_); + } + + return nlohmann::json::parse(response.data).get(); + } + + EmbeddingCreateResponse OpenAIEmbeddingClient::GenerateEmbeddings(gsl::span inputs) const { + if (inputs.empty()) { + throw Exception("Embedding inputs must be a non-empty array of strings.", *logger_); + } + for (const auto& s : inputs) { + if (s.empty()) { + throw Exception("Each embedding input must be a non-empty string.", *logger_); + } + } + + std::string openAiReqJson = BuildBatchRequestJson(inputs); + + CoreInteropRequest req("embeddings"); + req.AddParam("OpenAICreateRequest", openAiReqJson); + + std::string json = req.ToJson(); + auto response = core_->call(req.Command(), *logger_, &json); + if (response.HasError()) { + throw Exception("Batch embedding generation failed: " + response.error, *logger_); + } + + return nlohmann::json::parse(response.data).get(); + } + + OpenAIEmbeddingClient::OpenAIEmbeddingClient(const IModel& model) + : OpenAIEmbeddingClient(model.GetCoreAccess().core, model.GetCoreAccess().modelName, + model.GetCoreAccess().logger) { + if (!model.IsLoaded()) { + throw Exception("Model " + model.GetCoreAccess().modelName + " is not loaded. Call Load() first.", + *model.GetCoreAccess().logger); + } + } + +} // namespace foundry_local diff --git a/sdk/cpp/src/parser.h b/sdk/cpp/src/parser.h index 3596579cb..3da60271d 100644 --- a/sdk/cpp/src/parser.h +++ b/sdk/cpp/src/parser.h @@ -292,6 +292,43 @@ namespace foundry_local { c.delta = j.at("delta").get(); } + inline void from_json(const nlohmann::json& j, EmbeddingObject& e) { + if (j.contains("index")) + j.at("index").get_to(e.index); + e.embedding.clear(); + if (j.contains("embedding") && j.at("embedding").is_array()) { + const auto& arr = j.at("embedding"); + e.embedding.reserve(arr.size()); + for (const auto& v : arr) { + if (v.is_number()) { + e.embedding.push_back(v.get()); + } + } + } + } + + inline void from_json(const nlohmann::json& j, EmbeddingUsage& u) { + u.prompt_tokens = ParsingUtils::get_opt_int(j, "prompt_tokens"); + u.total_tokens = ParsingUtils::get_opt_int(j, "total_tokens"); + } + + inline void from_json(const nlohmann::json& j, EmbeddingCreateResponse& r) { + r.model = ParsingUtils::get_string_or_empty(j, "model"); + r.object = ParsingUtils::get_string_or_empty(j, "object"); + + r.data.clear(); + if (j.contains("data") && j.at("data").is_array()) { + r.data = j.at("data").get>(); + } + + if (j.contains("usage") && j.at("usage").is_object()) { + r.usage = j.at("usage").get(); + } + else { + r.usage.reset(); + } + } + inline void from_json(const nlohmann::json& j, ChatCompletionCreateResponse& r) { if (j.contains("created")) j.at("created").get_to(r.created); diff --git a/sdk/cpp/test/client_test.cpp b/sdk/cpp/test/client_test.cpp index 6f083cef9..868ede024 100644 --- a/sdk/cpp/test/client_test.cpp +++ b/sdk/cpp/test/client_test.cpp @@ -743,3 +743,160 @@ TEST_F(OpenAIChatClientTest, CompleteChat_ToolCallRoundTrip) { EXPECT_EQ("call_1", openAiReq["messages"][3]["tool_call_id"].get()); EXPECT_EQ("auto", openAiReq["tool_choice"].get()); } + +// ===================================================================== +// OpenAIEmbeddingClient tests +// ===================================================================== + +class OpenAIEmbeddingClientTest : public ::testing::Test { +protected: + MockCore core_; + NullLogger logger_; + + static std::string MakeEmbeddingResponseJson(const std::vector>& vectors, + const std::string& modelName = "embedding-model") { + nlohmann::json data = nlohmann::json::array(); + for (size_t i = 0; i < vectors.size(); ++i) { + data.push_back({{"index", static_cast(i)}, {"object", "embedding"}, {"embedding", vectors[i]}}); + } + nlohmann::json resp = {{"model", modelName}, + {"object", "list"}, + {"data", std::move(data)}, + {"usage", {{"prompt_tokens", 5}, {"total_tokens", 5}}}}; + return resp.dump(); + } + + ModelVariant MakeLoadedVariant(const std::string& name = "embedding-model") { + core_.OnCall("list_loaded_models", "[\"" + name + ":1\"]"); + return Factory::CreateModelVariant(&core_, Factory::MakeModelInfo(name, "alias"), &logger_); + } +}; + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbedding_BasicResponse) { + core_.OnCall("embeddings", MakeEmbeddingResponseJson({{0.1f, 0.2f, 0.3f, 0.4f}})); + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + auto response = client.GenerateEmbedding("hello world"); + + EXPECT_EQ("embedding-model", response.model); + EXPECT_EQ("list", response.object); + ASSERT_EQ(1u, response.data.size()); + EXPECT_EQ(0, response.data[0].index); + ASSERT_EQ(4u, response.data[0].embedding.size()); + EXPECT_NEAR(0.1f, response.data[0].embedding[0], 1e-5f); + EXPECT_NEAR(0.4f, response.data[0].embedding[3], 1e-5f); + ASSERT_TRUE(response.usage.has_value()); + EXPECT_EQ(5, *response.usage->prompt_tokens); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbedding_RequestFormat) { + core_.OnCall("embeddings", MakeEmbeddingResponseJson({{0.0f}})); + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + client.GenerateEmbedding("hello world"); + + auto requestJson = nlohmann::json::parse(core_.GetLastDataArg("embeddings")); + auto openAiReq = nlohmann::json::parse(requestJson["Params"]["OpenAICreateRequest"].get()); + EXPECT_EQ("embedding-model", openAiReq["model"].get()); + EXPECT_EQ("hello world", openAiReq["input"].get()); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_BasicResponse) { + core_.OnCall("embeddings", MakeEmbeddingResponseJson({{0.1f, 0.2f}, {0.3f, 0.4f}, {0.5f, 0.6f}})); + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + std::vector inputs = {"first", "second", "third"}; + auto response = client.GenerateEmbeddings(inputs); + + ASSERT_EQ(3u, response.data.size()); + EXPECT_EQ(0, response.data[0].index); + EXPECT_EQ(1, response.data[1].index); + EXPECT_EQ(2, response.data[2].index); + EXPECT_NEAR(0.5f, response.data[2].embedding[0], 1e-5f); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_RequestFormat) { + core_.OnCall("embeddings", MakeEmbeddingResponseJson({{0.0f}, {0.0f}})); + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + std::vector inputs = {"a", "b"}; + client.GenerateEmbeddings(inputs); + + auto requestJson = nlohmann::json::parse(core_.GetLastDataArg("embeddings")); + auto openAiReq = nlohmann::json::parse(requestJson["Params"]["OpenAICreateRequest"].get()); + EXPECT_EQ("embedding-model", openAiReq["model"].get()); + ASSERT_TRUE(openAiReq["input"].is_array()); + ASSERT_EQ(2u, openAiReq["input"].size()); + EXPECT_EQ("a", openAiReq["input"][0].get()); + EXPECT_EQ("b", openAiReq["input"][1].get()); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbedding_EmptyInput_Throws) { + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + EXPECT_THROW(client.GenerateEmbedding(""), Exception); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_EmptyList_Throws) { + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + std::vector empty; + EXPECT_THROW(client.GenerateEmbeddings(empty), Exception); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_ListWithEmptyString_Throws) { + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + std::vector inputs = {"valid", "", "also valid"}; + EXPECT_THROW(client.GenerateEmbeddings(inputs), Exception); +} + +TEST_F(OpenAIEmbeddingClientTest, Constructor_ThrowsIfNotLoaded) { + core_.OnCall("list_loaded_models", R"([])"); + auto variant = Factory::CreateModelVariant(&core_, Factory::MakeModelInfo("unloaded-model", "alias"), &logger_); + EXPECT_THROW(OpenAIEmbeddingClient client(variant), Exception); +} + +TEST_F(OpenAIEmbeddingClientTest, GetModelId) { + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + EXPECT_EQ("embedding-model", client.GetModelId()); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbedding_CoreError_Throws) { + core_.OnCallThrow("embeddings", "embedding generation failed"); + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + EXPECT_THROW(client.GenerateEmbedding("test"), Exception); +} + +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_CoreError_Throws) { + core_.OnCallThrow("embeddings", "batch embedding generation failed"); + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + std::vector inputs = {"a", "b"}; + EXPECT_THROW(client.GenerateEmbeddings(inputs), Exception); +} diff --git a/sdk/cpp/test/e2e_test.cpp b/sdk/cpp/test/e2e_test.cpp index b49626120..0ef7d1b4c 100644 --- a/sdk/cpp/test/e2e_test.cpp +++ b/sdk/cpp/test/e2e_test.cpp @@ -42,6 +42,29 @@ class EndToEndTest : public ::testing::Test { static void SetUpTestSuite() { Configuration config("CppSdkE2ETest"); config.log_level = LogLevel::Information; + + // Match the C#/JS/Python/Rust SDK test suites: if the sibling + // test-data-shared repo exists (via compile-time path or explicit + // env override), use it as the model cache so locally cached test + // models are discoverable without a download. + std::filesystem::path testDataSharedDir; + if (const char* override = std::getenv("FOUNDRY_LOCAL_TEST_DATA_SHARED_DIR")) { + testDataSharedDir = override; + } +#ifdef CPPSDK_TEST_DATA_SHARED_DIR + else { + testDataSharedDir = CPPSDK_TEST_DATA_SHARED_DIR; + } +#endif + if (!testDataSharedDir.empty() && std::filesystem::exists(testDataSharedDir)) { + std::cout << "[E2E] Using test-data-shared model cache: " << testDataSharedDir << "\n"; + config.model_cache_dir = testDataSharedDir; + } + else if (!testDataSharedDir.empty()) { + std::cout << "[E2E] test-data-shared path not found: " << testDataSharedDir + << " (falling back to default cache)\n"; + } + try { Manager::Create(std::move(config)); } @@ -61,6 +84,19 @@ class EndToEndTest : public ::testing::Test { static bool IsAudioModel(const std::string& alias) { return alias.find("whisper") != std::string::npos; } + static bool IsEmbeddingModel(const std::string& alias) { return alias.find("embedding") != std::string::npos; } + + /// Variant ID the other SDK test suites use and that test-data-shared ships. + static constexpr const char* kTestEmbeddingModelVariantId = "qwen3-0.6b-embedding-generic-cpu:1"; + + /// Returns the specific embedding model variant shipped by the sibling + /// test-data-shared repo. Mirrors the C#/JS/Python/Rust SDK test suites, + /// which all load `qwen3-0.6b-embedding-generic-cpu:1` directly rather + /// than picking whatever happens to be cached. + static IModel* FindEmbeddingModel(Catalog& catalog) { + return catalog.GetModelVariant(kTestEmbeddingModelVariantId); + } + /// Find a chat-capable model, preferring cached, then known small models, then any. /// Selects the CPU variant when available to avoid GPU/EP dependency issues. static IModel* FindChatModel(Catalog& catalog) { @@ -561,3 +597,220 @@ TEST_F(EndToEndTest, DISABLED_DownloadAndRemoveFromCache) { std::cout << "[E2E] RemoveFromCache completed for: " << target->GetAlias() << " (IsCached=" << (target->IsCached() ? "true" : "false") << ")\n"; } + +// =========================================================================== +// Download, load, embeddings (single and batch), unload +// =========================================================================== +// +// The embedding tests below mirror the integration test suites in the C#, JS, +// Python, and Rust SDKs. They all require a real embedding model (loaded via +// the Catalog); they are DISABLED_ by default and run only with +// --gtest_also_run_disabled_tests. +// +// Each test prepares an OpenAIEmbeddingClient over a loaded variant and relies +// on the suite's SetUp/TearDown to bring up the Manager. + +namespace { + +inline double L2Norm(const std::vector& v) { + double sum = 0; + for (float x : v) { + sum += static_cast(x) * static_cast(x); + } + return std::sqrt(sum); +} + +constexpr std::size_t kExpectedEmbeddingDim = 1024u; + +} // namespace + +// Each embedding test below starts with the same preamble: skip in CI, find +// a cached embedding model (skip if none), download + load, construct a +// client. We can't factor this into a helper because FindEmbeddingModel is +// a protected static on EndToEndTest — only reachable from inside TEST_F. + +TEST_F(EndToEndTest, DISABLED_DownloadLoadEmbeddingUnload) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + + std::cout << "[E2E] Using embedding model: " << target->GetAlias() + << " variant: " << target->GetId() << "\n"; + target->Download(); + EXPECT_TRUE(target->IsCached()); + target->Load(); + EXPECT_TRUE(target->IsLoaded()); + + OpenAIEmbeddingClient client(*target); + + auto single = client.GenerateEmbedding("The capital of France is Paris"); + ASSERT_FALSE(single.data.empty()); + EXPECT_FALSE(single.data[0].embedding.empty()); + std::cout << "[E2E] Single embedding dim: " << single.data[0].embedding.size() << "\n"; + + std::vector inputs = {"short", "a longer sentence for embedding"}; + auto batch = client.GenerateEmbeddings(inputs); + ASSERT_EQ(2u, batch.data.size()); + EXPECT_EQ(single.data[0].embedding.size(), batch.data[0].embedding.size()); + EXPECT_EQ(single.data[0].embedding.size(), batch.data[1].embedding.size()); + + target->Unload(); + EXPECT_FALSE(target->IsLoaded()); +} + +TEST_F(EndToEndTest, DISABLED_Embedding_BasicRequest_Succeeds) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + auto response = client.GenerateEmbedding("The quick brown fox jumps over the lazy dog"); + EXPECT_EQ("list", response.object); + ASSERT_EQ(1u, response.data.size()); + EXPECT_EQ(0, response.data[0].index); + EXPECT_EQ(kExpectedEmbeddingDim, response.data[0].embedding.size()); +} + +TEST_F(EndToEndTest, DISABLED_Embedding_IsNormalized) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + const std::vector inputs = {"The quick brown fox jumps over the lazy dog", + "Machine learning is a subset of artificial intelligence", + "The capital of France is Paris"}; + for (const auto& input : inputs) { + auto response = client.GenerateEmbedding(input); + ASSERT_FALSE(response.data.empty()); + const auto& embedding = response.data[0].embedding; + EXPECT_EQ(kExpectedEmbeddingDim, embedding.size()); + double norm = L2Norm(embedding); + EXPECT_GE(norm, 0.99); + EXPECT_LE(norm, 1.01); + } +} + +TEST_F(EndToEndTest, DISABLED_Embedding_DifferentInputs_ProduceDifferentEmbeddings) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + auto a = client.GenerateEmbedding("The quick brown fox"); + auto b = client.GenerateEmbedding("The capital of France is Paris"); + ASSERT_EQ(a.data[0].embedding.size(), b.data[0].embedding.size()); + + // Inputs are L2-normalized, so dot product == cosine similarity. + double dot = 0; + for (std::size_t i = 0; i < a.data[0].embedding.size(); ++i) { + dot += static_cast(a.data[0].embedding[i]) * static_cast(b.data[0].embedding[i]); + } + EXPECT_LT(dot, 0.99); +} + +TEST_F(EndToEndTest, DISABLED_Embedding_SameInput_ProducesSameEmbedding) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + const std::string input = "Deterministic embedding test"; + auto first = client.GenerateEmbedding(input); + auto second = client.GenerateEmbedding(input); + ASSERT_EQ(first.data[0].embedding.size(), second.data[0].embedding.size()); + for (std::size_t i = 0; i < first.data[0].embedding.size(); ++i) { + EXPECT_FLOAT_EQ(first.data[0].embedding[i], second.data[0].embedding[i]); + } +} + +TEST_F(EndToEndTest, DISABLED_Embedding_KnownValues_CapitalOfFrance) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + auto response = client.GenerateEmbedding("The capital of France is Paris"); + ASSERT_FALSE(response.data.empty()); + const auto& embedding = response.data[0].embedding; + ASSERT_EQ(kExpectedEmbeddingDim, embedding.size()); + + // Tolerance-based comparison — float32 outputs vary across hardware/ORT builds. + constexpr double kTolerance = 1e-3; + EXPECT_NEAR(static_cast(embedding[0]), -0.02815740555524826, kTolerance); + EXPECT_NEAR(static_cast(embedding[1023]), -0.00887922290712595, kTolerance); +} + +TEST_F(EndToEndTest, DISABLED_Embedding_Batch_ReturnsMultipleEmbeddings) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + const std::vector inputs = {"The quick brown fox jumps over the lazy dog", + "Machine learning is a subset of artificial intelligence", + "The capital of France is Paris"}; + auto response = client.GenerateEmbeddings(inputs); + ASSERT_EQ(3u, response.data.size()); + for (std::size_t i = 0; i < response.data.size(); ++i) { + EXPECT_EQ(static_cast(i), response.data[i].index); + EXPECT_EQ(kExpectedEmbeddingDim, response.data[i].embedding.size()); + } +} + +TEST_F(EndToEndTest, DISABLED_Embedding_Batch_EachEmbeddingIsNormalized) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + const std::vector inputs = {"Hello world", "Goodbye world"}; + auto response = client.GenerateEmbeddings(inputs); + ASSERT_EQ(2u, response.data.size()); + for (const auto& obj : response.data) { + double norm = L2Norm(obj.embedding); + EXPECT_GE(norm, 0.99); + EXPECT_LE(norm, 1.01); + } +} + +TEST_F(EndToEndTest, DISABLED_Embedding_Batch_MatchesSingleInputResults) { + if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; + auto& catalog = Manager::Instance().GetCatalog(); + auto* target = FindEmbeddingModel(catalog); + if (!target) GTEST_SKIP() << "No embedding model found in catalog"; + target->Download(); + target->Load(); + OpenAIEmbeddingClient client(*target); + + const std::string input = "The capital of France is Paris"; + auto single = client.GenerateEmbedding(input); + auto batch = client.GenerateEmbeddings(std::vector{input}); + ASSERT_EQ(1u, batch.data.size()); + ASSERT_EQ(single.data[0].embedding.size(), batch.data[0].embedding.size()); + for (std::size_t i = 0; i < single.data[0].embedding.size(); ++i) { + EXPECT_FLOAT_EQ(single.data[0].embedding[i], batch.data[0].embedding[i]); + } +} From 021d2bbc219fb95b4fee9cadc28fb8e4f39cfb99 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Wed, 22 Apr 2026 23:13:12 -0700 Subject: [PATCH 03/11] refactor --- sdk/cpp/CMakeLists.txt | 8 - .../include/openai/openai_embedding_client.h | 1 - sdk/cpp/src/openai_embedding_client.cpp | 19 +- sdk/cpp/test/client_test.cpp | 17 ++ sdk/cpp/test/e2e_test.cpp | 200 +----------------- 5 files changed, 35 insertions(+), 210 deletions(-) diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index 908a4d918..b2ce1cb11 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -229,14 +229,6 @@ if (BUILD_TESTING) ${CMAKE_CURRENT_SOURCE_DIR}/src ) - # Point E2E tests at the sibling test-data-shared repo (same convention as - # the C#/JS/Python/Rust SDK test suites). At runtime, SetUpTestSuite uses - # this path as `Configuration::model_cache_dir` when the directory exists, - # so locally-cached test models are discoverable without a download. - target_compile_definitions(CppSdkE2ETests PRIVATE - CPPSDK_TEST_DATA_SHARED_DIR="${CMAKE_CURRENT_SOURCE_DIR}/../../../test-data-shared" - ) - target_link_libraries(CppSdkE2ETests PRIVATE CppSdk diff --git a/sdk/cpp/include/openai/openai_embedding_client.h b/sdk/cpp/include/openai/openai_embedding_client.h index 5bb7319cf..795e68138 100644 --- a/sdk/cpp/include/openai/openai_embedding_client.h +++ b/sdk/cpp/include/openai/openai_embedding_client.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/sdk/cpp/src/openai_embedding_client.cpp b/sdk/cpp/src/openai_embedding_client.cpp index 6c393db89..5ac766d65 100644 --- a/sdk/cpp/src/openai_embedding_client.cpp +++ b/sdk/cpp/src/openai_embedding_client.cpp @@ -1,10 +1,10 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include #include #include #include -#include #include #include @@ -18,6 +18,19 @@ namespace foundry_local { + namespace { + /// True for strings that are empty or contain only whitespace characters. + /// Equivalent to C#'s IsNullOrWhiteSpace, JS's trim() === '', Python's .strip() == "". + bool IsBlank(std::string_view s) { + for (char c : s) { + if (!std::isspace(static_cast(c))) { + return false; + } + } + return true; + } + } // namespace + OpenAIEmbeddingClient::OpenAIEmbeddingClient(gsl::not_null core, std::string_view modelId, gsl::not_null logger) : core_(core), modelId_(modelId), logger_(logger) {} @@ -37,7 +50,7 @@ namespace foundry_local { } EmbeddingCreateResponse OpenAIEmbeddingClient::GenerateEmbedding(std::string_view input) const { - if (input.empty()) { + if (IsBlank(input)) { throw Exception("Embedding input must be a non-empty string.", *logger_); } @@ -60,7 +73,7 @@ namespace foundry_local { throw Exception("Embedding inputs must be a non-empty array of strings.", *logger_); } for (const auto& s : inputs) { - if (s.empty()) { + if (IsBlank(s)) { throw Exception("Each embedding input must be a non-empty string.", *logger_); } } diff --git a/sdk/cpp/test/client_test.cpp b/sdk/cpp/test/client_test.cpp index 868ede024..53a5353af 100644 --- a/sdk/cpp/test/client_test.cpp +++ b/sdk/cpp/test/client_test.cpp @@ -849,6 +849,14 @@ TEST_F(OpenAIEmbeddingClientTest, GenerateEmbedding_EmptyInput_Throws) { EXPECT_THROW(client.GenerateEmbedding(""), Exception); } +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbedding_WhitespaceOnlyInput_Throws) { + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + EXPECT_THROW(client.GenerateEmbedding(" \t\n "), Exception); +} + TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_EmptyList_Throws) { core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); auto variant = MakeLoadedVariant(); @@ -867,6 +875,15 @@ TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_ListWithEmptyString_Throws) EXPECT_THROW(client.GenerateEmbeddings(inputs), Exception); } +TEST_F(OpenAIEmbeddingClientTest, GenerateEmbeddings_ListWithWhitespaceOnlyString_Throws) { + core_.OnCall("list_loaded_models", R"(["embedding-model:1"])"); + auto variant = MakeLoadedVariant(); + OpenAIEmbeddingClient client(variant); + + std::vector inputs = {"valid", " ", "also valid"}; + EXPECT_THROW(client.GenerateEmbeddings(inputs), Exception); +} + TEST_F(OpenAIEmbeddingClientTest, Constructor_ThrowsIfNotLoaded) { core_.OnCall("list_loaded_models", R"([])"); auto variant = Factory::CreateModelVariant(&core_, Factory::MakeModelInfo("unloaded-model", "alias"), &logger_); diff --git a/sdk/cpp/test/e2e_test.cpp b/sdk/cpp/test/e2e_test.cpp index 0ef7d1b4c..704f36df3 100644 --- a/sdk/cpp/test/e2e_test.cpp +++ b/sdk/cpp/test/e2e_test.cpp @@ -42,29 +42,6 @@ class EndToEndTest : public ::testing::Test { static void SetUpTestSuite() { Configuration config("CppSdkE2ETest"); config.log_level = LogLevel::Information; - - // Match the C#/JS/Python/Rust SDK test suites: if the sibling - // test-data-shared repo exists (via compile-time path or explicit - // env override), use it as the model cache so locally cached test - // models are discoverable without a download. - std::filesystem::path testDataSharedDir; - if (const char* override = std::getenv("FOUNDRY_LOCAL_TEST_DATA_SHARED_DIR")) { - testDataSharedDir = override; - } -#ifdef CPPSDK_TEST_DATA_SHARED_DIR - else { - testDataSharedDir = CPPSDK_TEST_DATA_SHARED_DIR; - } -#endif - if (!testDataSharedDir.empty() && std::filesystem::exists(testDataSharedDir)) { - std::cout << "[E2E] Using test-data-shared model cache: " << testDataSharedDir << "\n"; - config.model_cache_dir = testDataSharedDir; - } - else if (!testDataSharedDir.empty()) { - std::cout << "[E2E] test-data-shared path not found: " << testDataSharedDir - << " (falling back to default cache)\n"; - } - try { Manager::Create(std::move(config)); } @@ -610,25 +587,6 @@ TEST_F(EndToEndTest, DISABLED_DownloadAndRemoveFromCache) { // Each test prepares an OpenAIEmbeddingClient over a loaded variant and relies // on the suite's SetUp/TearDown to bring up the Manager. -namespace { - -inline double L2Norm(const std::vector& v) { - double sum = 0; - for (float x : v) { - sum += static_cast(x) * static_cast(x); - } - return std::sqrt(sum); -} - -constexpr std::size_t kExpectedEmbeddingDim = 1024u; - -} // namespace - -// Each embedding test below starts with the same preamble: skip in CI, find -// a cached embedding model (skip if none), download + load, construct a -// client. We can't factor this into a helper because FindEmbeddingModel is -// a protected static on EndToEndTest — only reachable from inside TEST_F. - TEST_F(EndToEndTest, DISABLED_DownloadLoadEmbeddingUnload) { if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; auto& catalog = Manager::Instance().GetCatalog(); @@ -644,11 +602,13 @@ TEST_F(EndToEndTest, DISABLED_DownloadLoadEmbeddingUnload) { OpenAIEmbeddingClient client(*target); + // Single input auto single = client.GenerateEmbedding("The capital of France is Paris"); ASSERT_FALSE(single.data.empty()); EXPECT_FALSE(single.data[0].embedding.empty()); std::cout << "[E2E] Single embedding dim: " << single.data[0].embedding.size() << "\n"; + // Batch input std::vector inputs = {"short", "a longer sentence for embedding"}; auto batch = client.GenerateEmbeddings(inputs); ASSERT_EQ(2u, batch.data.size()); @@ -658,159 +618,3 @@ TEST_F(EndToEndTest, DISABLED_DownloadLoadEmbeddingUnload) { target->Unload(); EXPECT_FALSE(target->IsLoaded()); } - -TEST_F(EndToEndTest, DISABLED_Embedding_BasicRequest_Succeeds) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - auto response = client.GenerateEmbedding("The quick brown fox jumps over the lazy dog"); - EXPECT_EQ("list", response.object); - ASSERT_EQ(1u, response.data.size()); - EXPECT_EQ(0, response.data[0].index); - EXPECT_EQ(kExpectedEmbeddingDim, response.data[0].embedding.size()); -} - -TEST_F(EndToEndTest, DISABLED_Embedding_IsNormalized) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - const std::vector inputs = {"The quick brown fox jumps over the lazy dog", - "Machine learning is a subset of artificial intelligence", - "The capital of France is Paris"}; - for (const auto& input : inputs) { - auto response = client.GenerateEmbedding(input); - ASSERT_FALSE(response.data.empty()); - const auto& embedding = response.data[0].embedding; - EXPECT_EQ(kExpectedEmbeddingDim, embedding.size()); - double norm = L2Norm(embedding); - EXPECT_GE(norm, 0.99); - EXPECT_LE(norm, 1.01); - } -} - -TEST_F(EndToEndTest, DISABLED_Embedding_DifferentInputs_ProduceDifferentEmbeddings) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - auto a = client.GenerateEmbedding("The quick brown fox"); - auto b = client.GenerateEmbedding("The capital of France is Paris"); - ASSERT_EQ(a.data[0].embedding.size(), b.data[0].embedding.size()); - - // Inputs are L2-normalized, so dot product == cosine similarity. - double dot = 0; - for (std::size_t i = 0; i < a.data[0].embedding.size(); ++i) { - dot += static_cast(a.data[0].embedding[i]) * static_cast(b.data[0].embedding[i]); - } - EXPECT_LT(dot, 0.99); -} - -TEST_F(EndToEndTest, DISABLED_Embedding_SameInput_ProducesSameEmbedding) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - const std::string input = "Deterministic embedding test"; - auto first = client.GenerateEmbedding(input); - auto second = client.GenerateEmbedding(input); - ASSERT_EQ(first.data[0].embedding.size(), second.data[0].embedding.size()); - for (std::size_t i = 0; i < first.data[0].embedding.size(); ++i) { - EXPECT_FLOAT_EQ(first.data[0].embedding[i], second.data[0].embedding[i]); - } -} - -TEST_F(EndToEndTest, DISABLED_Embedding_KnownValues_CapitalOfFrance) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - auto response = client.GenerateEmbedding("The capital of France is Paris"); - ASSERT_FALSE(response.data.empty()); - const auto& embedding = response.data[0].embedding; - ASSERT_EQ(kExpectedEmbeddingDim, embedding.size()); - - // Tolerance-based comparison — float32 outputs vary across hardware/ORT builds. - constexpr double kTolerance = 1e-3; - EXPECT_NEAR(static_cast(embedding[0]), -0.02815740555524826, kTolerance); - EXPECT_NEAR(static_cast(embedding[1023]), -0.00887922290712595, kTolerance); -} - -TEST_F(EndToEndTest, DISABLED_Embedding_Batch_ReturnsMultipleEmbeddings) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - const std::vector inputs = {"The quick brown fox jumps over the lazy dog", - "Machine learning is a subset of artificial intelligence", - "The capital of France is Paris"}; - auto response = client.GenerateEmbeddings(inputs); - ASSERT_EQ(3u, response.data.size()); - for (std::size_t i = 0; i < response.data.size(); ++i) { - EXPECT_EQ(static_cast(i), response.data[i].index); - EXPECT_EQ(kExpectedEmbeddingDim, response.data[i].embedding.size()); - } -} - -TEST_F(EndToEndTest, DISABLED_Embedding_Batch_EachEmbeddingIsNormalized) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - const std::vector inputs = {"Hello world", "Goodbye world"}; - auto response = client.GenerateEmbeddings(inputs); - ASSERT_EQ(2u, response.data.size()); - for (const auto& obj : response.data) { - double norm = L2Norm(obj.embedding); - EXPECT_GE(norm, 0.99); - EXPECT_LE(norm, 1.01); - } -} - -TEST_F(EndToEndTest, DISABLED_Embedding_Batch_MatchesSingleInputResults) { - if (IsRunningInCI()) GTEST_SKIP() << "Skipped in CI (requires model download)"; - auto& catalog = Manager::Instance().GetCatalog(); - auto* target = FindEmbeddingModel(catalog); - if (!target) GTEST_SKIP() << "No embedding model found in catalog"; - target->Download(); - target->Load(); - OpenAIEmbeddingClient client(*target); - - const std::string input = "The capital of France is Paris"; - auto single = client.GenerateEmbedding(input); - auto batch = client.GenerateEmbeddings(std::vector{input}); - ASSERT_EQ(1u, batch.data.size()); - ASSERT_EQ(single.data[0].embedding.size(), batch.data[0].embedding.size()); - for (std::size_t i = 0; i < single.data[0].embedding.size(); ++i) { - EXPECT_FLOAT_EQ(single.data[0].embedding[i], batch.data[0].embedding[i]); - } -} From 9946ead5fbda38d838fb57fd6a6cced00ea8a252 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 23 Apr 2026 00:22:15 -0700 Subject: [PATCH 04/11] Updated embedding model alias in samples --- samples/cs/embeddings/Program.cs | 2 +- samples/js/embeddings/app.js | 2 +- samples/python/embeddings/src/app.py | 2 +- samples/rust/embeddings/src/main.rs | 2 +- sdk/cpp/sample/main.cpp | 2 +- sdk/cpp/test/e2e_test.cpp | 28 +++++++++++++++++++++------- 6 files changed, 26 insertions(+), 12 deletions(-) diff --git a/samples/cs/embeddings/Program.cs b/samples/cs/embeddings/Program.cs index 348bc3461..724ab531e 100644 --- a/samples/cs/embeddings/Program.cs +++ b/samples/cs/embeddings/Program.cs @@ -20,7 +20,7 @@ var catalog = await mgr.GetCatalogAsync(); // Get an embedding model -var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found"); +var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found"); // Download the model (the method skips download if already cached) await model.DownloadAsync(progress => diff --git a/samples/js/embeddings/app.js b/samples/js/embeddings/app.js index ea6ff1858..b50a31a82 100644 --- a/samples/js/embeddings/app.js +++ b/samples/js/embeddings/app.js @@ -16,7 +16,7 @@ console.log('✓ SDK initialized successfully'); // // Get an embedding model -const modelAlias = 'qwen3-0.6b-embedding'; +const modelAlias = 'qwen3-embedding-0.6b'; const model = await manager.catalog.getModel(modelAlias); // Download the model diff --git a/samples/python/embeddings/src/app.py b/samples/python/embeddings/src/app.py index 30ade4b20..fcf36d201 100644 --- a/samples/python/embeddings/src/app.py +++ b/samples/python/embeddings/src/app.py @@ -12,7 +12,7 @@ def main(): manager = FoundryLocalManager.instance # Select and load an embedding model from the catalog - model = manager.catalog.get_model("qwen3-0.6b-embedding") + model = manager.catalog.get_model("qwen3-embedding-0.6b") model.download( lambda progress: print( f"\rDownloading model: {progress:.2f}%", diff --git a/samples/rust/embeddings/src/main.rs b/samples/rust/embeddings/src/main.rs index 9b5550f05..b76e53b01 100644 --- a/samples/rust/embeddings/src/main.rs +++ b/samples/rust/embeddings/src/main.rs @@ -6,7 +6,7 @@ use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; // -const ALIAS: &str = "qwen3-0.6b-embedding"; +const ALIAS: &str = "qwen3-embedding-0.6b"; #[tokio::main] async fn main() -> Result<(), Box> { diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 27defa750..b12f808c7 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -491,7 +491,7 @@ int main(int argc, char* argv[]) { } // 6. Embeddings (uncomment and set a valid embedding model alias) - // GenerateEmbeddings(manager, "qwen3-0.6b-embedding"); + GenerateEmbeddings(manager, "qwen3-embedding-0.6b"); Manager::Destroy(); return 0; diff --git a/sdk/cpp/test/e2e_test.cpp b/sdk/cpp/test/e2e_test.cpp index 704f36df3..7fdbf6ba4 100644 --- a/sdk/cpp/test/e2e_test.cpp +++ b/sdk/cpp/test/e2e_test.cpp @@ -63,15 +63,29 @@ class EndToEndTest : public ::testing::Test { static bool IsEmbeddingModel(const std::string& alias) { return alias.find("embedding") != std::string::npos; } - /// Variant ID the other SDK test suites use and that test-data-shared ships. - static constexpr const char* kTestEmbeddingModelVariantId = "qwen3-0.6b-embedding-generic-cpu:1"; - /// Returns the specific embedding model variant shipped by the sibling - /// test-data-shared repo. Mirrors the C#/JS/Python/Rust SDK test suites, - /// which all load `qwen3-0.6b-embedding-generic-cpu:1` directly rather - /// than picking whatever happens to be cached. + /// Find an embedding model, preferring cached. static IModel* FindEmbeddingModel(Catalog& catalog) { - return catalog.GetModelVariant(kTestEmbeddingModelVariantId); + IModel* target = nullptr; + + auto cached = catalog.GetCachedModels(); + for (auto* variant : cached) { + if (IsEmbeddingModel(variant->GetAlias())) { + target = catalog.GetModel(variant->GetAlias()); + if (target) + break; + } + } + + if (!target) { + for (const auto& alias : {"qwen3-embedding-0.6b"}) { + target = catalog.GetModel(alias); + if (target) + break; + } + } + + return target; } /// Find a chat-capable model, preferring cached, then known small models, then any. From f57e37794a938a646f3a8db5a32da00af675b011 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 23 Apr 2026 00:37:19 -0700 Subject: [PATCH 05/11] Updated embedding samples to be consistent with other samples --- samples/cs/embeddings/Program.cs | 39 +++++++++++++++++++++++++++- samples/js/embeddings/app.js | 30 +++++++++++++++++++++ samples/python/embeddings/src/app.py | 27 +++++++++++++++++++ samples/rust/embeddings/src/main.rs | 35 +++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 1 deletion(-) diff --git a/samples/cs/embeddings/Program.cs b/samples/cs/embeddings/Program.cs index 724ab531e..53bbca83a 100644 --- a/samples/cs/embeddings/Program.cs +++ b/samples/cs/embeddings/Program.cs @@ -13,6 +13,44 @@ // Initialize the singleton instance. await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger()); var mgr = FoundryLocalManager.Instance; + +// Discover available execution providers and their registration status. +var eps = mgr.DiscoverEps(); +int maxNameLen = 30; +Console.WriteLine("Available execution providers:"); +Console.WriteLine($" {"Name".PadRight(maxNameLen)} Registered"); +Console.WriteLine($" {new string('─', maxNameLen)} {"──────────"}"); +foreach (var ep in eps) +{ + Console.WriteLine($" {ep.Name.PadRight(maxNameLen)} {ep.IsRegistered}"); +} + +// Download and register all execution providers with per-EP progress. +// EP packages include dependencies and may be large. +// Download is only required again if a new version of the EP is released. +// For cross platform builds there is no dynamic EP download and this will return immediately. +Console.WriteLine("\nDownloading execution providers:"); +if (eps.Length > 0) +{ + string currentEp = ""; + await mgr.DownloadAndRegisterEpsAsync((epName, percent) => + { + if (epName != currentEp) + { + if (currentEp != "") + { + Console.WriteLine(); + } + currentEp = epName; + } + Console.Write($"\r {epName.PadRight(maxNameLen)} {percent,6:F1}%"); + }); + Console.WriteLine(); +} +else +{ + Console.WriteLine("No execution providers to download."); +} // // @@ -69,6 +107,5 @@ await model.DownloadAsync(progress => // // Tidy up - unload the model await model.UnloadAsync(); -Console.WriteLine("\nModel unloaded."); // // diff --git a/samples/js/embeddings/app.js b/samples/js/embeddings/app.js index b50a31a82..5577566a2 100644 --- a/samples/js/embeddings/app.js +++ b/samples/js/embeddings/app.js @@ -14,6 +14,36 @@ const manager = FoundryLocalManager.create({ // console.log('✓ SDK initialized successfully'); +// Discover available execution providers and their registration status. +const eps = manager.discoverEps(); +const maxNameLen = 30; +console.log('\nAvailable execution providers:'); +console.log(` ${'Name'.padEnd(maxNameLen)} Registered`); +console.log(` ${'─'.repeat(maxNameLen)} ──────────`); +for (const ep of eps) { + console.log(` ${ep.name.padEnd(maxNameLen)} ${ep.isRegistered}`); +} + +// Download and register all execution providers with per-EP progress. +// EP packages include dependencies and may be large. +// Download is only required again if a new version of the EP is released. +console.log('\nDownloading execution providers:'); +if (eps.length > 0) { + let currentEp = ''; + await manager.downloadAndRegisterEps((epName, percent) => { + if (epName !== currentEp) { + if (currentEp !== '') { + process.stdout.write('\n'); + } + currentEp = epName; + } + process.stdout.write(`\r ${epName.padEnd(maxNameLen)} ${percent.toFixed(1).padStart(5)}%`); + }); + process.stdout.write('\n'); +} else { + console.log('No execution providers to download.'); +} + // // Get an embedding model const modelAlias = 'qwen3-embedding-0.6b'; diff --git a/samples/python/embeddings/src/app.py b/samples/python/embeddings/src/app.py index fcf36d201..f10a71e44 100644 --- a/samples/python/embeddings/src/app.py +++ b/samples/python/embeddings/src/app.py @@ -11,6 +11,33 @@ def main(): FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance + # Discover available execution providers and their registration status. + eps = manager.discover_eps() + max_name_len = 30 + print("Available execution providers:") + print(f" {'Name':<{max_name_len}} Registered") + print(f" {'─' * max_name_len} ──────────") + for ep in eps: + print(f" {ep.name:<{max_name_len}} {ep.is_registered}") + + # Download and register all execution providers. + print("\nDownloading execution providers:") + current_ep = "" + def ep_progress(ep_name: str, percent: float): + nonlocal current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name:<{max_name_len}} {percent:5.1f}%", end="", flush=True) + + if eps: + manager.download_and_register_eps(progress_callback=ep_progress) + if current_ep: + print() + else: + print("No execution providers to download.") + # Select and load an embedding model from the catalog model = manager.catalog.get_model("qwen3-embedding-0.6b") model.download( diff --git a/samples/rust/embeddings/src/main.rs b/samples/rust/embeddings/src/main.rs index b76e53b01..2849edd87 100644 --- a/samples/rust/embeddings/src/main.rs +++ b/samples/rust/embeddings/src/main.rs @@ -3,6 +3,8 @@ // Licensed under the MIT License. // +use std::io::{self, Write}; + use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; // @@ -18,6 +20,39 @@ async fn main() -> Result<(), Box> { let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?; // + // Discover available execution providers and their registration status. + let eps = manager.discover_eps()?; + let max_name_len = 30; + println!("Available execution providers:"); + println!(" {: let model = manager.catalog().get_model(ALIAS).await?; From ac2be1332ecae2ef5596a6e20420dfd75a16b522 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 23 Apr 2026 01:05:02 -0700 Subject: [PATCH 06/11] refactor --- sdk/cpp/src/openai_embedding_client.cpp | 1 - sdk/python/src/openai/embedding_client.py | 2 +- sdk/rust/src/openai/embedding_client.rs | 20 ++++++++------------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/sdk/cpp/src/openai_embedding_client.cpp b/sdk/cpp/src/openai_embedding_client.cpp index 5ac766d65..1ad19201c 100644 --- a/sdk/cpp/src/openai_embedding_client.cpp +++ b/sdk/cpp/src/openai_embedding_client.cpp @@ -20,7 +20,6 @@ namespace foundry_local { namespace { /// True for strings that are empty or contain only whitespace characters. - /// Equivalent to C#'s IsNullOrWhiteSpace, JS's trim() === '', Python's .strip() == "". bool IsBlank(std::string_view s) { for (char c : s) { if (!std::isspace(static_cast(c))) { diff --git a/sdk/python/src/openai/embedding_client.py b/sdk/python/src/openai/embedding_client.py index 89a3b8e55..069c6bcab 100644 --- a/sdk/python/src/openai/embedding_client.py +++ b/sdk/python/src/openai/embedding_client.py @@ -98,7 +98,7 @@ def generate_embeddings(self, inputs: List[str]) -> CreateEmbeddingResponse: ValueError: If *inputs* is empty or contains empty strings. FoundryLocalException: If the underlying native embeddings command fails. """ - if not inputs or len(inputs) == 0: + if not inputs: raise ValueError("Inputs must be a non-empty list of strings.") for text in inputs: diff --git a/sdk/rust/src/openai/embedding_client.rs b/sdk/rust/src/openai/embedding_client.rs index 5de080a0c..3215cb052 100644 --- a/sdk/rust/src/openai/embedding_client.rs +++ b/sdk/rust/src/openai/embedding_client.rs @@ -55,27 +55,23 @@ impl EmbeddingClient { .execute_command_async("embeddings".into(), Some(params)) .await?; - // Patch the response to add fields required by async_openai types - // that the server doesn't return (object on each item, usage) + // The server omits two fields that async_openai's CreateEmbeddingResponse + // requires: per-item `object` and top-level `usage`. Inject defaults before + // deserializing. let mut response_value: Value = serde_json::from_str(&raw)?; if let Some(data) = response_value .get_mut("data") .and_then(|d| d.as_array_mut()) { for item in data { - if item.get("object").is_none() { - item.as_object_mut() - .map(|m| m.insert("object".into(), json!("embedding"))); + if let Some(obj) = item.as_object_mut() { + obj.entry("object").or_insert_with(|| json!("embedding")); } } } - if response_value.get("usage").is_none() { - response_value.as_object_mut().map(|m| { - m.insert( - "usage".into(), - json!({"prompt_tokens": 0, "total_tokens": 0}), - ) - }); + if let Some(root) = response_value.as_object_mut() { + root.entry("usage") + .or_insert_with(|| json!({"prompt_tokens": 0, "total_tokens": 0})); } let parsed: CreateEmbeddingResponse = serde_json::from_value(response_value)?; From 5efba5ca69d0be6694335e742b7b66a6f33fcabb Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 23 Apr 2026 12:25:17 -0700 Subject: [PATCH 07/11] fix comment --- sdk/cpp/sample/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index b12f808c7..a1e33fe45 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -490,7 +490,7 @@ int main(int argc, char* argv[]) { std::cerr << "Example 5 failed: " << ex.what() << "\n"; } - // 6. Embeddings (uncomment and set a valid embedding model alias) + // 6. Embeddings — generate single and batch embeddings GenerateEmbeddings(manager, "qwen3-embedding-0.6b"); Manager::Destroy(); From bc439760f7b5aff5d3a8a39f91d256e092138a41 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Mon, 4 May 2026 11:33:51 -0700 Subject: [PATCH 08/11] Added cpp sdk sample for embedding --- samples/cpp/embeddings/README.md | 21 +++++++++ samples/cpp/embeddings/main.cpp | 77 ++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 samples/cpp/embeddings/README.md create mode 100644 samples/cpp/embeddings/main.cpp diff --git a/samples/cpp/embeddings/README.md b/samples/cpp/embeddings/README.md new file mode 100644 index 000000000..4b951b512 --- /dev/null +++ b/samples/cpp/embeddings/README.md @@ -0,0 +1,21 @@ +# Embeddings Example (C++) + +Demonstrates single and batch text embedding generation using the Foundry Local C++ SDK. + +Loads the `qwen3-embedding-0.6b` model and exercises +`OpenAIEmbeddingClient::GenerateEmbedding` (single input) and +`OpenAIEmbeddingClient::GenerateEmbeddings` (batch input), printing the returned +dimensionality for each. + + +## Build + +```bash +g++ -std=c++17 main.cpp -lfoundry_local -o embeddings-example +``` + +## Run + +```bash +./embeddings-example +``` diff --git a/samples/cpp/embeddings/main.cpp b/samples/cpp/embeddings/main.cpp new file mode 100644 index 000000000..b1197122f --- /dev/null +++ b/samples/cpp/embeddings/main.cpp @@ -0,0 +1,77 @@ +// Embeddings — Foundry Local C++ SDK Example +// +// Demonstrates single-input and batch embedding generation using the +// OpenAI-compatible `OpenAIEmbeddingClient` against a locally loaded +// embedding model. +// +// Requires: Foundry Local C++ SDK +// +// Usage: ./embeddings-example + +#include +#include +#include +#include + +#include "foundry_local.h" + +int main() { + try { + std::cout << "===========================================================" << std::endl; + std::cout << " Foundry Local -- Embeddings Demo (C++)" << std::endl; + std::cout << "===========================================================" << std::endl; + std::cout << std::endl; + + foundry_local::Configuration config("foundry_local_samples"); + + foundry_local::Manager::Create(config); + auto& manager = foundry_local::Manager::Instance(); + manager.EnsureEpsDownloaded(); + + auto& catalog = manager.GetCatalog(); + auto* model = catalog.GetModel("qwen3-embedding-0.6b"); + if (!model) { + throw std::runtime_error("Model \"qwen3-embedding-0.6b\" not found in catalog"); + } + + std::cout << "Downloading model (if needed)..." << std::endl; + model->Download([](float pct) { + std::cout << "\rDownloading: " << pct << "% " << std::flush; + }); + std::cout << std::endl; + std::cout << "Loading model..." << std::endl; + model->Load(); + std::cout << "Model loaded" << std::endl; + + foundry_local::OpenAIEmbeddingClient embeddings(*model); + + // Single input + std::cout << std::endl << "--- Single Embedding ---" << std::endl; + auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog"); + if (!single.data.empty()) { + std::cout << "Dimensions: " << single.data[0].embedding.size() << std::endl; + } + + // Batch input + std::cout << std::endl << "--- Batch Embeddings ---" << std::endl; + std::vector inputs = { + "Machine learning is a subset of artificial intelligence", + "The capital of France is Paris", + "Rust is a systems programming language", + }; + auto batch = embeddings.GenerateEmbeddings(inputs); + std::cout << "Number of embeddings: " << batch.data.size() << std::endl; + for (std::size_t i = 0; i < batch.data.size(); ++i) { + std::cout << " [" << i << "] Dimensions: " << batch.data[i].embedding.size() << std::endl; + } + + model->Unload(); + std::cout << std::endl << "Model unloaded" << std::endl; + + return 0; + } + catch (const std::exception& ex) { + std::cerr << "Fatal: " << ex.what() << std::endl; + return 1; + } +} From c0910e954c8d26748b8e06f62eef6186b2977d93 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Tue, 5 May 2026 13:43:54 -0700 Subject: [PATCH 09/11] Updated cpp SDK based on new pattern --- samples/cpp/embeddings/main.cpp | 1 + sdk/cpp/CMakeLists.txt | 2 +- sdk/cpp/include/foundry_local.h | 2 +- .../openai/{openai_embedding_client.h => embedding_client.h} | 0 sdk/cpp/sample/main.cpp | 2 +- .../src/{openai_embedding_client.cpp => embedding_client.cpp} | 0 6 files changed, 4 insertions(+), 3 deletions(-) rename sdk/cpp/include/openai/{openai_embedding_client.h => embedding_client.h} (100%) rename sdk/cpp/src/{openai_embedding_client.cpp => embedding_client.cpp} (100%) diff --git a/samples/cpp/embeddings/main.cpp b/samples/cpp/embeddings/main.cpp index b1197122f..dca549ba4 100644 --- a/samples/cpp/embeddings/main.cpp +++ b/samples/cpp/embeddings/main.cpp @@ -37,6 +37,7 @@ int main() { std::cout << "Downloading model (if needed)..." << std::endl; model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; + return true; }); std::cout << std::endl; std::cout << "Loading model..." << std::endl; diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index b2ce1cb11..a7a5f7feb 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -54,7 +54,7 @@ add_library(CppSdk STATIC src/audio_client.cpp src/live_audio_types.cpp src/live_audio_session.cpp - src/openai_embedding_client.cpp + src/embedding_client.cpp src/foundry_local_manager.cpp ) diff --git a/sdk/cpp/include/foundry_local.h b/sdk/cpp/include/foundry_local.h index 1c78ffdd2..06ea68098 100644 --- a/sdk/cpp/include/foundry_local.h +++ b/sdk/cpp/include/foundry_local.h @@ -18,4 +18,4 @@ #include "openai/audio_client.h" #include "openai/live_audio_types.h" #include "openai/live_audio_session.h" -#include "openai/openai_embedding_client.h" +#include "openai/embedding_client.h" diff --git a/sdk/cpp/include/openai/openai_embedding_client.h b/sdk/cpp/include/openai/embedding_client.h similarity index 100% rename from sdk/cpp/include/openai/openai_embedding_client.h rename to sdk/cpp/include/openai/embedding_client.h diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index a1e33fe45..676cc30d2 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -376,7 +376,7 @@ void GenerateEmbeddings(Manager& manager, const std::string& alias) { return; } - model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; }); + model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; return true; }); std::cout << "\n"; model->Load(); diff --git a/sdk/cpp/src/openai_embedding_client.cpp b/sdk/cpp/src/embedding_client.cpp similarity index 100% rename from sdk/cpp/src/openai_embedding_client.cpp rename to sdk/cpp/src/embedding_client.cpp From c7dfce838c07458122c77a14768c3c1edf5e1c57 Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Thu, 7 May 2026 15:33:20 -0700 Subject: [PATCH 10/11] fix sample --- samples/cpp/embeddings/main.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/samples/cpp/embeddings/main.cpp b/samples/cpp/embeddings/main.cpp index dca549ba4..94cbcdfa8 100644 --- a/samples/cpp/embeddings/main.cpp +++ b/samples/cpp/embeddings/main.cpp @@ -8,6 +8,7 @@ // // Usage: ./embeddings-example +#include #include #include #include @@ -26,7 +27,27 @@ int main() { foundry_local::Manager::Create(config); auto& manager = foundry_local::Manager::Instance(); - manager.EnsureEpsDownloaded(); + + auto eps = manager.DiscoverEps(); + std::cout << "Available execution providers:" << std::endl; + for (const auto& ep : eps) { + std::cout << " " << ep.name << std::endl; + } + + if (!eps.empty()) { + std::cout << std::endl << "Downloading execution providers:" << std::endl; + std::string currentEp; + manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { + if (epName != currentEp) { + if (!currentEp.empty()) std::cout << std::endl; + currentEp = epName; + } + std::cout << "\r " << std::left << std::setw(30) << epName + << " " << std::right << std::fixed << std::setprecision(1) + << std::setw(6) << percent << "% " << std::flush; + }); + if (!currentEp.empty()) std::cout << std::endl; + } auto& catalog = manager.GetCatalog(); auto* model = catalog.GetModel("qwen3-embedding-0.6b"); From 2fbf75f2db4a6199a9135d7aff324d36c9a4174c Mon Sep 17 00:00:00 2001 From: Raja Phanindra Chava Date: Fri, 8 May 2026 12:28:49 -0700 Subject: [PATCH 11/11] Updated cpp sdk documentation with embeddings api --- samples/cpp/embeddings/README.md | 9 ++++----- samples/cpp/embeddings/main.cpp | 4 +++- sdk/cpp/README.md | 33 +++++++++++++++++++++++++++++--- sdk/cpp/sample/main.cpp | 15 ++++++++++++++- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/samples/cpp/embeddings/README.md b/samples/cpp/embeddings/README.md index 4b951b512..2425adc1d 100644 --- a/samples/cpp/embeddings/README.md +++ b/samples/cpp/embeddings/README.md @@ -1,11 +1,10 @@ # Embeddings Example (C++) -Demonstrates single and batch text embedding generation using the Foundry Local C++ SDK. +Demonstrates single-input and batch text embedding generation using the Foundry Local C++ SDK. -Loads the `qwen3-embedding-0.6b` model and exercises -`OpenAIEmbeddingClient::GenerateEmbedding` (single input) and -`OpenAIEmbeddingClient::GenerateEmbeddings` (batch input), printing the returned -dimensionality for each. +Loads the `qwen3-embedding-0.6b` embedding model, generates an embedding for a +single string and a batch of strings via `OpenAIEmbeddingClient`, and prints +the resulting vector dimensionality. ## Build diff --git a/samples/cpp/embeddings/main.cpp b/samples/cpp/embeddings/main.cpp index 94cbcdfa8..38a290259 100644 --- a/samples/cpp/embeddings/main.cpp +++ b/samples/cpp/embeddings/main.cpp @@ -8,6 +8,7 @@ // // Usage: ./embeddings-example +#include #include #include #include @@ -57,7 +58,8 @@ int main() { std::cout << "Downloading model (if needed)..." << std::endl; model->Download([](float pct) { - std::cout << "\rDownloading: " << pct << "% " << std::flush; + printf("\rDownloading: %5.1f%%", pct); + fflush(stdout); return true; }); std::cout << std::endl; diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 017f2fa6f..1030d8a82 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -10,6 +10,7 @@ The Foundry Local C++ SDK provides a C++17 static library for running AI models - **Lifecycle management** — download, load, unload, and remove models programmatically - **Chat completions** — synchronous and streaming via OpenAI-compatible types - **Audio transcription** — transcribe audio files with streaming support +- **Embeddings** — generate single and batch text embeddings via OpenAI-compatible types - **Tool calling** — define tools and handle tool-call responses in chat completions - **Download progress** — wire up a callback for real-time download percentage - **Model variants** — select specific hardware/quantization variants per model alias @@ -277,6 +278,30 @@ audio.TranscribeAudioStreaming(R"(C:\path\to\audio.wav)", [](const AudioCreateTr }); ``` +### Embeddings + +Generate text embeddings for a single input or for a batch in one request: + +```cpp +OpenAIEmbeddingClient embeddings(*model); + +// Single input +auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog"); +if (!single.data.empty()) { + std::cout << "Dimensions: " << single.data[0].embedding.size() << "\n"; +} + +// Batch input +std::vector inputs = { + "Machine learning is a subset of AI", + "The capital of France is Paris" +}; +auto batch = embeddings.GenerateEmbeddings(inputs); +std::cout << "Got " << batch.data.size() << " embeddings\n"; +``` + +Empty and whitespace-only inputs are rejected client-side and throw `Exception`. + ### Tool Calling See `sample/main.cpp` (Example 5) for a full tool-calling walkthrough. @@ -449,6 +474,7 @@ Key types: | `ModelVariant` | A specific variant of a model (implements `IModel`) | | `OpenAIChatClient` | Chat completions (sync + streaming) | | `OpenAIAudioClient` | Audio transcription (sync + streaming) | +| `OpenAIEmbeddingClient` | Text embeddings (single + batch) | | `EpInfo` | Execution provider discovery info (name, registration status) | | `EpDownloadResult` | Result of EP download/registration (success, registered/failed EPs) | | `ChatSettings` | Chat generation parameters | @@ -478,9 +504,10 @@ sdk/cpp/ │ ├── model.h # Model & ModelVariant │ ├── logger.h # ILogger interface │ └── openai/ -│ ├── chat_client.h # Chat completion client -│ ├── audio_client.h # Audio transcription client -│ └── tool_types.h # Tool calling types +│ ├── chat_client.h # Chat completion client +│ ├── audio_client.h # Audio transcription client +│ ├── embedding_client.h # Embedding client +│ └── tool_types.h # Tool calling types ├── src/ # Private implementation ├── sample/ │ ├── main.cpp # Sample application diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 676cc30d2..f82711702 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -376,11 +376,24 @@ void GenerateEmbeddings(Manager& manager, const std::string& alias) { return; } - model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; return true; }); + // Prefer CPU variant to avoid DML/GPU provider issues + if (auto* concreteModel = dynamic_cast(model)) { + PreferCpuVariant(*concreteModel); + } + + model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; }); std::cout << "\n"; model->Load(); + if (model->IsLoaded()) { + std::cout << "Model is loaded and ready for inference.\n"; + } + else { + std::cerr << "Failed to load model.\n"; + return; + } + OpenAIEmbeddingClient embeddings(*model); // Single input