microsoft · phanindraraja · Apr 22, 2026 · Apr 22, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/samples/cpp/embeddings/README.md b/samples/cpp/embeddings/README.md
@@ -0,0 +1,20 @@
+# Embeddings Example (C++)
+
+Demonstrates single-input and batch text embedding generation using the Foundry Local C++ SDK.
+
+Loads the `qwen3-embedding-0.6b` embedding model, generates an embedding for a
+single string and a batch of strings via `OpenAIEmbeddingClient`, and prints
+the resulting vector dimensionality.
+
+
+## Build
+
+```bash
+g++ -std=c++17 main.cpp -lfoundry_local -o embeddings-example
+```
+
+## Run
+
+```bash
+./embeddings-example
+```
diff --git a/samples/cpp/embeddings/main.cpp b/samples/cpp/embeddings/main.cpp
@@ -0,0 +1,101 @@
+// Embeddings — Foundry Local C++ SDK Example
+//
+// Demonstrates single-input and batch embedding generation using the
+// OpenAI-compatible `OpenAIEmbeddingClient` against a locally loaded
+// embedding model.
+//
+// Requires: Foundry Local C++ SDK
+//
+// Usage: ./embeddings-example
+
+#include <cstdio>
+#include <iomanip>
+#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "foundry_local.h"
+
+int main() {
+    try {
+        std::cout << "===========================================================" << std::endl;
+        std::cout << "   Foundry Local -- Embeddings Demo (C++)" << std::endl;
+        std::cout << "===========================================================" << std::endl;
+        std::cout << std::endl;
+
+        foundry_local::Configuration config("foundry_local_samples");
+
+        foundry_local::Manager::Create(config);
+        auto& manager = foundry_local::Manager::Instance();
+
+        auto eps = manager.DiscoverEps();
+        std::cout << "Available execution providers:" << std::endl;
+        for (const auto& ep : eps) {
+            std::cout << "  " << ep.name << std::endl;
+        }
+
+        if (!eps.empty()) {
+            std::cout << std::endl << "Downloading execution providers:" << std::endl;
+            std::string currentEp;
+            manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) {
+                if (epName != currentEp) {
+                    if (!currentEp.empty()) std::cout << std::endl;
+                    currentEp = epName;
+                }
+                std::cout << "\r  " << std::left << std::setw(30) << epName
+                            << "  " << std::right << std::fixed << std::setprecision(1)
+                            << std::setw(6) << percent << "%   " << std::flush;
+            });
+            if (!currentEp.empty()) std::cout << std::endl;
+        }
+
+        auto& catalog = manager.GetCatalog();
+        auto* model = catalog.GetModel("qwen3-embedding-0.6b");
+        if (!model) {
+            throw std::runtime_error("Model \"qwen3-embedding-0.6b\" not found in catalog");
+        }
+
+        std::cout << "Downloading model (if needed)..." << std::endl;
+        model->Download([](float pct) {
+            printf("\rDownloading: %5.1f%%", pct);
+            fflush(stdout);
+            return true;
+        });
+        std::cout << std::endl;
+        std::cout << "Loading model..." << std::endl;
+        model->Load();
+        std::cout << "Model loaded" << std::endl;
+
+        foundry_local::OpenAIEmbeddingClient embeddings(*model);
+
+        // Single input
+        std::cout << std::endl << "--- Single Embedding ---" << std::endl;
+        auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog");
+        if (!single.data.empty()) {
+            std::cout << "Dimensions: " << single.data[0].embedding.size() << std::endl;
+        }
+
+        // Batch input
+        std::cout << std::endl << "--- Batch Embeddings ---" << std::endl;
+        std::vector<std::string> inputs = {
+            "Machine learning is a subset of artificial intelligence",
+            "The capital of France is Paris",
+            "Rust is a systems programming language",
+        };
+        auto batch = embeddings.GenerateEmbeddings(inputs);
+        std::cout << "Number of embeddings: " << batch.data.size() << std::endl;
+        for (std::size_t i = 0; i < batch.data.size(); ++i) {
+            std::cout << "  [" << i << "] Dimensions: " << batch.data[i].embedding.size() << std::endl;
+        }
+
+        model->Unload();
+        std::cout << std::endl << "Model unloaded" << std::endl;
+
+        return 0;
+    }
+    catch (const std::exception& ex) {
+        std::cerr << "Fatal: " << ex.what() << std::endl;
+        return 1;
+    }
+}
diff --git a/samples/cs/embeddings/Program.cs b/samples/cs/embeddings/Program.cs
@@ -13,14 +13,52 @@
 // Initialize the singleton instance.
 await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
 var mgr = FoundryLocalManager.Instance;
+
+// Discover available execution providers and their registration status.
+var eps = mgr.DiscoverEps();
+int maxNameLen = 30;
+Console.WriteLine("Available execution providers:");
+Console.WriteLine($"  {"Name".PadRight(maxNameLen)}  Registered");
+Console.WriteLine($"  {new string('─', maxNameLen)}  {"──────────"}");
+foreach (var ep in eps)
+{
+    Console.WriteLine($"  {ep.Name.PadRight(maxNameLen)}  {ep.IsRegistered}");
+}
+
+// Download and register all execution providers with per-EP progress.
+// EP packages include dependencies and may be large.
+// Download is only required again if a new version of the EP is released.
+// For cross platform builds there is no dynamic EP download and this will return immediately.
+Console.WriteLine("\nDownloading execution providers:");
+if (eps.Length > 0)
+{
+    string currentEp = "";
+    await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
+    {
+        if (epName != currentEp)
+        {
+            if (currentEp != "")
+            {
+                Console.WriteLine();
+            }
+            currentEp = epName;
+        }
+        Console.Write($"\r  {epName.PadRight(maxNameLen)}  {percent,6:F1}%");
+    });
+    Console.WriteLine();
+}
+else
+{
+    Console.WriteLine("No execution providers to download.");
+}
 // </init>
 
 // <model_setup>
 // Get the model catalog
 var catalog = await mgr.GetCatalogAsync();
 
 // Get an embedding model
-var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found");
+var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found");
 
 // Download the model (the method skips download if already cached)
 await model.DownloadAsync(progress =>
@@ -69,6 +107,5 @@ await model.DownloadAsync(progress =>
 // <cleanup>
 // Tidy up - unload the model
 await model.UnloadAsync();
-Console.WriteLine("\nModel unloaded.");
 // </cleanup>
 // </complete_code>
diff --git a/samples/js/embeddings/app.js b/samples/js/embeddings/app.js
@@ -14,9 +14,39 @@ const manager = FoundryLocalManager.create({
 // </init>
 console.log('✓ SDK initialized successfully');
 
+// Discover available execution providers and their registration status.
+const eps = manager.discoverEps();
+const maxNameLen = 30;
+console.log('\nAvailable execution providers:');
+console.log(`  ${'Name'.padEnd(maxNameLen)}  Registered`);
+console.log(`  ${'─'.repeat(maxNameLen)}  ──────────`);
+for (const ep of eps) {
+    console.log(`  ${ep.name.padEnd(maxNameLen)}  ${ep.isRegistered}`);
+}
+
+// Download and register all execution providers with per-EP progress.
+// EP packages include dependencies and may be large.
+// Download is only required again if a new version of the EP is released.
+console.log('\nDownloading execution providers:');
+if (eps.length > 0) {
+    let currentEp = '';
+    await manager.downloadAndRegisterEps((epName, percent) => {
+        if (epName !== currentEp) {
+            if (currentEp !== '') {
+                process.stdout.write('\n');
+            }
+            currentEp = epName;
+        }
+        process.stdout.write(`\r  ${epName.padEnd(maxNameLen)}  ${percent.toFixed(1).padStart(5)}%`);
+    });
+    process.stdout.write('\n');
+} else {
+    console.log('No execution providers to download.');
+}
+
 // <model_setup>
 // Get an embedding model
-const modelAlias = 'qwen3-0.6b-embedding';
+const modelAlias = 'qwen3-embedding-0.6b';
 const model = await manager.catalog.getModel(modelAlias);
 
 // Download the model

diff --git a/samples/python/embeddings/src/app.py b/samples/python/embeddings/src/app.py
@@ -11,8 +11,35 @@ def main():
     FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
+    # Discover available execution providers and their registration status.
+    eps = manager.discover_eps()
+    max_name_len = 30
+    print("Available execution providers:")
+    print(f"  {'Name':<{max_name_len}}  Registered")
+    print(f"  {'─' * max_name_len}  ──────────")
+    for ep in eps:
+        print(f"  {ep.name:<{max_name_len}}  {ep.is_registered}")
+
+    # Download and register all execution providers.
+    print("\nDownloading execution providers:")
+    current_ep = ""
+    def ep_progress(ep_name: str, percent: float):
+        nonlocal current_ep
+        if ep_name != current_ep:
+            if current_ep:
+                print()
+            current_ep = ep_name
+        print(f"\r  {ep_name:<{max_name_len}}  {percent:5.1f}%", end="", flush=True)
+
+    if eps:
+        manager.download_and_register_eps(progress_callback=ep_progress)
+        if current_ep:
+            print()
+    else:
+        print("No execution providers to download.")
+
     # Select and load an embedding model from the catalog
-    model = manager.catalog.get_model("qwen3-0.6b-embedding")
+    model = manager.catalog.get_model("qwen3-embedding-0.6b")
     model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",

diff --git a/samples/rust/embeddings/src/main.rs b/samples/rust/embeddings/src/main.rs
@@ -3,10 +3,12 @@
 // Licensed under the MIT License.
 
 // <imports>
+use std::io::{self, Write};
+
 use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
 // </imports>
 
-const ALIAS: &str = "qwen3-0.6b-embedding";
+const ALIAS: &str = "qwen3-embedding-0.6b";
 
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -18,6 +20,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
     // </init>
 
+    // Discover available execution providers and their registration status.
+    let eps = manager.discover_eps()?;
+    let max_name_len = 30;
+    println!("Available execution providers:");
+    println!("  {:<width$}  Registered", "Name", width = max_name_len);
+    println!("  {:─<width$}  ──────────", "", width = max_name_len);
+    for ep in &eps {
+        println!("  {:<width$}  {}", ep.name, ep.is_registered, width = max_name_len);
+    }
+
+    // Download and register all execution providers.
+    println!("\nDownloading execution providers:");
+    if !eps.is_empty() {
+        manager
+            .download_and_register_eps_with_progress(None, {
+                let mut current_ep = String::new();
+                move |ep_name: &str, percent: f64| {
+                    if ep_name != current_ep {
+                        if !current_ep.is_empty() {
+                            println!();
+                        }
+                        current_ep = ep_name.to_string();
+                    }
+                    print!("\r  {:<width$}  {:5.1}%", ep_name, percent, width = max_name_len);
+                    io::stdout().flush().ok();
+                }
+            })
+            .await?;
+        println!();
+    } else {
+        println!("No execution providers to download.");
+    }
+
     // ── 2. Pick a model and ensure it is downloaded ─────────────────────
     // <model_setup>
     let model = manager.catalog().get_model(ALIAS).await?;

diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt
@@ -54,6 +54,7 @@ add_library(CppSdk STATIC
   src/audio_client.cpp
   src/live_audio_types.cpp
   src/live_audio_session.cpp
+  src/embedding_client.cpp
   src/foundry_local_manager.cpp
 )
 

diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md
@@ -10,6 +10,7 @@ The Foundry Local C++ SDK provides a C++17 static library for running AI models
 - **Lifecycle management** — download, load, unload, and remove models programmatically
 - **Chat completions** — synchronous and streaming via OpenAI-compatible types
 - **Audio transcription** — transcribe audio files with streaming support
+- **Embeddings** — generate single and batch text embeddings via OpenAI-compatible types
 - **Tool calling** — define tools and handle tool-call responses in chat completions
 - **Download progress** — wire up a callback for real-time download percentage
 - **Model variants** — select specific hardware/quantization variants per model alias
@@ -277,6 +278,30 @@ audio.TranscribeAudioStreaming(R"(C:\path\to\audio.wav)", [](const AudioCreateTr
 });
 ```
 
+### Embeddings
+
+Generate text embeddings for a single input or for a batch in one request:
+
+```cpp
+OpenAIEmbeddingClient embeddings(*model);
+
+// Single input
+auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog");
+if (!single.data.empty()) {
+    std::cout << "Dimensions: " << single.data[0].embedding.size() << "\n";
+}
+
+// Batch input
+std::vector<std::string> inputs = {
+    "Machine learning is a subset of AI",
+    "The capital of France is Paris"
+};
+auto batch = embeddings.GenerateEmbeddings(inputs);
+std::cout << "Got " << batch.data.size() << " embeddings\n";
+```
+
+Empty and whitespace-only inputs are rejected client-side and throw `Exception`.
+
 ### Tool Calling
 
 See `sample/main.cpp` (Example 5) for a full tool-calling walkthrough.
@@ -449,6 +474,7 @@ Key types:
 | `ModelVariant` | A specific variant of a model (implements `IModel`) |
 | `OpenAIChatClient` | Chat completions (sync + streaming) |
 | `OpenAIAudioClient` | Audio transcription (sync + streaming) |
+| `OpenAIEmbeddingClient` | Text embeddings (single + batch) |
 | `EpInfo` | Execution provider discovery info (name, registration status) |
 | `EpDownloadResult` | Result of EP download/registration (success, registered/failed EPs) |
 | `ChatSettings` | Chat generation parameters |
@@ -478,9 +504,10 @@ sdk/cpp/
 │   ├── model.h               # Model & ModelVariant
 │   ├── logger.h              # ILogger interface
 │   └── openai/
-│       ├── chat_client.h     # Chat completion client
-│       ├── audio_client.h    # Audio transcription client
-│       └── tool_types.h      # Tool calling types
+│       ├── chat_client.h        # Chat completion client
+│       ├── audio_client.h       # Audio transcription client
+│       ├── embedding_client.h   # Embedding client
+│       └── tool_types.h         # Tool calling types
 ├── src/                      # Private implementation
 ├── sample/
 │   ├── main.cpp              # Sample application

diff --git a/sdk/cpp/include/foundry_local.h b/sdk/cpp/include/foundry_local.h
@@ -18,3 +18,4 @@
 #include "openai/audio_client.h"
 #include "openai/live_audio_types.h"
 #include "openai/live_audio_session.h"
+#include "openai/embedding_client.h"