Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions samples/cpp/embeddings/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Embeddings Example (C++)

Demonstrates single-input and batch text embedding generation using the Foundry Local C++ SDK.

Loads the `qwen3-embedding-0.6b` embedding model, generates an embedding for a
single string and a batch of strings via `OpenAIEmbeddingClient`, and prints
the resulting vector dimensionality.


## Build

```bash
g++ -std=c++17 main.cpp -lfoundry_local -o embeddings-example
```

## Run

```bash
./embeddings-example
```
101 changes: 101 additions & 0 deletions samples/cpp/embeddings/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Embeddings — Foundry Local C++ SDK Example
//
// Demonstrates single-input and batch embedding generation using the
// OpenAI-compatible `OpenAIEmbeddingClient` against a locally loaded
// embedding model.
//
// Requires: Foundry Local C++ SDK
//
// Usage: ./embeddings-example

#include <cstdio>
#include <iomanip>
#include <iostream>
#include <stdexcept>
#include <string>
#include <vector>

#include "foundry_local.h"

int main() {
try {
std::cout << "===========================================================" << std::endl;
std::cout << " Foundry Local -- Embeddings Demo (C++)" << std::endl;
std::cout << "===========================================================" << std::endl;
std::cout << std::endl;

foundry_local::Configuration config("foundry_local_samples");

foundry_local::Manager::Create(config);
auto& manager = foundry_local::Manager::Instance();

auto eps = manager.DiscoverEps();
std::cout << "Available execution providers:" << std::endl;
for (const auto& ep : eps) {
std::cout << " " << ep.name << std::endl;
}

if (!eps.empty()) {
std::cout << std::endl << "Downloading execution providers:" << std::endl;
std::string currentEp;
manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) {
if (epName != currentEp) {
if (!currentEp.empty()) std::cout << std::endl;
currentEp = epName;
}
std::cout << "\r " << std::left << std::setw(30) << epName
<< " " << std::right << std::fixed << std::setprecision(1)
<< std::setw(6) << percent << "% " << std::flush;
});
if (!currentEp.empty()) std::cout << std::endl;
}

auto& catalog = manager.GetCatalog();
auto* model = catalog.GetModel("qwen3-embedding-0.6b");
if (!model) {
throw std::runtime_error("Model \"qwen3-embedding-0.6b\" not found in catalog");
}

std::cout << "Downloading model (if needed)..." << std::endl;
model->Download([](float pct) {
printf("\rDownloading: %5.1f%%", pct);
fflush(stdout);
return true;
});
std::cout << std::endl;
std::cout << "Loading model..." << std::endl;
model->Load();
std::cout << "Model loaded" << std::endl;

foundry_local::OpenAIEmbeddingClient embeddings(*model);

// Single input
std::cout << std::endl << "--- Single Embedding ---" << std::endl;
auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog");
if (!single.data.empty()) {
std::cout << "Dimensions: " << single.data[0].embedding.size() << std::endl;
}

// Batch input
std::cout << std::endl << "--- Batch Embeddings ---" << std::endl;
std::vector<std::string> inputs = {
"Machine learning is a subset of artificial intelligence",
"The capital of France is Paris",
"Rust is a systems programming language",
};
auto batch = embeddings.GenerateEmbeddings(inputs);
std::cout << "Number of embeddings: " << batch.data.size() << std::endl;
for (std::size_t i = 0; i < batch.data.size(); ++i) {
std::cout << " [" << i << "] Dimensions: " << batch.data[i].embedding.size() << std::endl;
}

model->Unload();
std::cout << std::endl << "Model unloaded" << std::endl;

return 0;
}
catch (const std::exception& ex) {
std::cerr << "Fatal: " << ex.what() << std::endl;
return 1;
}
}
41 changes: 39 additions & 2 deletions samples/cs/embeddings/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,52 @@
// Initialize the singleton instance.
await FoundryLocalManager.CreateAsync(config, Utils.GetAppLogger());
var mgr = FoundryLocalManager.Instance;

// Discover available execution providers and their registration status.
var eps = mgr.DiscoverEps();
int maxNameLen = 30;
Console.WriteLine("Available execution providers:");
Console.WriteLine($" {"Name".PadRight(maxNameLen)} Registered");
Console.WriteLine($" {new string('─', maxNameLen)} {"──────────"}");
foreach (var ep in eps)
{
Console.WriteLine($" {ep.Name.PadRight(maxNameLen)} {ep.IsRegistered}");
}

// Download and register all execution providers with per-EP progress.
// EP packages include dependencies and may be large.
// Download is only required again if a new version of the EP is released.
// For cross platform builds there is no dynamic EP download and this will return immediately.
Console.WriteLine("\nDownloading execution providers:");
if (eps.Length > 0)
{
string currentEp = "";
await mgr.DownloadAndRegisterEpsAsync((epName, percent) =>
{
if (epName != currentEp)
{
if (currentEp != "")
{
Console.WriteLine();
}
currentEp = epName;
}
Console.Write($"\r {epName.PadRight(maxNameLen)} {percent,6:F1}%");
});
Console.WriteLine();
}
else
{
Console.WriteLine("No execution providers to download.");
}
// </init>

// <model_setup>
// Get the model catalog
var catalog = await mgr.GetCatalogAsync();

// Get an embedding model
var model = await catalog.GetModelAsync("qwen3-0.6b-embedding") ?? throw new Exception("Embedding model not found");
var model = await catalog.GetModelAsync("qwen3-embedding-0.6b") ?? throw new Exception("Embedding model not found");

// Download the model (the method skips download if already cached)
await model.DownloadAsync(progress =>
Expand Down Expand Up @@ -69,6 +107,5 @@ await model.DownloadAsync(progress =>
// <cleanup>
// Tidy up - unload the model
await model.UnloadAsync();
Console.WriteLine("\nModel unloaded.");
// </cleanup>
// </complete_code>
32 changes: 31 additions & 1 deletion samples/js/embeddings/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,39 @@ const manager = FoundryLocalManager.create({
// </init>
console.log('✓ SDK initialized successfully');

// Discover available execution providers and their registration status.
const eps = manager.discoverEps();
const maxNameLen = 30;
console.log('\nAvailable execution providers:');
console.log(` ${'Name'.padEnd(maxNameLen)} Registered`);
console.log(` ${'─'.repeat(maxNameLen)} ──────────`);
for (const ep of eps) {
console.log(` ${ep.name.padEnd(maxNameLen)} ${ep.isRegistered}`);
}

// Download and register all execution providers with per-EP progress.
// EP packages include dependencies and may be large.
// Download is only required again if a new version of the EP is released.
console.log('\nDownloading execution providers:');
if (eps.length > 0) {
let currentEp = '';
await manager.downloadAndRegisterEps((epName, percent) => {
if (epName !== currentEp) {
if (currentEp !== '') {
process.stdout.write('\n');
}
currentEp = epName;
}
process.stdout.write(`\r ${epName.padEnd(maxNameLen)} ${percent.toFixed(1).padStart(5)}%`);
});
process.stdout.write('\n');
} else {
console.log('No execution providers to download.');
}

// <model_setup>
// Get an embedding model
const modelAlias = 'qwen3-0.6b-embedding';
const modelAlias = 'qwen3-embedding-0.6b';
const model = await manager.catalog.getModel(modelAlias);

// Download the model
Expand Down
29 changes: 28 additions & 1 deletion samples/python/embeddings/src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,35 @@ def main():
FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance

# Discover available execution providers and their registration status.
eps = manager.discover_eps()
max_name_len = 30
print("Available execution providers:")
print(f" {'Name':<{max_name_len}} Registered")
print(f" {'─' * max_name_len} ──────────")
for ep in eps:
print(f" {ep.name:<{max_name_len}} {ep.is_registered}")

# Download and register all execution providers.
print("\nDownloading execution providers:")
current_ep = ""
def ep_progress(ep_name: str, percent: float):
nonlocal current_ep
if ep_name != current_ep:
if current_ep:
print()
current_ep = ep_name
print(f"\r {ep_name:<{max_name_len}} {percent:5.1f}%", end="", flush=True)

if eps:
manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
else:
print("No execution providers to download.")

# Select and load an embedding model from the catalog
model = manager.catalog.get_model("qwen3-0.6b-embedding")
model = manager.catalog.get_model("qwen3-embedding-0.6b")
model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
Expand Down
37 changes: 36 additions & 1 deletion samples/rust/embeddings/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
// Licensed under the MIT License.

// <imports>
use std::io::{self, Write};

use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
// </imports>

const ALIAS: &str = "qwen3-0.6b-embedding";
const ALIAS: &str = "qwen3-embedding-0.6b";

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand All @@ -18,6 +20,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
// </init>

// Discover available execution providers and their registration status.
let eps = manager.discover_eps()?;
let max_name_len = 30;
println!("Available execution providers:");
println!(" {:<width$} Registered", "Name", width = max_name_len);
println!(" {:─<width$} ──────────", "", width = max_name_len);
for ep in &eps {
println!(" {:<width$} {}", ep.name, ep.is_registered, width = max_name_len);
}

// Download and register all execution providers.
println!("\nDownloading execution providers:");
if !eps.is_empty() {
manager
.download_and_register_eps_with_progress(None, {
let mut current_ep = String::new();
move |ep_name: &str, percent: f64| {
if ep_name != current_ep {
if !current_ep.is_empty() {
println!();
}
current_ep = ep_name.to_string();
}
print!("\r {:<width$} {:5.1}%", ep_name, percent, width = max_name_len);
io::stdout().flush().ok();
}
})
.await?;
println!();
} else {
println!("No execution providers to download.");
}

// ── 2. Pick a model and ensure it is downloaded ─────────────────────
// <model_setup>
let model = manager.catalog().get_model(ALIAS).await?;
Expand Down
1 change: 1 addition & 0 deletions sdk/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ add_library(CppSdk STATIC
src/audio_client.cpp
src/live_audio_types.cpp
src/live_audio_session.cpp
src/embedding_client.cpp
src/foundry_local_manager.cpp
)

Expand Down
33 changes: 30 additions & 3 deletions sdk/cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The Foundry Local C++ SDK provides a C++17 static library for running AI models
- **Lifecycle management** — download, load, unload, and remove models programmatically
- **Chat completions** — synchronous and streaming via OpenAI-compatible types
- **Audio transcription** — transcribe audio files with streaming support
- **Embeddings** — generate single and batch text embeddings via OpenAI-compatible types
- **Tool calling** — define tools and handle tool-call responses in chat completions
- **Download progress** — wire up a callback for real-time download percentage
- **Model variants** — select specific hardware/quantization variants per model alias
Expand Down Expand Up @@ -277,6 +278,30 @@ audio.TranscribeAudioStreaming(R"(C:\path\to\audio.wav)", [](const AudioCreateTr
});
```

### Embeddings

Generate text embeddings for a single input or for a batch in one request:

```cpp
OpenAIEmbeddingClient embeddings(*model);

// Single input
auto single = embeddings.GenerateEmbedding("The quick brown fox jumps over the lazy dog");
if (!single.data.empty()) {
std::cout << "Dimensions: " << single.data[0].embedding.size() << "\n";
}

// Batch input
std::vector<std::string> inputs = {
"Machine learning is a subset of AI",
"The capital of France is Paris"
};
auto batch = embeddings.GenerateEmbeddings(inputs);
std::cout << "Got " << batch.data.size() << " embeddings\n";
```

Empty and whitespace-only inputs are rejected client-side and throw `Exception`.

### Tool Calling

See `sample/main.cpp` (Example 5) for a full tool-calling walkthrough.
Expand Down Expand Up @@ -449,6 +474,7 @@ Key types:
| `ModelVariant` | A specific variant of a model (implements `IModel`) |
| `OpenAIChatClient` | Chat completions (sync + streaming) |
| `OpenAIAudioClient` | Audio transcription (sync + streaming) |
| `OpenAIEmbeddingClient` | Text embeddings (single + batch) |
| `EpInfo` | Execution provider discovery info (name, registration status) |
| `EpDownloadResult` | Result of EP download/registration (success, registered/failed EPs) |
| `ChatSettings` | Chat generation parameters |
Expand Down Expand Up @@ -478,9 +504,10 @@ sdk/cpp/
│ ├── model.h # Model & ModelVariant
│ ├── logger.h # ILogger interface
│ └── openai/
│ ├── chat_client.h # Chat completion client
│ ├── audio_client.h # Audio transcription client
│ └── tool_types.h # Tool calling types
│ ├── chat_client.h # Chat completion client
│ ├── audio_client.h # Audio transcription client
│ ├── embedding_client.h # Embedding client
│ └── tool_types.h # Tool calling types
├── src/ # Private implementation
├── sample/
│ ├── main.cpp # Sample application
Expand Down
1 change: 1 addition & 0 deletions sdk/cpp/include/foundry_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
#include "openai/audio_client.h"
#include "openai/live_audio_types.h"
#include "openai/live_audio_session.h"
#include "openai/embedding_client.h"
Loading
Loading