From f79ac76b666e6d1d26ecdc24f5dae1b862a261cc Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 22:28:12 -0700 Subject: [PATCH 01/18] Add ResponseAPI vision sample for CPP SDK --- .../CMakeLists.txt | 9 + .../cpp/web-server-responses-vision/README.md | 97 ++++++ .../cpp/web-server-responses-vision/main.cpp | 293 ++++++++++++++++++ .../web-server-responses-vision/stb_impl.cpp | 8 + .../test_image.jpg | Bin 0 -> 6828 bytes sdk/cpp/CMakeLists.txt | 23 ++ sdk/cpp/triplets/x64-windows-static-md.cmake | 3 - sdk/cpp/vcpkg.json | 4 +- 8 files changed, 433 insertions(+), 4 deletions(-) create mode 100644 samples/cpp/web-server-responses-vision/CMakeLists.txt create mode 100644 samples/cpp/web-server-responses-vision/README.md create mode 100644 samples/cpp/web-server-responses-vision/main.cpp create mode 100644 samples/cpp/web-server-responses-vision/stb_impl.cpp create mode 100644 samples/cpp/web-server-responses-vision/test_image.jpg delete mode 100644 sdk/cpp/triplets/x64-windows-static-md.cmake diff --git a/samples/cpp/web-server-responses-vision/CMakeLists.txt b/samples/cpp/web-server-responses-vision/CMakeLists.txt new file mode 100644 index 000000000..a5eae8015 --- /dev/null +++ b/samples/cpp/web-server-responses-vision/CMakeLists.txt @@ -0,0 +1,9 @@ +# This sample is built as part of the C++ SDK. +# See sdk/cpp/CMakeLists.txt for the build target (WebServerResponsesVision). +# +# Build from sdk/cpp: +# cmake --preset x64-debug +# cmake --build --preset x64-debug +# +# The built executable will be at: +# sdk/cpp/out/build/x64-debug/WebServerResponsesVision.exe diff --git a/samples/cpp/web-server-responses-vision/README.md b/samples/cpp/web-server-responses-vision/README.md new file mode 100644 index 000000000..bdb036b7e --- /dev/null +++ b/samples/cpp/web-server-responses-vision/README.md @@ -0,0 +1,97 @@ +# Foundry Local C++ Vision Sample (Responses API) + +This sample demonstrates vision (image understanding) capabilities using the Foundry Local web service and the OpenAI Responses API. + +> **Windows-only** — requires MSVC or clang-cl (MSVC-compatible toolchain). + +## Features + +- **Vision inference** — send an image to a vision-capable model and get a description +- **Streaming** — token-by-token output via Server-Sent Events (SSE) +- **Responses API** — uses the `/v1/responses` endpoint (not chat completions) +- Uses a default test image (`test_image.jpg`) if no image path is provided + +## Prerequisites + +| Requirement | Notes | +|---|---| +| **Foundry Local / AI Toolkit** | Install via `winget install Microsoft.AIToolkit` or the VS Code AI Toolkit extension | +| **CMake >= 3.20** | Ships with Visual Studio 2022 | +| **Ninja** | Ships with Visual Studio 2022 | +| **vcpkg** | Set the `VCPKG_ROOT` environment variable to your vcpkg installation | +| **MSVC** (or clang-cl) | Visual Studio 2022 Build Tools or full IDE | + +The sample downloads the specified model the first time it runs (skips if already cached). + +## Build + +This sample is built as part of the C++ SDK. Open an **x64 Native Tools Command Prompt for VS 2022** (or run `vcvars64.bat`), then navigate to `sdk/cpp`: + +```bash +cd sdk/cpp +``` + +### 1. Download native dependencies + +Download the required NuGet packages to `sdk/cpp/_native_deps` (needed for both build and runtime): + +```bash +nuget install Microsoft.AI.Foundry.Local.Core -Version 1.1.0 -OutputDirectory _native_deps +nuget install Microsoft.ML.OnnxRuntime.Foundry -Version 1.25.1 -OutputDirectory _native_deps +nuget install Microsoft.ML.OnnxRuntimeGenAI.Foundry -Version 0.13.2 -OutputDirectory _native_deps +nuget install Microsoft.Windows.AI.MachineLearning -Version 2.0.300 -OutputDirectory _native_deps +``` + +### 2. Build + +```bash +cmake --preset x64-debug +cmake --build --preset x64-debug --target WebServerResponsesVision +``` + +The built executable will be at `sdk/cpp/out/build/x64-debug/WebServerResponsesVision.exe`. + +### 3. Copy runtime DLLs + +Copy the `win-x64` DLLs next to the executable: + +```bash +copy _native_deps\Microsoft.AI.Foundry.Local.Core.1.1.0\runtimes\win-x64\native\*.dll out\build\x64-debug\ +copy _native_deps\Microsoft.ML.OnnxRuntime.Foundry.1.25.1\runtimes\win-x64\native\*.dll out\build\x64-debug\ +copy _native_deps\Microsoft.ML.OnnxRuntimeGenAI.Foundry.0.13.2\runtimes\win-x64\native\*.dll out\build\x64-debug\ +copy _native_deps\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\Microsoft.Windows.AI.MachineLearning.dll out\build\x64-debug\ +copy _native_deps\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\DirectML.dll out\build\x64-debug\ +``` + +## Run the sample + +```bash +.\out\build\x64-debug\WebServerResponsesVision.exe qwen3.5-0.8b +``` + +The sample starts the local web service, sends vision requests via the Responses API to `http://localhost:/v1`, prints the model output, and then stops the web service. + +## How it works + +1. **Initialize** — creates the `Manager` singleton with web service configuration +2. **Execution providers** — discovers and installs compatible EPs (including WebGPU) via the Windows ML EP Catalog +3. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory +4. **Web service** — starts the local Foundry web service on a random port +5. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG +6. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts +7. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive +8. **Cleanup** — stops the web service, unloads the model, and destroys the manager + +## Troubleshooting + +| Error | Cause | Fix | +|---|---|---| +| `Cannot open file: test_image.jpg` | Default image not found | Ensure `test_image.jpg` is present next to the source file | +| `Model 'xyz' not found in catalog` | Invalid model alias | Check available models printed in the error output | +| `Microsoft.Windows.AI.MachineLearning.dll was not found` | WinML DLL missing | Copy the DLL from `_native_deps` to the build output (see DLL dependencies) | +| `WebGPU execution provider is not supported` | WebGPUExecutionProvider not available | WebGPU models are not supported yet; the sample automatically falls back to the CPU variant | +| cURL connection refused | Web service failed to start | Ensure `config.web` is set and no port conflicts exist | + +## License + +Licensed under the MIT License. diff --git a/samples/cpp/web-server-responses-vision/main.cpp b/samples/cpp/web-server-responses-vision/main.cpp new file mode 100644 index 000000000..7de142ee0 --- /dev/null +++ b/samples/cpp/web-server-responses-vision/main.cpp @@ -0,0 +1,293 @@ +// +// +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "foundry_local.h" +// + +#ifdef _WIN32 +#include +#include +#endif + +using json = nlohmann::json; + +// ─── Base64 encoding ──────────────────────────────────────────────────────── + +static const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +std::string Base64Encode(const std::vector& data) { + std::string out; + out.reserve(((data.size() + 2) / 3) * 4); + size_t i = 0; + while (i < data.size()) { + uint32_t octet_a = i < data.size() ? data[i++] : 0; + uint32_t octet_b = i < data.size() ? data[i++] : 0; + uint32_t octet_c = i < data.size() ? data[i++] : 0; + uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c; + out.push_back(kBase64Chars[(triple >> 18) & 0x3F]); + out.push_back(kBase64Chars[(triple >> 12) & 0x3F]); + out.push_back(kBase64Chars[(triple >> 6) & 0x3F]); + out.push_back(kBase64Chars[triple & 0x3F]); + } + size_t mod = data.size() % 3; + if (mod == 1) { + out[out.size() - 2] = '='; + out[out.size() - 1] = '='; + } else if (mod == 2) { + out[out.size() - 1] = '='; + } + return out; +} + +// Load and resize a local image, returning (base64_str, media_type). +// Mirrors the Python sample's resize_and_encode(path, max_dim=512). +std::pair ResizeAndEncode(const std::filesystem::path& path, int maxDim = 512) { + int w = 0, h = 0, channels = 0; + unsigned char* img = stbi_load(path.string().c_str(), &w, &h, &channels, 3); + if (!img) { + throw std::runtime_error("Failed to load image: " + path.string()); + } + + int newW = w, newH = h; + if ((std::max)(w, h) > maxDim) { + if (w >= h) { + newW = maxDim; + newH = static_cast(static_cast(h) * maxDim / w); + } else { + newH = maxDim; + newW = static_cast(static_cast(w) * maxDim / h); + } + std::vector resized(newW * newH * 3); + stbir_resize_uint8_linear(img, w, h, 0, resized.data(), newW, newH, 0, STBIR_RGB); + stbi_image_free(img); + + std::cout << " (resized to " << newW << "x" << newH << ")" << std::endl; + + // Encode resized image to JPEG in memory + std::vector jpegBuf; + stbi_write_jpg_to_func( + [](void* ctx, void* data, int size) { + auto* buf = static_cast*>(ctx); + auto* bytes = static_cast(data); + buf->insert(buf->end(), bytes, bytes + size); + }, + &jpegBuf, newW, newH, 3, resized.data(), 90); + + return {Base64Encode(jpegBuf), "image/jpeg"}; + } + + // No resize needed — encode original to JPEG + std::vector jpegBuf; + stbi_write_jpg_to_func( + [](void* ctx, void* data, int size) { + auto* buf = static_cast*>(ctx); + auto* bytes = static_cast(data); + buf->insert(buf->end(), bytes, bytes + size); + }, + &jpegBuf, w, h, 3, img, 90); + stbi_image_free(img); + + return {Base64Encode(jpegBuf), "image/jpeg"}; +} + +// cURL SSE streaming callback +static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, + void* /*userdata*/) { + size_t totalBytes = size * nmemb; + std::string chunk(ptr, totalBytes); + + std::istringstream stream(chunk); + std::string line; + while (std::getline(stream, line)) { + if (!line.empty() && line.back() == '\r') { + line.pop_back(); + } + if (line.rfind("data: ", 0) != 0) continue; + std::string data = line.substr(6); + if (data == "[DONE]") break; + + try { + auto j = json::parse(data); + std::string type = j.value("type", ""); + if (type == "response.output_text.delta") { + std::string delta = j.value("delta", ""); + std::cout << delta << std::flush; + } + } catch (...) { + // Skip malformed JSON fragments + } + } + return totalBytes; +} + +int main(int argc, char* argv[]) { + if (argc < 2) { + std::cout << "Usage: web-server-responses-vision " << std::endl; + std::cout << " Example: web-server-responses-vision qwen3.5-0.8b" << std::endl; + return 1; + } + + const std::string modelAlias = argv[1]; + const std::filesystem::path imagePath = + std::filesystem::path(__FILE__).parent_path() / "test_image.jpg"; + + try { + // + foundry_local::Configuration config("foundry_local_samples"); + config.web = foundry_local::WebServiceConfig{}; + + foundry_local::Manager::Create(config); + auto& manager = foundry_local::Manager::Instance(); + + // Download and register execution providers + std::cout << "\nDownloading execution providers:" << std::endl; +#ifdef _WIN32 + { + WinMLEpCatalogHandle epCatalog = nullptr; + HRESULT hr = WinMLEpCatalogCreate(&epCatalog); + if (SUCCEEDED(hr)) { + WinMLEpCatalogEnumProviders(epCatalog, + [](WinMLEpHandle ep, const WinMLEpInfo* info, void*) -> BOOL { + if (info == nullptr) return TRUE; + if (info->readyState == WinMLEpReadyState_NotPresent || + info->readyState == WinMLEpReadyState_NotReady) { + std::cout << " Installing: " << info->name << std::endl; + WinMLEpEnsureReady(ep); + } + return TRUE; + }, nullptr); + WinMLEpCatalogRelease(epCatalog); + std::cout << " Done." << std::endl; + } else { + std::cout << " EP catalog not available." << std::endl; + } + } +#endif + // + + // + auto& catalog = manager.GetCatalog(); + auto* model = catalog.GetModel(modelAlias); + if (!model) { + auto models = catalog.GetModels(); + std::cout << "\nModel '" << modelAlias << "' not found in catalog." << std::endl; + std::cout << "Available models:"; + for (auto* m : models) { + std::cout << " " << m->GetAlias(); + } + std::cout << std::endl; + return 1; + } + + if (!model->IsCached()) { + std::cout << "\nDownloading model " << modelAlias << "..." << std::endl; + model->Download([](float pct) { + std::cout << "\rDownloading model: " << pct << "% " << std::flush; + return true; + }); + std::cout << "\nModel downloaded" << std::endl; + } + + std::cout << "\nLoading model..." << std::endl; + model->Load(); + std::cout << "Model loaded" << std::endl; + // + + // + std::cout << "\nStarting web service..." << std::endl; + manager.StartWebService(); + auto endpoints = manager.GetWebServiceEndpoints(); + if (endpoints.empty()) { + throw std::runtime_error("No web service endpoints available"); + } + std::string baseUrl = endpoints[0]; + if (!baseUrl.empty() && baseUrl.back() == '/') { + baseUrl.pop_back(); + } + baseUrl += "/v1"; + std::cout << "Web service started" << std::endl; + + // Use cURL to call the local Foundry web service Responses API + // (C++ equivalent of OpenAI SDK used in the Python sample) + std::string responsesUrl = baseUrl + "/responses"; + // + + // + std::cout << "\nPreparing image: " << imagePath.string() << std::endl; + auto [imageB64, mediaType] = ResizeAndEncode(imagePath); + + json visionInput = json::array({ + { + {"type", "message"}, + {"role", "user"}, + {"content", json::array({ + {{"type", "input_text"}, {"text", "Describe this image."}}, + { + {"type", "input_image"}, + {"image_data", imageB64}, + {"media_type", mediaType}, + } + })} + } + }); + + std::cout << "\nStreaming vision response..." << std::endl; + + json requestBody = { + {"model", model->GetId()}, + {"input", visionInput}, + {"stream", true}, + }; + std::string body = requestBody.dump(); + + CURL* curl = curl_easy_init(); + if (!curl) { + throw std::runtime_error("Failed to initialize cURL"); + } + + struct curl_slist* headers = nullptr; + headers = curl_slist_append(headers, "Content-Type: application/json"); + headers = curl_slist_append(headers, "Accept: text/event-stream"); + + curl_easy_setopt(curl, CURLOPT_URL, responsesUrl.c_str()); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str()); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, StreamWriteCallback); + + std::cout << "[ASSISTANT]: " << std::flush; + CURLcode res = curl_easy_perform(curl); + std::cout << std::endl; + + if (res != CURLE_OK) { + std::cerr << "cURL error: " << curl_easy_strerror(res) << std::endl; + } + + curl_slist_free_all(headers); + curl_easy_cleanup(curl); + // + + manager.StopWebService(); + model->Unload(); + foundry_local::Manager::Destroy(); + + } catch (const std::exception& ex) { + std::cerr << "Error: " << ex.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/samples/cpp/web-server-responses-vision/stb_impl.cpp b/samples/cpp/web-server-responses-vision/stb_impl.cpp new file mode 100644 index 000000000..ae81fc3b8 --- /dev/null +++ b/samples/cpp/web-server-responses-vision/stb_impl.cpp @@ -0,0 +1,8 @@ +// stb implementation file — include exactly once per project. +#define STB_IMAGE_IMPLEMENTATION +#define STB_IMAGE_RESIZE2_IMPLEMENTATION +#define STB_IMAGE_WRITE_IMPLEMENTATION + +#include +#include +#include diff --git a/samples/cpp/web-server-responses-vision/test_image.jpg b/samples/cpp/web-server-responses-vision/test_image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..73a4e8004db0fd82a2913bd14ad8b97672097ac5 GIT binary patch literal 6828 zcmc&&cT^MmwjOE#k)nx!lu)HAB1n;DIROy`K_EyK5D_?l^r8?20gV(1f*z`X2+|_b zM0)R{(v%{-w-8zqQr>XxJ?HAVuiX3IA8)euT9Y+1d;h-K-*4}4Htj2I0^qu&qo)JV z(E$J*_ycGozy*MXnHj>&!~%gpSXo)v*r5m64;*0UJ9L;6DtHtwBzROnKv?{Qq_C)* zn1FzkinQEG1tldVxTKn<>M4yAib|*UozSteva%mw=RJ6k_mqf$$f zpc4h?x#<|V>1fRW99$<8-R}qR_k)g}fsu(B!otdS0KB1$3!tZCV4!DYU}9oq1n&+8 zuLF$SOov1i&M_Z0vV(|vLQg%2%U}^ZU)%sQ{)`h>wD$^SWjn&d%Xd`bn52}njM8ak z6;-t}7k)wL=w8&jbj`%n3~7Gd!r`XlEvMVgF5W)Ae*OW0L17QWBO)I?Mq}ciB_uw7 z@mo@6)~oE-Ik|80-j$S=l~=s4{Lt9c+|t_C-qHD`uYX{0=xkPlAYPx`Yw@#N?MXjWe+_u4j#5;eh}NpKCDt**m%bxufL0ye8TH$; zEv`y?$%Y1Wj8lhne~31O`K&u6O9NCU!YGztp!p%{75UBo9)c8U7BxraLgfeAl1ZS; zY^qZss&Rz|ytzvQBvE_TL4FCrl$=-BZHXba!!Kjpl1nPQ!lHItU1x(x{*B{!soQni zlMw0?i`9mqH^x`4j1r3^I&m*#OlBh^W!=WjNP*?9MhMsfTnK7p6&pL%gmJJ8~q4kHU@WQ^ryZ;f0i zP=?pav9^7O3yZ~v)=w`fZqB1aL<#XNBU%0O_wp5KzzM(lHXhtIpG4)=5c@mcQAk-^ z=g>(eM>(tQRrn?ikl0kqenh=CewuTb$1ukCs7K_QyU0l5-Q@7_u$voY7eWZdOA_QUm!Ozg`3J9Ta`wGTc^qFZJFzo+b_llRVx#jeN+u2m z%!eY;w_j*@>9kzu6$oXS;kr9?ME2p^1lU4H17(>^0}f@`=zFg&$tZ-U^NEiUA}~B0YIK$fG@&aSCDa?JoedgLy4>M9SgVZc z=A?et-boES9$?-(-8D;u=i$c5=tb1XIO!U=Su11jNM)jdA5C^~`~a#wBa;RUJ%p2v zyUxk{%#p2A>uBB9jaZy?P+j|ACc1OuL=`0Bo(ig9v}j?~Uhc5Chj(MqS=BBs#Tdn; z9p^^VV;M|-TQ)iMDsBe4Q#sAbxLe7vv=r{Ly8{mV3XGO7r@LkQFe8Oy!nY=hg;g+AII3`d$ZQJ$wK+*QV3QUefQ|2W{qnUSpiWJNW{`-xyz5CY${# zzMu}*;Zt@0KJhfLQ&y(mdGPgOKk~`gLFLrXJFlbjEnJ6Bzs|ScD=f-9bnK#p_kaN_ z-Gg>94K9QYHi^m}(6a?UbmFm>ch1tgJo@Xg2`VC&9>i@5NKfzO2+M0MC zEqd_eh1lYh$w8@G^B;?#V9p%(bN2s`FDwILZ%QZKxEMEjB7)zS2gz@)u*olC66JW& zDy{l4M1aADXe&gRzvgD!W-+$-UjO0UjziK84Q9~>{S5-##mF>DAhve$vP^!IuVX2U z>VY7orwk=zIizQzWw+v^eq_@Ax$JUXabaou@7}-|J-pE>Tk;<$SSKp^M=dqug;j%5i_8f!; zc)<4zsuQc_oHl0hXq%ZsepKDcghv zt513mp-Q$STo?^N6bAA6w}?##v=N{+wll8CPFvWP@+YdOi)$GfNqF%Hf1+=p>z+yv_oZ*z(B&mnMg2j-^|~ zEwT?WJ#{cv$uK216HWPL@}HChNp|8`#Gn?mdSC{L>~}gKWDdJM)y@vg2(AjNQ5U8w zahm*kRYPUTS7cgLti~>hT@F1zpD#dv{sG276-e6S*ZJb6BzUy%Qt_4Y!D5y^VfwF2 zZ2*1qOni*qv0B|Ho_T3URUNBS$HB@M=24jn74!Gnh8o$X3EF`vgT&wal?J?kQ>sy` zXYdx=*y!|41{zQiOUbd(w>hFnz!O7sm#S50fanL?on7iPFfT<8qyLQJr+!OB*QVks z$mpZ;lz?+IAak&Wy=p3MquPPYW`o6_&QqH}&#?-pi6`=zyozhjNa&B^IyVujZt@IS zkXoi&50}2Dk5j5;+hFBhIb*%7k%%y#g>t`W7f&gg<(n4{8xr<3-SK!-t^S&jqm!&Z zAhIbOpWrw-<@x#3j$Qg3$2nng!!#-~sqVpibWx|(*UK$-RSyMU%}koZKQyyX#|<$~ zOm9_3YDAn!%D6rZzL$<_%3TUI#n;)ZsxU?eEror3@*fDXZUk@KEnBT|iwzSg5jKg) zD>w9y^a|Y(1L(p^Qsxgv!8BCKjSQVQ@|w@J^1W`9sRpn%S94 zgZjHkEHB{6YV}TxcOSHd++r94X)f93;NsxgSz|pu@j-sMe=W+y^eHGv+OHZRpma0i zKeD|xxAsfvs$th&aCqVP^VGC^vd2cJRokak;Rz-*;Gt#H_!iw48i3=_#$mN;*-T~B zaw>QDY!7{DuoH=V#_w5MVQq9bCI$@M59UmRBjJW)F+5O4X9V80gbT)M71UjI4dYm1Au87z@}Z2 zO>UsR|3m}Q4cB$MO^|(cKe~jX8jnw|lYm7O0Rkr&QBvjSIJa9P$s|;emVIU|s6T&N ze%4>Ztt_HWqKcHX=lk|P|IYxRC?|bl7{xOK)THU(V_)R|-1l$)C0e0sW(ti z^!ZHT##{jaSdh&>7~x~-mL?RKcCGtOBUhN2e}6F4$U3qoTvvnJszLm4Gp0%A8Ac6y z>O)*8iw2jj3kH3-JeN~o!?51 z7;cL1wDr+B7Sju*ON?<+lDE4uWH3mr#`L+IP@gT{L=&p>Qgmd7j5SH_D=K= zov~U-K$(R*cy;U|J5<1z54JUyn(#W+C9z!j?TDpr>eB|^U#wD6=;~+U3H{qG=xMe- zvy3acJtZ)cq;BB+J@Q^D4hJPP* z#p*I8+gVLp(FbD&b`dj%e{8rPKeq}vVT5;GjbBe&E{mCG-QbNW_$v7nvSi|! z^7z>^)7fIh6gwVi&4VQAIDG-!t55A1glL5Bg@hY{jNJX#meBvXmdZ`1bhotS@ zf*hMvwbsLaULzleqmOM%fF0jxzy1DK#-3;1NmYu>gAVbu&|2?}w#TnF zR37D|I{FodNBKQ6?Bw!%&(40NbxkLTqp^m6suyi(&=-HcOf_vsbMw6%8zKczXc)b~ z(0lfrK$wlRjoF#9@e*6b=I&yikWNh%&F7OUXzI`{(m~2ccagc?Zg(3Eudvw3)}Z*X z`o^K0q0V8OY|pHv#NqHPx3uqPmpg#zTvg$Ts8ID#*TrZEGScSL%JxO({`aw-HU_N% zr)D0coD@@?ht;xGXaou^Ity;(Ut*3q)&9kdGh(=y=@mvUYMu2sb zqxx#C7;Xun6f+!-DL^tkAV#Xg#ZxdmX++3<1?%u&#$!1!iGEU&FYKap+bvL$80~l5 z--VLt+8o|a7@qnbW?#JKxcOR6YhfZ-wth;kYsp;aC55<9hs2YIPiJvBbA$bWXg8|I z_?jJ*DfadcKL$gb18dGWZV|^?d0XIZ)O2{H4=H?cYA0Z{90P|iOM)X zu#tU=Ir~FO|DN8P@7fsUmA;63>SU*mPMUD&iElyay1|J<%fZQ5j?(fkqDwpnXD#U& zZqR_k6KQARMwm+8)0>4Jgs$f3sdVluxr_m;jgJba@41_|6{%zN)}y8+VGnGM$W)Z2 zC5z1!zle3ab|tX^6o+c<`%26`K)=~Lz_yudyscYu*PpdiA>c97{SE;f%ZwLuKsz?u zK2dE_FC$JIjbM+7l&QzFh~;AA^S^3w2V~!@(BiLD#c1M(i)9$3)o!*KpOYISNWQdK zuOFO!UJ`70?Rc|S*EfBqiZ@Hoj1WP8=)ZHq^m_Gz3+}^jWo^1^hj*|XVHw|D;-fn}geIz^R{~7i zA$<1Co~^EP!Pku651!UfJ-=FE=qchi^HlK@JLV2EAb20v6kXUMgs39NswgzyRlTda z1`4%osFBUN2~Z3Ft%8}qt^3d?pHcHJb)qu7Li|ZTd`X=vKn#sVu#T804g2f%3keM! zzlX>iSyC=~c=4s)(elg2H{37O&jd5;aGsYnu9lqiKwIh#@#v+@^dX>P-e=1M$gF_Jz0i|q-PLDeSb8qcW$;LTLMOmf)w*s5&bQb?PhbmAB)EztTWkJy zb0RmeBw|`4ycKM86;}~~+fZb0f9{fANBVJ#{CpKPUyvs+yML@+vI~ZhHd40S1Ygl4 zUAOZcX0oC6OCwGQs%kI0LR|dl&1qqw zfzJ_zD>)%iOAS++(fT!M^L;)h$`x0?e{1k}-89@WIKiErqc z19&2{m}ie`uYRwaUFb#0%^S@Y6uQ-6MWbOJds?>Zl5?oW*cBqGI{i0rorLrijW+7N zlA)cqYbKe6G{C8gGDGeKC4aj7Zv3Y9r0$N811QVQ@0$C|7@5(4l36M~#EC3L?R~!2 zK}^SG?QtrADV5VKP*0G`M}5Q7fIw|UDk$mya1{9h9A+m94GMNXGp+kGu=byhnz~W* zU7W5Qv!?b1ym!vE7ruCPsJTcTo$<#K%-rlSKi=6?w7+kXIB{W3HF literal 0 HcmV?d00001 diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index 855e6be15..ebd58ec5c 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -83,6 +83,29 @@ add_executable(CppSdkSample target_link_libraries(CppSdkSample PRIVATE CppSdk) +# ----------------------------- +# Vision sample (Responses API) +# ----------------------------- +find_package(CURL REQUIRED) + +set(VISION_SAMPLE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../samples/cpp/web-server-responses-vision") +set(WINML_DEPS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/_native_deps/Microsoft.Windows.AI.MachineLearning.2.0.300") + +add_executable(WebServerResponsesVision + ${VISION_SAMPLE_DIR}/main.cpp + ${VISION_SAMPLE_DIR}/stb_impl.cpp +) + +target_include_directories(WebServerResponsesVision PRIVATE + ${WINML_DEPS_DIR}/include +) + +target_link_libraries(WebServerResponsesVision PRIVATE + CppSdk + CURL::libcurl + ${WINML_DEPS_DIR}/lib/native/x64/Microsoft.Windows.AI.MachineLearning.lib +) + # ----------------------------- # Unit tests # ----------------------------- diff --git a/sdk/cpp/triplets/x64-windows-static-md.cmake b/sdk/cpp/triplets/x64-windows-static-md.cmake deleted file mode 100644 index 63d6cde24..000000000 --- a/sdk/cpp/triplets/x64-windows-static-md.cmake +++ /dev/null @@ -1,3 +0,0 @@ -set(VCPKG_TARGET_ARCHITECTURE x64) -set(VCPKG_CRT_LINKAGE dynamic) -set(VCPKG_LIBRARY_LINKAGE static) diff --git a/sdk/cpp/vcpkg.json b/sdk/cpp/vcpkg.json index 459a72c15..6845f8eed 100644 --- a/sdk/cpp/vcpkg.json +++ b/sdk/cpp/vcpkg.json @@ -4,6 +4,8 @@ "dependencies": [ "nlohmann-json", "ms-gsl", - "gtest" + "gtest", + "curl", + "stb" ] } From 8de30125458a27f6790e4f7ccf69b9ca07fecd2a Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 22:37:54 -0700 Subject: [PATCH 02/18] update the sample --- .../CMakeLists.txt | 40 ++++++++++++++----- .../cpp/web-server-responses-vision/README.md | 31 +++++++------- .../vcpkg-configuration.json | 6 +++ .../web-server-responses-vision/vcpkg.json | 10 +++++ 4 files changed, 63 insertions(+), 24 deletions(-) create mode 100644 samples/cpp/web-server-responses-vision/vcpkg-configuration.json create mode 100644 samples/cpp/web-server-responses-vision/vcpkg.json diff --git a/samples/cpp/web-server-responses-vision/CMakeLists.txt b/samples/cpp/web-server-responses-vision/CMakeLists.txt index a5eae8015..3a38288ff 100644 --- a/samples/cpp/web-server-responses-vision/CMakeLists.txt +++ b/samples/cpp/web-server-responses-vision/CMakeLists.txt @@ -1,9 +1,31 @@ -# This sample is built as part of the C++ SDK. -# See sdk/cpp/CMakeLists.txt for the build target (WebServerResponsesVision). -# -# Build from sdk/cpp: -# cmake --preset x64-debug -# cmake --build --preset x64-debug -# -# The built executable will be at: -# sdk/cpp/out/build/x64-debug/WebServerResponsesVision.exe +cmake_minimum_required(VERSION 3.20) +project(web-server-responses-vision LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Match the SDK's vcpkg triplet +set(VCPKG_TARGET_TRIPLET "x64-windows-static-md" CACHE STRING "") + +# Build the Foundry Local C++ SDK as a subdirectory +set(FOUNDRY_SDK_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../sdk/cpp") +set(BUILD_TESTING OFF CACHE BOOL "" FORCE) +add_subdirectory(${FOUNDRY_SDK_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CppSdk) + +# curl is installed by vcpkg via the SDK's vcpkg.json +find_package(CURL REQUIRED) + +# WinML EP Catalog (from NuGet _native_deps) +set(WINML_DEPS_DIR "${FOUNDRY_SDK_DIR}/_native_deps/Microsoft.Windows.AI.MachineLearning.2.0.300") + +add_executable(web-server-responses-vision main.cpp stb_impl.cpp) + +target_include_directories(web-server-responses-vision PRIVATE + ${WINML_DEPS_DIR}/include +) + +target_link_libraries(web-server-responses-vision PRIVATE + CppSdk + CURL::libcurl + ${WINML_DEPS_DIR}/lib/native/x64/Microsoft.Windows.AI.MachineLearning.lib +) diff --git a/samples/cpp/web-server-responses-vision/README.md b/samples/cpp/web-server-responses-vision/README.md index bdb036b7e..e550da764 100644 --- a/samples/cpp/web-server-responses-vision/README.md +++ b/samples/cpp/web-server-responses-vision/README.md @@ -25,10 +25,10 @@ The sample downloads the specified model the first time it runs (skips if alread ## Build -This sample is built as part of the C++ SDK. Open an **x64 Native Tools Command Prompt for VS 2022** (or run `vcvars64.bat`), then navigate to `sdk/cpp`: +Open an **x64 Native Tools Command Prompt for VS 2022** (or run `vcvars64.bat`), then navigate to the sample directory: ```bash -cd sdk/cpp +cd samples/cpp/web-server-responses-vision ``` ### 1. Download native dependencies @@ -36,37 +36,38 @@ cd sdk/cpp Download the required NuGet packages to `sdk/cpp/_native_deps` (needed for both build and runtime): ```bash -nuget install Microsoft.AI.Foundry.Local.Core -Version 1.1.0 -OutputDirectory _native_deps -nuget install Microsoft.ML.OnnxRuntime.Foundry -Version 1.25.1 -OutputDirectory _native_deps -nuget install Microsoft.ML.OnnxRuntimeGenAI.Foundry -Version 0.13.2 -OutputDirectory _native_deps -nuget install Microsoft.Windows.AI.MachineLearning -Version 2.0.300 -OutputDirectory _native_deps +nuget install Microsoft.AI.Foundry.Local.Core -Version 1.1.0 -OutputDirectory ../../../sdk/cpp/_native_deps +nuget install Microsoft.ML.OnnxRuntime.Foundry -Version 1.25.1 -OutputDirectory ../../../sdk/cpp/_native_deps +nuget install Microsoft.ML.OnnxRuntimeGenAI.Foundry -Version 0.13.2 -OutputDirectory ../../../sdk/cpp/_native_deps +nuget install Microsoft.Windows.AI.MachineLearning -Version 2.0.300 -OutputDirectory ../../../sdk/cpp/_native_deps ``` ### 2. Build ```bash -cmake --preset x64-debug -cmake --build --preset x64-debug --target WebServerResponsesVision +cmake -G Ninja -B build -DCMAKE_TOOLCHAIN_FILE="%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake" -DVCPKG_TARGET_TRIPLET=x64-windows-static-md +cmake --build build ``` -The built executable will be at `sdk/cpp/out/build/x64-debug/WebServerResponsesVision.exe`. +The built executable will be at `build/web-server-responses-vision.exe`. ### 3. Copy runtime DLLs Copy the `win-x64` DLLs next to the executable: ```bash -copy _native_deps\Microsoft.AI.Foundry.Local.Core.1.1.0\runtimes\win-x64\native\*.dll out\build\x64-debug\ -copy _native_deps\Microsoft.ML.OnnxRuntime.Foundry.1.25.1\runtimes\win-x64\native\*.dll out\build\x64-debug\ -copy _native_deps\Microsoft.ML.OnnxRuntimeGenAI.Foundry.0.13.2\runtimes\win-x64\native\*.dll out\build\x64-debug\ -copy _native_deps\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\Microsoft.Windows.AI.MachineLearning.dll out\build\x64-debug\ -copy _native_deps\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\DirectML.dll out\build\x64-debug\ +set DEPS=..\..\..\sdk\cpp\_native_deps +copy %DEPS%\Microsoft.AI.Foundry.Local.Core.1.1.0\runtimes\win-x64\native\*.dll build\ +copy %DEPS%\Microsoft.ML.OnnxRuntime.Foundry.1.25.1\runtimes\win-x64\native\*.dll build\ +copy %DEPS%\Microsoft.ML.OnnxRuntimeGenAI.Foundry.0.13.2\runtimes\win-x64\native\*.dll build\ +copy %DEPS%\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\Microsoft.Windows.AI.MachineLearning.dll build\ +copy %DEPS%\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\DirectML.dll build\ ``` ## Run the sample ```bash -.\out\build\x64-debug\WebServerResponsesVision.exe qwen3.5-0.8b +.\build\web-server-responses-vision.exe qwen3.5-0.8b ``` The sample starts the local web service, sends vision requests via the Responses API to `http://localhost:/v1`, prints the model output, and then stops the web service. diff --git a/samples/cpp/web-server-responses-vision/vcpkg-configuration.json b/samples/cpp/web-server-responses-vision/vcpkg-configuration.json new file mode 100644 index 000000000..a5253fb7a --- /dev/null +++ b/samples/cpp/web-server-responses-vision/vcpkg-configuration.json @@ -0,0 +1,6 @@ +{ + "default-registry": { + "kind": "builtin", + "baseline": "a9f0cd0345fb29cd227d802f1fd1917c28f8e5a3" + } +} diff --git a/samples/cpp/web-server-responses-vision/vcpkg.json b/samples/cpp/web-server-responses-vision/vcpkg.json new file mode 100644 index 000000000..7d7d593e4 --- /dev/null +++ b/samples/cpp/web-server-responses-vision/vcpkg.json @@ -0,0 +1,10 @@ +{ + "name": "web-server-responses-vision", + "version-string": "0.1.0", + "dependencies": [ + "nlohmann-json", + "ms-gsl", + "curl", + "stb" + ] +} From 49bfc7489781f86d5b1ab2eaf4a832d0d32e2942 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 22:40:29 -0700 Subject: [PATCH 03/18] Add triplets back --- sdk/cpp/triplets/x64-windows-static-md.cmake | 3 +++ sdk/cpp/vcpkg.json | 4 +--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 sdk/cpp/triplets/x64-windows-static-md.cmake diff --git a/sdk/cpp/triplets/x64-windows-static-md.cmake b/sdk/cpp/triplets/x64-windows-static-md.cmake new file mode 100644 index 000000000..63d6cde24 --- /dev/null +++ b/sdk/cpp/triplets/x64-windows-static-md.cmake @@ -0,0 +1,3 @@ +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) diff --git a/sdk/cpp/vcpkg.json b/sdk/cpp/vcpkg.json index 6845f8eed..459a72c15 100644 --- a/sdk/cpp/vcpkg.json +++ b/sdk/cpp/vcpkg.json @@ -4,8 +4,6 @@ "dependencies": [ "nlohmann-json", "ms-gsl", - "gtest", - "curl", - "stb" + "gtest" ] } From 555d8415eb29e70da8b6122af39942e89fdcb773 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 22:41:32 -0700 Subject: [PATCH 04/18] revert cmakelists changes --- sdk/cpp/CMakeLists.txt | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index ebd58ec5c..855e6be15 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -83,29 +83,6 @@ add_executable(CppSdkSample target_link_libraries(CppSdkSample PRIVATE CppSdk) -# ----------------------------- -# Vision sample (Responses API) -# ----------------------------- -find_package(CURL REQUIRED) - -set(VISION_SAMPLE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../samples/cpp/web-server-responses-vision") -set(WINML_DEPS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/_native_deps/Microsoft.Windows.AI.MachineLearning.2.0.300") - -add_executable(WebServerResponsesVision - ${VISION_SAMPLE_DIR}/main.cpp - ${VISION_SAMPLE_DIR}/stb_impl.cpp -) - -target_include_directories(WebServerResponsesVision PRIVATE - ${WINML_DEPS_DIR}/include -) - -target_link_libraries(WebServerResponsesVision PRIVATE - CppSdk - CURL::libcurl - ${WINML_DEPS_DIR}/lib/native/x64/Microsoft.Windows.AI.MachineLearning.lib -) - # ----------------------------- # Unit tests # ----------------------------- From 356c8948a1a350ca95a68ad155795dc07abdddf9 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 22:51:11 -0700 Subject: [PATCH 05/18] remove winml installation --- .../CMakeLists.txt | 8 ----- .../cpp/web-server-responses-vision/README.md | 32 ++++++++----------- .../cpp/web-server-responses-vision/main.cpp | 26 --------------- 3 files changed, 13 insertions(+), 53 deletions(-) diff --git a/samples/cpp/web-server-responses-vision/CMakeLists.txt b/samples/cpp/web-server-responses-vision/CMakeLists.txt index 3a38288ff..de7bc2be6 100644 --- a/samples/cpp/web-server-responses-vision/CMakeLists.txt +++ b/samples/cpp/web-server-responses-vision/CMakeLists.txt @@ -15,17 +15,9 @@ add_subdirectory(${FOUNDRY_SDK_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CppSdk) # curl is installed by vcpkg via the SDK's vcpkg.json find_package(CURL REQUIRED) -# WinML EP Catalog (from NuGet _native_deps) -set(WINML_DEPS_DIR "${FOUNDRY_SDK_DIR}/_native_deps/Microsoft.Windows.AI.MachineLearning.2.0.300") - add_executable(web-server-responses-vision main.cpp stb_impl.cpp) -target_include_directories(web-server-responses-vision PRIVATE - ${WINML_DEPS_DIR}/include -) - target_link_libraries(web-server-responses-vision PRIVATE CppSdk CURL::libcurl - ${WINML_DEPS_DIR}/lib/native/x64/Microsoft.Windows.AI.MachineLearning.lib ) diff --git a/samples/cpp/web-server-responses-vision/README.md b/samples/cpp/web-server-responses-vision/README.md index e550da764..d0e6bca94 100644 --- a/samples/cpp/web-server-responses-vision/README.md +++ b/samples/cpp/web-server-responses-vision/README.md @@ -33,13 +33,12 @@ cd samples/cpp/web-server-responses-vision ### 1. Download native dependencies -Download the required NuGet packages to `sdk/cpp/_native_deps` (needed for both build and runtime): +Download the required NuGet packages to `_native_deps` (needed for runtime DLLs): ```bash -nuget install Microsoft.AI.Foundry.Local.Core -Version 1.1.0 -OutputDirectory ../../../sdk/cpp/_native_deps -nuget install Microsoft.ML.OnnxRuntime.Foundry -Version 1.25.1 -OutputDirectory ../../../sdk/cpp/_native_deps -nuget install Microsoft.ML.OnnxRuntimeGenAI.Foundry -Version 0.13.2 -OutputDirectory ../../../sdk/cpp/_native_deps -nuget install Microsoft.Windows.AI.MachineLearning -Version 2.0.300 -OutputDirectory ../../../sdk/cpp/_native_deps +nuget install Microsoft.AI.Foundry.Local.Core -Version 1.1.0 -OutputDirectory _native_deps +nuget install Microsoft.ML.OnnxRuntime.Foundry -Version 1.25.1 -OutputDirectory _native_deps +nuget install Microsoft.ML.OnnxRuntimeGenAI.Foundry -Version 0.13.2 -OutputDirectory _native_deps ``` ### 2. Build @@ -56,12 +55,9 @@ The built executable will be at `build/web-server-responses-vision.exe`. Copy the `win-x64` DLLs next to the executable: ```bash -set DEPS=..\..\..\sdk\cpp\_native_deps -copy %DEPS%\Microsoft.AI.Foundry.Local.Core.1.1.0\runtimes\win-x64\native\*.dll build\ -copy %DEPS%\Microsoft.ML.OnnxRuntime.Foundry.1.25.1\runtimes\win-x64\native\*.dll build\ -copy %DEPS%\Microsoft.ML.OnnxRuntimeGenAI.Foundry.0.13.2\runtimes\win-x64\native\*.dll build\ -copy %DEPS%\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\Microsoft.Windows.AI.MachineLearning.dll build\ -copy %DEPS%\Microsoft.Windows.AI.MachineLearning.2.0.300\runtimes\win-x64\native\DirectML.dll build\ +copy _native_deps\Microsoft.AI.Foundry.Local.Core.1.1.0\runtimes\win-x64\native\*.dll build\ +copy _native_deps\Microsoft.ML.OnnxRuntime.Foundry.1.25.1\runtimes\win-x64\native\*.dll build\ +copy _native_deps\Microsoft.ML.OnnxRuntimeGenAI.Foundry.0.13.2\runtimes\win-x64\native\*.dll build\ ``` ## Run the sample @@ -75,13 +71,12 @@ The sample starts the local web service, sends vision requests via the Responses ## How it works 1. **Initialize** — creates the `Manager` singleton with web service configuration -2. **Execution providers** — discovers and installs compatible EPs (including WebGPU) via the Windows ML EP Catalog -3. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory -4. **Web service** — starts the local Foundry web service on a random port -5. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG -6. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts -7. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive -8. **Cleanup** — stops the web service, unloads the model, and destroys the manager +2. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory +3. **Web service** — starts the local Foundry web service on a random port +4. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG +5. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts +6. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive +7. **Cleanup** — stops the web service, unloads the model, and destroys the manager ## Troubleshooting @@ -89,7 +84,6 @@ The sample starts the local web service, sends vision requests via the Responses |---|---|---| | `Cannot open file: test_image.jpg` | Default image not found | Ensure `test_image.jpg` is present next to the source file | | `Model 'xyz' not found in catalog` | Invalid model alias | Check available models printed in the error output | -| `Microsoft.Windows.AI.MachineLearning.dll was not found` | WinML DLL missing | Copy the DLL from `_native_deps` to the build output (see DLL dependencies) | | `WebGPU execution provider is not supported` | WebGPUExecutionProvider not available | WebGPU models are not supported yet; the sample automatically falls back to the CPU variant | | cURL connection refused | Web service failed to start | Ensure `config.web` is set and no port conflicts exist | diff --git a/samples/cpp/web-server-responses-vision/main.cpp b/samples/cpp/web-server-responses-vision/main.cpp index 7de142ee0..8d0152595 100644 --- a/samples/cpp/web-server-responses-vision/main.cpp +++ b/samples/cpp/web-server-responses-vision/main.cpp @@ -19,7 +19,6 @@ #ifdef _WIN32 #include -#include #endif using json = nlohmann::json; @@ -152,31 +151,6 @@ int main(int argc, char* argv[]) { foundry_local::Manager::Create(config); auto& manager = foundry_local::Manager::Instance(); - - // Download and register execution providers - std::cout << "\nDownloading execution providers:" << std::endl; -#ifdef _WIN32 - { - WinMLEpCatalogHandle epCatalog = nullptr; - HRESULT hr = WinMLEpCatalogCreate(&epCatalog); - if (SUCCEEDED(hr)) { - WinMLEpCatalogEnumProviders(epCatalog, - [](WinMLEpHandle ep, const WinMLEpInfo* info, void*) -> BOOL { - if (info == nullptr) return TRUE; - if (info->readyState == WinMLEpReadyState_NotPresent || - info->readyState == WinMLEpReadyState_NotReady) { - std::cout << " Installing: " << info->name << std::endl; - WinMLEpEnsureReady(ep); - } - return TRUE; - }, nullptr); - WinMLEpCatalogRelease(epCatalog); - std::cout << " Done." << std::endl; - } else { - std::cout << " EP catalog not available." << std::endl; - } - } -#endif // // From 14d2c97697a23944df9741f991838df66801fa09 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 23:01:16 -0700 Subject: [PATCH 06/18] Address copilot comments --- .../cpp/web-server-responses-vision/README.md | 2 +- .../cpp/web-server-responses-vision/main.cpp | 99 ++++++++++++++----- 2 files changed, 77 insertions(+), 24 deletions(-) diff --git a/samples/cpp/web-server-responses-vision/README.md b/samples/cpp/web-server-responses-vision/README.md index d0e6bca94..08002a285 100644 --- a/samples/cpp/web-server-responses-vision/README.md +++ b/samples/cpp/web-server-responses-vision/README.md @@ -9,7 +9,7 @@ This sample demonstrates vision (image understanding) capabilities using the Fou - **Vision inference** — send an image to a vision-capable model and get a description - **Streaming** — token-by-token output via Server-Sent Events (SSE) - **Responses API** — uses the `/v1/responses` endpoint (not chat completions) -- Uses a default test image (`test_image.jpg`) if no image path is provided +- Uses a default test image (`test_image.jpg`) ## Prerequisites diff --git a/samples/cpp/web-server-responses-vision/main.cpp b/samples/cpp/web-server-responses-vision/main.cpp index 8d0152595..79f0d8956 100644 --- a/samples/cpp/web-server-responses-vision/main.cpp +++ b/samples/cpp/web-server-responses-vision/main.cpp @@ -1,11 +1,14 @@ // // +#include #include #include #include #include #include +#include #include +#include #include #include @@ -70,28 +73,39 @@ std::pair ResizeAndEncode(const std::filesystem::path& newH = maxDim; newW = static_cast(static_cast(w) * maxDim / h); } + // Clamp to at least 1 pixel for extreme aspect ratios + newW = (std::max)(newW, 1); + newH = (std::max)(newH, 1); + std::vector resized(newW * newH * 3); - stbir_resize_uint8_linear(img, w, h, 0, resized.data(), newW, newH, 0, STBIR_RGB); + unsigned char* result = stbir_resize_uint8_linear( + img, w, h, 0, resized.data(), newW, newH, 0, STBIR_RGB); stbi_image_free(img); + if (!result) { + throw std::runtime_error("Failed to resize image"); + } std::cout << " (resized to " << newW << "x" << newH << ")" << std::endl; // Encode resized image to JPEG in memory std::vector jpegBuf; - stbi_write_jpg_to_func( + int writeOk = stbi_write_jpg_to_func( [](void* ctx, void* data, int size) { auto* buf = static_cast*>(ctx); auto* bytes = static_cast(data); buf->insert(buf->end(), bytes, bytes + size); }, &jpegBuf, newW, newH, 3, resized.data(), 90); + if (!writeOk) { + throw std::runtime_error("Failed to encode resized image to JPEG"); + } return {Base64Encode(jpegBuf), "image/jpeg"}; } // No resize needed — encode original to JPEG std::vector jpegBuf; - stbi_write_jpg_to_func( + int writeOk = stbi_write_jpg_to_func( [](void* ctx, void* data, int size) { auto* buf = static_cast*>(ctx); auto* bytes = static_cast(data); @@ -99,37 +113,69 @@ std::pair ResizeAndEncode(const std::filesystem::path& }, &jpegBuf, w, h, 3, img, 90); stbi_image_free(img); + if (!writeOk) { + throw std::runtime_error("Failed to encode image to JPEG"); + } return {Base64Encode(jpegBuf), "image/jpeg"}; } -// cURL SSE streaming callback +// Persistent buffer for SSE parsing across cURL callbacks. +struct SseBuffer { + std::string partial; // incomplete line carried over between callbacks + bool done = false; // set when [DONE] is received +}; + +// Process a single complete SSE line. Returns true if [DONE] was received. +static bool ProcessSseLine(const std::string& line) { + if (line.rfind("data: ", 0) != 0) return false; + std::string data = line.substr(6); + if (data == "[DONE]") return true; + + try { + auto j = json::parse(data); + std::string type = j.value("type", ""); + if (type == "response.output_text.delta") { + std::string delta = j.value("delta", ""); + std::cout << delta << std::flush; + } + } catch (...) { + // Skip malformed JSON + } + return false; +} + +// cURL SSE streaming callback — appends to a persistent buffer and +// processes only complete lines, retaining any trailing partial line. +// Returns 0 to abort the transfer once [DONE] is observed. static size_t StreamWriteCallback(char* ptr, size_t size, size_t nmemb, - void* /*userdata*/) { + void* userdata) { size_t totalBytes = size * nmemb; - std::string chunk(ptr, totalBytes); + auto* buf = static_cast(userdata); + + if (buf->done) return 0; // abort transfer + + buf->partial.append(ptr, totalBytes); - std::istringstream stream(chunk); - std::string line; - while (std::getline(stream, line)) { + // Process all complete lines (terminated by \n) + std::string::size_type pos = 0; + std::string::size_type newline; + while ((newline = buf->partial.find('\n', pos)) != std::string::npos) { + std::string line = buf->partial.substr(pos, newline - pos); + // Strip trailing \r if (!line.empty() && line.back() == '\r') { line.pop_back(); } - if (line.rfind("data: ", 0) != 0) continue; - std::string data = line.substr(6); - if (data == "[DONE]") break; - - try { - auto j = json::parse(data); - std::string type = j.value("type", ""); - if (type == "response.output_text.delta") { - std::string delta = j.value("delta", ""); - std::cout << delta << std::flush; - } - } catch (...) { - // Skip malformed JSON fragments + if (ProcessSseLine(line)) { + buf->done = true; + buf->partial.clear(); + return 0; // abort transfer cleanly } + pos = newline + 1; } + // Retain any trailing partial line for the next callback + buf->partial.erase(0, pos); + return totalBytes; } @@ -144,6 +190,8 @@ int main(int argc, char* argv[]) { const std::filesystem::path imagePath = std::filesystem::path(__FILE__).parent_path() / "test_image.jpg"; + curl_global_init(CURL_GLOBAL_DEFAULT); + try { // foundry_local::Configuration config("foundry_local_samples"); @@ -242,11 +290,14 @@ int main(int argc, char* argv[]) { curl_easy_setopt(curl, CURLOPT_POSTFIELDS, body.c_str()); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, StreamWriteCallback); + SseBuffer sseBuf; + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &sseBuf); + std::cout << "[ASSISTANT]: " << std::flush; CURLcode res = curl_easy_perform(curl); std::cout << std::endl; - if (res != CURLE_OK) { + if (res != CURLE_OK && !(res == CURLE_WRITE_ERROR && sseBuf.done)) { std::cerr << "cURL error: " << curl_easy_strerror(res) << std::endl; } @@ -260,8 +311,10 @@ int main(int argc, char* argv[]) { } catch (const std::exception& ex) { std::cerr << "Error: " << ex.what() << std::endl; + curl_global_cleanup(); return 1; } + curl_global_cleanup(); return 0; } From 4fb33727ab9fe934f5c5a94dfe3a22592d95f23f Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 23:11:11 -0700 Subject: [PATCH 07/18] Make this sample self contained. Update CmakeLists to take care of all the tasks --- .../CMakeLists.txt | 50 +++++++++++++++++++ .../cpp/web-server-responses-vision/README.md | 30 +++-------- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/samples/cpp/web-server-responses-vision/CMakeLists.txt b/samples/cpp/web-server-responses-vision/CMakeLists.txt index de7bc2be6..4f8cb5f86 100644 --- a/samples/cpp/web-server-responses-vision/CMakeLists.txt +++ b/samples/cpp/web-server-responses-vision/CMakeLists.txt @@ -7,6 +7,42 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Match the SDK's vcpkg triplet set(VCPKG_TARGET_TRIPLET "x64-windows-static-md" CACHE STRING "") +# ── Auto-download NuGet native dependencies ────────────────────────────── +# These provide the Foundry Local Core DLL and ONNX Runtime DLLs needed at +# runtime. Versions are kept in sync with the SDK's deps_versions.json. +set(CORE_VERSION "1.1.0") +set(ORT_VERSION "1.25.1") +set(ORTGENAI_VERSION "0.13.2") +set(NATIVE_DEPS_DIR "${CMAKE_CURRENT_BINARY_DIR}/_native_deps") + +function(ensure_nuget_package PKG_NAME PKG_VERSION) + set(PKG_DIR "${NATIVE_DEPS_DIR}/${PKG_NAME}.${PKG_VERSION}") + if(NOT EXISTS "${PKG_DIR}") + message(STATUS "Downloading ${PKG_NAME} ${PKG_VERSION}...") + find_program(NUGET_EXE nuget) + if(NOT NUGET_EXE) + message(FATAL_ERROR "nuget.exe not found on PATH. Install NuGet CLI: https://www.nuget.org/downloads") + endif() + execute_process( + COMMAND ${NUGET_EXE} install ${PKG_NAME} -Version ${PKG_VERSION} + -OutputDirectory ${NATIVE_DEPS_DIR} -NonInteractive + RESULT_VARIABLE result + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Failed to download ${PKG_NAME} ${PKG_VERSION}") + endif() + endif() +endfunction() + +ensure_nuget_package("Microsoft.AI.Foundry.Local.Core" ${CORE_VERSION}) +ensure_nuget_package("Microsoft.ML.OnnxRuntime.Foundry" ${ORT_VERSION}) +ensure_nuget_package("Microsoft.ML.OnnxRuntimeGenAI.Foundry" ${ORTGENAI_VERSION}) + +# ── Copy runtime DLLs to build output after build ──────────────────────── +set(CORE_DLL_DIR "${NATIVE_DEPS_DIR}/Microsoft.AI.Foundry.Local.Core.${CORE_VERSION}/runtimes/win-x64/native") +set(ORT_DLL_DIR "${NATIVE_DEPS_DIR}/Microsoft.ML.OnnxRuntime.Foundry.${ORT_VERSION}/runtimes/win-x64/native") +set(ORTGENAI_DLL_DIR "${NATIVE_DEPS_DIR}/Microsoft.ML.OnnxRuntimeGenAI.Foundry.${ORTGENAI_VERSION}/runtimes/win-x64/native") + # Build the Foundry Local C++ SDK as a subdirectory set(FOUNDRY_SDK_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../sdk/cpp") set(BUILD_TESTING OFF CACHE BOOL "" FORCE) @@ -21,3 +57,17 @@ target_link_libraries(web-server-responses-vision PRIVATE CppSdk CURL::libcurl ) + +# Copy runtime DLLs next to the executable after build +add_custom_command(TARGET web-server-responses-vision POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${CORE_DLL_DIR}/Microsoft.AI.Foundry.Local.Core.dll" + $ + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${ORT_DLL_DIR}" + $ + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${ORTGENAI_DLL_DIR}" + $ + COMMENT "Copying runtime DLLs..." +) diff --git a/samples/cpp/web-server-responses-vision/README.md b/samples/cpp/web-server-responses-vision/README.md index 08002a285..dd56f3db5 100644 --- a/samples/cpp/web-server-responses-vision/README.md +++ b/samples/cpp/web-server-responses-vision/README.md @@ -29,36 +29,18 @@ Open an **x64 Native Tools Command Prompt for VS 2022** (or run `vcvars64.bat`), ```bash cd samples/cpp/web-server-responses-vision -``` - -### 1. Download native dependencies - -Download the required NuGet packages to `_native_deps` (needed for runtime DLLs): - -```bash -nuget install Microsoft.AI.Foundry.Local.Core -Version 1.1.0 -OutputDirectory _native_deps -nuget install Microsoft.ML.OnnxRuntime.Foundry -Version 1.25.1 -OutputDirectory _native_deps -nuget install Microsoft.ML.OnnxRuntimeGenAI.Foundry -Version 0.13.2 -OutputDirectory _native_deps -``` - -### 2. Build - -```bash cmake -G Ninja -B build -DCMAKE_TOOLCHAIN_FILE="%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake" -DVCPKG_TARGET_TRIPLET=x64-windows-static-md cmake --build build ``` -The built executable will be at `build/web-server-responses-vision.exe`. - -### 3. Copy runtime DLLs +CMake will automatically: +- Install vcpkg dependencies (`nlohmann-json`, `ms-gsl`, `curl`, `stb`) +- Download the required NuGet packages (`Microsoft.AI.Foundry.Local.Core`, `Microsoft.ML.OnnxRuntime.Foundry`, `Microsoft.ML.OnnxRuntimeGenAI.Foundry`) +- Copy runtime DLLs next to the executable after build -Copy the `win-x64` DLLs next to the executable: +The built executable will be at `build/web-server-responses-vision.exe`. -```bash -copy _native_deps\Microsoft.AI.Foundry.Local.Core.1.1.0\runtimes\win-x64\native\*.dll build\ -copy _native_deps\Microsoft.ML.OnnxRuntime.Foundry.1.25.1\runtimes\win-x64\native\*.dll build\ -copy _native_deps\Microsoft.ML.OnnxRuntimeGenAI.Foundry.0.13.2\runtimes\win-x64\native\*.dll build\ -``` +> **Note:** `nuget.exe` must be on the PATH for auto-download to work. Install from [nuget.org/downloads](https://www.nuget.org/downloads). ## Run the sample From d5b6cc4481c20944241965b9a679c1e1ed133eae Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Wed, 6 May 2026 23:14:08 -0700 Subject: [PATCH 08/18] Fix overflow issue --- samples/cpp/web-server-responses-vision/main.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/cpp/web-server-responses-vision/main.cpp b/samples/cpp/web-server-responses-vision/main.cpp index 79f0d8956..92d630f8e 100644 --- a/samples/cpp/web-server-responses-vision/main.cpp +++ b/samples/cpp/web-server-responses-vision/main.cpp @@ -77,7 +77,11 @@ std::pair ResizeAndEncode(const std::filesystem::path& newW = (std::max)(newW, 1); newH = (std::max)(newH, 1); - std::vector resized(newW * newH * 3); + const auto resizedSize = + static_cast::size_type>(newW) * + static_cast::size_type>(newH) * + static_cast::size_type>(3); + std::vector resized(resizedSize); unsigned char* result = stbir_resize_uint8_linear( img, w, h, 0, resized.data(), newW, newH, 0, STBIR_RGB); stbi_image_free(img); From 37868ab28c7833ca6b59165cd2f32fffd5eca123 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 00:06:44 -0700 Subject: [PATCH 09/18] Add vision sample, auto-download NuGet deps, and SDK README --- .../CMakeLists.txt | 73 --- sdk/cpp/CMakeLists.txt | 113 +++++ sdk/cpp/README.md | 470 ++++++++++++++++++ sdk/cpp/cmake/FoundryLocalConfig.cmake | 46 ++ .../CMakeLists.txt | 33 ++ .../CMakePresets.json | 31 ++ .../web-server-responses-vision/README.md | 22 +- .../web-server-responses-vision/main.cpp | 0 .../web-server-responses-vision/stb_impl.cpp | 0 .../test_image.jpg | Bin .../vcpkg-configuration.json | 0 .../web-server-responses-vision/vcpkg.json | 0 sdk/cpp/test/model_variant_test.cpp | 2 +- sdk/cpp/vcpkg.json | 4 +- 14 files changed, 713 insertions(+), 81 deletions(-) delete mode 100644 samples/cpp/web-server-responses-vision/CMakeLists.txt create mode 100644 sdk/cpp/README.md create mode 100644 sdk/cpp/cmake/FoundryLocalConfig.cmake create mode 100644 sdk/cpp/sample/web-server-responses-vision/CMakeLists.txt create mode 100644 sdk/cpp/sample/web-server-responses-vision/CMakePresets.json rename {samples/cpp => sdk/cpp/sample}/web-server-responses-vision/README.md (87%) rename {samples/cpp => sdk/cpp/sample}/web-server-responses-vision/main.cpp (100%) rename {samples/cpp => sdk/cpp/sample}/web-server-responses-vision/stb_impl.cpp (100%) rename {samples/cpp => sdk/cpp/sample}/web-server-responses-vision/test_image.jpg (100%) rename {samples/cpp => sdk/cpp/sample}/web-server-responses-vision/vcpkg-configuration.json (100%) rename {samples/cpp => sdk/cpp/sample}/web-server-responses-vision/vcpkg.json (100%) diff --git a/samples/cpp/web-server-responses-vision/CMakeLists.txt b/samples/cpp/web-server-responses-vision/CMakeLists.txt deleted file mode 100644 index 4f8cb5f86..000000000 --- a/samples/cpp/web-server-responses-vision/CMakeLists.txt +++ /dev/null @@ -1,73 +0,0 @@ -cmake_minimum_required(VERSION 3.20) -project(web-server-responses-vision LANGUAGES CXX) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -# Match the SDK's vcpkg triplet -set(VCPKG_TARGET_TRIPLET "x64-windows-static-md" CACHE STRING "") - -# ── Auto-download NuGet native dependencies ────────────────────────────── -# These provide the Foundry Local Core DLL and ONNX Runtime DLLs needed at -# runtime. Versions are kept in sync with the SDK's deps_versions.json. -set(CORE_VERSION "1.1.0") -set(ORT_VERSION "1.25.1") -set(ORTGENAI_VERSION "0.13.2") -set(NATIVE_DEPS_DIR "${CMAKE_CURRENT_BINARY_DIR}/_native_deps") - -function(ensure_nuget_package PKG_NAME PKG_VERSION) - set(PKG_DIR "${NATIVE_DEPS_DIR}/${PKG_NAME}.${PKG_VERSION}") - if(NOT EXISTS "${PKG_DIR}") - message(STATUS "Downloading ${PKG_NAME} ${PKG_VERSION}...") - find_program(NUGET_EXE nuget) - if(NOT NUGET_EXE) - message(FATAL_ERROR "nuget.exe not found on PATH. Install NuGet CLI: https://www.nuget.org/downloads") - endif() - execute_process( - COMMAND ${NUGET_EXE} install ${PKG_NAME} -Version ${PKG_VERSION} - -OutputDirectory ${NATIVE_DEPS_DIR} -NonInteractive - RESULT_VARIABLE result - ) - if(NOT result EQUAL 0) - message(FATAL_ERROR "Failed to download ${PKG_NAME} ${PKG_VERSION}") - endif() - endif() -endfunction() - -ensure_nuget_package("Microsoft.AI.Foundry.Local.Core" ${CORE_VERSION}) -ensure_nuget_package("Microsoft.ML.OnnxRuntime.Foundry" ${ORT_VERSION}) -ensure_nuget_package("Microsoft.ML.OnnxRuntimeGenAI.Foundry" ${ORTGENAI_VERSION}) - -# ── Copy runtime DLLs to build output after build ──────────────────────── -set(CORE_DLL_DIR "${NATIVE_DEPS_DIR}/Microsoft.AI.Foundry.Local.Core.${CORE_VERSION}/runtimes/win-x64/native") -set(ORT_DLL_DIR "${NATIVE_DEPS_DIR}/Microsoft.ML.OnnxRuntime.Foundry.${ORT_VERSION}/runtimes/win-x64/native") -set(ORTGENAI_DLL_DIR "${NATIVE_DEPS_DIR}/Microsoft.ML.OnnxRuntimeGenAI.Foundry.${ORTGENAI_VERSION}/runtimes/win-x64/native") - -# Build the Foundry Local C++ SDK as a subdirectory -set(FOUNDRY_SDK_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../../../sdk/cpp") -set(BUILD_TESTING OFF CACHE BOOL "" FORCE) -add_subdirectory(${FOUNDRY_SDK_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CppSdk) - -# curl is installed by vcpkg via the SDK's vcpkg.json -find_package(CURL REQUIRED) - -add_executable(web-server-responses-vision main.cpp stb_impl.cpp) - -target_link_libraries(web-server-responses-vision PRIVATE - CppSdk - CURL::libcurl -) - -# Copy runtime DLLs next to the executable after build -add_custom_command(TARGET web-server-responses-vision POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${CORE_DLL_DIR}/Microsoft.AI.Foundry.Local.Core.dll" - $ - COMMAND ${CMAKE_COMMAND} -E copy_directory - "${ORT_DLL_DIR}" - $ - COMMAND ${CMAKE_COMMAND} -E copy_directory - "${ORTGENAI_DLL_DIR}" - $ - COMMENT "Copying runtime DLLs..." -) diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index 855e6be15..455b57b2c 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -74,6 +74,59 @@ if (UNIX) target_link_libraries(CppSdk PUBLIC ${CMAKE_DL_LIBS}) endif() +# ----------------------------- +# Native dependencies (NuGet) +# Auto-download Core DLL and ONNX Runtime DLLs needed at runtime. +# Versions are kept in sync with deps_versions.json. +# ----------------------------- +set(FL_CORE_VERSION "1.1.0" CACHE STRING "Microsoft.AI.Foundry.Local.Core NuGet version") +set(FL_ORT_VERSION "1.25.1" CACHE STRING "Microsoft.ML.OnnxRuntime.Foundry NuGet version") +set(FL_ORTGENAI_VERSION "0.13.2" CACHE STRING "Microsoft.ML.OnnxRuntimeGenAI.Foundry NuGet version") +set(FL_NATIVE_DEPS_DIR "${CMAKE_CURRENT_BINARY_DIR}/_native_deps") + +function(fl_ensure_nuget_package PKG_NAME PKG_VERSION) + set(PKG_DIR "${FL_NATIVE_DEPS_DIR}/${PKG_NAME}.${PKG_VERSION}") + if(NOT EXISTS "${PKG_DIR}") + message(STATUS "Downloading ${PKG_NAME} ${PKG_VERSION}...") + find_program(NUGET_EXE nuget) + if(NOT NUGET_EXE) + message(FATAL_ERROR "nuget.exe not found on PATH. Install NuGet CLI: https://www.nuget.org/downloads") + endif() + execute_process( + COMMAND ${NUGET_EXE} install ${PKG_NAME} -Version ${PKG_VERSION} + -OutputDirectory ${FL_NATIVE_DEPS_DIR} -NonInteractive + RESULT_VARIABLE result + ) + if(NOT result EQUAL 0) + message(FATAL_ERROR "Failed to download ${PKG_NAME} ${PKG_VERSION}") + endif() + endif() +endfunction() + +fl_ensure_nuget_package("Microsoft.AI.Foundry.Local.Core" ${FL_CORE_VERSION}) +fl_ensure_nuget_package("Microsoft.ML.OnnxRuntime.Foundry" ${FL_ORT_VERSION}) +fl_ensure_nuget_package("Microsoft.ML.OnnxRuntimeGenAI.Foundry" ${FL_ORTGENAI_VERSION}) + +set(FL_CORE_DLL_DIR "${FL_NATIVE_DEPS_DIR}/Microsoft.AI.Foundry.Local.Core.${FL_CORE_VERSION}/runtimes/win-x64/native") +set(FL_ORT_DLL_DIR "${FL_NATIVE_DEPS_DIR}/Microsoft.ML.OnnxRuntime.Foundry.${FL_ORT_VERSION}/runtimes/win-x64/native") +set(FL_ORTGENAI_DLL_DIR "${FL_NATIVE_DEPS_DIR}/Microsoft.ML.OnnxRuntimeGenAI.Foundry.${FL_ORTGENAI_VERSION}/runtimes/win-x64/native") + +# Helper function: copy runtime DLLs next to any target that links CppSdk +function(fl_copy_runtime_dlls TARGET_NAME) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${FL_CORE_DLL_DIR}/Microsoft.AI.Foundry.Local.Core.dll" + $ + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${FL_ORT_DLL_DIR}" + $ + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${FL_ORTGENAI_DLL_DIR}" + $ + COMMENT "Copying Foundry Local runtime DLLs for ${TARGET_NAME}..." + ) +endfunction() + # ----------------------------- # Sample executable # ----------------------------- @@ -83,6 +136,32 @@ add_executable(CppSdkSample target_link_libraries(CppSdkSample PRIVATE CppSdk) +# Copy DLLs for the SDK sample +fl_copy_runtime_dlls(CppSdkSample) + +# ----------------------------- +# Vision sample (Responses API) — built if present +# ----------------------------- +set(VISION_SAMPLE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/sample/web-server-responses-vision") + +if(EXISTS "${VISION_SAMPLE_DIR}/main.cpp") + find_package(CURL QUIET) + if(CURL_FOUND) + add_executable(WebServerResponsesVision + ${VISION_SAMPLE_DIR}/main.cpp + ${VISION_SAMPLE_DIR}/stb_impl.cpp + ) + + target_link_libraries(WebServerResponsesVision PRIVATE CppSdk CURL::libcurl) + fl_copy_runtime_dlls(WebServerResponsesVision) + message(STATUS "Vision sample: enabled") + else() + message(STATUS "Vision sample: disabled (curl not found — add 'curl' to vcpkg.json)") + endif() +else() + message(STATUS "Vision sample: not found") +endif() + # ----------------------------- # Unit tests # ----------------------------- @@ -152,3 +231,37 @@ endif() # Make Visual Studio start/debug this target by default set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT CppSdkSample) + +# ----------------------------- +# Install — produces the redistributable SDK zip layout: +# lib/CppSdk.lib +# include/... +# bin/*.dll (runtime DLLs) +# cmake/FoundryLocalConfig.cmake +# ----------------------------- +install(TARGETS CppSdk + ARCHIVE DESTINATION lib +) + +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ + DESTINATION include + FILES_MATCHING PATTERN "*.h" +) + +# Install runtime DLLs into bin/ +install(DIRECTORY "${FL_CORE_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") +install(DIRECTORY "${FL_ORT_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") +install(DIRECTORY "${FL_ORTGENAI_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") + +# Install CMake config file +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/FoundryLocalConfig.cmake DESTINATION cmake) + +# Install vision sample into the zip +if(EXISTS "${VISION_SAMPLE_DIR}/main.cpp") + install(DIRECTORY "${VISION_SAMPLE_DIR}/" + DESTINATION sample/web-server-responses-vision + PATTERN "build" EXCLUDE + PATTERN "_native_deps" EXCLUDE + PATTERN "vcpkg_installed" EXCLUDE + ) +endif() diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md new file mode 100644 index 000000000..03f849088 --- /dev/null +++ b/sdk/cpp/README.md @@ -0,0 +1,470 @@ +# Foundry Local C++ SDK + +The Foundry Local C++ SDK provides a C++17 static library for running AI models locally via [Foundry Local](https://www.foundrylocal.ai/). Discover, download, load, and run inference entirely on your own machine — no cloud required. + +> **Windows-only** — requires MSVC or clang-cl (MSVC-compatible toolchain). + +## Features + +- **Model catalog** — browse and search all available models; filter by cached or loaded state +- **Lifecycle management** — download, load, unload, and remove models programmatically +- **Chat completions** — synchronous and streaming via OpenAI-compatible types +- **Audio transcription** — transcribe audio files with streaming support +- **Tool calling** — define tools and handle tool-call responses in chat completions +- **Download progress** — wire up a callback for real-time download percentage +- **Model variants** — select specific hardware/quantization variants per model alias +- **Optional web service** — start an OpenAI-compatible REST endpoint +- **Execution providers** — ensure EPs are downloaded and registered for hardware acceleration +- **Auto NuGet download** — CMake auto-downloads native runtime DLLs at configure time + +## Prerequisites + +| Requirement | Notes | +|---|---| +| **CMake >= 3.20** | Ships with Visual Studio 2022 | +| **Ninja** | Ships with Visual Studio 2022 | +| **vcpkg** | Set the `VCPKG_ROOT` environment variable to your vcpkg installation | +| **MSVC** (or clang-cl) | Visual Studio 2022 Build Tools or full IDE | +| **NuGet CLI** | Required for auto-downloading native runtime DLLs. Install from [nuget.org/downloads](https://www.nuget.org/downloads) | + +## Building from Source + +### 0. Open an x64 developer environment + +All commands below must run in a shell that has the x64 MSVC toolchain on the PATH. +Choose **one** of the following: + +| Method | How to open | +|---|---| +| **Developer Command Prompt** | Start Menu → *"x64 Native Tools Command Prompt for VS 2022"* | +| **Developer PowerShell** | Start Menu → *"Developer PowerShell for VS 2022"* | +| **Inside an existing cmd** | Run `"\VC\Auxiliary\Build\vcvars64.bat"` where `` is your Visual Studio installation path (e.g. `C:\Program Files\Microsoft Visual Studio\2022\Enterprise`) | +| **VS Code terminal** | Open the project folder in VS Code with the CMake Tools extension; it configures the environment automatically | + +Verify by running `cl.exe` — the banner should say **x64**. + +### 1. Clone & navigate + +```bash +git clone https://github.com/microsoft/Foundry-Local.git +cd Foundry-Local/sdk/cpp +``` + +### 2. Configure (CMake + vcpkg) + +```bash +cmake --preset x64-debug +``` + +This uses the `x64-debug` preset which: +- Uses the **Ninja** generator +- Resolves C++ dependencies via **vcpkg** (`nlohmann-json`, `ms-gsl`, `gtest`) +- Builds with the `x64-windows-static-md` triplet +- Auto-downloads native runtime DLLs via **NuGet**: + - `Microsoft.AI.Foundry.Local.Core` (1.1.0) — Foundry Local core runtime + - `Microsoft.ML.OnnxRuntime.Foundry` (1.25.1) — ONNX Runtime + - `Microsoft.ML.OnnxRuntimeGenAI.Foundry` (0.13.2) — ONNX Runtime GenAI + +NuGet packages are cached in `out/build//_native_deps/` and only downloaded on first configure. Runtime DLLs are automatically copied next to executables via post-build steps. + +### 3. Build + +```bash +cmake --build --preset x64-debug +``` + +### Release build + +```bash +cmake --preset x64-release +cmake --build --preset x64-release +ctest --preset x64-release +``` + +## Quick Start + +```cpp +#include "foundry_local.h" +#include + +using namespace foundry_local; + +int main() { + // 1. Create the manager + Manager::Create({"MyApp"}); + auto& manager = Manager::Instance(); + + // 2. Get a model from the catalog + auto& catalog = manager.GetCatalog(); + auto* model = catalog.GetModel("phi-3.5-mini"); + if (!model) return 1; + + // 3. Download (if needed) and load + model->Download(); + model->Load(); + + // 4. Chat + OpenAIChatClient chat(*model); + std::vector messages = {{"user", "Hello!"}}; + ChatSettings settings; + settings.max_tokens = 128; + + auto response = chat.CompleteChat(messages, settings); + if (!response.choices.empty() && response.choices[0].message) { + std::cout << response.choices[0].message->content << "\n"; + } + + // 5. Cleanup + model->Unload(); + Manager::Destroy(); +} +``` + +### Vision Sample (Responses API) + +A complete vision sample is included at `sample/web-server-responses-vision/`. It demonstrates image understanding using the Responses API with streaming via cURL. + +Build and run from the SDK root: + +```bash +cmake --preset x64-debug +cmake --build --preset x64-debug --target WebServerResponsesVision +.\out\build\x64-debug\WebServerResponsesVision.exe qwen3.5-0.8b +``` + +Or build standalone from the sample directory: + +```bash +cd sample/web-server-responses-vision +cmake --preset x64-debug +cmake --build --preset x64-debug +.\out\build\x64-debug\web-server-responses-vision.exe qwen3.5-0.8b +``` + +See [sample/web-server-responses-vision/README.md](sample/web-server-responses-vision/README.md) for full details. + +## Usage + +### Initialization + +`Manager` is a singleton. Call `Create` once at startup: + +```cpp +Manager::Create(Configuration{"MyApp"}, &myLogger); +``` + +Access it anywhere afterward via `Manager::Instance()`. Check `Manager::IsInitialized()` to verify creation. + +Call `Manager::Destroy()` to perform deterministic cleanup when done. + +### Catalog + +The catalog lists all models known to the Foundry Local Core: + +```cpp +auto& catalog = Manager::Instance().GetCatalog(); + +// List all available models +auto models = catalog.GetModels(); +for (auto* m : models) + std::cout << m->GetAlias() << " — " << m->GetId() << "\n"; + +// Get a specific model by alias +auto* model = catalog.GetModel("phi-3.5-mini"); + +// Get a specific variant by its unique model ID +auto* variant = catalog.GetModelVariant("phi-3.5-mini-generic-gpu-4"); + +// List models already downloaded to the local cache +auto cached = catalog.GetCachedModels(); + +// List models currently loaded in memory +auto loaded = catalog.GetLoadedModels(); +``` + +### Model Lifecycle + +Each model may have multiple variants (different quantizations, hardware targets). The SDK auto-selects the best variant, or you can pick one. + +```cpp +// Check and select variants +if (auto* concrete = dynamic_cast(model)) { + for (const auto& v : concrete->GetVariants()) { + std::cout << v.GetId() << " (cached: " << v.IsCached() << ")\n"; + } + // Switch to a specific variant (e.g., CPU) + for (const auto& variant : concrete->GetVariants()) { + if (variant.GetInfo().runtime && + variant.GetInfo().runtime->device_type == DeviceType::CPU) { + concrete->SelectVariant(variant); + break; + } + } +} +``` + +Download, load, and unload: + +```cpp +// Download with progress reporting +model->Download([](float progress) { + std::cout << "Download: " << progress << "%\n"; +}); + +// Load into memory +model->Load(); + +// Unload when done +model->Unload(); + +// Remove from local cache entirely +model->RemoveFromCache(); +``` + +### Chat Completions + +```cpp +OpenAIChatClient chat(*model); + +std::vector messages = { + {"system", "You are a helpful assistant."}, + {"user", "Explain async/await in C#."} +}; +ChatSettings settings; + +auto response = chat.CompleteChat(messages, settings); +if (!response.choices.empty() && response.choices[0].message) { + std::cout << response.choices[0].message->content << "\n"; +} +``` + +### Streaming + +Use a callback for token-by-token output: + +```cpp +chat.CompleteChatStreaming(messages, settings, [](const ChatCompletionCreateResponse& chunk) { + if (!chunk.choices.empty() && chunk.choices[0].delta) { + std::cout << chunk.choices[0].delta->content << std::flush; + } +}); +``` + +### Chat Settings + +Tune generation parameters per request: + +```cpp +ChatSettings settings; +settings.temperature = 0.7f; +settings.max_tokens = 256; +settings.top_p = 0.9f; +settings.frequency_penalty = 0.5f; +``` + +### Audio Transcription + +```cpp +OpenAIAudioClient audio(*model); + +// One-shot transcription +auto result = audio.TranscribeAudio(R"(C:\path\to\audio.wav)"); +std::cout << result.text << "\n"; + +// Streaming transcription +audio.TranscribeAudioStreaming(R"(C:\path\to\audio.wav)", [](const AudioCreateTranscriptionResponse& chunk) { + std::cout << chunk.text; +}); +``` + +### Tool Calling + +See `sample/main.cpp` (Example 5) for a full tool-calling walkthrough. + +### Web Service + +Start an OpenAI-compatible REST endpoint for use by external tools or processes: + +```cpp +Configuration config{"MyApp"}; +config.web = WebServiceConfig{ "http://127.0.0.1:5000" }; +Manager::Create(std::move(config)); + +Manager::Instance().StartWebService(); +auto urls = Manager::Instance().GetWebServiceEndpoints(); +for (const auto& url : urls) + std::cout << "Listening on: " << url << "\n"; + +// ... use the service ... + +Manager::Instance().StopWebService(); +``` + +### Execution Providers + +Ensure EPs are downloaded and registered for hardware acceleration: + +```cpp +Manager::Instance().EnsureEpsDownloaded(); +``` + +### Using the Prebuilt SDK (Zip) + +#### Creating the zip + +Build and install the SDK to produce the redistributable layout: + +```bash +cd sdk/cpp +cmake --preset x64-release +cmake --build --preset x64-release +cmake --install out/build/x64-release --prefix out/foundry-local-cpp-sdk +``` + +This creates: + +``` +out/foundry-local-cpp-sdk/ +├── include/ # Public headers +├── lib/CppSdk.lib # Prebuilt static library +├── bin/ # Runtime DLLs (Core, OnnxRuntime, OnnxRuntimeGenAI) +├── cmake/ # FoundryLocalConfig.cmake +└── README.md +``` + +Zip the `out/foundry-local-cpp-sdk/` folder and distribute. + +#### Using the zip in your project + +1. Unzip to a folder (e.g. `foundry-local-cpp-sdk/`) +2. In your `CMakeLists.txt`: + +```cmake +cmake_minimum_required(VERSION 3.20) +project(my-app) + +set(CMAKE_CXX_STANDARD 17) +set(VCPKG_TARGET_TRIPLET "x64-windows-static-md" CACHE STRING "") + +list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_SOURCE_DIR}/foundry-local-cpp-sdk") +find_package(FoundryLocal REQUIRED) + +add_executable(my-app main.cpp) +target_link_libraries(my-app PRIVATE FoundryLocal::FoundryLocal) + +# Auto-copies Core DLL, ORT DLLs next to the exe +fl_copy_runtime_dlls(my-app) +``` + +3. Create a `vcpkg.json` with the required transitive dependencies: + +```json +{ + "dependencies": ["nlohmann-json", "ms-gsl"] +} +``` + +4. Build: + +```bash +cmake -G Ninja -B build -DCMAKE_TOOLCHAIN_FILE="%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake" +cmake --build build +``` + +### Using the SDK from Source + +Include the SDK via `add_subdirectory` (e.g. from the repo): + +```cmake +add_subdirectory(path/to/sdk/cpp ${CMAKE_CURRENT_BINARY_DIR}/CppSdk) + +add_executable(my_app main.cpp) +target_link_libraries(my_app PRIVATE CppSdk) +fl_copy_runtime_dlls(my_app) +``` + +## Configuration + +| Property | Type | Default | Description | +|---|---|---|---| +| `app_name` | `std::string` | (required) | Your application name | +| `app_data_dir` | `optional` | `~/.{app_name}` | Application data directory | +| `model_cache_dir` | `optional` | `{app_data_dir}/cache/models` | Where models are stored locally | +| `logs_dir` | `optional` | `{app_data_dir}/logs` | Log output directory | +| `log_level` | `LogLevel` | `Warning` | Verbose, Debug, Information, Warning, Error, Fatal | +| `web` | `optional` | `nullopt` | Web service configuration (see below) | +| `additional_settings` | `optional` | `nullopt` | Extra key-value settings passed to Core | + +**WebServiceConfig** + +| Property | Type | Default | Description | +|---|---|---|---| +| `urls` | `optional` | `127.0.0.1:0` | Bind address; semicolon-separated for multiple | +| `external_url` | `optional` | `nullopt` | URI for accessing the web service in a separate process | + +## API Reference + +Key types: + +| Type | Description | +|---|---| +| `Manager` | Singleton entry point — create, catalog, web service | +| `Configuration` | Initialization settings | +| `Catalog` | Model catalog — list, search, filter | +| `IModel` | Model interface — identity, metadata, lifecycle | +| `Model` | Model with variant selection (implements `IModel`) | +| `ModelVariant` | A specific variant of a model (implements `IModel`) | +| `OpenAIChatClient` | Chat completions (sync + streaming) | +| `OpenAIAudioClient` | Audio transcription (sync + streaming) | +| `ChatSettings` | Chat generation parameters | +| `ModelInfo` | Full model metadata record | + +## Tests + +```bash +ctest --preset x64-debug +``` + +Or run the test executable directly: + +```bash +.\out\build\x64-debug\CppSdkTests.exe +``` + +## Project Structure + +``` +sdk/cpp/ +├── include/ # Public headers +│ ├── foundry_local.h # Umbrella header (include this) +│ ├── configuration.h # Configuration struct +│ ├── foundry_local_manager.h # Manager singleton +│ ├── catalog.h # Model catalog +│ ├── model.h # Model & ModelVariant +│ ├── logger.h # ILogger interface +│ └── openai/ +│ ├── chat_client.h # Chat completion client +│ ├── audio_client.h # Audio transcription client +│ └── tool_types.h # Tool calling types +├── src/ # Private implementation +├── sample/ +│ └── main.cpp # Sample application +├── test/ # Unit & E2E tests (GTest) +├── CMakeLists.txt +├── CMakePresets.json +├── vcpkg.json # vcpkg dependencies +└── vcpkg-configuration.json +``` + +## Troubleshooting + +| Error | Cause | Fix | +|---|---|---| +| `DML provider requested, but GenAI has not been built with DML support` | GPU variant selected but ONNX Runtime GenAI lacks DML | Select a CPU variant or update Foundry Local | +| `OgaGenerator_TokenCount not found in onnxruntime-genai` | Version mismatch between Foundry Local components | Update NuGet package versions in CMakeLists.txt | +| `API version [N] is not available` | ONNX Runtime version too old for the Foundry Local service | Update NuGet package versions in CMakeLists.txt | +| `nuget.exe not found on PATH` | NuGet CLI not installed | Install from [nuget.org/downloads](https://www.nuget.org/downloads) | + +## License + +Licensed under the MIT License. diff --git a/sdk/cpp/cmake/FoundryLocalConfig.cmake b/sdk/cpp/cmake/FoundryLocalConfig.cmake new file mode 100644 index 000000000..ffd6e417a --- /dev/null +++ b/sdk/cpp/cmake/FoundryLocalConfig.cmake @@ -0,0 +1,46 @@ +# FoundryLocalConfig.cmake +# +# Imported target: FoundryLocal::FoundryLocal +# +# Usage in your CMakeLists.txt: +# list(APPEND CMAKE_PREFIX_PATH "") +# find_package(FoundryLocal REQUIRED) +# target_link_libraries(my_app PRIVATE FoundryLocal::FoundryLocal) +# fl_copy_runtime_dlls(my_app) + +get_filename_component(_FL_SDK_DIR "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE) + +# Create imported static library target +if(NOT TARGET FoundryLocal::FoundryLocal) + add_library(FoundryLocal::FoundryLocal STATIC IMPORTED) + + set_target_properties(FoundryLocal::FoundryLocal PROPERTIES + IMPORTED_LOCATION "${_FL_SDK_DIR}/lib/CppSdk.lib" + INTERFACE_INCLUDE_DIRECTORIES "${_FL_SDK_DIR}/include" + ) + + # Require nlohmann_json and GSL from vcpkg (consumer must have these) + find_package(nlohmann_json CONFIG REQUIRED) + find_package(Microsoft.GSL CONFIG REQUIRED) + + set_property(TARGET FoundryLocal::FoundryLocal APPEND PROPERTY + INTERFACE_LINK_LIBRARIES + nlohmann_json::nlohmann_json + Microsoft.GSL::GSL + ) +endif() + +# Runtime DLLs directory +set(FL_RUNTIME_DLL_DIR "${_FL_SDK_DIR}/bin") + +# Helper function: copy Foundry Local runtime DLLs next to an executable +function(fl_copy_runtime_dlls TARGET_NAME) + add_custom_command(TARGET ${TARGET_NAME} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + "${FL_RUNTIME_DLL_DIR}" + $ + COMMENT "Copying Foundry Local runtime DLLs for ${TARGET_NAME}..." + ) +endfunction() + +set(FoundryLocal_FOUND TRUE) diff --git a/sdk/cpp/sample/web-server-responses-vision/CMakeLists.txt b/sdk/cpp/sample/web-server-responses-vision/CMakeLists.txt new file mode 100644 index 000000000..b6dbcbd04 --- /dev/null +++ b/sdk/cpp/sample/web-server-responses-vision/CMakeLists.txt @@ -0,0 +1,33 @@ +cmake_minimum_required(VERSION 3.20) +project(web-server-responses-vision LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Match the SDK's vcpkg triplet +set(VCPKG_TARGET_TRIPLET "x64-windows-static-md" CACHE STRING "") + +# Try find_package first (prebuilt SDK zip), fall back to add_subdirectory (repo) +find_package(FoundryLocal QUIET) +if(NOT FoundryLocal_FOUND) + message(STATUS "Prebuilt FoundryLocal not found — building SDK from source") + set(FOUNDRY_SDK_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../..") + set(BUILD_TESTING OFF CACHE BOOL "" FORCE) + add_subdirectory(${FOUNDRY_SDK_DIR} ${CMAKE_CURRENT_BINARY_DIR}/CppSdk) + set(FL_SDK_TARGET CppSdk) +else() + message(STATUS "Using prebuilt FoundryLocal SDK") + set(FL_SDK_TARGET FoundryLocal::FoundryLocal) +endif() + +find_package(CURL REQUIRED) + +add_executable(web-server-responses-vision main.cpp stb_impl.cpp) + +target_link_libraries(web-server-responses-vision PRIVATE + ${FL_SDK_TARGET} + CURL::libcurl +) + +# Copy runtime DLLs next to the executable +fl_copy_runtime_dlls(web-server-responses-vision) diff --git a/sdk/cpp/sample/web-server-responses-vision/CMakePresets.json b/sdk/cpp/sample/web-server-responses-vision/CMakePresets.json new file mode 100644 index 000000000..1ec183fe2 --- /dev/null +++ b/sdk/cpp/sample/web-server-responses-vision/CMakePresets.json @@ -0,0 +1,31 @@ +{ + "version": 6, + "configurePresets": [ + { + "name": "x64-debug", + "displayName": "MSVC x64 Debug", + "inherits": [], + "generator": "Ninja", + "binaryDir": "${sourceDir}/out/build/${presetName}", + "toolchainFile": "$env{VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake", + "cacheVariables": { + "CMAKE_C_COMPILER": "cl.exe", + "CMAKE_CXX_COMPILER": "cl.exe", + "CMAKE_BUILD_TYPE": "Debug", + "VCPKG_TARGET_TRIPLET": "x64-windows-static-md", + "VCPKG_OVERLAY_TRIPLETS": "${sourceDir}/../../triplets" + }, + "condition": { + "type": "equals", + "lhs": "${hostSystemName}", + "rhs": "Windows" + } + } + ], + "buildPresets": [ + { + "name": "x64-debug", + "configurePreset": "x64-debug" + } + ] +} diff --git a/samples/cpp/web-server-responses-vision/README.md b/sdk/cpp/sample/web-server-responses-vision/README.md similarity index 87% rename from samples/cpp/web-server-responses-vision/README.md rename to sdk/cpp/sample/web-server-responses-vision/README.md index dd56f3db5..8e4c028e1 100644 --- a/samples/cpp/web-server-responses-vision/README.md +++ b/sdk/cpp/sample/web-server-responses-vision/README.md @@ -15,11 +15,11 @@ This sample demonstrates vision (image understanding) capabilities using the Fou | Requirement | Notes | |---|---| -| **Foundry Local / AI Toolkit** | Install via `winget install Microsoft.AIToolkit` or the VS Code AI Toolkit extension | | **CMake >= 3.20** | Ships with Visual Studio 2022 | | **Ninja** | Ships with Visual Studio 2022 | | **vcpkg** | Set the `VCPKG_ROOT` environment variable to your vcpkg installation | | **MSVC** (or clang-cl) | Visual Studio 2022 Build Tools or full IDE | +| **NuGet CLI** | Auto-downloaded by CMake. Install from [nuget.org/downloads](https://www.nuget.org/downloads) | The sample downloads the specified model the first time it runs (skips if already cached). @@ -28,9 +28,19 @@ The sample downloads the specified model the first time it runs (skips if alread Open an **x64 Native Tools Command Prompt for VS 2022** (or run `vcvars64.bat`), then navigate to the sample directory: ```bash -cd samples/cpp/web-server-responses-vision -cmake -G Ninja -B build -DCMAKE_TOOLCHAIN_FILE="%VCPKG_ROOT%/scripts/buildsystems/vcpkg.cmake" -DVCPKG_TARGET_TRIPLET=x64-windows-static-md -cmake --build build +cd sdk/cpp/sample/web-server-responses-vision +``` + +### Configure (CMake + vcpkg) + +```bash +cmake --preset x64-debug +``` + +### Build + +```bash +cmake --build --preset x64-debug ``` CMake will automatically: @@ -38,14 +48,14 @@ CMake will automatically: - Download the required NuGet packages (`Microsoft.AI.Foundry.Local.Core`, `Microsoft.ML.OnnxRuntime.Foundry`, `Microsoft.ML.OnnxRuntimeGenAI.Foundry`) - Copy runtime DLLs next to the executable after build -The built executable will be at `build/web-server-responses-vision.exe`. +The built executable will be at `out/build/x64-debug/web-server-responses-vision.exe`. > **Note:** `nuget.exe` must be on the PATH for auto-download to work. Install from [nuget.org/downloads](https://www.nuget.org/downloads). ## Run the sample ```bash -.\build\web-server-responses-vision.exe qwen3.5-0.8b +.\out\build\x64-debug\web-server-responses-vision.exe qwen3.5-0.8b ``` The sample starts the local web service, sends vision requests via the Responses API to `http://localhost:/v1`, prints the model output, and then stops the web service. diff --git a/samples/cpp/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp similarity index 100% rename from samples/cpp/web-server-responses-vision/main.cpp rename to sdk/cpp/sample/web-server-responses-vision/main.cpp diff --git a/samples/cpp/web-server-responses-vision/stb_impl.cpp b/sdk/cpp/sample/web-server-responses-vision/stb_impl.cpp similarity index 100% rename from samples/cpp/web-server-responses-vision/stb_impl.cpp rename to sdk/cpp/sample/web-server-responses-vision/stb_impl.cpp diff --git a/samples/cpp/web-server-responses-vision/test_image.jpg b/sdk/cpp/sample/web-server-responses-vision/test_image.jpg similarity index 100% rename from samples/cpp/web-server-responses-vision/test_image.jpg rename to sdk/cpp/sample/web-server-responses-vision/test_image.jpg diff --git a/samples/cpp/web-server-responses-vision/vcpkg-configuration.json b/sdk/cpp/sample/web-server-responses-vision/vcpkg-configuration.json similarity index 100% rename from samples/cpp/web-server-responses-vision/vcpkg-configuration.json rename to sdk/cpp/sample/web-server-responses-vision/vcpkg-configuration.json diff --git a/samples/cpp/web-server-responses-vision/vcpkg.json b/sdk/cpp/sample/web-server-responses-vision/vcpkg.json similarity index 100% rename from samples/cpp/web-server-responses-vision/vcpkg.json rename to sdk/cpp/sample/web-server-responses-vision/vcpkg.json diff --git a/sdk/cpp/test/model_variant_test.cpp b/sdk/cpp/test/model_variant_test.cpp index 7544a8e1f..c631f8ff3 100644 --- a/sdk/cpp/test/model_variant_test.cpp +++ b/sdk/cpp/test/model_variant_test.cpp @@ -143,7 +143,7 @@ TEST_F(ModelVariantTest, Download_WithCallback_ReturnsZeroToContinue) { }); auto variant = MakeVariant("test-model"); - variant.Download([&](float) {}); + variant.Download([&](float) { return true; }); } TEST_F(ModelVariantTest, RemoveFromCache_CallsCore) { diff --git a/sdk/cpp/vcpkg.json b/sdk/cpp/vcpkg.json index 459a72c15..6845f8eed 100644 --- a/sdk/cpp/vcpkg.json +++ b/sdk/cpp/vcpkg.json @@ -4,6 +4,8 @@ "dependencies": [ "nlohmann-json", "ms-gsl", - "gtest" + "gtest", + "curl", + "stb" ] } From 1d19fc9dea993dd1ab9944ea0cc8c46d7d3ae114 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 00:10:43 -0700 Subject: [PATCH 10/18] remove unused imports --- sdk/cpp/sample/web-server-responses-vision/main.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index 92d630f8e..d73a6bc4c 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -2,10 +2,8 @@ // #include #include -#include #include #include -#include #include #include #include @@ -216,6 +214,7 @@ int main(int argc, char* argv[]) { std::cout << " " << m->GetAlias(); } std::cout << std::endl; + curl_global_cleanup(); return 1; } From aac8956b803672d220402b7e181f954a5204a826 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 00:24:49 -0700 Subject: [PATCH 11/18] Address copilot comments --- sdk/cpp/CMakeLists.txt | 25 +++++++--- sdk/cpp/README.md | 9 ++++ sdk/cpp/cmake/FoundryLocalConfig.cmake | 50 +++++++++++++++++-- .../web-server-responses-vision/README.md | 6 +-- .../web-server-responses-vision/main.cpp | 16 +++--- sdk/deps_versions.json | 4 +- 6 files changed, 87 insertions(+), 23 deletions(-) diff --git a/sdk/cpp/CMakeLists.txt b/sdk/cpp/CMakeLists.txt index 455b57b2c..83efb81e7 100644 --- a/sdk/cpp/CMakeLists.txt +++ b/sdk/cpp/CMakeLists.txt @@ -75,10 +75,12 @@ if (UNIX) endif() # ----------------------------- -# Native dependencies (NuGet) +# Native dependencies (NuGet) — Windows only # Auto-download Core DLL and ONNX Runtime DLLs needed at runtime. # Versions are kept in sync with deps_versions.json. # ----------------------------- +if(WIN32) + set(FL_CORE_VERSION "1.1.0" CACHE STRING "Microsoft.AI.Foundry.Local.Core NuGet version") set(FL_ORT_VERSION "1.25.1" CACHE STRING "Microsoft.ML.OnnxRuntime.Foundry NuGet version") set(FL_ORTGENAI_VERSION "0.13.2" CACHE STRING "Microsoft.ML.OnnxRuntimeGenAI.Foundry NuGet version") @@ -127,6 +129,14 @@ function(fl_copy_runtime_dlls TARGET_NAME) ) endfunction() +else() # Non-Windows: provide a no-op fl_copy_runtime_dlls + +function(fl_copy_runtime_dlls TARGET_NAME) + # No DLL copying needed on non-Windows platforms +endfunction() + +endif() # WIN32 + # ----------------------------- # Sample executable # ----------------------------- @@ -240,7 +250,7 @@ set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} # cmake/FoundryLocalConfig.cmake # ----------------------------- install(TARGETS CppSdk - ARCHIVE DESTINATION lib + ARCHIVE DESTINATION $,lib/debug,lib> ) install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ @@ -248,10 +258,12 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ FILES_MATCHING PATTERN "*.h" ) -# Install runtime DLLs into bin/ -install(DIRECTORY "${FL_CORE_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") -install(DIRECTORY "${FL_ORT_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") -install(DIRECTORY "${FL_ORTGENAI_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") +# Install runtime DLLs into bin/ (Windows only) +if(WIN32) + install(DIRECTORY "${FL_CORE_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") + install(DIRECTORY "${FL_ORT_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") + install(DIRECTORY "${FL_ORTGENAI_DLL_DIR}/" DESTINATION bin FILES_MATCHING PATTERN "*.dll") +endif() # Install CMake config file install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/FoundryLocalConfig.cmake DESTINATION cmake) @@ -261,6 +273,7 @@ if(EXISTS "${VISION_SAMPLE_DIR}/main.cpp") install(DIRECTORY "${VISION_SAMPLE_DIR}/" DESTINATION sample/web-server-responses-vision PATTERN "build" EXCLUDE + PATTERN "out" EXCLUDE PATTERN "_native_deps" EXCLUDE PATTERN "vcpkg_installed" EXCLUDE ) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 03f849088..9472b4aae 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -316,9 +316,16 @@ Build and install the SDK to produce the redistributable layout: ```bash cd sdk/cpp + +# Release build cmake --preset x64-release cmake --build --preset x64-release cmake --install out/build/x64-release --prefix out/foundry-local-cpp-sdk + +# Optional: also install Debug lib for consumers who need Debug builds +cmake --preset x64-debug +cmake --build --preset x64-debug +cmake --install out/build/x64-debug --prefix out/foundry-local-cpp-sdk ``` This creates: @@ -371,6 +378,8 @@ cmake -G Ninja -B build -DCMAKE_TOOLCHAIN_FILE="%VCPKG_ROOT%/scripts/buildsystem cmake --build build ``` +> **Note:** Match your build type to the SDK's. If the zip only contains a Release lib, build your project in Release (`-DCMAKE_BUILD_TYPE=Release`) to avoid MSVC runtime-library mismatches. If both Debug and Release libs are included, CMake selects the correct one automatically. + ### Using the SDK from Source Include the SDK via `add_subdirectory` (e.g. from the repo): diff --git a/sdk/cpp/cmake/FoundryLocalConfig.cmake b/sdk/cpp/cmake/FoundryLocalConfig.cmake index ffd6e417a..fe1fe84e8 100644 --- a/sdk/cpp/cmake/FoundryLocalConfig.cmake +++ b/sdk/cpp/cmake/FoundryLocalConfig.cmake @@ -10,14 +10,56 @@ get_filename_component(_FL_SDK_DIR "${CMAKE_CURRENT_LIST_DIR}/.." ABSOLUTE) +# Validate SDK layout +if(NOT EXISTS "${_FL_SDK_DIR}/include/foundry_local.h") + message(FATAL_ERROR + "FoundryLocal SDK incomplete: include/foundry_local.h not found at ${_FL_SDK_DIR}/include/. " + "Ensure CMAKE_PREFIX_PATH points to the correct SDK directory." + ) +endif() + +if(NOT EXISTS "${_FL_SDK_DIR}/lib/CppSdk.lib" AND NOT EXISTS "${_FL_SDK_DIR}/lib/debug/CppSdk.lib") + message(FATAL_ERROR + "FoundryLocal SDK incomplete: CppSdk.lib not found at ${_FL_SDK_DIR}/lib/. " + "Build and install the SDK first: cmake --install out/build/x64-release --prefix " + ) +endif() + +if(WIN32 AND NOT EXISTS "${_FL_SDK_DIR}/bin") + message(FATAL_ERROR + "FoundryLocal SDK incomplete: bin/ directory not found at ${_FL_SDK_DIR}/bin/. " + "Runtime DLLs are required. Rebuild and install the SDK." + ) +endif() + # Create imported static library target if(NOT TARGET FoundryLocal::FoundryLocal) add_library(FoundryLocal::FoundryLocal STATIC IMPORTED) - set_target_properties(FoundryLocal::FoundryLocal PROPERTIES - IMPORTED_LOCATION "${_FL_SDK_DIR}/lib/CppSdk.lib" - INTERFACE_INCLUDE_DIRECTORIES "${_FL_SDK_DIR}/include" - ) + # Support both Debug and Release libs if available + if(EXISTS "${_FL_SDK_DIR}/lib/debug/CppSdk.lib") + set_target_properties(FoundryLocal::FoundryLocal PROPERTIES + IMPORTED_LOCATION_RELEASE "${_FL_SDK_DIR}/lib/CppSdk.lib" + IMPORTED_LOCATION_DEBUG "${_FL_SDK_DIR}/lib/debug/CppSdk.lib" + INTERFACE_INCLUDE_DIRECTORIES "${_FL_SDK_DIR}/include" + ) + elseif(EXISTS "${_FL_SDK_DIR}/lib/CppSdk.lib") + # Single-config: warn if consumer build type doesn't match + if(CMAKE_BUILD_TYPE AND NOT CMAKE_BUILD_TYPE STREQUAL "Release") + message(WARNING + "FoundryLocal SDK was built in Release mode. " + "Linking with CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} may cause " + "MSVC runtime-library mismatches. Consider using Release or " + "rebuilding the SDK in Debug mode." + ) + endif() + set_target_properties(FoundryLocal::FoundryLocal PROPERTIES + IMPORTED_LOCATION "${_FL_SDK_DIR}/lib/CppSdk.lib" + INTERFACE_INCLUDE_DIRECTORIES "${_FL_SDK_DIR}/include" + ) + else() + message(FATAL_ERROR "FoundryLocal SDK library not found at ${_FL_SDK_DIR}/lib/") + endif() # Require nlohmann_json and GSL from vcpkg (consumer must have these) find_package(nlohmann_json CONFIG REQUIRED) diff --git a/sdk/cpp/sample/web-server-responses-vision/README.md b/sdk/cpp/sample/web-server-responses-vision/README.md index 8e4c028e1..a3758af3d 100644 --- a/sdk/cpp/sample/web-server-responses-vision/README.md +++ b/sdk/cpp/sample/web-server-responses-vision/README.md @@ -19,7 +19,7 @@ This sample demonstrates vision (image understanding) capabilities using the Fou | **Ninja** | Ships with Visual Studio 2022 | | **vcpkg** | Set the `VCPKG_ROOT` environment variable to your vcpkg installation | | **MSVC** (or clang-cl) | Visual Studio 2022 Build Tools or full IDE | -| **NuGet CLI** | Auto-downloaded by CMake. Install from [nuget.org/downloads](https://www.nuget.org/downloads) | +| **NuGet CLI** | Must be on PATH. Install from [nuget.org/downloads](https://www.nuget.org/downloads) | The sample downloads the specified model the first time it runs (skips if already cached). @@ -50,8 +50,6 @@ CMake will automatically: The built executable will be at `out/build/x64-debug/web-server-responses-vision.exe`. -> **Note:** `nuget.exe` must be on the PATH for auto-download to work. Install from [nuget.org/downloads](https://www.nuget.org/downloads). - ## Run the sample ```bash @@ -74,7 +72,7 @@ The sample starts the local web service, sends vision requests via the Responses | Error | Cause | Fix | |---|---|---| -| `Cannot open file: test_image.jpg` | Default image not found | Ensure `test_image.jpg` is present next to the source file | +| `Failed to load image: ` | Default image not found | Ensure `test_image.jpg` is present next to the source file | | `Model 'xyz' not found in catalog` | Invalid model alias | Check available models printed in the error output | | `WebGPU execution provider is not supported` | WebGPUExecutionProvider not available | WebGPU models are not supported yet; the sample automatically falls back to the CPU variant | | cURL connection refused | Web service failed to start | Ensure `config.web` is set and no port conflicts exist | diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index d73a6bc4c..aea9b0c48 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -192,7 +192,11 @@ int main(int argc, char* argv[]) { const std::filesystem::path imagePath = std::filesystem::path(__FILE__).parent_path() / "test_image.jpg"; - curl_global_init(CURL_GLOBAL_DEFAULT); + CURLcode globalRes = curl_global_init(CURL_GLOBAL_DEFAULT); + if (globalRes != CURLE_OK) { + std::cerr << "Error: curl_global_init failed: " << curl_easy_strerror(globalRes) << std::endl; + return 1; + } try { // @@ -208,14 +212,12 @@ int main(int argc, char* argv[]) { auto* model = catalog.GetModel(modelAlias); if (!model) { auto models = catalog.GetModels(); - std::cout << "\nModel '" << modelAlias << "' not found in catalog." << std::endl; - std::cout << "Available models:"; + std::string available; for (auto* m : models) { - std::cout << " " << m->GetAlias(); + available += " " + m->GetAlias(); } - std::cout << std::endl; - curl_global_cleanup(); - return 1; + throw std::runtime_error( + "Model '" + modelAlias + "' not found in catalog. Available:" + available); } if (!model->IsCached()) { diff --git a/sdk/deps_versions.json b/sdk/deps_versions.json index 4a7795af3..19c318ebe 100644 --- a/sdk/deps_versions.json +++ b/sdk/deps_versions.json @@ -1,7 +1,7 @@ { "foundry-local-core": { - "nuget": "1.0.0", - "python": "1.0.0" + "nuget": "1.1.0", + "python": "1.1.0" }, "onnxruntime": { "version": "1.25.1" From 8a13c8daf135be9e191641e4865c05882b7bd4c5 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 09:53:41 -0700 Subject: [PATCH 12/18] add DiscoverEps and DownloadAndRegisterEps to Manager --- sdk/cpp/README.md | 30 ++++- sdk/cpp/include/foundry_local_manager.h | 35 ++++++ sdk/cpp/sample/main.cpp | 25 ++++ .../web-server-responses-vision/README.md | 13 ++- .../web-server-responses-vision/main.cpp | 45 +++++-- sdk/cpp/src/foundry_local_manager.cpp | 110 ++++++++++++++++++ 6 files changed, 241 insertions(+), 17 deletions(-) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 9472b4aae..348ebae97 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -302,12 +302,38 @@ Manager::Instance().StopWebService(); ### Execution Providers -Ensure EPs are downloaded and registered for hardware acceleration: +Discover and download execution providers for hardware acceleration: ```cpp -Manager::Instance().EnsureEpsDownloaded(); +// Discover available EPs +auto eps = manager.DiscoverEps(); +for (const auto& ep : eps) { + std::cout << ep.name << " — registered: " << (ep.is_registered ? "yes" : "no") << "\n"; +} + +// Download and register all EPs with progress +std::string currentEp; +auto result = manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { + if (epName != currentEp) { + if (!currentEp.empty()) std::cout << "\n"; + currentEp = epName; + } + std::cout << "\r " << epName << " " << percent << "%" << std::flush; +}); +std::cout << "\n"; + +// Or download specific EPs only +auto result2 = manager.DownloadAndRegisterEps({"WebGPUExecutionProvider"}); + +// Check results +if (result.success) { + for (const auto& ep : result.registered_eps) + std::cout << "Registered: " << ep << "\n"; +} ``` +The legacy `EnsureEpsDownloaded()` method is also available but does not support per-EP progress or selective download. + ### Using the Prebuilt SDK (Zip) #### Creating the zip diff --git a/sdk/cpp/include/foundry_local_manager.h b/sdk/cpp/include/foundry_local_manager.h index 074f5673f..cef09e902 100644 --- a/sdk/cpp/include/foundry_local_manager.h +++ b/sdk/cpp/include/foundry_local_manager.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -20,6 +22,23 @@ namespace foundry_local::Internal { namespace foundry_local { + /// Information about a discoverable execution provider. + struct EpInfo { + std::string name; + bool is_registered = false; + }; + + /// Result of an EP download and registration operation. + struct EpDownloadResult { + bool success = false; + std::string status; + std::vector registered_eps; + std::vector failed_eps; + }; + + /// Callback for EP download progress. Parameters: (ep_name, percent 0-100). + using EpProgressCallback = std::function; + class Manager final { public: Manager(const Manager&) = delete; @@ -63,6 +82,22 @@ namespace foundry_local { /// Once downloaded, EPs are not re-downloaded unless a new version is available. void EnsureEpsDownloaded() const; + /// Discover available execution providers and their registration status. + /// @return Vector of EpInfo describing each available EP. + std::vector DiscoverEps() const; + + /// Download and register all available execution providers. + /// @param progressCallback Optional callback invoked with (ep_name, percent) during download. + /// @return Result describing which EPs were registered or failed. + EpDownloadResult DownloadAndRegisterEps(EpProgressCallback progressCallback = nullptr) const; + + /// Download and register specific execution providers by name. + /// @param names EP names to download (as returned by DiscoverEps). + /// @param progressCallback Optional callback invoked with (ep_name, percent) during download. + /// @return Result describing which EPs were registered or failed. + EpDownloadResult DownloadAndRegisterEps(const std::vector& names, + EpProgressCallback progressCallback = nullptr) const; + private: explicit Manager(Configuration configuration, ILogger* logger); ~Manager(); diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 8926a6c7e..8df941298 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -3,6 +3,7 @@ #include "foundry_local.h" +#include #include #include #include @@ -379,6 +380,30 @@ int main(int argc, char* argv[]) { Manager::Create({"SampleApp"}, &logger); auto& manager = Manager::Instance(); + // Discover and download execution providers + auto eps = manager.DiscoverEps(); + std::cout << "\nAvailable execution providers:\n"; + for (const auto& ep : eps) { + std::cout << " " << ep.name << "\n"; + } + + if (!eps.empty()) { + std::cout << "\nDownloading execution providers:\n"; + std::string currentEp; + manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { + if (epName != currentEp) { + if (!currentEp.empty()) std::cout << "\n"; + currentEp = epName; + } + std::cout << "\r " << std::left << std::setw(30) << epName + << " " << std::right << std::fixed << std::setprecision(1) + << std::setw(6) << percent << "% " << std::flush; + }); + if (!currentEp.empty()) std::cout << "\n"; + } else { + std::cout << "\nNo execution providers to download.\n"; + } + // 1. Browse the full catalog try { BrowseCatalog(manager); diff --git a/sdk/cpp/sample/web-server-responses-vision/README.md b/sdk/cpp/sample/web-server-responses-vision/README.md index a3758af3d..723da69dc 100644 --- a/sdk/cpp/sample/web-server-responses-vision/README.md +++ b/sdk/cpp/sample/web-server-responses-vision/README.md @@ -61,12 +61,13 @@ The sample starts the local web service, sends vision requests via the Responses ## How it works 1. **Initialize** — creates the `Manager` singleton with web service configuration -2. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory -3. **Web service** — starts the local Foundry web service on a random port -4. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG -5. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts -6. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive -7. **Cleanup** — stops the web service, unloads the model, and destroys the manager +2. **Execution providers** — discovers available EPs via `DiscoverEps()` and downloads them with per-EP progress via `DownloadAndRegisterEps()` +3. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory +4. **Web service** — starts the local Foundry web service on a random port +5. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG +6. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts +7. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive +8. **Cleanup** — stops the web service, unloads the model, and destroys the manager ## Troubleshooting diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index aea9b0c48..66e05b9ba 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -18,10 +19,6 @@ #include "foundry_local.h" // -#ifdef _WIN32 -#include -#endif - using json = nlohmann::json; // ─── Base64 encoding ──────────────────────────────────────────────────────── @@ -33,17 +30,18 @@ std::string Base64Encode(const std::vector& data) { std::string out; out.reserve(((data.size() + 2) / 3) * 4); size_t i = 0; - while (i < data.size()) { - uint32_t octet_a = i < data.size() ? data[i++] : 0; - uint32_t octet_b = i < data.size() ? data[i++] : 0; - uint32_t octet_c = i < data.size() ? data[i++] : 0; + const size_t len = data.size(); + while (i < len) { + uint32_t octet_a = data[i++]; + uint32_t octet_b = (i < len) ? data[i++] : 0; + uint32_t octet_c = (i < len) ? data[i++] : 0; uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c; out.push_back(kBase64Chars[(triple >> 18) & 0x3F]); out.push_back(kBase64Chars[(triple >> 12) & 0x3F]); out.push_back(kBase64Chars[(triple >> 6) & 0x3F]); out.push_back(kBase64Chars[triple & 0x3F]); } - size_t mod = data.size() % 3; + size_t mod = len % 3; if (mod == 1) { out[out.size() - 2] = '='; out[out.size() - 1] = '='; @@ -205,6 +203,35 @@ int main(int argc, char* argv[]) { foundry_local::Manager::Create(config); auto& manager = foundry_local::Manager::Instance(); + + // Discover and download execution providers (like C# sample) + auto eps = manager.DiscoverEps(); + std::cout << "\nAvailable execution providers:" << std::endl; + for (const auto& ep : eps) { + std::cout << " " << ep.name << std::endl; + } + + if (!eps.empty()) { + std::cout << "\nDownloading execution providers:" << std::endl; + std::string currentEp; + manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { + if (epName != currentEp) { + if (!currentEp.empty()) { + std::cout << std::endl; + } + currentEp = epName; + } + // Fixed-width output to overwrite previous line cleanly + std::cout << "\r " << std::left << std::setw(30) << epName + << " " << std::right << std::fixed << std::setprecision(1) + << std::setw(6) << percent << "% " << std::flush; + }); + if (!currentEp.empty()) { + std::cout << std::endl; + } + } else { + std::cout << "\nNo execution providers to download." << std::endl; + } // // diff --git a/sdk/cpp/src/foundry_local_manager.cpp b/sdk/cpp/src/foundry_local_manager.cpp index dfaef291a..cf3d44c68 100644 --- a/sdk/cpp/src/foundry_local_manager.cpp +++ b/sdk/cpp/src/foundry_local_manager.cpp @@ -5,8 +5,10 @@ #include #include #include +#include #include +#include #include "foundry_local.h" #include "foundry_local_internal_core.h" @@ -136,6 +138,114 @@ void Manager::Cleanup() noexcept { } } + std::vector Manager::DiscoverEps() const { + auto response = core_->call("discover_eps", *logger_); + if (response.HasError()) { + throw Exception(std::string("Error discovering execution providers: ") + response.error, *logger_); + } + + std::vector result; + if (response.data.empty()) { + return result; + } + + auto json = nlohmann::json::parse(response.data, nullptr, false); + if (json.is_discarded() || !json.is_array()) { + return result; + } + + for (const auto& item : json) { + EpInfo ep; + ep.name = item.value("Name", ""); + ep.is_registered = item.value("IsRegistered", false); + result.push_back(std::move(ep)); + } + return result; + } + + namespace { + struct EpCallbackContext { + EpProgressCallback* callback; + }; + + int EpProgressNativeCallback(void* data, int32_t dataLength, void* userData) { + auto* ctx = static_cast(userData); + if (!ctx || !ctx->callback || !*ctx->callback) return 0; + + std::string progressStr(static_cast(data), static_cast(dataLength)); + auto sepIndex = progressStr.find('|'); + if (sepIndex != std::string::npos) { + std::string name = progressStr.substr(0, sepIndex); + try { + double percent = std::stod(progressStr.substr(sepIndex + 1)); + (*ctx->callback)(name, percent); + } catch (...) { + // Skip malformed progress strings + } + } + return 0; + } + } + + EpDownloadResult Manager::DownloadAndRegisterEps(EpProgressCallback progressCallback) const { + return DownloadAndRegisterEps({}, std::move(progressCallback)); + } + + EpDownloadResult Manager::DownloadAndRegisterEps(const std::vector& names, + EpProgressCallback progressCallback) const { + std::string requestData; + std::string* requestDataPtr = nullptr; + + if (!names.empty()) { + CoreInteropRequest request("download_and_register_eps"); + std::string namesList; + for (size_t i = 0; i < names.size(); ++i) { + if (i > 0) namesList += ","; + namesList += names[i]; + } + request.AddParam("Names", namesList); + requestData = request.ToJson(); + requestDataPtr = &requestData; + } + + CoreResponse response; + if (progressCallback) { + EpCallbackContext ctx{&progressCallback}; + response = core_->call("download_and_register_eps", *logger_, + requestDataPtr, EpProgressNativeCallback, &ctx); + } else { + response = core_->call("download_and_register_eps", *logger_, requestDataPtr); + } + + if (response.HasError()) { + throw Exception(std::string("Error downloading execution providers: ") + response.error, *logger_); + } + + EpDownloadResult result; + if (!response.data.empty()) { + auto json = nlohmann::json::parse(response.data, nullptr, false); + if (!json.is_discarded()) { + result.success = json.value("Success", false); + result.status = json.value("Status", ""); + if (json.contains("RegisteredEps") && json["RegisteredEps"].is_array()) { + for (const auto& ep : json["RegisteredEps"]) { + result.registered_eps.push_back(ep.get()); + } + } + if (json.contains("FailedEps") && json["FailedEps"].is_array()) { + for (const auto& ep : json["FailedEps"]) { + result.failed_eps.push_back(ep.get()); + } + } + } + } else { + result.success = true; + result.status = "Completed"; + } + + return result; + } + void Manager::Initialize() { config_.Validate(); From 9cfead65ea8b03fde7ac2229a45b50d7bde3ea67 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 10:12:11 -0700 Subject: [PATCH 13/18] Revert "add DiscoverEps and DownloadAndRegisterEps to Manager" This reverts commit 8a13c8daf135be9e191641e4865c05882b7bd4c5. --- sdk/cpp/README.md | 30 +---- sdk/cpp/include/foundry_local_manager.h | 35 ------ sdk/cpp/sample/main.cpp | 25 ---- .../web-server-responses-vision/README.md | 13 +-- .../web-server-responses-vision/main.cpp | 45 ++----- sdk/cpp/src/foundry_local_manager.cpp | 110 ------------------ 6 files changed, 17 insertions(+), 241 deletions(-) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 348ebae97..9472b4aae 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -302,38 +302,12 @@ Manager::Instance().StopWebService(); ### Execution Providers -Discover and download execution providers for hardware acceleration: +Ensure EPs are downloaded and registered for hardware acceleration: ```cpp -// Discover available EPs -auto eps = manager.DiscoverEps(); -for (const auto& ep : eps) { - std::cout << ep.name << " — registered: " << (ep.is_registered ? "yes" : "no") << "\n"; -} - -// Download and register all EPs with progress -std::string currentEp; -auto result = manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { - if (epName != currentEp) { - if (!currentEp.empty()) std::cout << "\n"; - currentEp = epName; - } - std::cout << "\r " << epName << " " << percent << "%" << std::flush; -}); -std::cout << "\n"; - -// Or download specific EPs only -auto result2 = manager.DownloadAndRegisterEps({"WebGPUExecutionProvider"}); - -// Check results -if (result.success) { - for (const auto& ep : result.registered_eps) - std::cout << "Registered: " << ep << "\n"; -} +Manager::Instance().EnsureEpsDownloaded(); ``` -The legacy `EnsureEpsDownloaded()` method is also available but does not support per-EP progress or selective download. - ### Using the Prebuilt SDK (Zip) #### Creating the zip diff --git a/sdk/cpp/include/foundry_local_manager.h b/sdk/cpp/include/foundry_local_manager.h index cef09e902..074f5673f 100644 --- a/sdk/cpp/include/foundry_local_manager.h +++ b/sdk/cpp/include/foundry_local_manager.h @@ -6,8 +6,6 @@ #include #include #include -#include -#include #include #include @@ -22,23 +20,6 @@ namespace foundry_local::Internal { namespace foundry_local { - /// Information about a discoverable execution provider. - struct EpInfo { - std::string name; - bool is_registered = false; - }; - - /// Result of an EP download and registration operation. - struct EpDownloadResult { - bool success = false; - std::string status; - std::vector registered_eps; - std::vector failed_eps; - }; - - /// Callback for EP download progress. Parameters: (ep_name, percent 0-100). - using EpProgressCallback = std::function; - class Manager final { public: Manager(const Manager&) = delete; @@ -82,22 +63,6 @@ namespace foundry_local { /// Once downloaded, EPs are not re-downloaded unless a new version is available. void EnsureEpsDownloaded() const; - /// Discover available execution providers and their registration status. - /// @return Vector of EpInfo describing each available EP. - std::vector DiscoverEps() const; - - /// Download and register all available execution providers. - /// @param progressCallback Optional callback invoked with (ep_name, percent) during download. - /// @return Result describing which EPs were registered or failed. - EpDownloadResult DownloadAndRegisterEps(EpProgressCallback progressCallback = nullptr) const; - - /// Download and register specific execution providers by name. - /// @param names EP names to download (as returned by DiscoverEps). - /// @param progressCallback Optional callback invoked with (ep_name, percent) during download. - /// @return Result describing which EPs were registered or failed. - EpDownloadResult DownloadAndRegisterEps(const std::vector& names, - EpProgressCallback progressCallback = nullptr) const; - private: explicit Manager(Configuration configuration, ILogger* logger); ~Manager(); diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 8df941298..8926a6c7e 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -3,7 +3,6 @@ #include "foundry_local.h" -#include #include #include #include @@ -380,30 +379,6 @@ int main(int argc, char* argv[]) { Manager::Create({"SampleApp"}, &logger); auto& manager = Manager::Instance(); - // Discover and download execution providers - auto eps = manager.DiscoverEps(); - std::cout << "\nAvailable execution providers:\n"; - for (const auto& ep : eps) { - std::cout << " " << ep.name << "\n"; - } - - if (!eps.empty()) { - std::cout << "\nDownloading execution providers:\n"; - std::string currentEp; - manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { - if (epName != currentEp) { - if (!currentEp.empty()) std::cout << "\n"; - currentEp = epName; - } - std::cout << "\r " << std::left << std::setw(30) << epName - << " " << std::right << std::fixed << std::setprecision(1) - << std::setw(6) << percent << "% " << std::flush; - }); - if (!currentEp.empty()) std::cout << "\n"; - } else { - std::cout << "\nNo execution providers to download.\n"; - } - // 1. Browse the full catalog try { BrowseCatalog(manager); diff --git a/sdk/cpp/sample/web-server-responses-vision/README.md b/sdk/cpp/sample/web-server-responses-vision/README.md index 723da69dc..a3758af3d 100644 --- a/sdk/cpp/sample/web-server-responses-vision/README.md +++ b/sdk/cpp/sample/web-server-responses-vision/README.md @@ -61,13 +61,12 @@ The sample starts the local web service, sends vision requests via the Responses ## How it works 1. **Initialize** — creates the `Manager` singleton with web service configuration -2. **Execution providers** — discovers available EPs via `DiscoverEps()` and downloads them with per-EP progress via `DownloadAndRegisterEps()` -3. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory -4. **Web service** — starts the local Foundry web service on a random port -5. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG -6. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts -7. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive -8. **Cleanup** — stops the web service, unloads the model, and destroys the manager +2. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory +3. **Web service** — starts the local Foundry web service on a random port +4. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG +5. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts +6. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive +7. **Cleanup** — stops the web service, unloads the model, and destroys the manager ## Troubleshooting diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index 66e05b9ba..aea9b0c48 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -19,6 +18,10 @@ #include "foundry_local.h" // +#ifdef _WIN32 +#include +#endif + using json = nlohmann::json; // ─── Base64 encoding ──────────────────────────────────────────────────────── @@ -30,18 +33,17 @@ std::string Base64Encode(const std::vector& data) { std::string out; out.reserve(((data.size() + 2) / 3) * 4); size_t i = 0; - const size_t len = data.size(); - while (i < len) { - uint32_t octet_a = data[i++]; - uint32_t octet_b = (i < len) ? data[i++] : 0; - uint32_t octet_c = (i < len) ? data[i++] : 0; + while (i < data.size()) { + uint32_t octet_a = i < data.size() ? data[i++] : 0; + uint32_t octet_b = i < data.size() ? data[i++] : 0; + uint32_t octet_c = i < data.size() ? data[i++] : 0; uint32_t triple = (octet_a << 16) | (octet_b << 8) | octet_c; out.push_back(kBase64Chars[(triple >> 18) & 0x3F]); out.push_back(kBase64Chars[(triple >> 12) & 0x3F]); out.push_back(kBase64Chars[(triple >> 6) & 0x3F]); out.push_back(kBase64Chars[triple & 0x3F]); } - size_t mod = len % 3; + size_t mod = data.size() % 3; if (mod == 1) { out[out.size() - 2] = '='; out[out.size() - 1] = '='; @@ -203,35 +205,6 @@ int main(int argc, char* argv[]) { foundry_local::Manager::Create(config); auto& manager = foundry_local::Manager::Instance(); - - // Discover and download execution providers (like C# sample) - auto eps = manager.DiscoverEps(); - std::cout << "\nAvailable execution providers:" << std::endl; - for (const auto& ep : eps) { - std::cout << " " << ep.name << std::endl; - } - - if (!eps.empty()) { - std::cout << "\nDownloading execution providers:" << std::endl; - std::string currentEp; - manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { - if (epName != currentEp) { - if (!currentEp.empty()) { - std::cout << std::endl; - } - currentEp = epName; - } - // Fixed-width output to overwrite previous line cleanly - std::cout << "\r " << std::left << std::setw(30) << epName - << " " << std::right << std::fixed << std::setprecision(1) - << std::setw(6) << percent << "% " << std::flush; - }); - if (!currentEp.empty()) { - std::cout << std::endl; - } - } else { - std::cout << "\nNo execution providers to download." << std::endl; - } // // diff --git a/sdk/cpp/src/foundry_local_manager.cpp b/sdk/cpp/src/foundry_local_manager.cpp index cf3d44c68..dfaef291a 100644 --- a/sdk/cpp/src/foundry_local_manager.cpp +++ b/sdk/cpp/src/foundry_local_manager.cpp @@ -5,10 +5,8 @@ #include #include #include -#include #include -#include #include "foundry_local.h" #include "foundry_local_internal_core.h" @@ -138,114 +136,6 @@ void Manager::Cleanup() noexcept { } } - std::vector Manager::DiscoverEps() const { - auto response = core_->call("discover_eps", *logger_); - if (response.HasError()) { - throw Exception(std::string("Error discovering execution providers: ") + response.error, *logger_); - } - - std::vector result; - if (response.data.empty()) { - return result; - } - - auto json = nlohmann::json::parse(response.data, nullptr, false); - if (json.is_discarded() || !json.is_array()) { - return result; - } - - for (const auto& item : json) { - EpInfo ep; - ep.name = item.value("Name", ""); - ep.is_registered = item.value("IsRegistered", false); - result.push_back(std::move(ep)); - } - return result; - } - - namespace { - struct EpCallbackContext { - EpProgressCallback* callback; - }; - - int EpProgressNativeCallback(void* data, int32_t dataLength, void* userData) { - auto* ctx = static_cast(userData); - if (!ctx || !ctx->callback || !*ctx->callback) return 0; - - std::string progressStr(static_cast(data), static_cast(dataLength)); - auto sepIndex = progressStr.find('|'); - if (sepIndex != std::string::npos) { - std::string name = progressStr.substr(0, sepIndex); - try { - double percent = std::stod(progressStr.substr(sepIndex + 1)); - (*ctx->callback)(name, percent); - } catch (...) { - // Skip malformed progress strings - } - } - return 0; - } - } - - EpDownloadResult Manager::DownloadAndRegisterEps(EpProgressCallback progressCallback) const { - return DownloadAndRegisterEps({}, std::move(progressCallback)); - } - - EpDownloadResult Manager::DownloadAndRegisterEps(const std::vector& names, - EpProgressCallback progressCallback) const { - std::string requestData; - std::string* requestDataPtr = nullptr; - - if (!names.empty()) { - CoreInteropRequest request("download_and_register_eps"); - std::string namesList; - for (size_t i = 0; i < names.size(); ++i) { - if (i > 0) namesList += ","; - namesList += names[i]; - } - request.AddParam("Names", namesList); - requestData = request.ToJson(); - requestDataPtr = &requestData; - } - - CoreResponse response; - if (progressCallback) { - EpCallbackContext ctx{&progressCallback}; - response = core_->call("download_and_register_eps", *logger_, - requestDataPtr, EpProgressNativeCallback, &ctx); - } else { - response = core_->call("download_and_register_eps", *logger_, requestDataPtr); - } - - if (response.HasError()) { - throw Exception(std::string("Error downloading execution providers: ") + response.error, *logger_); - } - - EpDownloadResult result; - if (!response.data.empty()) { - auto json = nlohmann::json::parse(response.data, nullptr, false); - if (!json.is_discarded()) { - result.success = json.value("Success", false); - result.status = json.value("Status", ""); - if (json.contains("RegisteredEps") && json["RegisteredEps"].is_array()) { - for (const auto& ep : json["RegisteredEps"]) { - result.registered_eps.push_back(ep.get()); - } - } - if (json.contains("FailedEps") && json["FailedEps"].is_array()) { - for (const auto& ep : json["FailedEps"]) { - result.failed_eps.push_back(ep.get()); - } - } - } - } else { - result.success = true; - result.status = "Completed"; - } - - return result; - } - void Manager::Initialize() { config_.Validate(); From 251af8355a21478167c2dba9af5ff188f273fe9b Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 14:46:58 -0700 Subject: [PATCH 14/18] Update Readme --- sdk/cpp/README.md | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 0fce2d0c8..4446dffd1 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -14,7 +14,7 @@ The Foundry Local C++ SDK provides a C++17 static library for running AI models - **Download progress** — wire up a callback for real-time download percentage - **Model variants** — select specific hardware/quantization variants per model alias - **Optional web service** — start an OpenAI-compatible REST endpoint -- **Execution providers** — ensure EPs are downloaded and registered for hardware acceleration +- **Execution providers** — discover, download, and register EPs with per-EP progress reporting - **Auto NuGet download** — CMake auto-downloads native runtime DLLs at configure time ## Prerequisites @@ -302,10 +302,34 @@ Manager::Instance().StopWebService(); ### Execution Providers -Ensure EPs are downloaded and registered for hardware acceleration: +Discover and download execution providers for hardware acceleration: ```cpp -Manager::Instance().EnsureEpsDownloaded(); +// Discover available EPs +auto eps = manager.DiscoverEps(); +for (const auto& ep : eps) { + std::cout << ep.name << " — registered: " << (ep.is_registered ? "yes" : "no") << "\n"; +} + +// Download and register all EPs with progress +std::string currentEp; +auto result = manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { + if (epName != currentEp) { + if (!currentEp.empty()) std::cout << "\n"; + currentEp = epName; + } + std::cout << "\r " << epName << " " << percent << "%" << std::flush; +}); +std::cout << "\n"; + +// Or download specific EPs only +auto result2 = manager.DownloadAndRegisterEps({"WebGpuExecutionProvider"}); + +// Check results +if (result.success) { + for (const auto& ep : result.registered_eps) + std::cout << "Registered: " << ep << "\n"; +} ``` ### Using the Prebuilt SDK (Zip) @@ -459,7 +483,8 @@ sdk/cpp/ │ └── tool_types.h # Tool calling types ├── src/ # Private implementation ├── sample/ -│ └── main.cpp # Sample application +│ ├── main.cpp # Sample application +│ └── web-server-responses-vision/ # Vision sample (Responses API) ├── test/ # Unit & E2E tests (GTest) ├── CMakeLists.txt ├── CMakePresets.json @@ -475,7 +500,7 @@ sdk/cpp/ | `OgaGenerator_TokenCount not found in onnxruntime-genai` | Version mismatch between Foundry Local components | Update NuGet package versions in CMakeLists.txt | | `API version [N] is not available` | ONNX Runtime version too old for the Foundry Local service | Update NuGet package versions in CMakeLists.txt | | `nuget.exe not found on PATH` | NuGet CLI not installed | Install from [nuget.org/downloads](https://www.nuget.org/downloads) | -| `Failed to load shared library: Microsoft.AI.Foundry.Local.Core.dll` | Runtime DLLs not next to executable | Copy DLLs from NuGet packages (see step 3 in Building from Source) | +| `Failed to load shared library: Microsoft.AI.Foundry.Local.Core.dll` | Runtime DLLs not next to executable | Reconfigure with `cmake --preset x64-debug` to re-download NuGet packages, then rebuild | ## License From 534d58d812466ee55725a2c218d29f82b3e48c60 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 15:29:27 -0700 Subject: [PATCH 15/18] Add discover and download eps in the sample --- .../web-server-responses-vision/README.md | 15 +++++----- .../web-server-responses-vision/main.cpp | 29 +++++++++++++++++++ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/sdk/cpp/sample/web-server-responses-vision/README.md b/sdk/cpp/sample/web-server-responses-vision/README.md index a3758af3d..dc7465c7a 100644 --- a/sdk/cpp/sample/web-server-responses-vision/README.md +++ b/sdk/cpp/sample/web-server-responses-vision/README.md @@ -61,12 +61,13 @@ The sample starts the local web service, sends vision requests via the Responses ## How it works 1. **Initialize** — creates the `Manager` singleton with web service configuration -2. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory -3. **Web service** — starts the local Foundry web service on a random port -4. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG -5. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts -6. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive -7. **Cleanup** — stops the web service, unloads the model, and destroys the manager +2. **Execution providers** — discovers and downloads compatible EPs via `DiscoverEps()` and `DownloadAndRegisterEps()` +3. **Model setup** — resolves the model alias, downloads if not cached, and loads into memory +4. **Web service** — starts the local Foundry web service on a random port +5. **Image encoding** — loads the image via stb, resizes to max 512px (preserving aspect ratio), and base64-encodes as JPEG +6. **Vision request** — builds the Responses API request body with `input_text` + `input_image` content parts +7. **Streaming** — sends the request via cURL with SSE streaming, printing tokens as they arrive +8. **Cleanup** — stops the web service, unloads the model, and destroys the manager ## Troubleshooting @@ -74,7 +75,7 @@ The sample starts the local web service, sends vision requests via the Responses |---|---|---| | `Failed to load image: ` | Default image not found | Ensure `test_image.jpg` is present next to the source file | | `Model 'xyz' not found in catalog` | Invalid model alias | Check available models printed in the error output | -| `WebGPU execution provider is not supported` | WebGPUExecutionProvider not available | WebGPU models are not supported yet; the sample automatically falls back to the CPU variant | +| `WebGPU execution provider is not supported` | WebGPU EP not available in this OnnxRuntime build | Ensure `DownloadAndRegisterEps()` runs before model load to install the EP | | cURL connection refused | Web service failed to start | Ensure `config.web` is set and no port conflicts exist | ## License diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index aea9b0c48..b457be9cb 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -205,6 +206,34 @@ int main(int argc, char* argv[]) { foundry_local::Manager::Create(config); auto& manager = foundry_local::Manager::Instance(); + + // Discover and download execution providers + try { + auto eps = manager.DiscoverEps(); + std::cout << "\nAvailable execution providers:" << std::endl; + for (const auto& ep : eps) { + std::cout << " " << ep.name << std::endl; + } + + if (!eps.empty()) { + std::cout << "\nDownloading execution providers:" << std::endl; + std::string currentEp; + manager.DownloadAndRegisterEps([&](const std::string& epName, double percent) { + if (epName != currentEp) { + if (!currentEp.empty()) std::cout << std::endl; + currentEp = epName; + } + std::cout << "\r " << std::left << std::setw(30) << epName + << " " << std::right << std::fixed << std::setprecision(1) + << std::setw(6) << percent << "% " << std::flush; + }); + if (!currentEp.empty()) std::cout << std::endl; + } else { + std::cout << "\nNo execution providers to download." << std::endl; + } + } catch (const std::exception& ex) { + std::cerr << "EP discovery/download skipped: " << ex.what() << std::endl; + } // // From be8a853db5b4ede58f6d1b9d37f080f705d31536 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 15:37:31 -0700 Subject: [PATCH 16/18] Update Readme --- sdk/cpp/README.md | 5 +++-- sdk/cpp/sample/web-server-responses-vision/README.md | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sdk/cpp/README.md b/sdk/cpp/README.md index 4446dffd1..017f2fa6f 100644 --- a/sdk/cpp/README.md +++ b/sdk/cpp/README.md @@ -25,7 +25,7 @@ The Foundry Local C++ SDK provides a C++17 static library for running AI models | **Ninja** | Ships with Visual Studio 2022 | | **vcpkg** | Set the `VCPKG_ROOT` environment variable to your vcpkg installation | | **MSVC** (or clang-cl) | Visual Studio 2022 Build Tools or full IDE | -| **NuGet CLI** | Required for auto-downloading native runtime DLLs. Install from [nuget.org/downloads](https://www.nuget.org/downloads) | +| **NuGet CLI** | Required for auto-downloading native runtime DLLs. Install via `winget install Microsoft.NuGet` | ## Building from Source @@ -499,8 +499,9 @@ sdk/cpp/ | `DML provider requested, but GenAI has not been built with DML support` | GPU variant selected but ONNX Runtime GenAI lacks DML | Select a CPU variant or update Foundry Local | | `OgaGenerator_TokenCount not found in onnxruntime-genai` | Version mismatch between Foundry Local components | Update NuGet package versions in CMakeLists.txt | | `API version [N] is not available` | ONNX Runtime version too old for the Foundry Local service | Update NuGet package versions in CMakeLists.txt | -| `nuget.exe not found on PATH` | NuGet CLI not installed | Install from [nuget.org/downloads](https://www.nuget.org/downloads) | +| `nuget.exe not found on PATH` | NuGet CLI not installed | Install via `winget install Microsoft.NuGet` | | `Failed to load shared library: Microsoft.AI.Foundry.Local.Core.dll` | Runtime DLLs not next to executable | Reconfigure with `cmake --preset x64-debug` to re-download NuGet packages, then rebuild | +| NuGet packages not installed or DLLs not copied correctly | Stale or corrupted build cache | Delete the `out` folder (`rmdir /s /q out`) and reconfigure from scratch: `cmake --preset x64-debug && cmake --build --preset x64-debug` | ## License diff --git a/sdk/cpp/sample/web-server-responses-vision/README.md b/sdk/cpp/sample/web-server-responses-vision/README.md index dc7465c7a..cf9f343d8 100644 --- a/sdk/cpp/sample/web-server-responses-vision/README.md +++ b/sdk/cpp/sample/web-server-responses-vision/README.md @@ -19,7 +19,7 @@ This sample demonstrates vision (image understanding) capabilities using the Fou | **Ninja** | Ships with Visual Studio 2022 | | **vcpkg** | Set the `VCPKG_ROOT` environment variable to your vcpkg installation | | **MSVC** (or clang-cl) | Visual Studio 2022 Build Tools or full IDE | -| **NuGet CLI** | Must be on PATH. Install from [nuget.org/downloads](https://www.nuget.org/downloads) | +| **NuGet CLI** | Must be on PATH. Install via `winget install Microsoft.NuGet` | The sample downloads the specified model the first time it runs (skips if already cached). @@ -77,6 +77,7 @@ The sample starts the local web service, sends vision requests via the Responses | `Model 'xyz' not found in catalog` | Invalid model alias | Check available models printed in the error output | | `WebGPU execution provider is not supported` | WebGPU EP not available in this OnnxRuntime build | Ensure `DownloadAndRegisterEps()` runs before model load to install the EP | | cURL connection refused | Web service failed to start | Ensure `config.web` is set and no port conflicts exist | +| NuGet packages not installed or DLLs not copied correctly | Stale or corrupted build cache | Delete the `out` folder (`rmdir /s /q out`) and reconfigure from scratch: `cmake --preset x64-debug && cmake --build --preset x64-debug` | ## License From 054ee0df1371199772c05bc7c7af9505caa0300c Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 15:43:10 -0700 Subject: [PATCH 17/18] Fix spacing for ep download --- sdk/cpp/sample/main.cpp | 5 +++-- sdk/cpp/sample/web-server-responses-vision/main.cpp | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 1812956d7..28d4e182e 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -397,9 +397,10 @@ int main(int argc, char* argv[]) { if (!currentEp.empty()) std::cout << "\n"; currentEp = epName; } - std::cout << "\r " << std::left << std::setw(30) << epName + std::cout << "\r" << std::string(60, ' ') << "\r" + << " " << std::left << std::setw(30) << epName << " " << std::right << std::fixed << std::setprecision(1) - << std::setw(6) << percent << "% " << std::flush; + << std::setw(6) << percent << "%" << std::flush; }); if (!currentEp.empty()) std::cout << "\n"; } else { diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index b457be9cb..887f916f9 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -223,9 +223,11 @@ int main(int argc, char* argv[]) { if (!currentEp.empty()) std::cout << std::endl; currentEp = epName; } - std::cout << "\r " << std::left << std::setw(30) << epName + // Clear the entire line, then print progress + std::cout << "\r" << std::string(60, ' ') << "\r" + << " " << std::left << std::setw(30) << epName << " " << std::right << std::fixed << std::setprecision(1) - << std::setw(6) << percent << "% " << std::flush; + << std::setw(6) << percent << "%" << std::flush; }); if (!currentEp.empty()) std::cout << std::endl; } else { From 83ae3f10f8be6088c2df27af51c7d53da059d467 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Thu, 7 May 2026 15:57:11 -0700 Subject: [PATCH 18/18] Update progress bars --- sdk/cpp/sample/main.cpp | 14 ++++++-------- .../sample/web-server-responses-vision/main.cpp | 13 ++++++------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/sdk/cpp/sample/main.cpp b/sdk/cpp/sample/main.cpp index 28d4e182e..7c377da99 100644 --- a/sdk/cpp/sample/main.cpp +++ b/sdk/cpp/sample/main.cpp @@ -3,7 +3,7 @@ #include "foundry_local.h" -#include +#include #include #include #include @@ -118,7 +118,7 @@ void ChatNonStreaming(Manager& manager, const std::string& alias) { PreferCpuVariant(*concreteModel); } - model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; return true; }); + model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; }); std::cout << "\n"; model->Load(); @@ -211,7 +211,7 @@ void TranscribeAudio(Manager& manager, const std::string& alias, const std::stri PreferCpuVariant(*concreteModel); } - model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; return true; }); + model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; }); std::cout << "\n"; model->Load(); @@ -263,7 +263,7 @@ void ChatWithToolCalling(Manager& manager, const std::string& alias) { PreferCpuVariant(*concreteModel); } - model->Download([](float pct) { std::cout << "\rDownloading: " << pct << "% " << std::flush; return true; }); + model->Download([](float pct) { printf("\rDownloading: %5.1f%%", pct); fflush(stdout); return true; }); std::cout << "\n"; model->Load(); @@ -397,10 +397,8 @@ int main(int argc, char* argv[]) { if (!currentEp.empty()) std::cout << "\n"; currentEp = epName; } - std::cout << "\r" << std::string(60, ' ') << "\r" - << " " << std::left << std::setw(30) << epName - << " " << std::right << std::fixed << std::setprecision(1) - << std::setw(6) << percent << "%" << std::flush; + printf("\r %-30s %5.1f%%", epName.c_str(), percent); + fflush(stdout); }); if (!currentEp.empty()) std::cout << "\n"; } else { diff --git a/sdk/cpp/sample/web-server-responses-vision/main.cpp b/sdk/cpp/sample/web-server-responses-vision/main.cpp index 887f916f9..cd13db152 100644 --- a/sdk/cpp/sample/web-server-responses-vision/main.cpp +++ b/sdk/cpp/sample/web-server-responses-vision/main.cpp @@ -2,8 +2,8 @@ // #include #include +#include #include -#include #include #include #include @@ -223,11 +223,9 @@ int main(int argc, char* argv[]) { if (!currentEp.empty()) std::cout << std::endl; currentEp = epName; } - // Clear the entire line, then print progress - std::cout << "\r" << std::string(60, ' ') << "\r" - << " " << std::left << std::setw(30) << epName - << " " << std::right << std::fixed << std::setprecision(1) - << std::setw(6) << percent << "%" << std::flush; + // Matches Python: print(f"\r {ep_name:<30} {percent:5.1f}%", ...) + printf("\r %-30s %5.1f%%", epName.c_str(), percent); + fflush(stdout); }); if (!currentEp.empty()) std::cout << std::endl; } else { @@ -254,7 +252,8 @@ int main(int argc, char* argv[]) { if (!model->IsCached()) { std::cout << "\nDownloading model " << modelAlias << "..." << std::endl; model->Download([](float pct) { - std::cout << "\rDownloading model: " << pct << "% " << std::flush; + printf("\rDownloading model: %5.1f%%", pct); + fflush(stdout); return true; }); std::cout << "\nModel downloaded" << std::endl;