From 47c6aef38c83bd761af8199fee2042ffdbdb056b Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Tue, 28 Apr 2026 02:27:02 -0400
Subject: [PATCH 1/5] Add Responses API to Rust SDK

Implements the HTTP Responses API client, wire types, SSE streaming parser, manager factory, tests, and example for the Rust SDK.

Also aligns the Rust implementation with resolved Responses API review feedback: opt-in storage, request timeouts, image source validation, optional media type, list pagination fields, get_responses_client naming, and server-matching streaming event shapes.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/rust/Cargo.toml                          |   8 +-
 sdk/rust/examples/responses.rs               | 245 +++++++
 sdk/rust/src/foundry_local_manager.rs        |  15 +
 sdk/rust/src/lib.rs                          |  10 +
 sdk/rust/src/openai/mod.rs                   |  10 +
 sdk/rust/src/openai/responses_client.rs      | 637 ++++++++++++++++++
 sdk/rust/src/openai/responses_types.rs       | 661 +++++++++++++++++++
 sdk/rust/tests/integration/main.rs           |   1 +
 sdk/rust/tests/integration/responses_test.rs | 454 +++++++++++++
 sdk/rust/tests/unit/main.rs                  |   5 +
 sdk/rust/tests/unit/responses_test.rs        | 487 ++++++++++++++
 11 files changed, 2532 insertions(+), 1 deletion(-)
 create mode 100644 sdk/rust/examples/responses.rs
 create mode 100644 sdk/rust/src/openai/responses_client.rs
 create mode 100644 sdk/rust/src/openai/responses_types.rs
 create mode 100644 sdk/rust/tests/integration/responses_test.rs
 create mode 100644 sdk/rust/tests/unit/main.rs
 create mode 100644 sdk/rust/tests/unit/responses_test.rs
diff --git a/sdk/rust/Cargo.toml b/sdk/rust/Cargo.toml
index 94794697e..d11a15c2e 100644
--- a/sdk/rust/Cargo.toml
+++ b/sdk/rust/Cargo.toml
@@ -24,7 +24,9 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
 tokio-stream = "0.1"
 tokio-util = "0.7"
 futures-core = "0.3"
-reqwest = { version = "0.12", features = ["json"] }
+reqwest = { version = "0.12", features = ["json", "stream"] }
+bytes = "1"
+async-stream = "0.3"
 urlencoding = "2"
 async-openai = { version = "0.33", default-features = false, features = ["chat-completion-types", "embedding-types"] }
 
@@ -34,6 +36,10 @@ zip = "2"
 serde_json = "1"
 serde = { version = "1", features = ["derive"] }
 
+[[example]]
+name = "responses"
+path = "examples/responses.rs"
+
 [[example]]
 name = "chat_completion"
 path = "examples/chat_completion.rs"
diff --git a/sdk/rust/examples/responses.rs b/sdk/rust/examples/responses.rs
new file mode 100644
index 000000000..f96fb11f4
--- /dev/null
+++ b/sdk/rust/examples/responses.rs
@@ -0,0 +1,245 @@
+//! Responses API example demonstrating non-streaming and streaming usage,
+//! tool calling, and multi-turn conversations.
+
+use std::io::{self, Write};
+
+use foundry_local_sdk::{
+    FoundryLocalConfig, FoundryLocalError, FoundryLocalManager, FunctionToolDefinition,
+    ResponseInput, ResponseItem, StreamingEvent,
+};
+use serde_json::json;
+use tokio_stream::StreamExt;
+
+type Result<T> = std::result::Result<T, FoundryLocalError>;
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    // ── 1. Initialise the manager ────────────────────────────────────────────
+    let config = FoundryLocalConfig::new("foundry_local_responses_example");
+    let manager = FoundryLocalManager::create(config)?;
+
+    // ── 2. Start the web service ─────────────────────────────────────────────
+    println!("Starting web service…");
+    manager.start_web_service().await?;
+    println!("Web service URLs: {:?}", manager.urls()?);
+
+    // ── 3. Pick a model ──────────────────────────────────────────────────────
+    let models = manager.catalog().get_models().await?;
+    let model_alias = ["phi-4-mini", "phi-3.5-mini", "qwen2.5-0.5b"]
+        .iter()
+        .find(|alias| models.iter().any(|m| m.alias() == **alias))
+        .map(|s| s.to_string())
+        .or_else(|| models.first().map(|m| m.alias().to_string()))
+        .expect("No models available in the catalog");
+
+    println!("Using model: {model_alias}");
+    let model = manager.catalog().get_model(&model_alias).await?;
+
+    if !model.is_cached().await? {
+        println!("Downloading model {model_alias}…");
+        model.download(None::<fn(f64)>).await?;
+    }
+    model.load().await?;
+    println!("Model loaded.");
+
+    // ── 4. Create the Responses client ───────────────────────────────────────
+    let mut client = manager.get_responses_client(Some(&model.info().id))?;
+    client.settings.store = Some(true);
+
+    // ── 5. Non-streaming request ─────────────────────────────────────────────
+    println!("\n─── Non-streaming ───────────────────────────────────────────────");
+    let response = client
+        .create(
+            ResponseInput::Text("What is the capital of France? Reply in one word.".into()),
+            None,
+        )
+        .await?;
+
+    println!("Status : {}", response.status);
+    println!("Answer : {}", response.output_text());
+    if let Some(usage) = &response.usage {
+        println!(
+            "Tokens : {} in / {} out",
+            usage.input_tokens, usage.output_tokens
+        );
+    }
+
+    // ── 6. Streaming request ─────────────────────────────────────────────────
+    println!("\n─── Streaming ───────────────────────────────────────────────────");
+    print!("Story : ");
+    io::stdout().flush().ok();
+
+    let mut stream = client
+        .create_streaming(
+            ResponseInput::Text(
+                "Tell me a two-sentence story about a robot that loves ice cream.".into(),
+            ),
+            None,
+        )
+        .await?;
+
+    let mut full_text = String::new();
+    while let Some(event) = stream.next().await {
+        match event? {
+            StreamingEvent::OutputTextDelta { delta, .. } => {
+                print!("{delta}");
+                io::stdout().flush().ok();
+                full_text.push_str(&delta);
+            }
+            StreamingEvent::ResponseCompleted { response, .. } => {
+                if let Some(usage) = response.usage.as_ref() {
+                    println!("\n[completed, {} output tokens]", usage.output_tokens);
+                } else {
+                    println!("\n[completed]");
+                }
+            }
+            _ => {}
+        }
+    }
+
+    // ── 7. Multi-turn: follow-up using previous_response_id ─────────────────
+    println!("\n─── Multi-turn ──────────────────────────────────────────────────");
+    let first = client
+        .create(
+            ResponseInput::Text("My favourite number is 42. Remember this.".into()),
+            None,
+        )
+        .await?;
+    println!("Turn 1: {}", first.output_text());
+
+    let follow_up_opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("What is my favourite number?".into()),
+        previous_response_id: Some(first.id.clone()),
+        instructions: None,
+        tools: None,
+        tool_choice: None,
+        stream: None,
+        store: Some(true),
+        temperature: Some(0.0),
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let second = client
+        .create(
+            ResponseInput::Text("What is my favourite number?".into()),
+            Some(follow_up_opts),
+        )
+        .await?;
+    println!("Turn 2: {}", second.output_text());
+
+    // ── 8. Tool calling ──────────────────────────────────────────────────────
+    println!("\n─── Tool calling ────────────────────────────────────────────────");
+    let add_tool = FunctionToolDefinition {
+        tool_type: "function".into(),
+        name: "add".into(),
+        description: Some("Add two integers and return the sum.".into()),
+        parameters: Some(json!({
+            "type": "object",
+            "properties": {
+                "a": { "type": "integer", "description": "First addend" },
+                "b": { "type": "integer", "description": "Second addend" }
+            },
+            "required": ["a", "b"]
+        })),
+        strict: None,
+    };
+
+    let tool_opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
+        tools: Some(vec![add_tool]),
+        tool_choice: Some(json!("required")),
+        instructions: None,
+        previous_response_id: None,
+        stream: None,
+        store: Some(true),
+        temperature: Some(0.0),
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let tool_response = client
+        .create(
+            ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
+            Some(tool_opts),
+        )
+        .await?;
+
+    if let Some(ResponseItem::FunctionCall {
+        call_id,
+        name,
+        arguments,
+        ..
+    }) = tool_response
+        .output
+        .iter()
+        .find(|i| matches!(i, ResponseItem::FunctionCall { .. }))
+    {
+        println!("Model called tool: {name}({arguments})");
+        let args: serde_json::Value = serde_json::from_str(arguments)?;
+        let a = args["a"].as_i64().unwrap_or(0);
+        let b = args["b"].as_i64().unwrap_or(0);
+        let sum = a + b;
+
+        let result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput {
+            id: None,
+            call_id: call_id.clone(),
+            output: sum.to_string(),
+            status: None,
+        }]);
+
+        let final_opts = foundry_local_sdk::ResponseCreateRequest {
+            model: model.info().id.clone(),
+            input: result_input.clone(),
+            previous_response_id: Some(tool_response.id.clone()),
+            instructions: None,
+            tools: None,
+            tool_choice: None,
+            stream: None,
+            store: Some(true),
+            temperature: Some(0.0),
+            top_p: None,
+            max_output_tokens: None,
+            frequency_penalty: None,
+            presence_penalty: None,
+            seed: None,
+            truncation: None,
+            parallel_tool_calls: None,
+            metadata: None,
+            user: None,
+            reasoning: None,
+            text: None,
+        };
+
+        let final_response = client.create(result_input, Some(final_opts)).await?;
+        println!("Tool result: {}", final_response.output_text());
+    } else {
+        println!("No tool call in response (model may not support tool calling)");
+    }
+
+    // ── 9. Clean up ──────────────────────────────────────────────────────────
+    model.unload().await?;
+    manager.stop_web_service().await?;
+    println!("\nDone.");
+    Ok(())
+}
diff --git a/sdk/rust/src/foundry_local_manager.rs b/sdk/rust/src/foundry_local_manager.rs
index 0c22ef154..bfaf3af5c 100644
--- a/sdk/rust/src/foundry_local_manager.rs
+++ b/sdk/rust/src/foundry_local_manager.rs
@@ -13,6 +13,7 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger};
 use crate::detail::core_interop::CoreInterop;
 use crate::detail::ModelLoadManager;
 use crate::error::{FoundryLocalError, Result};
+use crate::openai::responses_client::ResponsesClient;
 use crate::types::{EpDownloadResult, EpInfo};
 
 /// Global singleton holder — only stores a successfully initialised manager.
@@ -135,6 +136,20 @@ impl FoundryLocalManager {
         Ok(())
     }
 
+    /// Get a [`ResponsesClient`] for the given model.
+    ///
+    /// The web service must be started before using the returned client.
+    /// Pass `model_id = None` to defer model selection to per-request options.
+    pub fn get_responses_client(&self, model_id: Option<&str>) -> Result<ResponsesClient> {
+        let urls = self.urls()?;
+        let base_url = urls.first().ok_or_else(|| FoundryLocalError::Validation {
+            reason:
+                "Web service not started. Call start_web_service() before getting a ResponsesClient."
+                    .into(),
+        })?;
+        Ok(ResponsesClient::new(base_url, model_id))
+    }
+
     /// Discover available execution providers and their registration status.
     pub fn discover_eps(&self) -> Result<Vec<EpInfo>> {
         let raw = self.core.execute_command("discover_eps", None)?;
diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs
index 9fb4bb85b..52fab6e60 100644
--- a/sdk/rust/src/lib.rs
+++ b/sdk/rust/src/lib.rs
@@ -43,3 +43,13 @@ pub use async_openai::types::chat::{
     CreateChatCompletionResponse, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
     FunctionCallStream,
 };
+
+// Re-export Responses API types.
+pub use crate::openai::{
+    Annotation, DeleteResponseResult, FunctionToolDefinition, IncompleteDetails,
+    InputItemsListResponse, InputTokensDetails, ListResponsesOptions, ListResponsesResult, LogProb,
+    MessageContent, OutputTokensDetails, ReasoningConfig, ResponseCreateRequest, ResponseError,
+    ResponseInput, ResponseItem, ResponseObject, ResponseUsage, ResponsesClient,
+    ResponsesClientSettings, ResponsesContentPart, SseStream, StreamingEvent, TextConfig,
+    TextFormat,
+};
diff --git a/sdk/rust/src/openai/mod.rs b/sdk/rust/src/openai/mod.rs
index ae0f1996a..4c607457e 100644
--- a/sdk/rust/src/openai/mod.rs
+++ b/sdk/rust/src/openai/mod.rs
@@ -3,6 +3,8 @@ mod chat_client;
 mod embedding_client;
 mod json_stream;
 mod live_audio_client;
+pub mod responses_client;
+pub mod responses_types;
 
 pub use self::audio_client::{
     AudioClient, AudioClientSettings, AudioTranscriptionResponse, AudioTranscriptionStream,
@@ -15,3 +17,11 @@ pub use self::live_audio_client::{
     ContentPart, CoreErrorResponse, LiveAudioTranscriptionOptions, LiveAudioTranscriptionResponse,
     LiveAudioTranscriptionSession, LiveAudioTranscriptionStream,
 };
+pub use self::responses_client::{ResponsesClient, ResponsesClientSettings, SseStream};
+pub use self::responses_types::{
+    Annotation, ContentPart as ResponsesContentPart, DeleteResponseResult, FunctionToolDefinition,
+    IncompleteDetails, InputItemsListResponse, InputTokensDetails, ListResponsesOptions,
+    ListResponsesResult, LogProb, MessageContent, OutputTokensDetails, ReasoningConfig,
+    ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem, ResponseObject,
+    ResponseUsage, StreamingEvent, TextConfig, TextFormat,
+};
diff --git a/sdk/rust/src/openai/responses_client.rs b/sdk/rust/src/openai/responses_client.rs
new file mode 100644
index 000000000..2f301c405
--- /dev/null
+++ b/sdk/rust/src/openai/responses_client.rs
@@ -0,0 +1,637 @@
+//! HTTP client for the OpenAI Responses API.
+
+use std::collections::HashMap;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+use std::time::Duration;
+
+use async_stream::try_stream;
+use bytes::Bytes;
+use futures_core::Stream;
+use reqwest::Client;
+use serde_json::Value;
+
+use crate::error::{FoundryLocalError, Result};
+
+use super::responses_types::{
+    DeleteResponseResult, FunctionToolDefinition, InputItemsListResponse, ListResponsesResult,
+    ReasoningConfig, ResponseCreateRequest, ResponseInput, ResponseObject, StreamingEvent,
+    TextConfig,
+};
+
+// ============================================================================
+// Settings
+// ============================================================================
+
+/// Configuration applied to every request made by a [`ResponsesClient`].
+///
+/// Use the public fields to set defaults; individual calls can override them
+/// via the `options` parameter.
+#[derive(Debug, Clone)]
+pub struct ResponsesClientSettings {
+    pub instructions: Option<String>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub max_output_tokens: Option<u32>,
+    pub frequency_penalty: Option<f32>,
+    pub presence_penalty: Option<f32>,
+    /// Tool choice strategy (serialised as JSON).
+    pub tool_choice: Option<Value>,
+    /// Truncation strategy: `"auto"` or `"disabled"`.
+    pub truncation: Option<String>,
+    pub parallel_tool_calls: Option<bool>,
+    /// Whether to persist the response for later retrieval.
+    ///
+    /// Defaults to `None`, which omits the field and lets the server decide.
+    pub store: Option<bool>,
+    pub metadata: Option<HashMap<String, String>>,
+    pub reasoning: Option<ReasoningConfig>,
+    pub text: Option<TextConfig>,
+    pub seed: Option<u32>,
+    /// Request timeout used for non-streaming calls; streaming calls use this as
+    /// a connect timeout so long-running streams are not cut off mid-response.
+    pub timeout: Duration,
+}
+
+impl Default for ResponsesClientSettings {
+    fn default() -> Self {
+        Self {
+            store: None,
+            instructions: None,
+            temperature: None,
+            top_p: None,
+            max_output_tokens: None,
+            frequency_penalty: None,
+            presence_penalty: None,
+            tool_choice: None,
+            truncation: None,
+            parallel_tool_calls: None,
+            metadata: None,
+            reasoning: None,
+            text: None,
+            seed: None,
+            timeout: Duration::from_secs(60),
+        }
+    }
+}
+
+impl ResponsesClientSettings {
+    /// Create settings with sensible defaults.
+    pub fn new() -> Self {
+        Self::default()
+    }
+}
+
+// ============================================================================
+// SSE Stream
+// ============================================================================
+
+/// A stream of [`StreamingEvent`]s parsed from a Server-Sent Events response body.
+pub struct SseStream {
+    inner: Pin<Box<dyn Stream<Item = Result<StreamingEvent>> + Send>>,
+}
+
+impl Stream for SseStream {
+    type Item = Result<StreamingEvent>;
+
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
+        self.inner.as_mut().poll_next(cx)
+    }
+}
+
+// ============================================================================
+// Client
+// ============================================================================
+
+/// Client for the OpenAI Responses API served by Foundry Local's embedded web service.
+///
+/// Unlike the chat/audio/embedding clients (which use FFI via CoreInterop),
+/// this client is HTTP-only and communicates directly with the embedded web service.
+///
+/// # Example
+/// ```ignore
+/// let manager = FoundryLocalManager::create(config)?;
+/// manager.start_web_service().await?;
+/// let client = manager.get_responses_client(Some("my-model-id"))?;
+///
+/// // Non-streaming
+/// let response = client.create(ResponseInput::Text("Hello!".into()), None).await?;
+/// println!("{}", response.output_text());
+///
+/// // Streaming
+/// use tokio_stream::StreamExt;
+/// let mut stream = client.create_streaming(ResponseInput::Text("Tell me a story".into()), None).await?;
+/// while let Some(event) = stream.next().await {
+///     if let Ok(StreamingEvent::OutputTextDelta { delta, .. }) = event {
+///         print!("{delta}");
+///     }
+/// }
+/// ```
+pub struct ResponsesClient {
+    http: Client,
+    base_url: String,
+    model_id: Option<String>,
+    /// Shared settings applied to every request. Modify via `client.settings`.
+    pub settings: ResponsesClientSettings,
+}
+
+impl ResponsesClient {
+    /// Create a new [`ResponsesClient`].
+    ///
+    /// - `base_url` — base URL of the Foundry Local web service (e.g. `"http://127.0.0.1:5273"`).
+    ///   Trailing slashes are stripped.
+    /// - `model_id` — default model used when not specified per-request.
+    pub fn new(base_url: &str, model_id: Option<&str>) -> Self {
+        let base_url = base_url.trim_end_matches('/').to_owned();
+        Self {
+            http: Client::new(),
+            base_url,
+            model_id: model_id.map(str::to_owned),
+            settings: ResponsesClientSettings::default(),
+        }
+    }
+
+    // ── Public API ───────────────────────────────────────────────────────────
+
+    /// Create a model response (non-streaming).
+    ///
+    /// Settings are merged in order: `model + input` → `self.settings` → `options`.
+    pub async fn create(
+        &self,
+        input: ResponseInput,
+        options: Option<ResponseCreateRequest>,
+    ) -> Result<ResponseObject> {
+        self.validate_input(&input)?;
+        if let Some(ref opts) = options {
+            self.validate_tools(opts.tools.as_deref())?;
+        }
+
+        let body = self.build_request(input, options, false)?;
+        let resp = self
+            .http
+            .post(self.url("/v1/responses"))
+            .timeout(self.request_timeout()?)
+            .json(&body)
+            .send()
+            .await?;
+
+        self.parse_json_response(resp).await
+    }
+
+    /// Create a model response with streaming via Server-Sent Events.
+    ///
+    /// Returns an `impl Stream<Item = Result<StreamingEvent>>` that yields parsed
+    /// events as they arrive.  Use `tokio_stream::StreamExt` to iterate:
+    ///
+    /// ```ignore
+    /// use tokio_stream::StreamExt;
+    /// let mut stream = client.create_streaming(input, None).await?;
+    /// while let Some(event) = stream.next().await {
+    ///     // …
+    /// }
+    /// ```
+    pub async fn create_streaming(
+        &self,
+        input: ResponseInput,
+        options: Option<ResponseCreateRequest>,
+    ) -> Result<SseStream> {
+        self.validate_input(&input)?;
+        if let Some(ref opts) = options {
+            self.validate_tools(opts.tools.as_deref())?;
+        }
+
+        let body = self.build_request(input, options, true)?;
+        let http = Client::builder()
+            .connect_timeout(self.request_timeout()?)
+            .build()?;
+        let resp = http
+            .post(self.url("/v1/responses"))
+            .header("Accept", "text/event-stream")
+            .json(&body)
+            .send()
+            .await?;
+
+        if !resp.status().is_success() {
+            let status = resp.status();
+            let text = resp.text().await.unwrap_or_else(|_| status.to_string());
+            return Err(FoundryLocalError::Validation {
+                reason: format!("Responses API error ({status}): {text}"),
+            });
+        }
+
+        let byte_stream = resp.bytes_stream();
+        let parsed = parse_sse_stream(byte_stream);
+        Ok(SseStream {
+            inner: Box::pin(parsed),
+        })
+    }
+
+    /// Retrieve a stored response by ID.
+    pub async fn get(&self, response_id: &str) -> Result<ResponseObject> {
+        self.validate_id(response_id, "response_id")?;
+        let url = self.url(&format!(
+            "/v1/responses/{}",
+            urlencoding::encode(response_id)
+        ));
+        let resp = self
+            .http
+            .get(url)
+            .timeout(self.request_timeout()?)
+            .send()
+            .await?;
+        self.parse_json_response(resp).await
+    }
+
+    /// Delete a stored response by ID.
+    pub async fn delete(&self, response_id: &str) -> Result<DeleteResponseResult> {
+        self.validate_id(response_id, "response_id")?;
+        let url = self.url(&format!(
+            "/v1/responses/{}",
+            urlencoding::encode(response_id)
+        ));
+        let resp = self
+            .http
+            .delete(url)
+            .timeout(self.request_timeout()?)
+            .send()
+            .await?;
+        self.parse_json_response(resp).await
+    }
+
+    /// Cancel an in-progress response.
+    pub async fn cancel(&self, response_id: &str) -> Result<ResponseObject> {
+        self.validate_id(response_id, "response_id")?;
+        let url = self.url(&format!(
+            "/v1/responses/{}/cancel",
+            urlencoding::encode(response_id)
+        ));
+        let resp = self
+            .http
+            .post(url)
+            .timeout(self.request_timeout()?)
+            .send()
+            .await?;
+        self.parse_json_response(resp).await
+    }
+
+    /// Retrieve the input items for a stored response.
+    pub async fn get_input_items(&self, response_id: &str) -> Result<InputItemsListResponse> {
+        self.validate_id(response_id, "response_id")?;
+        let url = self.url(&format!(
+            "/v1/responses/{}/input_items",
+            urlencoding::encode(response_id)
+        ));
+        let resp = self
+            .http
+            .get(url)
+            .timeout(self.request_timeout()?)
+            .send()
+            .await?;
+        self.parse_json_response(resp).await
+    }
+
+    /// List all stored responses (extension endpoint).
+    pub async fn list(&self) -> Result<ListResponsesResult> {
+        self.list_with_options(None).await
+    }
+
+    /// List stored responses with optional pagination controls.
+    pub async fn list_with_options(
+        &self,
+        options: Option<&super::responses_types::ListResponsesOptions>,
+    ) -> Result<ListResponsesResult> {
+        let mut req = self.http.get(self.url("/v1/responses"));
+        if let Some(options) = options {
+            let mut query = Vec::new();
+            if let Some(limit) = options.limit {
+                if limit == 0 {
+                    return Err(FoundryLocalError::Validation {
+                        reason: "list limit must be greater than zero.".into(),
+                    });
+                }
+                query.push(("limit", limit.to_string()));
+            }
+            if let Some(order) = &options.order {
+                if order != "asc" && order != "desc" {
+                    return Err(FoundryLocalError::Validation {
+                        reason: "list order must be either \"asc\" or \"desc\".".into(),
+                    });
+                }
+                query.push(("order", order.clone()));
+            }
+            if let Some(after) = &options.after {
+                self.validate_id(after, "after")?;
+                query.push(("after", after.clone()));
+            }
+            req = req.query(&query);
+        }
+        let resp = req.timeout(self.request_timeout()?).send().await?;
+        self.parse_json_response(resp).await
+    }
+
+    // ── Private helpers ──────────────────────────────────────────────────────
+
+    fn url(&self, path: &str) -> String {
+        format!("{}{}", self.base_url, path)
+    }
+
+    /// Merge `input`, `self.settings`, and caller `options` into a single
+    /// `ResponseCreateRequest`.
+    fn build_request(
+        &self,
+        input: ResponseInput,
+        options: Option<ResponseCreateRequest>,
+        stream: bool,
+    ) -> Result<ResponseCreateRequest> {
+        // Determine model: options override self.model_id
+        let model = options
+            .as_ref()
+            .map(|o| o.model.clone())
+            .filter(|m| !m.trim().is_empty())
+            .or_else(|| self.model_id.clone())
+            .ok_or_else(|| FoundryLocalError::Validation {
+                reason: "Model must be specified in the constructor or via options.model.".into(),
+            })?;
+
+        // Start with settings defaults
+        let s = &self.settings;
+
+        let mut req = ResponseCreateRequest {
+            model,
+            input,
+            stream: Some(stream),
+            // Settings defaults
+            instructions: s.instructions.clone(),
+            temperature: s.temperature,
+            top_p: s.top_p,
+            max_output_tokens: s.max_output_tokens,
+            frequency_penalty: s.frequency_penalty,
+            presence_penalty: s.presence_penalty,
+            tool_choice: s.tool_choice.clone(),
+            truncation: s.truncation.clone(),
+            parallel_tool_calls: s.parallel_tool_calls,
+            store: s.store,
+            metadata: s.metadata.clone(),
+            reasoning: s.reasoning.clone(),
+            text: s.text.clone(),
+            seed: s.seed,
+            // Not in settings
+            previous_response_id: None,
+            tools: None,
+            user: None,
+        };
+
+        // Apply per-call overrides
+        if let Some(opts) = options {
+            if !opts.model.trim().is_empty() {
+                req.model = opts.model;
+            }
+            // Only override input if the caller passed an options object with explicit input;
+            // in practice options.input will always be overwritten by the positional `input`.
+            if let Some(v) = opts.instructions {
+                req.instructions = Some(v);
+            }
+            if let Some(v) = opts.previous_response_id {
+                req.previous_response_id = Some(v);
+            }
+            if let Some(v) = opts.tools {
+                req.tools = Some(v);
+            }
+            if let Some(v) = opts.tool_choice {
+                req.tool_choice = Some(v);
+            }
+            if let Some(v) = opts.temperature {
+                req.temperature = Some(v);
+            }
+            if let Some(v) = opts.top_p {
+                req.top_p = Some(v);
+            }
+            if let Some(v) = opts.max_output_tokens {
+                req.max_output_tokens = Some(v);
+            }
+            if let Some(v) = opts.frequency_penalty {
+                req.frequency_penalty = Some(v);
+            }
+            if let Some(v) = opts.presence_penalty {
+                req.presence_penalty = Some(v);
+            }
+            if let Some(v) = opts.seed {
+                req.seed = Some(v);
+            }
+            if let Some(v) = opts.truncation {
+                req.truncation = Some(v);
+            }
+            if let Some(v) = opts.parallel_tool_calls {
+                req.parallel_tool_calls = Some(v);
+            }
+            if let Some(v) = opts.store {
+                req.store = Some(v);
+            }
+            if let Some(v) = opts.metadata {
+                req.metadata = Some(v);
+            }
+            if let Some(v) = opts.user {
+                req.user = Some(v);
+            }
+            if let Some(v) = opts.reasoning {
+                req.reasoning = Some(v);
+            }
+            if let Some(v) = opts.text {
+                req.text = Some(v);
+            }
+        }
+
+        Ok(req)
+    }
+
+    fn validate_input(&self, input: &ResponseInput) -> Result<()> {
+        match input {
+            ResponseInput::Text(s) if s.trim().is_empty() => Err(FoundryLocalError::Validation {
+                reason: "Input string cannot be empty.".into(),
+            }),
+            ResponseInput::Items(items) if items.is_empty() => Err(FoundryLocalError::Validation {
+                reason: "Input items array cannot be empty.".into(),
+            }),
+            ResponseInput::Items(items) => {
+                for item in items {
+                    Self::validate_response_item(item)?;
+                }
+                Ok(())
+            }
+            _ => Ok(()),
+        }
+    }
+
+    fn validate_response_item(item: &super::responses_types::ResponseItem) -> Result<()> {
+        match item {
+            super::responses_types::ResponseItem::Message {
+                content: super::responses_types::MessageContent::Parts(parts),
+                ..
+            } => {
+                for part in parts {
+                    Self::validate_content_part(part)?;
+                }
+            }
+            super::responses_types::ResponseItem::Reasoning {
+                content: Some(parts),
+                ..
+            } => {
+                for part in parts {
+                    Self::validate_content_part(part)?;
+                }
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn validate_content_part(part: &super::responses_types::ContentPart) -> Result<()> {
+        if let super::responses_types::ContentPart::InputImage {
+            image_url,
+            image_data,
+            ..
+        } = part
+        {
+            let has_image_url = image_url.as_ref().is_some_and(|v| !v.trim().is_empty());
+            let has_image_data = image_data.as_ref().is_some_and(|v| !v.trim().is_empty());
+            if has_image_url == has_image_data {
+                return Err(FoundryLocalError::Validation {
+                    reason:
+                        "Provide exactly one of image_url or image_data for input_image content."
+                            .into(),
+                });
+            }
+        }
+        Ok(())
+    }
+
+    fn validate_tools(&self, tools: Option<&[FunctionToolDefinition]>) -> Result<()> {
+        let Some(tools) = tools else {
+            return Ok(());
+        };
+        for tool in tools {
+            if tool.tool_type != "function" {
+                return Err(FoundryLocalError::Validation {
+                    reason: format!(
+                        "Each tool must have type \"function\", got \"{}\".",
+                        tool.tool_type
+                    ),
+                });
+            }
+            if tool.name.trim().is_empty() {
+                return Err(FoundryLocalError::Validation {
+                    reason: "Each tool must have a non-empty \"name\".".into(),
+                });
+            }
+        }
+        Ok(())
+    }
+
+    fn validate_id(&self, id: &str, param: &str) -> Result<()> {
+        if id.trim().is_empty() {
+            return Err(FoundryLocalError::Validation {
+                reason: format!("{param} must be a non-empty string."),
+            });
+        }
+        // OpenAI does not publish a max ID length; keep this aligned with the
+        // JS SDK to avoid surprising client-side rejections of valid server IDs.
+        if id.len() > 1024 {
+            return Err(FoundryLocalError::Validation {
+                reason: format!("{param} exceeds maximum length (1024)."),
+            });
+        }
+        Ok(())
+    }
+
+    fn request_timeout(&self) -> Result<Duration> {
+        if self.settings.timeout.is_zero() {
+            return Err(FoundryLocalError::Validation {
+                reason: "ResponsesClientSettings.timeout must be greater than zero.".into(),
+            });
+        }
+        Ok(self.settings.timeout)
+    }
+
+    async fn parse_json_response<T>(&self, resp: reqwest::Response) -> Result<T>
+    where
+        T: serde::de::DeserializeOwned,
+    {
+        let status = resp.status();
+        let text = resp.text().await?;
+        if !status.is_success() {
+            return Err(FoundryLocalError::Validation {
+                reason: format!("Responses API error ({status}): {text}"),
+            });
+        }
+        serde_json::from_str(&text).map_err(FoundryLocalError::from)
+    }
+}
+
+// ============================================================================
+// SSE parser
+// ============================================================================
+
+/// Parse a raw bytes stream (from `reqwest`) as Server-Sent Events.
+///
+/// Each complete SSE block (`\n\n`-separated) is parsed into a [`StreamingEvent`].
+/// The stream ends on `data: [DONE]` or when the source is exhausted.
+fn parse_sse_stream<S>(byte_stream: S) -> impl Stream<Item = Result<StreamingEvent>> + Send
+where
+    S: Stream<Item = std::result::Result<Bytes, reqwest::Error>> + Send + 'static,
+{
+    try_stream! {
+        use tokio_stream::StreamExt as _;
+
+        let mut byte_stream = std::pin::pin!(byte_stream);
+        // Buffer accumulates bytes until we have complete SSE blocks.
+        let mut buf = String::new();
+
+        while let Some(chunk) = byte_stream.next().await {
+            let bytes: Bytes = chunk.map_err(FoundryLocalError::from)?;
+            // SSE is always UTF-8
+            let text = std::str::from_utf8(&bytes).map_err(|e| FoundryLocalError::Validation {
+                reason: format!("SSE stream contained invalid UTF-8: {e}"),
+            })?;
+            buf.push_str(text);
+
+            // Process all complete SSE blocks (separated by double newlines).
+            loop {
+                let Some(block_end) = buf.find("\n\n") else {
+                    break;
+                };
+                let block = buf[..block_end].to_owned();
+                buf = buf[block_end + 2..].to_owned();
+
+                let trimmed = block.trim();
+                if trimmed.is_empty() {
+                    continue;
+                }
+
+                // Terminal signal
+                if trimmed == "data: [DONE]" {
+                    return;
+                }
+
+                // Collect `data:` lines (per SSE spec, multiple are joined with \n)
+                let mut data_lines: Vec<&str> = Vec::new();
+                for line in trimmed.split('\n') {
+                    if let Some(rest) = line.strip_prefix("data: ") {
+                        data_lines.push(rest);
+                    } else if line == "data:" {
+                        data_lines.push("");
+                    }
+                    // `event:` lines are informational; the type lives inside the JSON.
+                }
+
+                if data_lines.is_empty() {
+                    continue;
+                }
+
+                let event_json = data_lines.join("\n");
+                let event: StreamingEvent =
+                    serde_json::from_str(&event_json).map_err(FoundryLocalError::from)?;
+                yield event;
+            }
+        }
+    }
+}
diff --git a/sdk/rust/src/openai/responses_types.rs b/sdk/rust/src/openai/responses_types.rs
new file mode 100644
index 000000000..ffb2d2200
--- /dev/null
+++ b/sdk/rust/src/openai/responses_types.rs
@@ -0,0 +1,661 @@
+//! Type definitions for the OpenAI Responses API.
+
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+// ============================================================================
+// Content Parts
+// ============================================================================
+
+/// An annotation attached to an output-text content part.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Annotation {
+    #[serde(rename = "type")]
+    pub annotation_type: String,
+    pub start_index: u32,
+    pub end_index: u32,
+    /// URL for url_citation annotations.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub url: Option<String>,
+    /// Title for url_citation annotations.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub title: Option<String>,
+}
+
+/// Log probability for a token.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LogProb {
+    pub token: String,
+    pub logprob: f64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub bytes: Option<Vec<u8>>,
+}
+
+/// A content part within a message or response.
+///
+/// Discriminated on the `"type"` field.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum ContentPart {
+    /// Plain text input content.
+    #[serde(rename = "input_text")]
+    InputText { text: String },
+
+    /// Image input content (vision).
+    ///
+    /// This models Foundry Local's server contract. The server accepts either
+    /// `image_url` or `image_data`; when `image_data` is used, `media_type` lets
+    /// the server build the underlying data URI. If omitted, the server may infer
+    /// the media type.
+    #[serde(rename = "input_image")]
+    InputImage {
+        /// URL of the image (mutually exclusive with `image_data`).
+        #[serde(skip_serializing_if = "Option::is_none")]
+        image_url: Option<String>,
+        /// Base64-encoded image bytes (mutually exclusive with `image_url`).
+        #[serde(skip_serializing_if = "Option::is_none")]
+        image_data: Option<String>,
+        /// MIME type of the image, e.g. `"image/png"`.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        media_type: Option<String>,
+        /// Detail level: `"low"`, `"high"`, or `"auto"`.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        detail: Option<String>,
+    },
+
+    /// File input content.
+    #[serde(rename = "input_file")]
+    InputFile { filename: String, file_url: String },
+
+    /// Text produced by the model.
+    #[serde(rename = "output_text")]
+    OutputText {
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        annotations: Option<Vec<Annotation>>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        logprobs: Option<Vec<LogProb>>,
+    },
+
+    /// Model refusal.
+    #[serde(rename = "refusal")]
+    Refusal { refusal: String },
+}
+
+// ============================================================================
+// Message Content (string | ContentPart[])
+// ============================================================================
+
+/// The content of a message item — either a plain string or a list of content parts.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum MessageContent {
+    Text(String),
+    Parts(Vec<ContentPart>),
+}
+
+// ============================================================================
+// Response Items
+// ============================================================================
+
+/// An item in a request or response — discriminated on `"type"`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum ResponseItem {
+    #[serde(rename = "message")]
+    Message {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        id: Option<String>,
+        role: String,
+        content: MessageContent,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        status: Option<String>,
+    },
+
+    #[serde(rename = "function_call")]
+    FunctionCall {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        id: Option<String>,
+        call_id: String,
+        name: String,
+        arguments: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        status: Option<String>,
+    },
+
+    #[serde(rename = "function_call_output")]
+    FunctionCallOutput {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        id: Option<String>,
+        call_id: String,
+        output: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        status: Option<String>,
+    },
+
+    #[serde(rename = "item_reference")]
+    ItemReference { id: String },
+
+    #[serde(rename = "reasoning")]
+    Reasoning {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        id: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        content: Option<Vec<ContentPart>>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        encrypted_content: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        summary: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        status: Option<String>,
+    },
+}
+
+// ============================================================================
+// Response Input
+// ============================================================================
+
+/// The `input` field of a [`ResponseCreateRequest`]: either a plain string prompt
+/// or a structured list of [`ResponseItem`]s.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum ResponseInput {
+    Text(String),
+    Items(Vec<ResponseItem>),
+}
+
+// ============================================================================
+// Tool Definitions
+// ============================================================================
+
+/// A function tool definition passed to the model.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FunctionToolDefinition {
+    /// Always `"function"`.
+    #[serde(rename = "type")]
+    pub tool_type: String,
+    pub name: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+    /// JSON Schema for the function parameters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parameters: Option<Value>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub strict: Option<bool>,
+}
+
+// ============================================================================
+// Text & Reasoning Config
+// ============================================================================
+
+/// Format constraints for model text output (constrained generation).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TextFormat {
+    /// `"text"`, `"json_object"`, `"json_schema"`, `"lark_grammar"`, or `"regex"`.
+    #[serde(rename = "type")]
+    pub format_type: String,
+    /// Schema name (for `json_schema`).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    /// Schema description (for `json_schema`).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+    /// JSON Schema object (for `json_schema`).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub schema: Option<Value>,
+    /// Strict mode (for `json_schema`).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub strict: Option<bool>,
+}
+
+/// Text output configuration.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TextConfig {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub format: Option<TextFormat>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub verbosity: Option<String>,
+}
+
+/// Reasoning configuration for reasoning-capable models.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ReasoningConfig {
+    /// Effort level: `"low"`, `"medium"`, or `"high"`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<String>,
+    /// Summary style: `"auto"`, `"concise"`, or `"detailed"`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub summary: Option<String>,
+}
+
+// ============================================================================
+// Request
+// ============================================================================
+
+/// Request body for `POST /v1/responses`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ResponseCreateRequest {
+    pub model: String,
+    pub input: ResponseInput,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<FunctionToolDefinition>>,
+    /// `"none"` | `"auto"` | `"required"` | `{ "type": "function", "name": "..." }`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<Value>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub store: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub frequency_penalty: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub presence_penalty: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub seed: Option<u32>,
+    /// `"auto"` or `"disabled"`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<TextConfig>,
+}
+
+// ============================================================================
+// Response Object
+// ============================================================================
+
+/// Usage statistics attached to a completed response.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ResponseUsage {
+    pub input_tokens: u32,
+    pub output_tokens: u32,
+    pub total_tokens: u32,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_tokens_details: Option<InputTokensDetails>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_tokens_details: Option<OutputTokensDetails>,
+}
+
+/// Details about input token counts.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InputTokensDetails {
+    pub cached_tokens: u32,
+}
+
+/// Details about output token counts.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OutputTokensDetails {
+    pub reasoning_tokens: u32,
+}
+
+/// An error payload inside a response object.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ResponseError {
+    pub code: String,
+    pub message: String,
+}
+
+/// Optional details about why a response is incomplete.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct IncompleteDetails {
+    pub reason: String,
+}
+
+/// A completed (or failed) response from the Responses API.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ResponseObject {
+    pub id: String,
+    pub object: String,
+    pub created_at: i64,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub completed_at: Option<i64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub failed_at: Option<i64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub cancelled_at: Option<i64>,
+    /// `"queued"`, `"in_progress"`, `"completed"`, `"failed"`, `"incomplete"`, or `"cancelled"`.
+    pub status: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub incomplete_details: Option<IncompleteDetails>,
+    pub model: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+    pub output: Vec<ResponseItem>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<ResponseError>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<FunctionToolDefinition>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<Value>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<TextConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub presence_penalty: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub frequency_penalty: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub store: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub usage: Option<ResponseUsage>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user: Option<String>,
+}
+
+impl ResponseObject {
+    /// Concatenates text from the first assistant `message` item in `output`.
+    ///
+    /// Equivalent to the Python SDK's `response.output_text` property.
+    pub fn output_text(&self) -> String {
+        for item in &self.output {
+            if let ResponseItem::Message { role, content, .. } = item {
+                if role == "assistant" {
+                    return match content {
+                        MessageContent::Text(s) => s.clone(),
+                        MessageContent::Parts(parts) => parts
+                            .iter()
+                            .filter_map(|p| match p {
+                                ContentPart::OutputText { text, .. } => Some(text.as_str()),
+                                _ => None,
+                            })
+                            .collect::<Vec<_>>()
+                            .join(""),
+                    };
+                }
+            }
+        }
+        String::new()
+    }
+}
+
+// ============================================================================
+// Streaming Events
+// ============================================================================
+
+/// A single Server-Sent Event emitted by the streaming Responses API.
+///
+/// Discriminated on the `"type"` field.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(tag = "type")]
+pub enum StreamingEvent {
+    // ── Response lifecycle ───────────────────────────────────────────────────
+    #[serde(rename = "response.created")]
+    ResponseCreated {
+        response: ResponseObject,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.queued")]
+    ResponseQueued {
+        response: ResponseObject,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.in_progress")]
+    ResponseInProgress {
+        response: ResponseObject,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.completed")]
+    ResponseCompleted {
+        response: ResponseObject,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.failed")]
+    ResponseFailed {
+        response: ResponseObject,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.incomplete")]
+    ResponseIncomplete {
+        response: ResponseObject,
+        sequence_number: u64,
+    },
+
+    // ── Output items ─────────────────────────────────────────────────────────
+    #[serde(rename = "response.output_item.added")]
+    OutputItemAdded {
+        item_id: String,
+        output_index: u32,
+        item: ResponseItem,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.output_item.done")]
+    OutputItemDone {
+        item_id: String,
+        output_index: u32,
+        item: ResponseItem,
+        sequence_number: u64,
+    },
+
+    // ── Content parts ────────────────────────────────────────────────────────
+    #[serde(rename = "response.content_part.added")]
+    ContentPartAdded {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        part: ContentPart,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.content_part.done")]
+    ContentPartDone {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        part: ContentPart,
+        sequence_number: u64,
+    },
+
+    // ── Text deltas ──────────────────────────────────────────────────────────
+    #[serde(rename = "response.output_text.delta")]
+    OutputTextDelta {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        delta: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        logprobs: Option<Vec<LogProb>>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        obfuscation: Option<String>,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.output_text.done")]
+    OutputTextDone {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        logprobs: Option<Vec<LogProb>>,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.output_text.annotation.added")]
+    OutputTextAnnotationAdded {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        annotation_index: u32,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        annotation: Option<Annotation>,
+        sequence_number: u64,
+    },
+
+    // ── Refusal ──────────────────────────────────────────────────────────────
+    #[serde(rename = "response.refusal.delta")]
+    RefusalDelta {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        delta: String,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.refusal.done")]
+    RefusalDone {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        refusal: String,
+        sequence_number: u64,
+    },
+
+    // ── Function calls ───────────────────────────────────────────────────────
+    #[serde(rename = "response.function_call_arguments.delta")]
+    FunctionCallArgumentsDelta {
+        item_id: String,
+        output_index: u32,
+        call_id: String,
+        delta: String,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.function_call_arguments.done")]
+    FunctionCallArgumentsDone {
+        item_id: String,
+        output_index: u32,
+        call_id: String,
+        arguments: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        name: Option<String>,
+        sequence_number: u64,
+    },
+
+    // ── Reasoning ────────────────────────────────────────────────────────────
+    #[serde(rename = "response.reasoning_summary_part.added")]
+    ReasoningSummaryPartAdded {
+        item_id: String,
+        output_index: u32,
+        summary_index: u32,
+        part: ContentPart,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.reasoning_summary_part.done")]
+    ReasoningSummaryPartDone {
+        item_id: String,
+        output_index: u32,
+        summary_index: u32,
+        part: ContentPart,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.reasoning.delta")]
+    ReasoningDelta {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        delta: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        obfuscation: Option<String>,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.reasoning.done")]
+    ReasoningDone {
+        item_id: String,
+        output_index: u32,
+        content_index: u32,
+        text: String,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.reasoning_summary_text.delta")]
+    ReasoningSummaryTextDelta {
+        item_id: String,
+        output_index: u32,
+        summary_index: u32,
+        delta: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        obfuscation: Option<String>,
+        sequence_number: u64,
+    },
+    #[serde(rename = "response.reasoning_summary_text.done")]
+    ReasoningSummaryTextDone {
+        item_id: String,
+        output_index: u32,
+        summary_index: u32,
+        text: String,
+        sequence_number: u64,
+    },
+
+    // ── Error ────────────────────────────────────────────────────────────────
+    #[serde(rename = "error")]
+    Error {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        code: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        message: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        param: Option<String>,
+        sequence_number: u64,
+    },
+}
+
+// ============================================================================
+// List / Delete Results
+// ============================================================================
+
+/// Result of `DELETE /v1/responses/{id}`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DeleteResponseResult {
+    pub id: String,
+    pub object: String,
+    pub deleted: bool,
+}
+
+/// Response from `GET /v1/responses/{id}/input_items`.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InputItemsListResponse {
+    pub object: String,
+    pub data: Vec<ResponseItem>,
+}
+
+/// Response from `GET /v1/responses` (extension endpoint).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ListResponsesResult {
+    pub object: String,
+    pub data: Vec<ResponseObject>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub first_id: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub last_id: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub has_more: Option<bool>,
+}
+
+/// Optional query parameters for `GET /v1/responses`.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct ListResponsesOptions {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub limit: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub order: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub after: Option<String>,
+}
diff --git a/sdk/rust/tests/integration/main.rs b/sdk/rust/tests/integration/main.rs
index 055760003..d1366adc4 100644
--- a/sdk/rust/tests/integration/main.rs
+++ b/sdk/rust/tests/integration/main.rs
@@ -15,4 +15,5 @@ mod embedding_client_test;
 mod live_audio_test;
 mod manager_test;
 mod model_test;
+mod responses_test;
 mod web_service_test;
diff --git a/sdk/rust/tests/integration/responses_test.rs b/sdk/rust/tests/integration/responses_test.rs
new file mode 100644
index 000000000..f40f354fe
--- /dev/null
+++ b/sdk/rust/tests/integration/responses_test.rs
@@ -0,0 +1,454 @@
+//! Integration tests for the Responses API.
+//!
+//! These tests require a running Foundry Local web service with a loaded model.
+//! They are compiled only when the "integration" Cargo feature is enabled, and
+//! skipped automatically in CI when no model is available.
+
+use super::common;
+use foundry_local_sdk::{
+    FunctionToolDefinition, ListResponsesOptions, MessageContent, ResponseInput, ResponseItem,
+    ResponsesClient, ResponsesContentPart as ContentPart,
+};
+use serde_json::json;
+use tokio_stream::StreamExt;
+
+/// The model alias used for Responses API integration tests.
+const RESPONSES_MODEL_ALIAS: &str = common::TEST_MODEL_ALIAS;
+
+async fn setup_responses_client() -> (ResponsesClient, Arc<foundry_local_sdk::Model>) {
+    let manager = common::get_test_manager();
+    manager
+        .start_web_service()
+        .await
+        .expect("start_web_service failed");
+    let catalog = manager.catalog();
+    let model = catalog
+        .get_model(RESPONSES_MODEL_ALIAS)
+        .await
+        .expect("get_model failed");
+    model.load().await.expect("model.load() failed");
+
+    let mut client = manager
+        .get_responses_client(Some(&model.info().id))
+        .expect("get_responses_client failed");
+    client.settings.store = Some(true);
+    (client, model)
+}
+
+use std::sync::Arc;
+
+#[tokio::test]
+async fn non_streaming_simple_string() {
+    let (client, model) = setup_responses_client().await;
+
+    let response = client
+        .create(
+            ResponseInput::Text("What is 2+2? Respond with just the number.".into()),
+            None,
+        )
+        .await
+        .expect("create failed");
+
+    println!("Status: {}", response.status);
+    println!("Output: {}", response.output_text());
+
+    assert_eq!(response.status, "completed");
+    assert!(
+        !response.output_text().is_empty(),
+        "output_text should be non-empty"
+    );
+    assert!(response.usage.is_some(), "usage should be present");
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn non_streaming_with_options() {
+    let (client, model) = setup_responses_client().await;
+
+    let opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("Say hello.".into()),
+        temperature: Some(0.0),
+        max_output_tokens: Some(50),
+        instructions: None,
+        previous_response_id: None,
+        tools: None,
+        tool_choice: None,
+        stream: None,
+        store: Some(true),
+        top_p: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let response = client
+        .create(ResponseInput::Text("Say hello.".into()), Some(opts))
+        .await
+        .expect("create with options failed");
+
+    assert_eq!(response.status, "completed");
+    assert!(!response.output_text().is_empty());
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn streaming_receives_deltas() {
+    let (client, model) = setup_responses_client().await;
+
+    let mut stream = client
+        .create_streaming(ResponseInput::Text("Count from 1 to 5.".into()), None)
+        .await
+        .expect("create_streaming failed");
+
+    let mut delta_count = 0usize;
+    let mut full_text = String::new();
+    let mut completed = false;
+
+    while let Some(event) = stream.next().await {
+        let event = event.expect("stream event error");
+        match event {
+            foundry_local_sdk::StreamingEvent::OutputTextDelta { delta, .. } => {
+                full_text.push_str(&delta);
+                delta_count += 1;
+            }
+            foundry_local_sdk::StreamingEvent::ResponseCompleted { .. } => {
+                completed = true;
+            }
+            _ => {}
+        }
+    }
+
+    println!("Received {delta_count} deltas, text: {full_text}");
+    assert!(delta_count > 0, "Expected at least one delta event");
+    assert!(completed, "Expected a ResponseCompleted event");
+    assert!(!full_text.is_empty(), "Expected non-empty accumulated text");
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn multi_turn_previous_response_id() {
+    let (client, model) = setup_responses_client().await;
+
+    // First turn
+    let first = client
+        .create(
+            ResponseInput::Text("My favourite colour is blue. Remember this.".into()),
+            None,
+        )
+        .await
+        .expect("first create failed");
+    assert_eq!(first.status, "completed");
+    let first_id = first.id.clone();
+
+    // Second turn referencing the first
+    let opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("What is my favourite colour?".into()),
+        previous_response_id: Some(first_id),
+        instructions: None,
+        tools: None,
+        tool_choice: None,
+        stream: None,
+        store: Some(true),
+        temperature: None,
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let second = client
+        .create(
+            ResponseInput::Text("What is my favourite colour?".into()),
+            Some(opts),
+        )
+        .await
+        .expect("second create failed");
+
+    println!("Multi-turn response: {}", second.output_text());
+    assert_eq!(second.status, "completed");
+    let text = second.output_text().to_lowercase();
+    assert!(
+        text.contains("blue"),
+        "Second response should reference 'blue', got: {text}"
+    );
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn get_stored_response() {
+    let (client, model) = setup_responses_client().await;
+
+    let created = client
+        .create(ResponseInput::Text("Hello.".into()), None)
+        .await
+        .expect("create failed");
+    let response_id = created.id.clone();
+
+    let fetched = client.get(&response_id).await.expect("get failed");
+    assert_eq!(fetched.id, response_id);
+    assert_eq!(fetched.status, "completed");
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn delete_response() {
+    let (client, model) = setup_responses_client().await;
+
+    let created = client
+        .create(ResponseInput::Text("I will be deleted.".into()), None)
+        .await
+        .expect("create failed");
+    let response_id = created.id.clone();
+
+    let result = client.delete(&response_id).await.expect("delete failed");
+    assert_eq!(result.id, response_id);
+    assert!(result.deleted);
+
+    // Getting the deleted response should fail
+    let get_result = client.get(&response_id).await;
+    assert!(
+        get_result.is_err(),
+        "Expected error after deleting response"
+    );
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn list_responses() {
+    let (client, model) = setup_responses_client().await;
+
+    // Create a response to ensure there is at least one
+    let _ = client
+        .create(ResponseInput::Text("List test.".into()), None)
+        .await
+        .expect("create failed");
+
+    let list_options = ListResponsesOptions {
+        limit: Some(10),
+        order: Some("desc".into()),
+        after: None,
+    };
+    let list = client
+        .list_with_options(Some(&list_options))
+        .await
+        .expect("list failed");
+    assert_eq!(list.object, "list");
+    assert!(
+        !list.data.is_empty(),
+        "Expected at least one response in list"
+    );
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn get_input_items() {
+    let (client, model) = setup_responses_client().await;
+
+    let created = client
+        .create(ResponseInput::Text("Input items test.".into()), None)
+        .await
+        .expect("create failed");
+
+    let items = client
+        .get_input_items(&created.id)
+        .await
+        .expect("get_input_items failed");
+
+    assert_eq!(items.object, "list");
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn tool_calling_round_trip() {
+    let (client, model) = setup_responses_client().await;
+
+    let multiply_tool = FunctionToolDefinition {
+        tool_type: "function".into(),
+        name: "multiply".into(),
+        description: Some("Multiply two numbers".into()),
+        parameters: Some(json!({
+            "type": "object",
+            "properties": {
+                "a": { "type": "number" },
+                "b": { "type": "number" }
+            },
+            "required": ["a", "b"]
+        })),
+        strict: None,
+    };
+
+    let opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: ResponseInput::Text("What is 6 times 7? Use the multiply tool.".into()),
+        tools: Some(vec![multiply_tool]),
+        tool_choice: Some(json!("required")),
+        instructions: None,
+        previous_response_id: None,
+        stream: None,
+        store: Some(true),
+        temperature: Some(0.0),
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let response = client
+        .create(
+            ResponseInput::Text("What is 6 times 7? Use the multiply tool.".into()),
+            Some(opts),
+        )
+        .await
+        .expect("create with tool failed");
+
+    // Find the function_call item
+    let func_call = response.output.iter().find_map(|item| {
+        if let ResponseItem::FunctionCall {
+            call_id,
+            name,
+            arguments,
+            ..
+        } = item
+        {
+            Some((call_id.clone(), name.clone(), arguments.clone()))
+        } else {
+            None
+        }
+    });
+
+    assert!(func_call.is_some(), "Expected a function_call output item");
+    let (call_id, name, args_str) = func_call.unwrap();
+    assert_eq!(name, "multiply");
+
+    let args: serde_json::Value = serde_json::from_str(&args_str).expect("failed to parse args");
+    let a = args["a"].as_f64().unwrap_or(0.0);
+    let b = args["b"].as_f64().unwrap_or(0.0);
+    let product = (a * b) as i64;
+
+    // Send back the tool result
+    let tool_result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput {
+        id: None,
+        call_id,
+        output: product.to_string(),
+        status: None,
+    }]);
+
+    let final_opts = foundry_local_sdk::ResponseCreateRequest {
+        model: model.info().id.clone(),
+        input: tool_result_input.clone(),
+        previous_response_id: Some(response.id.clone()),
+        instructions: None,
+        tools: None,
+        tool_choice: None,
+        stream: None,
+        store: Some(true),
+        temperature: Some(0.0),
+        top_p: None,
+        max_output_tokens: None,
+        frequency_penalty: None,
+        presence_penalty: None,
+        seed: None,
+        truncation: None,
+        parallel_tool_calls: None,
+        metadata: None,
+        user: None,
+        reasoning: None,
+        text: None,
+    };
+
+    let final_response = client
+        .create(tool_result_input, Some(final_opts))
+        .await
+        .expect("tool result create failed");
+
+    let result_text = final_response.output_text();
+    println!("Tool call final answer: {result_text}");
+    assert!(
+        result_text.contains("42"),
+        "Expected '42' in final answer, got: {result_text}"
+    );
+
+    model.unload().await.expect("unload failed");
+}
+
+#[tokio::test]
+async fn vision_image_base64() {
+    // This test requires a vision-capable model (phi-4-multimodal or similar).
+    // It is skipped if no such model is available.
+    let manager = common::get_test_manager();
+    manager
+        .start_web_service()
+        .await
+        .expect("start_web_service failed");
+
+    // Small 1x1 red PNG, base64-encoded
+    let tiny_png_b64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI6QAAAABJRU5ErkJggg==";
+
+    // Try to use the test model (may not be vision-capable; test would then fail at API level)
+    let vision_model_id =
+        std::env::var("FOUNDRY_VISION_MODEL_ID").unwrap_or_else(|_| "phi-4-multimodal".to_string());
+
+    let client = ResponsesClient::new(
+        manager.urls().expect("urls").first().expect("url"),
+        Some(&vision_model_id),
+    );
+
+    let input = ResponseInput::Items(vec![ResponseItem::Message {
+        id: None,
+        role: "user".into(),
+        content: MessageContent::Parts(vec![
+            ContentPart::InputText {
+                text: "What colour is this image?".into(),
+            },
+            ContentPart::InputImage {
+                image_url: None,
+                image_data: Some(tiny_png_b64.into()),
+                media_type: Some("image/png".into()),
+                detail: Some("auto".into()),
+            },
+        ]),
+        status: None,
+    }]);
+
+    let result = client.create(input, None).await;
+    match result {
+        Ok(resp) => {
+            println!("Vision response: {}", resp.output_text());
+            assert_eq!(resp.status, "completed");
+        }
+        Err(e) => {
+            // Model may not be loaded; skip gracefully
+            println!("Vision test skipped (model not available): {e}");
+        }
+    }
+}
diff --git a/sdk/rust/tests/unit/main.rs b/sdk/rust/tests/unit/main.rs
new file mode 100644
index 000000000..25d6f8d23
--- /dev/null
+++ b/sdk/rust/tests/unit/main.rs
@@ -0,0 +1,5 @@
+//! Unit test binary for the Foundry Local Rust SDK — Responses API.
+//!
+//! These tests do not require a running server.
+
+mod responses_test;
diff --git a/sdk/rust/tests/unit/responses_test.rs b/sdk/rust/tests/unit/responses_test.rs
new file mode 100644
index 000000000..26f7bec7c
--- /dev/null
+++ b/sdk/rust/tests/unit/responses_test.rs
@@ -0,0 +1,487 @@
+//! Unit tests for the Responses API types and SSE parsing.
+//!
+//! All tests run without a server.
+
+use foundry_local_sdk::{
+    ListResponsesOptions, ListResponsesResult, MessageContent, ReasoningConfig, ResponseInput,
+    ResponseItem, ResponseObject, ResponsesClient, ResponsesClientSettings,
+    ResponsesContentPart as ContentPart, StreamingEvent, TextConfig, TextFormat,
+};
+use serde_json::json;
+use std::time::Duration;
+
+// ── Settings defaults ────────────────────────────────────────────────────────
+
+#[test]
+fn settings_defaults_omit_store() {
+    let settings = ResponsesClientSettings::new();
+    assert_eq!(
+        settings.store, None,
+        "store should be omitted unless callers explicitly opt in"
+    );
+}
+
+#[test]
+fn settings_default_trait_also_omits_store() {
+    let settings = ResponsesClientSettings::default();
+    assert_eq!(settings.store, None);
+}
+
+#[test]
+fn settings_default_timeout_is_sixty_seconds() {
+    let settings = ResponsesClientSettings::default();
+    assert_eq!(settings.timeout, Duration::from_secs(60));
+}
+
+#[test]
+fn settings_all_other_fields_default_to_none() {
+    let s = ResponsesClientSettings::new();
+    assert!(s.instructions.is_none());
+    assert!(s.temperature.is_none());
+    assert!(s.top_p.is_none());
+    assert!(s.max_output_tokens.is_none());
+    assert!(s.frequency_penalty.is_none());
+    assert!(s.presence_penalty.is_none());
+    assert!(s.tool_choice.is_none());
+    assert!(s.truncation.is_none());
+    assert!(s.parallel_tool_calls.is_none());
+    assert!(s.metadata.is_none());
+    assert!(s.reasoning.is_none());
+    assert!(s.text.is_none());
+    assert!(s.seed.is_none());
+}
+
+// ── output_text ──────────────────────────────────────────────────────────────
+
+fn make_response_with_text(role: &str, text: &str) -> ResponseObject {
+    serde_json::from_value(json!({
+        "id": "resp_test",
+        "object": "response",
+        "created_at": 0,
+        "status": "completed",
+        "model": "test-model",
+        "output": [
+            {
+                "type": "message",
+                "role": role,
+                "content": [{ "type": "output_text", "text": text }]
+            }
+        ]
+    }))
+    .expect("failed to deserialize test ResponseObject")
+}
+
+fn make_response_with_string_content(text: &str) -> ResponseObject {
+    serde_json::from_value(json!({
+        "id": "resp_test",
+        "object": "response",
+        "created_at": 0,
+        "status": "completed",
+        "model": "test-model",
+        "output": [
+            {
+                "type": "message",
+                "role": "assistant",
+                "content": text
+            }
+        ]
+    }))
+    .expect("failed to deserialize test ResponseObject")
+}
+
+#[test]
+fn output_text_extracts_assistant_message_parts() {
+    let resp = make_response_with_text("assistant", "Hello, world!");
+    assert_eq!(resp.output_text(), "Hello, world!");
+}
+
+#[test]
+fn output_text_extracts_assistant_string_content() {
+    let resp = make_response_with_string_content("Direct string content");
+    assert_eq!(resp.output_text(), "Direct string content");
+}
+
+#[test]
+fn output_text_skips_non_assistant_messages() {
+    let resp = make_response_with_text("user", "I am the user");
+    assert_eq!(
+        resp.output_text(),
+        "",
+        "user message should not be returned"
+    );
+}
+
+#[test]
+fn output_text_returns_empty_for_no_output() {
+    let resp: ResponseObject = serde_json::from_value(json!({
+        "id": "resp_test",
+        "object": "response",
+        "created_at": 0,
+        "status": "completed",
+        "model": "test-model",
+        "output": []
+    }))
+    .unwrap();
+    assert_eq!(resp.output_text(), "");
+}
+
+#[test]
+fn output_text_concatenates_multiple_parts() {
+    let resp: ResponseObject = serde_json::from_value(json!({
+        "id": "resp_test",
+        "object": "response",
+        "created_at": 0,
+        "status": "completed",
+        "model": "test-model",
+        "output": [{
+            "type": "message",
+            "role": "assistant",
+            "content": [
+                { "type": "output_text", "text": "Hello" },
+                { "type": "output_text", "text": ", world!" }
+            ]
+        }]
+    }))
+    .unwrap();
+    assert_eq!(resp.output_text(), "Hello, world!");
+}
+
+// ── Content part serialisation ───────────────────────────────────────────────
+
+#[test]
+fn content_part_input_text_serializes_correctly() {
+    let part = ContentPart::InputText {
+        text: "hello".into(),
+    };
+    let json = serde_json::to_value(&part).unwrap();
+    assert_eq!(json["type"], "input_text");
+    assert_eq!(json["text"], "hello");
+}
+
+#[test]
+fn content_part_output_text_serializes_correctly() {
+    let part = ContentPart::OutputText {
+        text: "hi".into(),
+        annotations: None,
+        logprobs: None,
+    };
+    let json = serde_json::to_value(&part).unwrap();
+    assert_eq!(json["type"], "output_text");
+    assert_eq!(json["text"], "hi");
+    // skip_serializing_if = None omits the field
+    assert!(json.get("annotations").is_none());
+}
+
+#[test]
+fn content_part_refusal_roundtrips() {
+    let part = ContentPart::Refusal {
+        refusal: "I can't do that".into(),
+    };
+    let json = serde_json::to_string(&part).unwrap();
+    let back: ContentPart = serde_json::from_str(&json).unwrap();
+    let ContentPart::Refusal { refusal } = back else {
+        panic!("Expected Refusal variant");
+    };
+    assert_eq!(refusal, "I can't do that");
+}
+
+#[test]
+fn input_image_content_serializes_with_base64() {
+    let part = ContentPart::InputImage {
+        image_url: None,
+        image_data: Some("base64data==".into()),
+        media_type: Some("image/png".into()),
+        detail: Some("auto".into()),
+    };
+    let json = serde_json::to_value(&part).unwrap();
+    assert_eq!(json["type"], "input_image");
+    assert_eq!(json["image_data"], "base64data==");
+    assert_eq!(json["media_type"], "image/png");
+    assert_eq!(json["detail"], "auto");
+    // image_url should be omitted (None)
+    assert!(json.get("image_url").is_none());
+}
+
+#[test]
+fn input_image_content_serializes_with_url() {
+    let part = ContentPart::InputImage {
+        image_url: Some("https://example.com/img.png".into()),
+        image_data: None,
+        media_type: None,
+        detail: None,
+    };
+    let json = serde_json::to_value(&part).unwrap();
+    assert_eq!(json["image_url"], "https://example.com/img.png");
+    assert!(json.get("image_data").is_none());
+    assert!(json.get("media_type").is_none());
+    assert!(json.get("detail").is_none());
+}
+
+#[tokio::test]
+async fn input_image_requires_exactly_one_source() {
+    let client = ResponsesClient::new("http://127.0.0.1:1", Some("test-model"));
+    let invalid_input = ResponseInput::Items(vec![ResponseItem::Message {
+        id: None,
+        role: "user".into(),
+        content: MessageContent::Parts(vec![ContentPart::InputImage {
+            image_url: Some("https://example.com/img.png".into()),
+            image_data: Some("base64data==".into()),
+            media_type: Some("image/png".into()),
+            detail: None,
+        }]),
+        status: None,
+    }]);
+
+    let err = client
+        .create(invalid_input, None)
+        .await
+        .expect_err("invalid input_image should fail before network request");
+    assert!(err
+        .to_string()
+        .contains("Provide exactly one of image_url or image_data"));
+}
+
+#[tokio::test]
+async fn timeout_must_be_positive() {
+    let mut client = ResponsesClient::new("http://127.0.0.1:1", Some("test-model"));
+    client.settings.timeout = Duration::ZERO;
+
+    let err = client
+        .list()
+        .await
+        .expect_err("zero timeout should fail before network request");
+    assert!(err
+        .to_string()
+        .contains("timeout must be greater than zero"));
+}
+
+#[test]
+fn list_response_result_deserializes_pagination_fields() {
+    let result: ListResponsesResult = serde_json::from_value(json!({
+        "object": "list",
+        "data": [],
+        "first_id": "resp_first",
+        "last_id": "resp_last",
+        "has_more": true
+    }))
+    .unwrap();
+
+    assert_eq!(result.first_id.as_deref(), Some("resp_first"));
+    assert_eq!(result.last_id.as_deref(), Some("resp_last"));
+    assert_eq!(result.has_more, Some(true));
+}
+
+#[test]
+fn list_options_serialize_query_fields() {
+    let options = ListResponsesOptions {
+        limit: Some(10),
+        order: Some("desc".into()),
+        after: Some("resp_123".into()),
+    };
+    let json = serde_json::to_value(options).unwrap();
+    assert_eq!(json["limit"], 10);
+    assert_eq!(json["order"], "desc");
+    assert_eq!(json["after"], "resp_123");
+}
+
+// ── ResponseItem serialisation ───────────────────────────────────────────────
+
+#[test]
+fn response_item_function_call_roundtrips() {
+    let item = ResponseItem::FunctionCall {
+        id: Some("fc_1".into()),
+        call_id: "call_abc".into(),
+        name: "get_weather".into(),
+        arguments: r#"{"city":"London"}"#.into(),
+        status: Some("completed".into()),
+    };
+    let json = serde_json::to_string(&item).unwrap();
+    let back: ResponseItem = serde_json::from_str(&json).unwrap();
+    let ResponseItem::FunctionCall { name, .. } = back else {
+        panic!("Expected FunctionCall variant");
+    };
+    assert_eq!(name, "get_weather");
+}
+
+#[test]
+fn response_item_message_with_string_content_roundtrips() {
+    let json = json!({
+        "type": "message",
+        "role": "user",
+        "content": "Hello"
+    });
+    let item: ResponseItem = serde_json::from_value(json).unwrap();
+    let ResponseItem::Message { content, .. } = &item else {
+        panic!("Expected Message variant");
+    };
+    assert!(matches!(content, MessageContent::Text(_)));
+}
+
+// ── Streaming event deserialisation ─────────────────────────────────────────
+
+#[test]
+fn streaming_event_output_text_delta_deserializes() {
+    let json = json!({
+        "type": "response.output_text.delta",
+        "item_id": "item_1",
+        "output_index": 0,
+        "content_index": 0,
+        "delta": "Hello",
+        "sequence_number": 5
+    });
+    let event: StreamingEvent = serde_json::from_value(json).unwrap();
+    let StreamingEvent::OutputTextDelta {
+        delta,
+        sequence_number,
+        ..
+    } = event
+    else {
+        panic!("Expected OutputTextDelta variant");
+    };
+    assert_eq!(delta, "Hello");
+    assert_eq!(sequence_number, 5);
+}
+
+#[test]
+fn streaming_event_response_completed_deserializes() {
+    let json = json!({
+        "type": "response.completed",
+        "sequence_number": 10,
+        "response": {
+            "id": "resp_1",
+            "object": "response",
+            "created_at": 1234567890_i64,
+            "status": "completed",
+            "model": "test",
+            "output": []
+        }
+    });
+    let event: StreamingEvent = serde_json::from_value(json).unwrap();
+    assert!(matches!(event, StreamingEvent::ResponseCompleted { .. }));
+}
+
+#[test]
+fn streaming_event_error_deserializes() {
+    let json = json!({
+        "type": "error",
+        "code": "model_error",
+        "message": "Something went wrong",
+        "sequence_number": 2
+    });
+    let event: StreamingEvent = serde_json::from_value(json).unwrap();
+    let StreamingEvent::Error { code, message, .. } = event else {
+        panic!("Expected Error variant");
+    };
+    assert_eq!(code.as_deref(), Some("model_error"));
+    assert_eq!(message.as_deref(), Some("Something went wrong"));
+}
+
+// ── SSE parser ───────────────────────────────────────────────────────────────
+
+/// Build a minimal SSE block string from event type and JSON data.
+fn sse_block(event_type: &str, data: &serde_json::Value) -> String {
+    format!("event: {event_type}\ndata: {data}\n\n")
+}
+
+#[tokio::test]
+async fn sse_parser_handles_complete_events() {
+    use bytes::Bytes;
+
+    // Build a minimal SSE payload with one delta event followed by [DONE]
+    let delta_json = json!({
+        "type": "response.output_text.delta",
+        "item_id": "item_1",
+        "output_index": 0,
+        "content_index": 0,
+        "delta": "Hi",
+        "sequence_number": 1
+    });
+
+    let payload = format!(
+        "{}{}",
+        sse_block("response.output_text.delta", &delta_json),
+        "data: [DONE]\n\n"
+    );
+
+    let bytes = Bytes::from(payload);
+
+    // Test the SSE logic by parsing the byte buffer as the SSE parser would.
+    let content = std::str::from_utf8(&bytes).unwrap().to_string();
+    let blocks: Vec<&str> = content
+        .split("\n\n")
+        .filter(|b| !b.trim().is_empty())
+        .collect();
+
+    for block in &blocks {
+        let trimmed = block.trim();
+        if trimmed == "data: [DONE]" {
+            break;
+        }
+        let data_line = trimmed
+            .split('\n')
+            .find(|l| l.starts_with("data: "))
+            .map(|l| &l[6..]);
+        if let Some(json_str) = data_line {
+            let event: StreamingEvent = serde_json::from_str(json_str).unwrap();
+            assert!(matches!(event, StreamingEvent::OutputTextDelta { .. }));
+        }
+    }
+}
+
+#[test]
+fn sse_done_signal_is_recognized() {
+    let block = "data: [DONE]";
+    assert!(block.trim() == "data: [DONE]");
+}
+
+// ── ResponseInput serde ──────────────────────────────────────────────────────
+
+#[test]
+fn response_input_text_serializes_as_string() {
+    let input = ResponseInput::Text("what is 2+2?".into());
+    let json = serde_json::to_value(&input).unwrap();
+    assert_eq!(json, json!("what is 2+2?"));
+}
+
+#[test]
+fn response_input_items_serializes_as_array() {
+    let input = ResponseInput::Items(vec![ResponseItem::Message {
+        id: None,
+        role: "user".into(),
+        content: MessageContent::Text("hello".into()),
+        status: None,
+    }]);
+    let json = serde_json::to_value(&input).unwrap();
+    assert!(json.is_array());
+}
+
+// ── TextConfig / ReasoningConfig ─────────────────────────────────────────────
+
+#[test]
+fn text_config_with_json_schema_serializes() {
+    let cfg = TextConfig {
+        format: Some(TextFormat {
+            format_type: "json_schema".into(),
+            name: Some("MySchema".into()),
+            schema: Some(json!({"type": "object"})),
+            description: None,
+            strict: Some(true),
+        }),
+        verbosity: None,
+    };
+    let json = serde_json::to_value(&cfg).unwrap();
+    assert_eq!(json["format"]["type"], "json_schema");
+    assert_eq!(json["format"]["name"], "MySchema");
+    assert_eq!(json["format"]["strict"], true);
+}
+
+#[test]
+fn reasoning_config_serializes_correctly() {
+    let cfg = ReasoningConfig {
+        effort: Some("high".into()),
+        summary: Some("concise".into()),
+    };
+    let json = serde_json::to_value(&cfg).unwrap();
+    assert_eq!(json["effort"], "high");
+    assert_eq!(json["summary"], "concise");
+}

From 5457ac3f590a0ddf9a56576e54feeb7c9381b11f Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Wed, 29 Apr 2026 15:24:22 -0400
Subject: [PATCH 2/5] rust: address responses API review feedback

- Make responses_client/responses_types modules private; expose only the re-exported public surface (matching audio_client/chat_client/etc).

- Introduce ResponseCreateOptions for per-call overrides so callers no longer need to materialize a full ResponseCreateRequest; ResponseCreateRequest stays as the wire-serialized request body.

- Move SSE parser tests inline into responses_client.rs so they exercise parse_sse_stream directly instead of duplicating the framing logic in tests/unit.

- Skip vision_image_base64 only when FOUNDRY_VISION_MODEL_ID is unset; when set, surface real failures so regressions are caught.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/rust/examples/responses.rs               |  59 +---------
 sdk/rust/src/foundry_local_manager.rs        |   2 +-
 sdk/rust/src/lib.rs                          |   8 +-
 sdk/rust/src/openai/mod.rs                   |   8 +-
 sdk/rust/src/openai/responses_client.rs      | 105 +++++++++++++++--
 sdk/rust/src/openai/responses_types.rs       |  34 ++++++
 sdk/rust/tests/integration/responses_test.rs | 113 ++++---------------
 sdk/rust/tests/unit/responses_test.rs        |  55 +--------
 8 files changed, 173 insertions(+), 211 deletions(-)

diff --git a/sdk/rust/examples/responses.rs b/sdk/rust/examples/responses.rs
index f96fb11f4..8d697988a 100644
--- a/sdk/rust/examples/responses.rs
+++ b/sdk/rust/examples/responses.rs
@@ -107,27 +107,11 @@ async fn main() -> Result<()> {
         .await?;
     println!("Turn 1: {}", first.output_text());
 
-    let follow_up_opts = foundry_local_sdk::ResponseCreateRequest {
-        model: model.info().id.clone(),
-        input: ResponseInput::Text("What is my favourite number?".into()),
+    let follow_up_opts = foundry_local_sdk::ResponseCreateOptions {
         previous_response_id: Some(first.id.clone()),
-        instructions: None,
-        tools: None,
-        tool_choice: None,
-        stream: None,
         store: Some(true),
         temperature: Some(0.0),
-        top_p: None,
-        max_output_tokens: None,
-        frequency_penalty: None,
-        presence_penalty: None,
-        seed: None,
-        truncation: None,
-        parallel_tool_calls: None,
-        metadata: None,
-        user: None,
-        reasoning: None,
-        text: None,
+        ..Default::default()
     };
 
     let second = client
@@ -155,27 +139,12 @@ async fn main() -> Result<()> {
         strict: None,
     };
 
-    let tool_opts = foundry_local_sdk::ResponseCreateRequest {
-        model: model.info().id.clone(),
-        input: ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
+    let tool_opts = foundry_local_sdk::ResponseCreateOptions {
         tools: Some(vec![add_tool]),
         tool_choice: Some(json!("required")),
-        instructions: None,
-        previous_response_id: None,
-        stream: None,
         store: Some(true),
         temperature: Some(0.0),
-        top_p: None,
-        max_output_tokens: None,
-        frequency_penalty: None,
-        presence_penalty: None,
-        seed: None,
-        truncation: None,
-        parallel_tool_calls: None,
-        metadata: None,
-        user: None,
-        reasoning: None,
-        text: None,
+        ..Default::default()
     };
 
     let tool_response = client
@@ -208,27 +177,11 @@ async fn main() -> Result<()> {
             status: None,
         }]);
 
-        let final_opts = foundry_local_sdk::ResponseCreateRequest {
-            model: model.info().id.clone(),
-            input: result_input.clone(),
+        let final_opts = foundry_local_sdk::ResponseCreateOptions {
             previous_response_id: Some(tool_response.id.clone()),
-            instructions: None,
-            tools: None,
-            tool_choice: None,
-            stream: None,
             store: Some(true),
             temperature: Some(0.0),
-            top_p: None,
-            max_output_tokens: None,
-            frequency_penalty: None,
-            presence_penalty: None,
-            seed: None,
-            truncation: None,
-            parallel_tool_calls: None,
-            metadata: None,
-            user: None,
-            reasoning: None,
-            text: None,
+            ..Default::default()
         };
 
         let final_response = client.create(result_input, Some(final_opts)).await?;
diff --git a/sdk/rust/src/foundry_local_manager.rs b/sdk/rust/src/foundry_local_manager.rs
index bfaf3af5c..95ba087ed 100644
--- a/sdk/rust/src/foundry_local_manager.rs
+++ b/sdk/rust/src/foundry_local_manager.rs
@@ -13,7 +13,7 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger};
 use crate::detail::core_interop::CoreInterop;
 use crate::detail::ModelLoadManager;
 use crate::error::{FoundryLocalError, Result};
-use crate::openai::responses_client::ResponsesClient;
+use crate::openai::ResponsesClient;
 use crate::types::{EpDownloadResult, EpInfo};
 
 /// Global singleton holder — only stores a successfully initialised manager.
diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs
index 52fab6e60..cbf7800db 100644
--- a/sdk/rust/src/lib.rs
+++ b/sdk/rust/src/lib.rs
@@ -48,8 +48,8 @@ pub use async_openai::types::chat::{
 pub use crate::openai::{
     Annotation, DeleteResponseResult, FunctionToolDefinition, IncompleteDetails,
     InputItemsListResponse, InputTokensDetails, ListResponsesOptions, ListResponsesResult, LogProb,
-    MessageContent, OutputTokensDetails, ReasoningConfig, ResponseCreateRequest, ResponseError,
-    ResponseInput, ResponseItem, ResponseObject, ResponseUsage, ResponsesClient,
-    ResponsesClientSettings, ResponsesContentPart, SseStream, StreamingEvent, TextConfig,
-    TextFormat,
+    MessageContent, OutputTokensDetails, ReasoningConfig, ResponseCreateOptions,
+    ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem, ResponseObject,
+    ResponseUsage, ResponsesClient, ResponsesClientSettings, ResponsesContentPart, SseStream,
+    StreamingEvent, TextConfig, TextFormat,
 };
diff --git a/sdk/rust/src/openai/mod.rs b/sdk/rust/src/openai/mod.rs
index 4c607457e..17f38e1d0 100644
--- a/sdk/rust/src/openai/mod.rs
+++ b/sdk/rust/src/openai/mod.rs
@@ -3,8 +3,8 @@ mod chat_client;
 mod embedding_client;
 mod json_stream;
 mod live_audio_client;
-pub mod responses_client;
-pub mod responses_types;
+mod responses_client;
+mod responses_types;
 
 pub use self::audio_client::{
     AudioClient, AudioClientSettings, AudioTranscriptionResponse, AudioTranscriptionStream,
@@ -22,6 +22,6 @@ pub use self::responses_types::{
     Annotation, ContentPart as ResponsesContentPart, DeleteResponseResult, FunctionToolDefinition,
     IncompleteDetails, InputItemsListResponse, InputTokensDetails, ListResponsesOptions,
     ListResponsesResult, LogProb, MessageContent, OutputTokensDetails, ReasoningConfig,
-    ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem, ResponseObject,
-    ResponseUsage, StreamingEvent, TextConfig, TextFormat,
+    ResponseCreateOptions, ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem,
+    ResponseObject, ResponseUsage, StreamingEvent, TextConfig, TextFormat,
 };
diff --git a/sdk/rust/src/openai/responses_client.rs b/sdk/rust/src/openai/responses_client.rs
index 2f301c405..b88e88f34 100644
--- a/sdk/rust/src/openai/responses_client.rs
+++ b/sdk/rust/src/openai/responses_client.rs
@@ -15,8 +15,8 @@ use crate::error::{FoundryLocalError, Result};
 
 use super::responses_types::{
     DeleteResponseResult, FunctionToolDefinition, InputItemsListResponse, ListResponsesResult,
-    ReasoningConfig, ResponseCreateRequest, ResponseInput, ResponseObject, StreamingEvent,
-    TextConfig,
+    ReasoningConfig, ResponseCreateOptions, ResponseCreateRequest, ResponseInput, ResponseObject,
+    StreamingEvent, TextConfig,
 };
 
 // ============================================================================
@@ -159,7 +159,7 @@ impl ResponsesClient {
     pub async fn create(
         &self,
         input: ResponseInput,
-        options: Option<ResponseCreateRequest>,
+        options: Option<ResponseCreateOptions>,
     ) -> Result<ResponseObject> {
         self.validate_input(&input)?;
         if let Some(ref opts) = options {
@@ -193,7 +193,7 @@ impl ResponsesClient {
     pub async fn create_streaming(
         &self,
         input: ResponseInput,
-        options: Option<ResponseCreateRequest>,
+        options: Option<ResponseCreateOptions>,
     ) -> Result<SseStream> {
         self.validate_input(&input)?;
         if let Some(ref opts) = options {
@@ -340,13 +340,13 @@ impl ResponsesClient {
     fn build_request(
         &self,
         input: ResponseInput,
-        options: Option<ResponseCreateRequest>,
+        options: Option<ResponseCreateOptions>,
         stream: bool,
     ) -> Result<ResponseCreateRequest> {
         // Determine model: options override self.model_id
         let model = options
             .as_ref()
-            .map(|o| o.model.clone())
+            .and_then(|o| o.model.clone())
             .filter(|m| !m.trim().is_empty())
             .or_else(|| self.model_id.clone())
             .ok_or_else(|| FoundryLocalError::Validation {
@@ -383,11 +383,9 @@ impl ResponsesClient {
 
         // Apply per-call overrides
         if let Some(opts) = options {
-            if !opts.model.trim().is_empty() {
-                req.model = opts.model;
+            if let Some(m) = opts.model.filter(|m| !m.trim().is_empty()) {
+                req.model = m;
             }
-            // Only override input if the caller passed an options object with explicit input;
-            // in practice options.input will always be overwritten by the positional `input`.
             if let Some(v) = opts.instructions {
                 req.instructions = Some(v);
             }
@@ -635,3 +633,90 @@ where
         }
     }
 }
+
+// ============================================================================
+// Inline tests
+// ============================================================================
+//
+// These tests live alongside `parse_sse_stream` so they exercise the real
+// implementation rather than reimplementing SSE framing in an external test
+// crate. Anything that only depends on public APIs lives in `tests/unit/`.
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use async_stream::stream;
+
+    /// Drive `parse_sse_stream` from a hand-constructed byte stream and collect
+    /// its yielded events.
+    async fn collect_events(chunks: Vec<&'static str>) -> Vec<StreamingEvent> {
+        let byte_stream = stream! {
+            for chunk in chunks {
+                yield Ok::<Bytes, reqwest::Error>(Bytes::from_static(chunk.as_bytes()));
+            }
+        };
+
+        let parsed = parse_sse_stream(byte_stream);
+        let mut parsed = std::pin::pin!(parsed);
+
+        let mut events = Vec::new();
+        use tokio_stream::StreamExt as _;
+        while let Some(event) = parsed.next().await {
+            events.push(event.expect("SSE event failed to parse"));
+        }
+        events
+    }
+
+    #[tokio::test]
+    async fn parses_complete_event_block() {
+        let payload = "data: {\"type\":\"response.output_text.delta\",\"item_id\":\"i1\",\
+            \"output_index\":0,\"content_index\":0,\"delta\":\"Hi\",\"sequence_number\":1}\n\n\
+            data: [DONE]\n\n";
+
+        let events = collect_events(vec![payload]).await;
+        assert_eq!(events.len(), 1);
+        assert!(matches!(
+            events[0],
+            StreamingEvent::OutputTextDelta { ref delta, .. } if delta == "Hi"
+        ));
+    }
+
+    #[tokio::test]
+    async fn done_signal_terminates_stream() {
+        let payload = "data: [DONE]\n\n\
+            data: {\"type\":\"response.output_text.delta\",\"item_id\":\"i1\",\
+            \"output_index\":0,\"content_index\":0,\"delta\":\"after-done\",\
+            \"sequence_number\":2}\n\n";
+
+        let events = collect_events(vec![payload]).await;
+        assert!(events.is_empty(), "events after [DONE] must be ignored");
+    }
+
+    #[tokio::test]
+    async fn handles_event_split_across_chunks() {
+        // Split a single SSE block across two byte chunks to make sure the
+        // parser buffers correctly.
+        let part1 = "data: {\"type\":\"response.output_text.delta\",\
+            \"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,";
+        let part2 = "\"delta\":\"split\",\"sequence_number\":3}\n\ndata: [DONE]\n\n";
+
+        let events = collect_events(vec![part1, part2]).await;
+        assert_eq!(events.len(), 1);
+        assert!(matches!(
+            events[0],
+            StreamingEvent::OutputTextDelta { ref delta, .. } if delta == "split"
+        ));
+    }
+
+    #[tokio::test]
+    async fn skips_event_lines_and_blank_blocks() {
+        let payload = "event: response.output_text.delta\n\
+            data: {\"type\":\"response.output_text.delta\",\"item_id\":\"i1\",\
+            \"output_index\":0,\"content_index\":0,\"delta\":\"ok\",\"sequence_number\":4}\n\n\
+            \n\n\
+            data: [DONE]\n\n";
+
+        let events = collect_events(vec![payload]).await;
+        assert_eq!(events.len(), 1);
+    }
+}
diff --git a/sdk/rust/src/openai/responses_types.rs b/sdk/rust/src/openai/responses_types.rs
index ffb2d2200..59860c382 100644
--- a/sdk/rust/src/openai/responses_types.rs
+++ b/sdk/rust/src/openai/responses_types.rs
@@ -279,6 +279,40 @@ pub struct ResponseCreateRequest {
     pub text: Option<TextConfig>,
 }
 
+/// Per-call overrides for [`ResponsesClient::create`] and
+/// [`ResponsesClient::create_streaming`].
+///
+/// Every field is optional — the client merges these on top of
+/// [`ResponsesClientSettings`] and the constructor-supplied model. Unlike
+/// [`ResponseCreateRequest`] (the wire-serialised request body), this type is
+/// intended purely as caller-friendly input and never needs to be fully
+/// populated.
+///
+/// [`ResponsesClient::create`]: crate::ResponsesClient::create
+/// [`ResponsesClient::create_streaming`]: crate::ResponsesClient::create_streaming
+/// [`ResponsesClientSettings`]: crate::ResponsesClientSettings
+#[derive(Debug, Clone, Default)]
+pub struct ResponseCreateOptions {
+    pub model: Option<String>,
+    pub instructions: Option<String>,
+    pub previous_response_id: Option<String>,
+    pub tools: Option<Vec<FunctionToolDefinition>>,
+    pub tool_choice: Option<Value>,
+    pub store: Option<bool>,
+    pub temperature: Option<f32>,
+    pub top_p: Option<f32>,
+    pub max_output_tokens: Option<u32>,
+    pub frequency_penalty: Option<f32>,
+    pub presence_penalty: Option<f32>,
+    pub seed: Option<u32>,
+    pub truncation: Option<String>,
+    pub parallel_tool_calls: Option<bool>,
+    pub metadata: Option<HashMap<String, String>>,
+    pub user: Option<String>,
+    pub reasoning: Option<ReasoningConfig>,
+    pub text: Option<TextConfig>,
+}
+
 // ============================================================================
 // Response Object
 // ============================================================================
diff --git a/sdk/rust/tests/integration/responses_test.rs b/sdk/rust/tests/integration/responses_test.rs
index f40f354fe..7944e7319 100644
--- a/sdk/rust/tests/integration/responses_test.rs
+++ b/sdk/rust/tests/integration/responses_test.rs
@@ -66,27 +66,11 @@ async fn non_streaming_simple_string() {
 async fn non_streaming_with_options() {
     let (client, model) = setup_responses_client().await;
 
-    let opts = foundry_local_sdk::ResponseCreateRequest {
-        model: model.info().id.clone(),
-        input: ResponseInput::Text("Say hello.".into()),
+    let opts = foundry_local_sdk::ResponseCreateOptions {
         temperature: Some(0.0),
         max_output_tokens: Some(50),
-        instructions: None,
-        previous_response_id: None,
-        tools: None,
-        tool_choice: None,
-        stream: None,
         store: Some(true),
-        top_p: None,
-        frequency_penalty: None,
-        presence_penalty: None,
-        seed: None,
-        truncation: None,
-        parallel_tool_calls: None,
-        metadata: None,
-        user: None,
-        reasoning: None,
-        text: None,
+        ..Default::default()
     };
 
     let response = client
@@ -151,27 +135,10 @@ async fn multi_turn_previous_response_id() {
     let first_id = first.id.clone();
 
     // Second turn referencing the first
-    let opts = foundry_local_sdk::ResponseCreateRequest {
-        model: model.info().id.clone(),
-        input: ResponseInput::Text("What is my favourite colour?".into()),
+    let opts = foundry_local_sdk::ResponseCreateOptions {
         previous_response_id: Some(first_id),
-        instructions: None,
-        tools: None,
-        tool_choice: None,
-        stream: None,
         store: Some(true),
-        temperature: None,
-        top_p: None,
-        max_output_tokens: None,
-        frequency_penalty: None,
-        presence_penalty: None,
-        seed: None,
-        truncation: None,
-        parallel_tool_calls: None,
-        metadata: None,
-        user: None,
-        reasoning: None,
-        text: None,
+        ..Default::default()
     };
 
     let second = client
@@ -300,27 +267,12 @@ async fn tool_calling_round_trip() {
         strict: None,
     };
 
-    let opts = foundry_local_sdk::ResponseCreateRequest {
-        model: model.info().id.clone(),
-        input: ResponseInput::Text("What is 6 times 7? Use the multiply tool.".into()),
+    let opts = foundry_local_sdk::ResponseCreateOptions {
         tools: Some(vec![multiply_tool]),
         tool_choice: Some(json!("required")),
-        instructions: None,
-        previous_response_id: None,
-        stream: None,
         store: Some(true),
         temperature: Some(0.0),
-        top_p: None,
-        max_output_tokens: None,
-        frequency_penalty: None,
-        presence_penalty: None,
-        seed: None,
-        truncation: None,
-        parallel_tool_calls: None,
-        metadata: None,
-        user: None,
-        reasoning: None,
-        text: None,
+        ..Default::default()
     };
 
     let response = client
@@ -363,27 +315,11 @@ async fn tool_calling_round_trip() {
         status: None,
     }]);
 
-    let final_opts = foundry_local_sdk::ResponseCreateRequest {
-        model: model.info().id.clone(),
-        input: tool_result_input.clone(),
+    let final_opts = foundry_local_sdk::ResponseCreateOptions {
         previous_response_id: Some(response.id.clone()),
-        instructions: None,
-        tools: None,
-        tool_choice: None,
-        stream: None,
         store: Some(true),
         temperature: Some(0.0),
-        top_p: None,
-        max_output_tokens: None,
-        frequency_penalty: None,
-        presence_penalty: None,
-        seed: None,
-        truncation: None,
-        parallel_tool_calls: None,
-        metadata: None,
-        user: None,
-        reasoning: None,
-        text: None,
+        ..Default::default()
     };
 
     let final_response = client
@@ -403,8 +339,16 @@ async fn tool_calling_round_trip() {
 
 #[tokio::test]
 async fn vision_image_base64() {
-    // This test requires a vision-capable model (phi-4-multimodal or similar).
-    // It is skipped if no such model is available.
+    // Skip unless an explicit vision-capable model is provided via the env var.
+    // This avoids accidentally treating a non-vision model failure as a pass.
+    let Ok(vision_model_id) = std::env::var("FOUNDRY_VISION_MODEL_ID") else {
+        eprintln!(
+            "vision_image_base64 skipped: set FOUNDRY_VISION_MODEL_ID to a vision-capable \
+             model alias to run this test."
+        );
+        return;
+    };
+
     let manager = common::get_test_manager();
     manager
         .start_web_service()
@@ -414,10 +358,6 @@ async fn vision_image_base64() {
     // Small 1x1 red PNG, base64-encoded
     let tiny_png_b64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI6QAAAABJRU5ErkJggg==";
 
-    // Try to use the test model (may not be vision-capable; test would then fail at API level)
-    let vision_model_id =
-        std::env::var("FOUNDRY_VISION_MODEL_ID").unwrap_or_else(|_| "phi-4-multimodal".to_string());
-
     let client = ResponsesClient::new(
         manager.urls().expect("urls").first().expect("url"),
         Some(&vision_model_id),
@@ -440,15 +380,10 @@ async fn vision_image_base64() {
         status: None,
     }]);
 
-    let result = client.create(input, None).await;
-    match result {
-        Ok(resp) => {
-            println!("Vision response: {}", resp.output_text());
-            assert_eq!(resp.status, "completed");
-        }
-        Err(e) => {
-            // Model may not be loaded; skip gracefully
-            println!("Vision test skipped (model not available): {e}");
-        }
-    }
+    let resp = client
+        .create(input, None)
+        .await
+        .expect("vision create failed (FOUNDRY_VISION_MODEL_ID is set but request failed)");
+    println!("Vision response: {}", resp.output_text());
+    assert_eq!(resp.status, "completed");
 }
diff --git a/sdk/rust/tests/unit/responses_test.rs b/sdk/rust/tests/unit/responses_test.rs
index 26f7bec7c..86fdf7d06 100644
--- a/sdk/rust/tests/unit/responses_test.rs
+++ b/sdk/rust/tests/unit/responses_test.rs
@@ -377,56 +377,11 @@ fn streaming_event_error_deserializes() {
 }
 
 // ── SSE parser ───────────────────────────────────────────────────────────────
-
-/// Build a minimal SSE block string from event type and JSON data.
-fn sse_block(event_type: &str, data: &serde_json::Value) -> String {
-    format!("event: {event_type}\ndata: {data}\n\n")
-}
-
-#[tokio::test]
-async fn sse_parser_handles_complete_events() {
-    use bytes::Bytes;
-
-    // Build a minimal SSE payload with one delta event followed by [DONE]
-    let delta_json = json!({
-        "type": "response.output_text.delta",
-        "item_id": "item_1",
-        "output_index": 0,
-        "content_index": 0,
-        "delta": "Hi",
-        "sequence_number": 1
-    });
-
-    let payload = format!(
-        "{}{}",
-        sse_block("response.output_text.delta", &delta_json),
-        "data: [DONE]\n\n"
-    );
-
-    let bytes = Bytes::from(payload);
-
-    // Test the SSE logic by parsing the byte buffer as the SSE parser would.
-    let content = std::str::from_utf8(&bytes).unwrap().to_string();
-    let blocks: Vec<&str> = content
-        .split("\n\n")
-        .filter(|b| !b.trim().is_empty())
-        .collect();
-
-    for block in &blocks {
-        let trimmed = block.trim();
-        if trimmed == "data: [DONE]" {
-            break;
-        }
-        let data_line = trimmed
-            .split('\n')
-            .find(|l| l.starts_with("data: "))
-            .map(|l| &l[6..]);
-        if let Some(json_str) = data_line {
-            let event: StreamingEvent = serde_json::from_str(json_str).unwrap();
-            assert!(matches!(event, StreamingEvent::OutputTextDelta { .. }));
-        }
-    }
-}
+//
+// The SSE parser itself (`parse_sse_stream`) lives in `responses_client.rs` and
+// is exercised by `#[cfg(test)] mod tests` in that file so the real
+// implementation is covered. The check below just verifies the externally
+// observable terminator string is what we expect.
 
 #[test]
 fn sse_done_signal_is_recognized() {

From 84045bb996d80ea5bfe176db826bd9cf6dfff4ed Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 16:23:38 -0400
Subject: [PATCH 3/5] rust: use web service for Responses sample

Pivot the Rust Responses work away from SDK-native client/types and demonstrate the intended pattern instead: use FoundryLocalManager for setup/model/web-service lifecycle, then call /v1/responses via raw HTTP.

Adds a focused responses_web_service example and integration tests for non-streaming, streaming SSE, and function-calling flows against the local OpenAI-compatible web service.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/rust/Cargo.toml                          |   6 +-
 sdk/rust/examples/responses.rs               | 198 -----
 sdk/rust/examples/responses_web_service.rs   | 266 +++++++
 sdk/rust/src/foundry_local_manager.rs        |  15 -
 sdk/rust/src/lib.rs                          |  10 -
 sdk/rust/src/openai/mod.rs                   |  10 -
 sdk/rust/src/openai/responses_client.rs      | 722 -------------------
 sdk/rust/src/openai/responses_types.rs       | 695 ------------------
 sdk/rust/tests/integration/responses_test.rs | 595 +++++++--------
 sdk/rust/tests/unit/main.rs                  |   5 -
 sdk/rust/tests/unit/responses_test.rs        | 442 ------------
 11 files changed, 528 insertions(+), 2436 deletions(-)
 delete mode 100644 sdk/rust/examples/responses.rs
 create mode 100644 sdk/rust/examples/responses_web_service.rs
 delete mode 100644 sdk/rust/src/openai/responses_client.rs
 delete mode 100644 sdk/rust/src/openai/responses_types.rs
 delete mode 100644 sdk/rust/tests/unit/main.rs
 delete mode 100644 sdk/rust/tests/unit/responses_test.rs

diff --git a/sdk/rust/Cargo.toml b/sdk/rust/Cargo.toml
index d11a15c2e..d5517137e 100644
--- a/sdk/rust/Cargo.toml
+++ b/sdk/rust/Cargo.toml
@@ -25,8 +25,6 @@ tokio-stream = "0.1"
 tokio-util = "0.7"
 futures-core = "0.3"
 reqwest = { version = "0.12", features = ["json", "stream"] }
-bytes = "1"
-async-stream = "0.3"
 urlencoding = "2"
 async-openai = { version = "0.33", default-features = false, features = ["chat-completion-types", "embedding-types"] }
 
@@ -37,8 +35,8 @@ serde_json = "1"
 serde = { version = "1", features = ["derive"] }
 
 [[example]]
-name = "responses"
-path = "examples/responses.rs"
+name = "responses_web_service"
+path = "examples/responses_web_service.rs"
 
 [[example]]
 name = "chat_completion"
diff --git a/sdk/rust/examples/responses.rs b/sdk/rust/examples/responses.rs
deleted file mode 100644
index 8d697988a..000000000
--- a/sdk/rust/examples/responses.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-//! Responses API example demonstrating non-streaming and streaming usage,
-//! tool calling, and multi-turn conversations.
-
-use std::io::{self, Write};
-
-use foundry_local_sdk::{
-    FoundryLocalConfig, FoundryLocalError, FoundryLocalManager, FunctionToolDefinition,
-    ResponseInput, ResponseItem, StreamingEvent,
-};
-use serde_json::json;
-use tokio_stream::StreamExt;
-
-type Result<T> = std::result::Result<T, FoundryLocalError>;
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    // ── 1. Initialise the manager ────────────────────────────────────────────
-    let config = FoundryLocalConfig::new("foundry_local_responses_example");
-    let manager = FoundryLocalManager::create(config)?;
-
-    // ── 2. Start the web service ─────────────────────────────────────────────
-    println!("Starting web service…");
-    manager.start_web_service().await?;
-    println!("Web service URLs: {:?}", manager.urls()?);
-
-    // ── 3. Pick a model ──────────────────────────────────────────────────────
-    let models = manager.catalog().get_models().await?;
-    let model_alias = ["phi-4-mini", "phi-3.5-mini", "qwen2.5-0.5b"]
-        .iter()
-        .find(|alias| models.iter().any(|m| m.alias() == **alias))
-        .map(|s| s.to_string())
-        .or_else(|| models.first().map(|m| m.alias().to_string()))
-        .expect("No models available in the catalog");
-
-    println!("Using model: {model_alias}");
-    let model = manager.catalog().get_model(&model_alias).await?;
-
-    if !model.is_cached().await? {
-        println!("Downloading model {model_alias}…");
-        model.download(None::<fn(f64)>).await?;
-    }
-    model.load().await?;
-    println!("Model loaded.");
-
-    // ── 4. Create the Responses client ───────────────────────────────────────
-    let mut client = manager.get_responses_client(Some(&model.info().id))?;
-    client.settings.store = Some(true);
-
-    // ── 5. Non-streaming request ─────────────────────────────────────────────
-    println!("\n─── Non-streaming ───────────────────────────────────────────────");
-    let response = client
-        .create(
-            ResponseInput::Text("What is the capital of France? Reply in one word.".into()),
-            None,
-        )
-        .await?;
-
-    println!("Status : {}", response.status);
-    println!("Answer : {}", response.output_text());
-    if let Some(usage) = &response.usage {
-        println!(
-            "Tokens : {} in / {} out",
-            usage.input_tokens, usage.output_tokens
-        );
-    }
-
-    // ── 6. Streaming request ─────────────────────────────────────────────────
-    println!("\n─── Streaming ───────────────────────────────────────────────────");
-    print!("Story : ");
-    io::stdout().flush().ok();
-
-    let mut stream = client
-        .create_streaming(
-            ResponseInput::Text(
-                "Tell me a two-sentence story about a robot that loves ice cream.".into(),
-            ),
-            None,
-        )
-        .await?;
-
-    let mut full_text = String::new();
-    while let Some(event) = stream.next().await {
-        match event? {
-            StreamingEvent::OutputTextDelta { delta, .. } => {
-                print!("{delta}");
-                io::stdout().flush().ok();
-                full_text.push_str(&delta);
-            }
-            StreamingEvent::ResponseCompleted { response, .. } => {
-                if let Some(usage) = response.usage.as_ref() {
-                    println!("\n[completed, {} output tokens]", usage.output_tokens);
-                } else {
-                    println!("\n[completed]");
-                }
-            }
-            _ => {}
-        }
-    }
-
-    // ── 7. Multi-turn: follow-up using previous_response_id ─────────────────
-    println!("\n─── Multi-turn ──────────────────────────────────────────────────");
-    let first = client
-        .create(
-            ResponseInput::Text("My favourite number is 42. Remember this.".into()),
-            None,
-        )
-        .await?;
-    println!("Turn 1: {}", first.output_text());
-
-    let follow_up_opts = foundry_local_sdk::ResponseCreateOptions {
-        previous_response_id: Some(first.id.clone()),
-        store: Some(true),
-        temperature: Some(0.0),
-        ..Default::default()
-    };
-
-    let second = client
-        .create(
-            ResponseInput::Text("What is my favourite number?".into()),
-            Some(follow_up_opts),
-        )
-        .await?;
-    println!("Turn 2: {}", second.output_text());
-
-    // ── 8. Tool calling ──────────────────────────────────────────────────────
-    println!("\n─── Tool calling ────────────────────────────────────────────────");
-    let add_tool = FunctionToolDefinition {
-        tool_type: "function".into(),
-        name: "add".into(),
-        description: Some("Add two integers and return the sum.".into()),
-        parameters: Some(json!({
-            "type": "object",
-            "properties": {
-                "a": { "type": "integer", "description": "First addend" },
-                "b": { "type": "integer", "description": "Second addend" }
-            },
-            "required": ["a", "b"]
-        })),
-        strict: None,
-    };
-
-    let tool_opts = foundry_local_sdk::ResponseCreateOptions {
-        tools: Some(vec![add_tool]),
-        tool_choice: Some(json!("required")),
-        store: Some(true),
-        temperature: Some(0.0),
-        ..Default::default()
-    };
-
-    let tool_response = client
-        .create(
-            ResponseInput::Text("What is 123 + 456? Use the add tool.".into()),
-            Some(tool_opts),
-        )
-        .await?;
-
-    if let Some(ResponseItem::FunctionCall {
-        call_id,
-        name,
-        arguments,
-        ..
-    }) = tool_response
-        .output
-        .iter()
-        .find(|i| matches!(i, ResponseItem::FunctionCall { .. }))
-    {
-        println!("Model called tool: {name}({arguments})");
-        let args: serde_json::Value = serde_json::from_str(arguments)?;
-        let a = args["a"].as_i64().unwrap_or(0);
-        let b = args["b"].as_i64().unwrap_or(0);
-        let sum = a + b;
-
-        let result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput {
-            id: None,
-            call_id: call_id.clone(),
-            output: sum.to_string(),
-            status: None,
-        }]);
-
-        let final_opts = foundry_local_sdk::ResponseCreateOptions {
-            previous_response_id: Some(tool_response.id.clone()),
-            store: Some(true),
-            temperature: Some(0.0),
-            ..Default::default()
-        };
-
-        let final_response = client.create(result_input, Some(final_opts)).await?;
-        println!("Tool result: {}", final_response.output_text());
-    } else {
-        println!("No tool call in response (model may not support tool calling)");
-    }
-
-    // ── 9. Clean up ──────────────────────────────────────────────────────────
-    model.unload().await?;
-    manager.stop_web_service().await?;
-    println!("\nDone.");
-    Ok(())
-}
diff --git a/sdk/rust/examples/responses_web_service.rs b/sdk/rust/examples/responses_web_service.rs
new file mode 100644
index 000000000..b8ef5d391
--- /dev/null
+++ b/sdk/rust/examples/responses_web_service.rs
@@ -0,0 +1,266 @@
+//! Responses API web-service sample.
+//!
+//! This sample uses the Rust SDK only for Foundry Local setup and lifecycle:
+//! manager initialization, model lookup/download/load, and local web-service
+//! start/stop. The actual `/v1/responses` calls use raw HTTP against the
+//! OpenAI-compatible local endpoint.
+
+use std::error::Error;
+use std::io::{self, Write};
+
+use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
+use serde_json::{json, Value};
+
+type SampleResult<T> = Result<T, Box<dyn Error + Send + Sync>>;
+
+#[tokio::main]
+async fn main() -> SampleResult<()> {
+    let config = FoundryLocalConfig::new("foundry_local_responses_web_service_sample");
+    let manager = FoundryLocalManager::create(config)?;
+
+    let models = manager.catalog().get_models().await?;
+    let model_alias = ["qwen2.5-0.5b", "phi-4-mini", "phi-3.5-mini"]
+        .iter()
+        .find(|alias| models.iter().any(|m| m.alias() == **alias))
+        .map(|s| s.to_string())
+        .or_else(|| models.first().map(|m| m.alias().to_string()))
+        .expect("No models available in the catalog");
+
+    let model = manager.catalog().get_model(&model_alias).await?;
+    if !model.is_cached().await? {
+        println!("Downloading model '{}'...", model.alias());
+        model
+            .download(Some(|progress: f64| println!("  {progress:.1}%")))
+            .await?;
+    }
+
+    println!("Loading model '{}'...", model.alias());
+    model.load().await?;
+
+    println!("Starting local OpenAI-compatible web service...");
+    manager.start_web_service().await?;
+    let base_url = format!(
+        "{}/v1",
+        manager
+            .urls()?
+            .first()
+            .expect("web service did not return a URL")
+            .trim_end_matches('/')
+    );
+    println!("Using base URL: {base_url}");
+
+    let result = run_responses_flow(&base_url, model.id()).await;
+
+    manager.stop_web_service().await.ok();
+    model.unload().await.ok();
+
+    result
+}
+
+async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()> {
+    let http = reqwest::Client::new();
+
+    println!("\n--- Non-streaming response ---");
+    let response = post_response_json(
+        &http,
+        base_url,
+        json!({
+            "model": model_id,
+            "input": "What is 2 + 2? Respond with just the answer.",
+            "temperature": 0.0
+        }),
+    )
+    .await?;
+    println!("Assistant: {}", output_text(&response));
+
+    println!("\n--- Streaming response ---");
+    print!("Assistant: ");
+    io::stdout().flush().ok();
+    let streaming_response = http
+        .post(format!("{base_url}/responses"))
+        .json(&json!({
+            "model": model_id,
+            "input": "Count from 1 to 3.",
+            "temperature": 0.0,
+            "stream": true
+        }))
+        .send()
+        .await?;
+    let streamed = read_responses_sse(streaming_response).await?;
+    println!("\nSaw {} text delta event(s).", streamed.delta_count);
+    if streamed.delta_count == 0 || !streamed.completed {
+        return Err("stream did not include both text delta and completion events".into());
+    }
+
+    println!("\n--- Function calling response ---");
+    let weather_tool = get_weather_tool();
+    let tool_response = post_response_json(
+        &http,
+        base_url,
+        json!({
+            "model": model_id,
+            "input": "Use the get_weather tool for Seattle, then answer.",
+            "tools": [weather_tool.clone()],
+            "tool_choice": "required",
+            "temperature": 0.0,
+            "store": true
+        }),
+    )
+    .await?;
+    let (call_id, name) = find_function_call(&tool_response)
+        .ok_or("expected a function_call item in the tool response")?;
+    println!("Model requested tool call: {name} ({call_id})");
+
+    let final_response = post_response_json(
+        &http,
+        base_url,
+        json!({
+            "model": model_id,
+            "previous_response_id": tool_response["id"].clone(),
+            "input": [{
+                "type": "function_call_output",
+                "call_id": call_id,
+                "output": "Seattle weather is 72F and sunny."
+            }],
+            "tools": [weather_tool],
+            "temperature": 0.0
+        }),
+    )
+    .await?;
+    println!("Assistant: {}", output_text(&final_response));
+
+    Ok(())
+}
+
+async fn post_response_json(
+    http: &reqwest::Client,
+    base_url: &str,
+    body: Value,
+) -> SampleResult<Value> {
+    let response = http
+        .post(format!("{base_url}/responses"))
+        .json(&body)
+        .send()
+        .await?;
+    let status = response.status();
+    let text = response.text().await?;
+    if !status.is_success() {
+        return Err(format!("Responses API returned {status}: {text}").into());
+    }
+    Ok(serde_json::from_str(&text)?)
+}
+
+fn output_text(response: &Value) -> String {
+    response
+        .get("output")
+        .and_then(Value::as_array)
+        .into_iter()
+        .flatten()
+        .find_map(|item| {
+            if item.get("type").and_then(Value::as_str) != Some("message") {
+                return None;
+            }
+            match item.get("content") {
+                Some(Value::String(text)) => Some(text.clone()),
+                Some(Value::Array(parts)) => Some(
+                    parts
+                        .iter()
+                        .filter_map(|part| {
+                            (part.get("type").and_then(Value::as_str) == Some("output_text"))
+                                .then(|| part.get("text").and_then(Value::as_str))
+                                .flatten()
+                        })
+                        .collect::<String>(),
+                ),
+                _ => None,
+            }
+        })
+        .unwrap_or_default()
+}
+
+fn find_function_call(response: &Value) -> Option<(String, String)> {
+    response.get("output")?.as_array()?.iter().find_map(|item| {
+        if item.get("type").and_then(Value::as_str) != Some("function_call") {
+            return None;
+        }
+        let call_id = item.get("call_id")?.as_str()?.to_string();
+        let name = item.get("name")?.as_str()?.to_string();
+        Some((call_id, name))
+    })
+}
+
+fn get_weather_tool() -> Value {
+    json!({
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get the current weather for a city.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "city": { "type": "string", "description": "City name" }
+            },
+            "required": ["city"]
+        }
+    })
+}
+
+#[derive(Default)]
+struct StreamSummary {
+    delta_count: usize,
+    completed: bool,
+}
+
+async fn read_responses_sse(mut response: reqwest::Response) -> SampleResult<StreamSummary> {
+    let status = response.status();
+    if !status.is_success() {
+        let text = response.text().await?;
+        return Err(format!("Responses API returned {status}: {text}").into());
+    }
+
+    let mut buffer = String::new();
+    let mut summary = StreamSummary::default();
+
+    while let Some(chunk) = response.chunk().await? {
+        buffer.push_str(&String::from_utf8_lossy(&chunk).replace("\r\n", "\n"));
+        while let Some(block_end) = buffer.find("\n\n") {
+            let block = buffer[..block_end].to_string();
+            buffer = buffer[block_end + 2..].to_string();
+            if handle_sse_block(&block, &mut summary) {
+                return Ok(summary);
+            }
+        }
+    }
+
+    Ok(summary)
+}
+
+fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
+    let data = block
+        .lines()
+        .filter_map(|line| line.trim().strip_prefix("data: "))
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    if data.is_empty() {
+        return false;
+    }
+    if data == "[DONE]" {
+        return true;
+    }
+
+    if let Ok(event) = serde_json::from_str::<Value>(&data) {
+        match event.get("type").and_then(Value::as_str) {
+            Some("response.output_text.delta") => {
+                summary.delta_count += 1;
+                if let Some(delta) = event.get("delta").and_then(Value::as_str) {
+                    print!("{delta}");
+                    io::stdout().flush().ok();
+                }
+            }
+            Some("response.completed") => summary.completed = true,
+            _ => {}
+        }
+    }
+
+    false
+}
diff --git a/sdk/rust/src/foundry_local_manager.rs b/sdk/rust/src/foundry_local_manager.rs
index 95ba087ed..0c22ef154 100644
--- a/sdk/rust/src/foundry_local_manager.rs
+++ b/sdk/rust/src/foundry_local_manager.rs
@@ -13,7 +13,6 @@ use crate::configuration::{Configuration, FoundryLocalConfig, Logger};
 use crate::detail::core_interop::CoreInterop;
 use crate::detail::ModelLoadManager;
 use crate::error::{FoundryLocalError, Result};
-use crate::openai::ResponsesClient;
 use crate::types::{EpDownloadResult, EpInfo};
 
 /// Global singleton holder — only stores a successfully initialised manager.
@@ -136,20 +135,6 @@ impl FoundryLocalManager {
         Ok(())
     }
 
-    /// Get a [`ResponsesClient`] for the given model.
-    ///
-    /// The web service must be started before using the returned client.
-    /// Pass `model_id = None` to defer model selection to per-request options.
-    pub fn get_responses_client(&self, model_id: Option<&str>) -> Result<ResponsesClient> {
-        let urls = self.urls()?;
-        let base_url = urls.first().ok_or_else(|| FoundryLocalError::Validation {
-            reason:
-                "Web service not started. Call start_web_service() before getting a ResponsesClient."
-                    .into(),
-        })?;
-        Ok(ResponsesClient::new(base_url, model_id))
-    }
-
     /// Discover available execution providers and their registration status.
     pub fn discover_eps(&self) -> Result<Vec<EpInfo>> {
         let raw = self.core.execute_command("discover_eps", None)?;
diff --git a/sdk/rust/src/lib.rs b/sdk/rust/src/lib.rs
index cbf7800db..9fb4bb85b 100644
--- a/sdk/rust/src/lib.rs
+++ b/sdk/rust/src/lib.rs
@@ -43,13 +43,3 @@ pub use async_openai::types::chat::{
     CreateChatCompletionResponse, CreateChatCompletionStreamResponse, FinishReason, FunctionCall,
     FunctionCallStream,
 };
-
-// Re-export Responses API types.
-pub use crate::openai::{
-    Annotation, DeleteResponseResult, FunctionToolDefinition, IncompleteDetails,
-    InputItemsListResponse, InputTokensDetails, ListResponsesOptions, ListResponsesResult, LogProb,
-    MessageContent, OutputTokensDetails, ReasoningConfig, ResponseCreateOptions,
-    ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem, ResponseObject,
-    ResponseUsage, ResponsesClient, ResponsesClientSettings, ResponsesContentPart, SseStream,
-    StreamingEvent, TextConfig, TextFormat,
-};
diff --git a/sdk/rust/src/openai/mod.rs b/sdk/rust/src/openai/mod.rs
index 17f38e1d0..ae0f1996a 100644
--- a/sdk/rust/src/openai/mod.rs
+++ b/sdk/rust/src/openai/mod.rs
@@ -3,8 +3,6 @@ mod chat_client;
 mod embedding_client;
 mod json_stream;
 mod live_audio_client;
-mod responses_client;
-mod responses_types;
 
 pub use self::audio_client::{
     AudioClient, AudioClientSettings, AudioTranscriptionResponse, AudioTranscriptionStream,
@@ -17,11 +15,3 @@ pub use self::live_audio_client::{
     ContentPart, CoreErrorResponse, LiveAudioTranscriptionOptions, LiveAudioTranscriptionResponse,
     LiveAudioTranscriptionSession, LiveAudioTranscriptionStream,
 };
-pub use self::responses_client::{ResponsesClient, ResponsesClientSettings, SseStream};
-pub use self::responses_types::{
-    Annotation, ContentPart as ResponsesContentPart, DeleteResponseResult, FunctionToolDefinition,
-    IncompleteDetails, InputItemsListResponse, InputTokensDetails, ListResponsesOptions,
-    ListResponsesResult, LogProb, MessageContent, OutputTokensDetails, ReasoningConfig,
-    ResponseCreateOptions, ResponseCreateRequest, ResponseError, ResponseInput, ResponseItem,
-    ResponseObject, ResponseUsage, StreamingEvent, TextConfig, TextFormat,
-};
diff --git a/sdk/rust/src/openai/responses_client.rs b/sdk/rust/src/openai/responses_client.rs
deleted file mode 100644
index b88e88f34..000000000
--- a/sdk/rust/src/openai/responses_client.rs
+++ /dev/null
@@ -1,722 +0,0 @@
-//! HTTP client for the OpenAI Responses API.
-
-use std::collections::HashMap;
-use std::pin::Pin;
-use std::task::{Context, Poll};
-use std::time::Duration;
-
-use async_stream::try_stream;
-use bytes::Bytes;
-use futures_core::Stream;
-use reqwest::Client;
-use serde_json::Value;
-
-use crate::error::{FoundryLocalError, Result};
-
-use super::responses_types::{
-    DeleteResponseResult, FunctionToolDefinition, InputItemsListResponse, ListResponsesResult,
-    ReasoningConfig, ResponseCreateOptions, ResponseCreateRequest, ResponseInput, ResponseObject,
-    StreamingEvent, TextConfig,
-};
-
-// ============================================================================
-// Settings
-// ============================================================================
-
-/// Configuration applied to every request made by a [`ResponsesClient`].
-///
-/// Use the public fields to set defaults; individual calls can override them
-/// via the `options` parameter.
-#[derive(Debug, Clone)]
-pub struct ResponsesClientSettings {
-    pub instructions: Option<String>,
-    pub temperature: Option<f32>,
-    pub top_p: Option<f32>,
-    pub max_output_tokens: Option<u32>,
-    pub frequency_penalty: Option<f32>,
-    pub presence_penalty: Option<f32>,
-    /// Tool choice strategy (serialised as JSON).
-    pub tool_choice: Option<Value>,
-    /// Truncation strategy: `"auto"` or `"disabled"`.
-    pub truncation: Option<String>,
-    pub parallel_tool_calls: Option<bool>,
-    /// Whether to persist the response for later retrieval.
-    ///
-    /// Defaults to `None`, which omits the field and lets the server decide.
-    pub store: Option<bool>,
-    pub metadata: Option<HashMap<String, String>>,
-    pub reasoning: Option<ReasoningConfig>,
-    pub text: Option<TextConfig>,
-    pub seed: Option<u32>,
-    /// Request timeout used for non-streaming calls; streaming calls use this as
-    /// a connect timeout so long-running streams are not cut off mid-response.
-    pub timeout: Duration,
-}
-
-impl Default for ResponsesClientSettings {
-    fn default() -> Self {
-        Self {
-            store: None,
-            instructions: None,
-            temperature: None,
-            top_p: None,
-            max_output_tokens: None,
-            frequency_penalty: None,
-            presence_penalty: None,
-            tool_choice: None,
-            truncation: None,
-            parallel_tool_calls: None,
-            metadata: None,
-            reasoning: None,
-            text: None,
-            seed: None,
-            timeout: Duration::from_secs(60),
-        }
-    }
-}
-
-impl ResponsesClientSettings {
-    /// Create settings with sensible defaults.
-    pub fn new() -> Self {
-        Self::default()
-    }
-}
-
-// ============================================================================
-// SSE Stream
-// ============================================================================
-
-/// A stream of [`StreamingEvent`]s parsed from a Server-Sent Events response body.
-pub struct SseStream {
-    inner: Pin<Box<dyn Stream<Item = Result<StreamingEvent>> + Send>>,
-}
-
-impl Stream for SseStream {
-    type Item = Result<StreamingEvent>;
-
-    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
-        self.inner.as_mut().poll_next(cx)
-    }
-}
-
-// ============================================================================
-// Client
-// ============================================================================
-
-/// Client for the OpenAI Responses API served by Foundry Local's embedded web service.
-///
-/// Unlike the chat/audio/embedding clients (which use FFI via CoreInterop),
-/// this client is HTTP-only and communicates directly with the embedded web service.
-///
-/// # Example
-/// ```ignore
-/// let manager = FoundryLocalManager::create(config)?;
-/// manager.start_web_service().await?;
-/// let client = manager.get_responses_client(Some("my-model-id"))?;
-///
-/// // Non-streaming
-/// let response = client.create(ResponseInput::Text("Hello!".into()), None).await?;
-/// println!("{}", response.output_text());
-///
-/// // Streaming
-/// use tokio_stream::StreamExt;
-/// let mut stream = client.create_streaming(ResponseInput::Text("Tell me a story".into()), None).await?;
-/// while let Some(event) = stream.next().await {
-///     if let Ok(StreamingEvent::OutputTextDelta { delta, .. }) = event {
-///         print!("{delta}");
-///     }
-/// }
-/// ```
-pub struct ResponsesClient {
-    http: Client,
-    base_url: String,
-    model_id: Option<String>,
-    /// Shared settings applied to every request. Modify via `client.settings`.
-    pub settings: ResponsesClientSettings,
-}
-
-impl ResponsesClient {
-    /// Create a new [`ResponsesClient`].
-    ///
-    /// - `base_url` — base URL of the Foundry Local web service (e.g. `"http://127.0.0.1:5273"`).
-    ///   Trailing slashes are stripped.
-    /// - `model_id` — default model used when not specified per-request.
-    pub fn new(base_url: &str, model_id: Option<&str>) -> Self {
-        let base_url = base_url.trim_end_matches('/').to_owned();
-        Self {
-            http: Client::new(),
-            base_url,
-            model_id: model_id.map(str::to_owned),
-            settings: ResponsesClientSettings::default(),
-        }
-    }
-
-    // ── Public API ───────────────────────────────────────────────────────────
-
-    /// Create a model response (non-streaming).
-    ///
-    /// Settings are merged in order: `model + input` → `self.settings` → `options`.
-    pub async fn create(
-        &self,
-        input: ResponseInput,
-        options: Option<ResponseCreateOptions>,
-    ) -> Result<ResponseObject> {
-        self.validate_input(&input)?;
-        if let Some(ref opts) = options {
-            self.validate_tools(opts.tools.as_deref())?;
-        }
-
-        let body = self.build_request(input, options, false)?;
-        let resp = self
-            .http
-            .post(self.url("/v1/responses"))
-            .timeout(self.request_timeout()?)
-            .json(&body)
-            .send()
-            .await?;
-
-        self.parse_json_response(resp).await
-    }
-
-    /// Create a model response with streaming via Server-Sent Events.
-    ///
-    /// Returns an `impl Stream<Item = Result<StreamingEvent>>` that yields parsed
-    /// events as they arrive.  Use `tokio_stream::StreamExt` to iterate:
-    ///
-    /// ```ignore
-    /// use tokio_stream::StreamExt;
-    /// let mut stream = client.create_streaming(input, None).await?;
-    /// while let Some(event) = stream.next().await {
-    ///     // …
-    /// }
-    /// ```
-    pub async fn create_streaming(
-        &self,
-        input: ResponseInput,
-        options: Option<ResponseCreateOptions>,
-    ) -> Result<SseStream> {
-        self.validate_input(&input)?;
-        if let Some(ref opts) = options {
-            self.validate_tools(opts.tools.as_deref())?;
-        }
-
-        let body = self.build_request(input, options, true)?;
-        let http = Client::builder()
-            .connect_timeout(self.request_timeout()?)
-            .build()?;
-        let resp = http
-            .post(self.url("/v1/responses"))
-            .header("Accept", "text/event-stream")
-            .json(&body)
-            .send()
-            .await?;
-
-        if !resp.status().is_success() {
-            let status = resp.status();
-            let text = resp.text().await.unwrap_or_else(|_| status.to_string());
-            return Err(FoundryLocalError::Validation {
-                reason: format!("Responses API error ({status}): {text}"),
-            });
-        }
-
-        let byte_stream = resp.bytes_stream();
-        let parsed = parse_sse_stream(byte_stream);
-        Ok(SseStream {
-            inner: Box::pin(parsed),
-        })
-    }
-
-    /// Retrieve a stored response by ID.
-    pub async fn get(&self, response_id: &str) -> Result<ResponseObject> {
-        self.validate_id(response_id, "response_id")?;
-        let url = self.url(&format!(
-            "/v1/responses/{}",
-            urlencoding::encode(response_id)
-        ));
-        let resp = self
-            .http
-            .get(url)
-            .timeout(self.request_timeout()?)
-            .send()
-            .await?;
-        self.parse_json_response(resp).await
-    }
-
-    /// Delete a stored response by ID.
-    pub async fn delete(&self, response_id: &str) -> Result<DeleteResponseResult> {
-        self.validate_id(response_id, "response_id")?;
-        let url = self.url(&format!(
-            "/v1/responses/{}",
-            urlencoding::encode(response_id)
-        ));
-        let resp = self
-            .http
-            .delete(url)
-            .timeout(self.request_timeout()?)
-            .send()
-            .await?;
-        self.parse_json_response(resp).await
-    }
-
-    /// Cancel an in-progress response.
-    pub async fn cancel(&self, response_id: &str) -> Result<ResponseObject> {
-        self.validate_id(response_id, "response_id")?;
-        let url = self.url(&format!(
-            "/v1/responses/{}/cancel",
-            urlencoding::encode(response_id)
-        ));
-        let resp = self
-            .http
-            .post(url)
-            .timeout(self.request_timeout()?)
-            .send()
-            .await?;
-        self.parse_json_response(resp).await
-    }
-
-    /// Retrieve the input items for a stored response.
-    pub async fn get_input_items(&self, response_id: &str) -> Result<InputItemsListResponse> {
-        self.validate_id(response_id, "response_id")?;
-        let url = self.url(&format!(
-            "/v1/responses/{}/input_items",
-            urlencoding::encode(response_id)
-        ));
-        let resp = self
-            .http
-            .get(url)
-            .timeout(self.request_timeout()?)
-            .send()
-            .await?;
-        self.parse_json_response(resp).await
-    }
-
-    /// List all stored responses (extension endpoint).
-    pub async fn list(&self) -> Result<ListResponsesResult> {
-        self.list_with_options(None).await
-    }
-
-    /// List stored responses with optional pagination controls.
-    pub async fn list_with_options(
-        &self,
-        options: Option<&super::responses_types::ListResponsesOptions>,
-    ) -> Result<ListResponsesResult> {
-        let mut req = self.http.get(self.url("/v1/responses"));
-        if let Some(options) = options {
-            let mut query = Vec::new();
-            if let Some(limit) = options.limit {
-                if limit == 0 {
-                    return Err(FoundryLocalError::Validation {
-                        reason: "list limit must be greater than zero.".into(),
-                    });
-                }
-                query.push(("limit", limit.to_string()));
-            }
-            if let Some(order) = &options.order {
-                if order != "asc" && order != "desc" {
-                    return Err(FoundryLocalError::Validation {
-                        reason: "list order must be either \"asc\" or \"desc\".".into(),
-                    });
-                }
-                query.push(("order", order.clone()));
-            }
-            if let Some(after) = &options.after {
-                self.validate_id(after, "after")?;
-                query.push(("after", after.clone()));
-            }
-            req = req.query(&query);
-        }
-        let resp = req.timeout(self.request_timeout()?).send().await?;
-        self.parse_json_response(resp).await
-    }
-
-    // ── Private helpers ──────────────────────────────────────────────────────
-
-    fn url(&self, path: &str) -> String {
-        format!("{}{}", self.base_url, path)
-    }
-
-    /// Merge `input`, `self.settings`, and caller `options` into a single
-    /// `ResponseCreateRequest`.
-    fn build_request(
-        &self,
-        input: ResponseInput,
-        options: Option<ResponseCreateOptions>,
-        stream: bool,
-    ) -> Result<ResponseCreateRequest> {
-        // Determine model: options override self.model_id
-        let model = options
-            .as_ref()
-            .and_then(|o| o.model.clone())
-            .filter(|m| !m.trim().is_empty())
-            .or_else(|| self.model_id.clone())
-            .ok_or_else(|| FoundryLocalError::Validation {
-                reason: "Model must be specified in the constructor or via options.model.".into(),
-            })?;
-
-        // Start with settings defaults
-        let s = &self.settings;
-
-        let mut req = ResponseCreateRequest {
-            model,
-            input,
-            stream: Some(stream),
-            // Settings defaults
-            instructions: s.instructions.clone(),
-            temperature: s.temperature,
-            top_p: s.top_p,
-            max_output_tokens: s.max_output_tokens,
-            frequency_penalty: s.frequency_penalty,
-            presence_penalty: s.presence_penalty,
-            tool_choice: s.tool_choice.clone(),
-            truncation: s.truncation.clone(),
-            parallel_tool_calls: s.parallel_tool_calls,
-            store: s.store,
-            metadata: s.metadata.clone(),
-            reasoning: s.reasoning.clone(),
-            text: s.text.clone(),
-            seed: s.seed,
-            // Not in settings
-            previous_response_id: None,
-            tools: None,
-            user: None,
-        };
-
-        // Apply per-call overrides
-        if let Some(opts) = options {
-            if let Some(m) = opts.model.filter(|m| !m.trim().is_empty()) {
-                req.model = m;
-            }
-            if let Some(v) = opts.instructions {
-                req.instructions = Some(v);
-            }
-            if let Some(v) = opts.previous_response_id {
-                req.previous_response_id = Some(v);
-            }
-            if let Some(v) = opts.tools {
-                req.tools = Some(v);
-            }
-            if let Some(v) = opts.tool_choice {
-                req.tool_choice = Some(v);
-            }
-            if let Some(v) = opts.temperature {
-                req.temperature = Some(v);
-            }
-            if let Some(v) = opts.top_p {
-                req.top_p = Some(v);
-            }
-            if let Some(v) = opts.max_output_tokens {
-                req.max_output_tokens = Some(v);
-            }
-            if let Some(v) = opts.frequency_penalty {
-                req.frequency_penalty = Some(v);
-            }
-            if let Some(v) = opts.presence_penalty {
-                req.presence_penalty = Some(v);
-            }
-            if let Some(v) = opts.seed {
-                req.seed = Some(v);
-            }
-            if let Some(v) = opts.truncation {
-                req.truncation = Some(v);
-            }
-            if let Some(v) = opts.parallel_tool_calls {
-                req.parallel_tool_calls = Some(v);
-            }
-            if let Some(v) = opts.store {
-                req.store = Some(v);
-            }
-            if let Some(v) = opts.metadata {
-                req.metadata = Some(v);
-            }
-            if let Some(v) = opts.user {
-                req.user = Some(v);
-            }
-            if let Some(v) = opts.reasoning {
-                req.reasoning = Some(v);
-            }
-            if let Some(v) = opts.text {
-                req.text = Some(v);
-            }
-        }
-
-        Ok(req)
-    }
-
-    fn validate_input(&self, input: &ResponseInput) -> Result<()> {
-        match input {
-            ResponseInput::Text(s) if s.trim().is_empty() => Err(FoundryLocalError::Validation {
-                reason: "Input string cannot be empty.".into(),
-            }),
-            ResponseInput::Items(items) if items.is_empty() => Err(FoundryLocalError::Validation {
-                reason: "Input items array cannot be empty.".into(),
-            }),
-            ResponseInput::Items(items) => {
-                for item in items {
-                    Self::validate_response_item(item)?;
-                }
-                Ok(())
-            }
-            _ => Ok(()),
-        }
-    }
-
-    fn validate_response_item(item: &super::responses_types::ResponseItem) -> Result<()> {
-        match item {
-            super::responses_types::ResponseItem::Message {
-                content: super::responses_types::MessageContent::Parts(parts),
-                ..
-            } => {
-                for part in parts {
-                    Self::validate_content_part(part)?;
-                }
-            }
-            super::responses_types::ResponseItem::Reasoning {
-                content: Some(parts),
-                ..
-            } => {
-                for part in parts {
-                    Self::validate_content_part(part)?;
-                }
-            }
-            _ => {}
-        }
-        Ok(())
-    }
-
-    fn validate_content_part(part: &super::responses_types::ContentPart) -> Result<()> {
-        if let super::responses_types::ContentPart::InputImage {
-            image_url,
-            image_data,
-            ..
-        } = part
-        {
-            let has_image_url = image_url.as_ref().is_some_and(|v| !v.trim().is_empty());
-            let has_image_data = image_data.as_ref().is_some_and(|v| !v.trim().is_empty());
-            if has_image_url == has_image_data {
-                return Err(FoundryLocalError::Validation {
-                    reason:
-                        "Provide exactly one of image_url or image_data for input_image content."
-                            .into(),
-                });
-            }
-        }
-        Ok(())
-    }
-
-    fn validate_tools(&self, tools: Option<&[FunctionToolDefinition]>) -> Result<()> {
-        let Some(tools) = tools else {
-            return Ok(());
-        };
-        for tool in tools {
-            if tool.tool_type != "function" {
-                return Err(FoundryLocalError::Validation {
-                    reason: format!(
-                        "Each tool must have type \"function\", got \"{}\".",
-                        tool.tool_type
-                    ),
-                });
-            }
-            if tool.name.trim().is_empty() {
-                return Err(FoundryLocalError::Validation {
-                    reason: "Each tool must have a non-empty \"name\".".into(),
-                });
-            }
-        }
-        Ok(())
-    }
-
-    fn validate_id(&self, id: &str, param: &str) -> Result<()> {
-        if id.trim().is_empty() {
-            return Err(FoundryLocalError::Validation {
-                reason: format!("{param} must be a non-empty string."),
-            });
-        }
-        // OpenAI does not publish a max ID length; keep this aligned with the
-        // JS SDK to avoid surprising client-side rejections of valid server IDs.
-        if id.len() > 1024 {
-            return Err(FoundryLocalError::Validation {
-                reason: format!("{param} exceeds maximum length (1024)."),
-            });
-        }
-        Ok(())
-    }
-
-    fn request_timeout(&self) -> Result<Duration> {
-        if self.settings.timeout.is_zero() {
-            return Err(FoundryLocalError::Validation {
-                reason: "ResponsesClientSettings.timeout must be greater than zero.".into(),
-            });
-        }
-        Ok(self.settings.timeout)
-    }
-
-    async fn parse_json_response<T>(&self, resp: reqwest::Response) -> Result<T>
-    where
-        T: serde::de::DeserializeOwned,
-    {
-        let status = resp.status();
-        let text = resp.text().await?;
-        if !status.is_success() {
-            return Err(FoundryLocalError::Validation {
-                reason: format!("Responses API error ({status}): {text}"),
-            });
-        }
-        serde_json::from_str(&text).map_err(FoundryLocalError::from)
-    }
-}
-
-// ============================================================================
-// SSE parser
-// ============================================================================
-
-/// Parse a raw bytes stream (from `reqwest`) as Server-Sent Events.
-///
-/// Each complete SSE block (`\n\n`-separated) is parsed into a [`StreamingEvent`].
-/// The stream ends on `data: [DONE]` or when the source is exhausted.
-fn parse_sse_stream<S>(byte_stream: S) -> impl Stream<Item = Result<StreamingEvent>> + Send
-where
-    S: Stream<Item = std::result::Result<Bytes, reqwest::Error>> + Send + 'static,
-{
-    try_stream! {
-        use tokio_stream::StreamExt as _;
-
-        let mut byte_stream = std::pin::pin!(byte_stream);
-        // Buffer accumulates bytes until we have complete SSE blocks.
-        let mut buf = String::new();
-
-        while let Some(chunk) = byte_stream.next().await {
-            let bytes: Bytes = chunk.map_err(FoundryLocalError::from)?;
-            // SSE is always UTF-8
-            let text = std::str::from_utf8(&bytes).map_err(|e| FoundryLocalError::Validation {
-                reason: format!("SSE stream contained invalid UTF-8: {e}"),
-            })?;
-            buf.push_str(text);
-
-            // Process all complete SSE blocks (separated by double newlines).
-            loop {
-                let Some(block_end) = buf.find("\n\n") else {
-                    break;
-                };
-                let block = buf[..block_end].to_owned();
-                buf = buf[block_end + 2..].to_owned();
-
-                let trimmed = block.trim();
-                if trimmed.is_empty() {
-                    continue;
-                }
-
-                // Terminal signal
-                if trimmed == "data: [DONE]" {
-                    return;
-                }
-
-                // Collect `data:` lines (per SSE spec, multiple are joined with \n)
-                let mut data_lines: Vec<&str> = Vec::new();
-                for line in trimmed.split('\n') {
-                    if let Some(rest) = line.strip_prefix("data: ") {
-                        data_lines.push(rest);
-                    } else if line == "data:" {
-                        data_lines.push("");
-                    }
-                    // `event:` lines are informational; the type lives inside the JSON.
-                }
-
-                if data_lines.is_empty() {
-                    continue;
-                }
-
-                let event_json = data_lines.join("\n");
-                let event: StreamingEvent =
-                    serde_json::from_str(&event_json).map_err(FoundryLocalError::from)?;
-                yield event;
-            }
-        }
-    }
-}
-
-// ============================================================================
-// Inline tests
-// ============================================================================
-//
-// These tests live alongside `parse_sse_stream` so they exercise the real
-// implementation rather than reimplementing SSE framing in an external test
-// crate. Anything that only depends on public APIs lives in `tests/unit/`.
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use async_stream::stream;
-
-    /// Drive `parse_sse_stream` from a hand-constructed byte stream and collect
-    /// its yielded events.
-    async fn collect_events(chunks: Vec<&'static str>) -> Vec<StreamingEvent> {
-        let byte_stream = stream! {
-            for chunk in chunks {
-                yield Ok::<Bytes, reqwest::Error>(Bytes::from_static(chunk.as_bytes()));
-            }
-        };
-
-        let parsed = parse_sse_stream(byte_stream);
-        let mut parsed = std::pin::pin!(parsed);
-
-        let mut events = Vec::new();
-        use tokio_stream::StreamExt as _;
-        while let Some(event) = parsed.next().await {
-            events.push(event.expect("SSE event failed to parse"));
-        }
-        events
-    }
-
-    #[tokio::test]
-    async fn parses_complete_event_block() {
-        let payload = "data: {\"type\":\"response.output_text.delta\",\"item_id\":\"i1\",\
-            \"output_index\":0,\"content_index\":0,\"delta\":\"Hi\",\"sequence_number\":1}\n\n\
-            data: [DONE]\n\n";
-
-        let events = collect_events(vec![payload]).await;
-        assert_eq!(events.len(), 1);
-        assert!(matches!(
-            events[0],
-            StreamingEvent::OutputTextDelta { ref delta, .. } if delta == "Hi"
-        ));
-    }
-
-    #[tokio::test]
-    async fn done_signal_terminates_stream() {
-        let payload = "data: [DONE]\n\n\
-            data: {\"type\":\"response.output_text.delta\",\"item_id\":\"i1\",\
-            \"output_index\":0,\"content_index\":0,\"delta\":\"after-done\",\
-            \"sequence_number\":2}\n\n";
-
-        let events = collect_events(vec![payload]).await;
-        assert!(events.is_empty(), "events after [DONE] must be ignored");
-    }
-
-    #[tokio::test]
-    async fn handles_event_split_across_chunks() {
-        // Split a single SSE block across two byte chunks to make sure the
-        // parser buffers correctly.
-        let part1 = "data: {\"type\":\"response.output_text.delta\",\
-            \"item_id\":\"i1\",\"output_index\":0,\"content_index\":0,";
-        let part2 = "\"delta\":\"split\",\"sequence_number\":3}\n\ndata: [DONE]\n\n";
-
-        let events = collect_events(vec![part1, part2]).await;
-        assert_eq!(events.len(), 1);
-        assert!(matches!(
-            events[0],
-            StreamingEvent::OutputTextDelta { ref delta, .. } if delta == "split"
-        ));
-    }
-
-    #[tokio::test]
-    async fn skips_event_lines_and_blank_blocks() {
-        let payload = "event: response.output_text.delta\n\
-            data: {\"type\":\"response.output_text.delta\",\"item_id\":\"i1\",\
-            \"output_index\":0,\"content_index\":0,\"delta\":\"ok\",\"sequence_number\":4}\n\n\
-            \n\n\
-            data: [DONE]\n\n";
-
-        let events = collect_events(vec![payload]).await;
-        assert_eq!(events.len(), 1);
-    }
-}
diff --git a/sdk/rust/src/openai/responses_types.rs b/sdk/rust/src/openai/responses_types.rs
deleted file mode 100644
index 59860c382..000000000
--- a/sdk/rust/src/openai/responses_types.rs
+++ /dev/null
@@ -1,695 +0,0 @@
-//! Type definitions for the OpenAI Responses API.
-
-use std::collections::HashMap;
-
-use serde::{Deserialize, Serialize};
-use serde_json::Value;
-
-// ============================================================================
-// Content Parts
-// ============================================================================
-
-/// An annotation attached to an output-text content part.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Annotation {
-    #[serde(rename = "type")]
-    pub annotation_type: String,
-    pub start_index: u32,
-    pub end_index: u32,
-    /// URL for url_citation annotations.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub url: Option<String>,
-    /// Title for url_citation annotations.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub title: Option<String>,
-}
-
-/// Log probability for a token.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct LogProb {
-    pub token: String,
-    pub logprob: f64,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub bytes: Option<Vec<u8>>,
-}
-
-/// A content part within a message or response.
-///
-/// Discriminated on the `"type"` field.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(tag = "type")]
-pub enum ContentPart {
-    /// Plain text input content.
-    #[serde(rename = "input_text")]
-    InputText { text: String },
-
-    /// Image input content (vision).
-    ///
-    /// This models Foundry Local's server contract. The server accepts either
-    /// `image_url` or `image_data`; when `image_data` is used, `media_type` lets
-    /// the server build the underlying data URI. If omitted, the server may infer
-    /// the media type.
-    #[serde(rename = "input_image")]
-    InputImage {
-        /// URL of the image (mutually exclusive with `image_data`).
-        #[serde(skip_serializing_if = "Option::is_none")]
-        image_url: Option<String>,
-        /// Base64-encoded image bytes (mutually exclusive with `image_url`).
-        #[serde(skip_serializing_if = "Option::is_none")]
-        image_data: Option<String>,
-        /// MIME type of the image, e.g. `"image/png"`.
-        #[serde(skip_serializing_if = "Option::is_none")]
-        media_type: Option<String>,
-        /// Detail level: `"low"`, `"high"`, or `"auto"`.
-        #[serde(skip_serializing_if = "Option::is_none")]
-        detail: Option<String>,
-    },
-
-    /// File input content.
-    #[serde(rename = "input_file")]
-    InputFile { filename: String, file_url: String },
-
-    /// Text produced by the model.
-    #[serde(rename = "output_text")]
-    OutputText {
-        text: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        annotations: Option<Vec<Annotation>>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        logprobs: Option<Vec<LogProb>>,
-    },
-
-    /// Model refusal.
-    #[serde(rename = "refusal")]
-    Refusal { refusal: String },
-}
-
-// ============================================================================
-// Message Content (string | ContentPart[])
-// ============================================================================
-
-/// The content of a message item — either a plain string or a list of content parts.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(untagged)]
-pub enum MessageContent {
-    Text(String),
-    Parts(Vec<ContentPart>),
-}
-
-// ============================================================================
-// Response Items
-// ============================================================================
-
-/// An item in a request or response — discriminated on `"type"`.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(tag = "type")]
-pub enum ResponseItem {
-    #[serde(rename = "message")]
-    Message {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        id: Option<String>,
-        role: String,
-        content: MessageContent,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        status: Option<String>,
-    },
-
-    #[serde(rename = "function_call")]
-    FunctionCall {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        id: Option<String>,
-        call_id: String,
-        name: String,
-        arguments: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        status: Option<String>,
-    },
-
-    #[serde(rename = "function_call_output")]
-    FunctionCallOutput {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        id: Option<String>,
-        call_id: String,
-        output: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        status: Option<String>,
-    },
-
-    #[serde(rename = "item_reference")]
-    ItemReference { id: String },
-
-    #[serde(rename = "reasoning")]
-    Reasoning {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        id: Option<String>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        content: Option<Vec<ContentPart>>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        encrypted_content: Option<String>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        summary: Option<String>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        status: Option<String>,
-    },
-}
-
-// ============================================================================
-// Response Input
-// ============================================================================
-
-/// The `input` field of a [`ResponseCreateRequest`]: either a plain string prompt
-/// or a structured list of [`ResponseItem`]s.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(untagged)]
-pub enum ResponseInput {
-    Text(String),
-    Items(Vec<ResponseItem>),
-}
-
-// ============================================================================
-// Tool Definitions
-// ============================================================================
-
-/// A function tool definition passed to the model.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FunctionToolDefinition {
-    /// Always `"function"`.
-    #[serde(rename = "type")]
-    pub tool_type: String,
-    pub name: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-    /// JSON Schema for the function parameters.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parameters: Option<Value>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub strict: Option<bool>,
-}
-
-// ============================================================================
-// Text & Reasoning Config
-// ============================================================================
-
-/// Format constraints for model text output (constrained generation).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct TextFormat {
-    /// `"text"`, `"json_object"`, `"json_schema"`, `"lark_grammar"`, or `"regex"`.
-    #[serde(rename = "type")]
-    pub format_type: String,
-    /// Schema name (for `json_schema`).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub name: Option<String>,
-    /// Schema description (for `json_schema`).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-    /// JSON Schema object (for `json_schema`).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub schema: Option<Value>,
-    /// Strict mode (for `json_schema`).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub strict: Option<bool>,
-}
-
-/// Text output configuration.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct TextConfig {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub format: Option<TextFormat>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub verbosity: Option<String>,
-}
-
-/// Reasoning configuration for reasoning-capable models.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ReasoningConfig {
-    /// Effort level: `"low"`, `"medium"`, or `"high"`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub effort: Option<String>,
-    /// Summary style: `"auto"`, `"concise"`, or `"detailed"`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub summary: Option<String>,
-}
-
-// ============================================================================
-// Request
-// ============================================================================
-
-/// Request body for `POST /v1/responses`.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ResponseCreateRequest {
-    pub model: String,
-    pub input: ResponseInput,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub previous_response_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<FunctionToolDefinition>>,
-    /// `"none"` | `"auto"` | `"required"` | `{ "type": "function", "name": "..." }`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<Value>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stream: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_output_tokens: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub seed: Option<u32>,
-    /// `"auto"` or `"disabled"`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<HashMap<String, String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
-}
-
-/// Per-call overrides for [`ResponsesClient::create`] and
-/// [`ResponsesClient::create_streaming`].
-///
-/// Every field is optional — the client merges these on top of
-/// [`ResponsesClientSettings`] and the constructor-supplied model. Unlike
-/// [`ResponseCreateRequest`] (the wire-serialised request body), this type is
-/// intended purely as caller-friendly input and never needs to be fully
-/// populated.
-///
-/// [`ResponsesClient::create`]: crate::ResponsesClient::create
-/// [`ResponsesClient::create_streaming`]: crate::ResponsesClient::create_streaming
-/// [`ResponsesClientSettings`]: crate::ResponsesClientSettings
-#[derive(Debug, Clone, Default)]
-pub struct ResponseCreateOptions {
-    pub model: Option<String>,
-    pub instructions: Option<String>,
-    pub previous_response_id: Option<String>,
-    pub tools: Option<Vec<FunctionToolDefinition>>,
-    pub tool_choice: Option<Value>,
-    pub store: Option<bool>,
-    pub temperature: Option<f32>,
-    pub top_p: Option<f32>,
-    pub max_output_tokens: Option<u32>,
-    pub frequency_penalty: Option<f32>,
-    pub presence_penalty: Option<f32>,
-    pub seed: Option<u32>,
-    pub truncation: Option<String>,
-    pub parallel_tool_calls: Option<bool>,
-    pub metadata: Option<HashMap<String, String>>,
-    pub user: Option<String>,
-    pub reasoning: Option<ReasoningConfig>,
-    pub text: Option<TextConfig>,
-}
-
-// ============================================================================
-// Response Object
-// ============================================================================
-
-/// Usage statistics attached to a completed response.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ResponseUsage {
-    pub input_tokens: u32,
-    pub output_tokens: u32,
-    pub total_tokens: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub input_tokens_details: Option<InputTokensDetails>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_tokens_details: Option<OutputTokensDetails>,
-}
-
-/// Details about input token counts.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct InputTokensDetails {
-    pub cached_tokens: u32,
-}
-
-/// Details about output token counts.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OutputTokensDetails {
-    pub reasoning_tokens: u32,
-}
-
-/// An error payload inside a response object.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ResponseError {
-    pub code: String,
-    pub message: String,
-}
-
-/// Optional details about why a response is incomplete.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct IncompleteDetails {
-    pub reason: String,
-}
-
-/// A completed (or failed) response from the Responses API.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ResponseObject {
-    pub id: String,
-    pub object: String,
-    pub created_at: i64,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub completed_at: Option<i64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub failed_at: Option<i64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub cancelled_at: Option<i64>,
-    /// `"queued"`, `"in_progress"`, `"completed"`, `"failed"`, `"incomplete"`, or `"cancelled"`.
-    pub status: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub incomplete_details: Option<IncompleteDetails>,
-    pub model: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub previous_response_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
-    pub output: Vec<ResponseItem>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub error: Option<ResponseError>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<FunctionToolDefinition>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<Value>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_output_tokens: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<HashMap<String, String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub usage: Option<ResponseUsage>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-}
-
-impl ResponseObject {
-    /// Concatenates text from the first assistant `message` item in `output`.
-    ///
-    /// Equivalent to the Python SDK's `response.output_text` property.
-    pub fn output_text(&self) -> String {
-        for item in &self.output {
-            if let ResponseItem::Message { role, content, .. } = item {
-                if role == "assistant" {
-                    return match content {
-                        MessageContent::Text(s) => s.clone(),
-                        MessageContent::Parts(parts) => parts
-                            .iter()
-                            .filter_map(|p| match p {
-                                ContentPart::OutputText { text, .. } => Some(text.as_str()),
-                                _ => None,
-                            })
-                            .collect::<Vec<_>>()
-                            .join(""),
-                    };
-                }
-            }
-        }
-        String::new()
-    }
-}
-
-// ============================================================================
-// Streaming Events
-// ============================================================================
-
-/// A single Server-Sent Event emitted by the streaming Responses API.
-///
-/// Discriminated on the `"type"` field.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(tag = "type")]
-pub enum StreamingEvent {
-    // ── Response lifecycle ───────────────────────────────────────────────────
-    #[serde(rename = "response.created")]
-    ResponseCreated {
-        response: ResponseObject,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.queued")]
-    ResponseQueued {
-        response: ResponseObject,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.in_progress")]
-    ResponseInProgress {
-        response: ResponseObject,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.completed")]
-    ResponseCompleted {
-        response: ResponseObject,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.failed")]
-    ResponseFailed {
-        response: ResponseObject,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.incomplete")]
-    ResponseIncomplete {
-        response: ResponseObject,
-        sequence_number: u64,
-    },
-
-    // ── Output items ─────────────────────────────────────────────────────────
-    #[serde(rename = "response.output_item.added")]
-    OutputItemAdded {
-        item_id: String,
-        output_index: u32,
-        item: ResponseItem,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.output_item.done")]
-    OutputItemDone {
-        item_id: String,
-        output_index: u32,
-        item: ResponseItem,
-        sequence_number: u64,
-    },
-
-    // ── Content parts ────────────────────────────────────────────────────────
-    #[serde(rename = "response.content_part.added")]
-    ContentPartAdded {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        part: ContentPart,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.content_part.done")]
-    ContentPartDone {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        part: ContentPart,
-        sequence_number: u64,
-    },
-
-    // ── Text deltas ──────────────────────────────────────────────────────────
-    #[serde(rename = "response.output_text.delta")]
-    OutputTextDelta {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        delta: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        logprobs: Option<Vec<LogProb>>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        obfuscation: Option<String>,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.output_text.done")]
-    OutputTextDone {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        text: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        logprobs: Option<Vec<LogProb>>,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.output_text.annotation.added")]
-    OutputTextAnnotationAdded {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        annotation_index: u32,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        annotation: Option<Annotation>,
-        sequence_number: u64,
-    },
-
-    // ── Refusal ──────────────────────────────────────────────────────────────
-    #[serde(rename = "response.refusal.delta")]
-    RefusalDelta {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        delta: String,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.refusal.done")]
-    RefusalDone {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        refusal: String,
-        sequence_number: u64,
-    },
-
-    // ── Function calls ───────────────────────────────────────────────────────
-    #[serde(rename = "response.function_call_arguments.delta")]
-    FunctionCallArgumentsDelta {
-        item_id: String,
-        output_index: u32,
-        call_id: String,
-        delta: String,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.function_call_arguments.done")]
-    FunctionCallArgumentsDone {
-        item_id: String,
-        output_index: u32,
-        call_id: String,
-        arguments: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        name: Option<String>,
-        sequence_number: u64,
-    },
-
-    // ── Reasoning ────────────────────────────────────────────────────────────
-    #[serde(rename = "response.reasoning_summary_part.added")]
-    ReasoningSummaryPartAdded {
-        item_id: String,
-        output_index: u32,
-        summary_index: u32,
-        part: ContentPart,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.reasoning_summary_part.done")]
-    ReasoningSummaryPartDone {
-        item_id: String,
-        output_index: u32,
-        summary_index: u32,
-        part: ContentPart,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.reasoning.delta")]
-    ReasoningDelta {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        delta: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        obfuscation: Option<String>,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.reasoning.done")]
-    ReasoningDone {
-        item_id: String,
-        output_index: u32,
-        content_index: u32,
-        text: String,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.reasoning_summary_text.delta")]
-    ReasoningSummaryTextDelta {
-        item_id: String,
-        output_index: u32,
-        summary_index: u32,
-        delta: String,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        obfuscation: Option<String>,
-        sequence_number: u64,
-    },
-    #[serde(rename = "response.reasoning_summary_text.done")]
-    ReasoningSummaryTextDone {
-        item_id: String,
-        output_index: u32,
-        summary_index: u32,
-        text: String,
-        sequence_number: u64,
-    },
-
-    // ── Error ────────────────────────────────────────────────────────────────
-    #[serde(rename = "error")]
-    Error {
-        #[serde(skip_serializing_if = "Option::is_none")]
-        code: Option<String>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        message: Option<String>,
-        #[serde(skip_serializing_if = "Option::is_none")]
-        param: Option<String>,
-        sequence_number: u64,
-    },
-}
-
-// ============================================================================
-// List / Delete Results
-// ============================================================================
-
-/// Result of `DELETE /v1/responses/{id}`.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct DeleteResponseResult {
-    pub id: String,
-    pub object: String,
-    pub deleted: bool,
-}
-
-/// Response from `GET /v1/responses/{id}/input_items`.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct InputItemsListResponse {
-    pub object: String,
-    pub data: Vec<ResponseItem>,
-}
-
-/// Response from `GET /v1/responses` (extension endpoint).
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct ListResponsesResult {
-    pub object: String,
-    pub data: Vec<ResponseObject>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub first_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub last_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub has_more: Option<bool>,
-}
-
-/// Optional query parameters for `GET /v1/responses`.
-#[derive(Debug, Clone, Default, Serialize, Deserialize)]
-pub struct ListResponsesOptions {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub limit: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub order: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub after: Option<String>,
-}
diff --git a/sdk/rust/tests/integration/responses_test.rs b/sdk/rust/tests/integration/responses_test.rs
index 7944e7319..91e0356c2 100644
--- a/sdk/rust/tests/integration/responses_test.rs
+++ b/sdk/rust/tests/integration/responses_test.rs
@@ -1,389 +1,314 @@
-//! Integration tests for the Responses API.
-//!
-//! These tests require a running Foundry Local web service with a loaded model.
-//! They are compiled only when the "integration" Cargo feature is enabled, and
-//! skipped automatically in CI when no model is available.
-
 use super::common;
-use foundry_local_sdk::{
-    FunctionToolDefinition, ListResponsesOptions, MessageContent, ResponseInput, ResponseItem,
-    ResponsesClient, ResponsesContentPart as ContentPart,
-};
-use serde_json::json;
-use tokio_stream::StreamExt;
-
-/// The model alias used for Responses API integration tests.
-const RESPONSES_MODEL_ALIAS: &str = common::TEST_MODEL_ALIAS;
-
-async fn setup_responses_client() -> (ResponsesClient, Arc<foundry_local_sdk::Model>) {
-    let manager = common::get_test_manager();
-    manager
-        .start_web_service()
-        .await
-        .expect("start_web_service failed");
-    let catalog = manager.catalog();
-    let model = catalog
-        .get_model(RESPONSES_MODEL_ALIAS)
-        .await
-        .expect("get_model failed");
-    model.load().await.expect("model.load() failed");
-
-    let mut client = manager
-        .get_responses_client(Some(&model.info().id))
-        .expect("get_responses_client failed");
-    client.settings.store = Some(true);
-    (client, model)
-}
-
+use serde_json::{json, Value};
 use std::sync::Arc;
 
-#[tokio::test]
-async fn non_streaming_simple_string() {
-    let (client, model) = setup_responses_client().await;
-
-    let response = client
-        .create(
-            ResponseInput::Text("What is 2+2? Respond with just the number.".into()),
-            None,
-        )
-        .await
-        .expect("create failed");
+type TestResult<T> = Result<T, Box<dyn std::error::Error + Send + Sync>>;
 
-    println!("Status: {}", response.status);
-    println!("Output: {}", response.output_text());
+static RESPONSES_TEST_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(());
 
-    assert_eq!(response.status, "completed");
-    assert!(
-        !response.output_text().is_empty(),
-        "output_text should be non-empty"
-    );
-    assert!(response.usage.is_some(), "usage should be present");
-
-    model.unload().await.expect("unload failed");
+struct ResponsesServiceContext {
+    manager: &'static foundry_local_sdk::FoundryLocalManager,
+    model: Arc<foundry_local_sdk::Model>,
+    base_url: String,
+    http: reqwest::Client,
 }
 
-#[tokio::test]
-async fn non_streaming_with_options() {
-    let (client, model) = setup_responses_client().await;
-
-    let opts = foundry_local_sdk::ResponseCreateOptions {
-        temperature: Some(0.0),
-        max_output_tokens: Some(50),
-        store: Some(true),
-        ..Default::default()
-    };
+impl ResponsesServiceContext {
+    async fn start() -> Option<Self> {
+        let manager = common::get_test_manager();
+        let catalog = manager.catalog();
+        let model = match catalog.get_model(common::TEST_MODEL_ALIAS).await {
+            Ok(model) => model,
+            Err(e) => {
+                eprintln!(
+                    "Skipping Responses web-service test: model '{}' unavailable: {e}",
+                    common::TEST_MODEL_ALIAS
+                );
+                return None;
+            }
+        };
+
+        if !model.is_cached().await.unwrap_or(false) {
+            eprintln!(
+                "Skipping Responses web-service test: model '{}' is not cached",
+                common::TEST_MODEL_ALIAS
+            );
+            return None;
+        }
 
-    let response = client
-        .create(ResponseInput::Text("Say hello.".into()), Some(opts))
-        .await
-        .expect("create with options failed");
+        model.load().await.expect("model.load() failed");
+        manager
+            .start_web_service()
+            .await
+            .expect("start_web_service failed");
+
+        let base_url = format!(
+            "{}/v1",
+            manager
+                .urls()
+                .expect("urls() should succeed")
+                .first()
+                .expect("no URL returned")
+                .trim_end_matches('/')
+        );
 
-    assert_eq!(response.status, "completed");
-    assert!(!response.output_text().is_empty());
+        Some(Self {
+            manager,
+            model,
+            base_url,
+            http: reqwest::Client::new(),
+        })
+    }
 
-    model.unload().await.expect("unload failed");
+    async fn cleanup(&self) {
+        self.manager
+            .stop_web_service()
+            .await
+            .expect("stop_web_service failed");
+        self.model.unload().await.expect("model.unload() failed");
+    }
 }
 
 #[tokio::test]
-async fn streaming_receives_deltas() {
-    let (client, model) = setup_responses_client().await;
-
-    let mut stream = client
-        .create_streaming(ResponseInput::Text("Count from 1 to 5.".into()), None)
-        .await
-        .expect("create_streaming failed");
-
-    let mut delta_count = 0usize;
-    let mut full_text = String::new();
-    let mut completed = false;
-
-    while let Some(event) = stream.next().await {
-        let event = event.expect("stream event error");
-        match event {
-            foundry_local_sdk::StreamingEvent::OutputTextDelta { delta, .. } => {
-                full_text.push_str(&delta);
-                delta_count += 1;
-            }
-            foundry_local_sdk::StreamingEvent::ResponseCompleted { .. } => {
-                completed = true;
-            }
-            _ => {}
-        }
-    }
-
-    println!("Received {delta_count} deltas, text: {full_text}");
-    assert!(delta_count > 0, "Expected at least one delta event");
-    assert!(completed, "Expected a ResponseCompleted event");
-    assert!(!full_text.is_empty(), "Expected non-empty accumulated text");
+async fn should_create_non_streaming_response_via_rest_api() {
+    let _guard = RESPONSES_TEST_LOCK.lock().await;
+    let Some(ctx) = ResponsesServiceContext::start().await else {
+        return;
+    };
 
-    model.unload().await.expect("unload failed");
+    let result = post_response_json(
+        &ctx,
+        json!({
+            "model": ctx.model.id(),
+            "input": "What is 2 + 2? Respond with just the answer.",
+            "temperature": 0.0
+        }),
+    )
+    .await;
+
+    ctx.cleanup().await;
+
+    let body = result.expect("Responses non-streaming request failed");
+    let text = output_text(&body);
+    println!("Responses non-streaming text: {text}");
+    assert!(!text.trim().is_empty(), "response text should not be empty");
 }
 
 #[tokio::test]
-async fn multi_turn_previous_response_id() {
-    let (client, model) = setup_responses_client().await;
-
-    // First turn
-    let first = client
-        .create(
-            ResponseInput::Text("My favourite colour is blue. Remember this.".into()),
-            None,
-        )
-        .await
-        .expect("first create failed");
-    assert_eq!(first.status, "completed");
-    let first_id = first.id.clone();
-
-    // Second turn referencing the first
-    let opts = foundry_local_sdk::ResponseCreateOptions {
-        previous_response_id: Some(first_id),
-        store: Some(true),
-        ..Default::default()
+async fn should_stream_response_via_rest_api() {
+    let _guard = RESPONSES_TEST_LOCK.lock().await;
+    let Some(ctx) = ResponsesServiceContext::start().await else {
+        return;
     };
 
-    let second = client
-        .create(
-            ResponseInput::Text("What is my favourite colour?".into()),
-            Some(opts),
-        )
-        .await
-        .expect("second create failed");
+    let result = async {
+        let response = ctx
+            .http
+            .post(format!("{}/responses", ctx.base_url))
+            .json(&json!({
+                "model": ctx.model.id(),
+                "input": "Count from 1 to 3.",
+                "temperature": 0.0,
+                "stream": true
+            }))
+            .send()
+            .await?;
+
+        read_responses_sse(response).await
+    }
+    .await;
+
+    ctx.cleanup().await;
 
-    println!("Multi-turn response: {}", second.output_text());
-    assert_eq!(second.status, "completed");
-    let text = second.output_text().to_lowercase();
+    let summary = result.expect("Responses streaming request failed");
     assert!(
-        text.contains("blue"),
-        "Second response should reference 'blue', got: {text}"
+        summary.delta_count > 0,
+        "expected at least one response.output_text.delta event"
+    );
+    assert!(
+        summary.completed,
+        "expected a response.completed event in the stream"
     );
-
-    model.unload().await.expect("unload failed");
 }
 
 #[tokio::test]
-async fn get_stored_response() {
-    let (client, model) = setup_responses_client().await;
-
-    let created = client
-        .create(ResponseInput::Text("Hello.".into()), None)
-        .await
-        .expect("create failed");
-    let response_id = created.id.clone();
+async fn should_complete_tool_calling_response_via_rest_api() {
+    let _guard = RESPONSES_TEST_LOCK.lock().await;
+    let Some(ctx) = ResponsesServiceContext::start().await else {
+        return;
+    };
 
-    let fetched = client.get(&response_id).await.expect("get failed");
-    assert_eq!(fetched.id, response_id);
-    assert_eq!(fetched.status, "completed");
+    let result = async {
+        let weather_tool = get_weather_tool();
+        let tool_response = post_response_json(
+            &ctx,
+            json!({
+                "model": ctx.model.id(),
+                "input": "Use the get_weather tool for Seattle, then answer.",
+                "tools": [weather_tool.clone()],
+                "tool_choice": "required",
+                "temperature": 0.0,
+                "store": true
+            }),
+        )
+        .await?;
 
-    model.unload().await.expect("unload failed");
-}
+        let (call_id, name) = find_function_call(&tool_response)
+            .ok_or("expected a function_call item in the tool response")?;
+        if name != "get_weather" {
+            return Err(format!("expected get_weather function call, got {name}").into());
+        }
 
-#[tokio::test]
-async fn delete_response() {
-    let (client, model) = setup_responses_client().await;
+        let final_response = post_response_json(
+            &ctx,
+            json!({
+                "model": ctx.model.id(),
+                "previous_response_id": tool_response["id"].clone(),
+                "input": [{
+                    "type": "function_call_output",
+                    "call_id": call_id,
+                    "output": "Seattle weather is 72F and sunny."
+                }],
+                "tools": [weather_tool],
+                "temperature": 0.0
+            }),
+        )
+        .await?;
 
-    let created = client
-        .create(ResponseInput::Text("I will be deleted.".into()), None)
-        .await
-        .expect("create failed");
-    let response_id = created.id.clone();
+        Ok::<String, Box<dyn std::error::Error + Send + Sync>>(output_text(&final_response))
+    }
+    .await;
 
-    let result = client.delete(&response_id).await.expect("delete failed");
-    assert_eq!(result.id, response_id);
-    assert!(result.deleted);
+    ctx.cleanup().await;
 
-    // Getting the deleted response should fail
-    let get_result = client.get(&response_id).await;
+    let text = result.expect("Responses tool-calling flow failed");
+    println!("Responses tool final text: {text}");
     assert!(
-        get_result.is_err(),
-        "Expected error after deleting response"
+        !text.trim().is_empty(),
+        "final response text should not be empty"
     );
-
-    model.unload().await.expect("unload failed");
 }
 
-#[tokio::test]
-async fn list_responses() {
-    let (client, model) = setup_responses_client().await;
-
-    // Create a response to ensure there is at least one
-    let _ = client
-        .create(ResponseInput::Text("List test.".into()), None)
-        .await
-        .expect("create failed");
-
-    let list_options = ListResponsesOptions {
-        limit: Some(10),
-        order: Some("desc".into()),
-        after: None,
-    };
-    let list = client
-        .list_with_options(Some(&list_options))
-        .await
-        .expect("list failed");
-    assert_eq!(list.object, "list");
-    assert!(
-        !list.data.is_empty(),
-        "Expected at least one response in list"
-    );
-
-    model.unload().await.expect("unload failed");
+async fn post_response_json(ctx: &ResponsesServiceContext, body: Value) -> TestResult<Value> {
+    let response = ctx
+        .http
+        .post(format!("{}/responses", ctx.base_url))
+        .json(&body)
+        .send()
+        .await?;
+    let status = response.status();
+    let text = response.text().await?;
+    if !status.is_success() {
+        return Err(format!("Responses API returned {status}: {text}").into());
+    }
+    Ok(serde_json::from_str(&text)?)
 }
 
-#[tokio::test]
-async fn get_input_items() {
-    let (client, model) = setup_responses_client().await;
-
-    let created = client
-        .create(ResponseInput::Text("Input items test.".into()), None)
-        .await
-        .expect("create failed");
-
-    let items = client
-        .get_input_items(&created.id)
-        .await
-        .expect("get_input_items failed");
-
-    assert_eq!(items.object, "list");
+fn output_text(response: &Value) -> String {
+    response
+        .get("output")
+        .and_then(Value::as_array)
+        .into_iter()
+        .flatten()
+        .find_map(|item| {
+            if item.get("type").and_then(Value::as_str) != Some("message") {
+                return None;
+            }
+            match item.get("content") {
+                Some(Value::String(text)) => Some(text.clone()),
+                Some(Value::Array(parts)) => Some(
+                    parts
+                        .iter()
+                        .filter_map(|part| {
+                            (part.get("type").and_then(Value::as_str) == Some("output_text"))
+                                .then(|| part.get("text").and_then(Value::as_str))
+                                .flatten()
+                        })
+                        .collect::<String>(),
+                ),
+                _ => None,
+            }
+        })
+        .unwrap_or_default()
+}
 
-    model.unload().await.expect("unload failed");
+fn find_function_call(response: &Value) -> Option<(String, String)> {
+    response.get("output")?.as_array()?.iter().find_map(|item| {
+        if item.get("type").and_then(Value::as_str) != Some("function_call") {
+            return None;
+        }
+        let call_id = item.get("call_id")?.as_str()?.to_string();
+        let name = item.get("name")?.as_str()?.to_string();
+        Some((call_id, name))
+    })
 }
 
-#[tokio::test]
-async fn tool_calling_round_trip() {
-    let (client, model) = setup_responses_client().await;
-
-    let multiply_tool = FunctionToolDefinition {
-        tool_type: "function".into(),
-        name: "multiply".into(),
-        description: Some("Multiply two numbers".into()),
-        parameters: Some(json!({
+fn get_weather_tool() -> Value {
+    json!({
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get the current weather for a city.",
+        "parameters": {
             "type": "object",
             "properties": {
-                "a": { "type": "number" },
-                "b": { "type": "number" }
+                "city": { "type": "string", "description": "City name" }
             },
-            "required": ["a", "b"]
-        })),
-        strict: None,
-    };
+            "required": ["city"]
+        }
+    })
+}
 
-    let opts = foundry_local_sdk::ResponseCreateOptions {
-        tools: Some(vec![multiply_tool]),
-        tool_choice: Some(json!("required")),
-        store: Some(true),
-        temperature: Some(0.0),
-        ..Default::default()
-    };
+#[derive(Default)]
+struct StreamSummary {
+    delta_count: usize,
+    completed: bool,
+}
 
-    let response = client
-        .create(
-            ResponseInput::Text("What is 6 times 7? Use the multiply tool.".into()),
-            Some(opts),
-        )
-        .await
-        .expect("create with tool failed");
-
-    // Find the function_call item
-    let func_call = response.output.iter().find_map(|item| {
-        if let ResponseItem::FunctionCall {
-            call_id,
-            name,
-            arguments,
-            ..
-        } = item
-        {
-            Some((call_id.clone(), name.clone(), arguments.clone()))
-        } else {
-            None
-        }
-    });
-
-    assert!(func_call.is_some(), "Expected a function_call output item");
-    let (call_id, name, args_str) = func_call.unwrap();
-    assert_eq!(name, "multiply");
-
-    let args: serde_json::Value = serde_json::from_str(&args_str).expect("failed to parse args");
-    let a = args["a"].as_f64().unwrap_or(0.0);
-    let b = args["b"].as_f64().unwrap_or(0.0);
-    let product = (a * b) as i64;
-
-    // Send back the tool result
-    let tool_result_input = ResponseInput::Items(vec![ResponseItem::FunctionCallOutput {
-        id: None,
-        call_id,
-        output: product.to_string(),
-        status: None,
-    }]);
-
-    let final_opts = foundry_local_sdk::ResponseCreateOptions {
-        previous_response_id: Some(response.id.clone()),
-        store: Some(true),
-        temperature: Some(0.0),
-        ..Default::default()
-    };
+async fn read_responses_sse(mut response: reqwest::Response) -> TestResult<StreamSummary> {
+    let status = response.status();
+    if !status.is_success() {
+        let text = response.text().await?;
+        return Err(format!("Responses API returned {status}: {text}").into());
+    }
 
-    let final_response = client
-        .create(tool_result_input, Some(final_opts))
-        .await
-        .expect("tool result create failed");
+    let mut buffer = String::new();
+    let mut summary = StreamSummary::default();
 
-    let result_text = final_response.output_text();
-    println!("Tool call final answer: {result_text}");
-    assert!(
-        result_text.contains("42"),
-        "Expected '42' in final answer, got: {result_text}"
-    );
+    while let Some(chunk) = response.chunk().await? {
+        buffer.push_str(&String::from_utf8_lossy(&chunk).replace("\r\n", "\n"));
+        while let Some(block_end) = buffer.find("\n\n") {
+            let block = buffer[..block_end].to_string();
+            buffer = buffer[block_end + 2..].to_string();
+            if handle_sse_block(&block, &mut summary) {
+                return Ok(summary);
+            }
+        }
+    }
 
-    model.unload().await.expect("unload failed");
+    Ok(summary)
 }
 
-#[tokio::test]
-async fn vision_image_base64() {
-    // Skip unless an explicit vision-capable model is provided via the env var.
-    // This avoids accidentally treating a non-vision model failure as a pass.
-    let Ok(vision_model_id) = std::env::var("FOUNDRY_VISION_MODEL_ID") else {
-        eprintln!(
-            "vision_image_base64 skipped: set FOUNDRY_VISION_MODEL_ID to a vision-capable \
-             model alias to run this test."
-        );
-        return;
-    };
-
-    let manager = common::get_test_manager();
-    manager
-        .start_web_service()
-        .await
-        .expect("start_web_service failed");
+fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
+    let data = block
+        .lines()
+        .filter_map(|line| line.trim().strip_prefix("data: "))
+        .collect::<Vec<_>>()
+        .join("\n");
 
-    // Small 1x1 red PNG, base64-encoded
-    let tiny_png_b64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI6QAAAABJRU5ErkJggg==";
+    if data.is_empty() {
+        return false;
+    }
+    if data == "[DONE]" {
+        return true;
+    }
 
-    let client = ResponsesClient::new(
-        manager.urls().expect("urls").first().expect("url"),
-        Some(&vision_model_id),
-    );
+    if let Ok(event) = serde_json::from_str::<Value>(&data) {
+        match event.get("type").and_then(Value::as_str) {
+            Some("response.output_text.delta") => summary.delta_count += 1,
+            Some("response.completed") => summary.completed = true,
+            _ => {}
+        }
+    }
 
-    let input = ResponseInput::Items(vec![ResponseItem::Message {
-        id: None,
-        role: "user".into(),
-        content: MessageContent::Parts(vec![
-            ContentPart::InputText {
-                text: "What colour is this image?".into(),
-            },
-            ContentPart::InputImage {
-                image_url: None,
-                image_data: Some(tiny_png_b64.into()),
-                media_type: Some("image/png".into()),
-                detail: Some("auto".into()),
-            },
-        ]),
-        status: None,
-    }]);
-
-    let resp = client
-        .create(input, None)
-        .await
-        .expect("vision create failed (FOUNDRY_VISION_MODEL_ID is set but request failed)");
-    println!("Vision response: {}", resp.output_text());
-    assert_eq!(resp.status, "completed");
+    false
 }
diff --git a/sdk/rust/tests/unit/main.rs b/sdk/rust/tests/unit/main.rs
deleted file mode 100644
index 25d6f8d23..000000000
--- a/sdk/rust/tests/unit/main.rs
+++ /dev/null
@@ -1,5 +0,0 @@
-//! Unit test binary for the Foundry Local Rust SDK — Responses API.
-//!
-//! These tests do not require a running server.
-
-mod responses_test;
diff --git a/sdk/rust/tests/unit/responses_test.rs b/sdk/rust/tests/unit/responses_test.rs
deleted file mode 100644
index 86fdf7d06..000000000
--- a/sdk/rust/tests/unit/responses_test.rs
+++ /dev/null
@@ -1,442 +0,0 @@
-//! Unit tests for the Responses API types and SSE parsing.
-//!
-//! All tests run without a server.
-
-use foundry_local_sdk::{
-    ListResponsesOptions, ListResponsesResult, MessageContent, ReasoningConfig, ResponseInput,
-    ResponseItem, ResponseObject, ResponsesClient, ResponsesClientSettings,
-    ResponsesContentPart as ContentPart, StreamingEvent, TextConfig, TextFormat,
-};
-use serde_json::json;
-use std::time::Duration;
-
-// ── Settings defaults ────────────────────────────────────────────────────────
-
-#[test]
-fn settings_defaults_omit_store() {
-    let settings = ResponsesClientSettings::new();
-    assert_eq!(
-        settings.store, None,
-        "store should be omitted unless callers explicitly opt in"
-    );
-}
-
-#[test]
-fn settings_default_trait_also_omits_store() {
-    let settings = ResponsesClientSettings::default();
-    assert_eq!(settings.store, None);
-}
-
-#[test]
-fn settings_default_timeout_is_sixty_seconds() {
-    let settings = ResponsesClientSettings::default();
-    assert_eq!(settings.timeout, Duration::from_secs(60));
-}
-
-#[test]
-fn settings_all_other_fields_default_to_none() {
-    let s = ResponsesClientSettings::new();
-    assert!(s.instructions.is_none());
-    assert!(s.temperature.is_none());
-    assert!(s.top_p.is_none());
-    assert!(s.max_output_tokens.is_none());
-    assert!(s.frequency_penalty.is_none());
-    assert!(s.presence_penalty.is_none());
-    assert!(s.tool_choice.is_none());
-    assert!(s.truncation.is_none());
-    assert!(s.parallel_tool_calls.is_none());
-    assert!(s.metadata.is_none());
-    assert!(s.reasoning.is_none());
-    assert!(s.text.is_none());
-    assert!(s.seed.is_none());
-}
-
-// ── output_text ──────────────────────────────────────────────────────────────
-
-fn make_response_with_text(role: &str, text: &str) -> ResponseObject {
-    serde_json::from_value(json!({
-        "id": "resp_test",
-        "object": "response",
-        "created_at": 0,
-        "status": "completed",
-        "model": "test-model",
-        "output": [
-            {
-                "type": "message",
-                "role": role,
-                "content": [{ "type": "output_text", "text": text }]
-            }
-        ]
-    }))
-    .expect("failed to deserialize test ResponseObject")
-}
-
-fn make_response_with_string_content(text: &str) -> ResponseObject {
-    serde_json::from_value(json!({
-        "id": "resp_test",
-        "object": "response",
-        "created_at": 0,
-        "status": "completed",
-        "model": "test-model",
-        "output": [
-            {
-                "type": "message",
-                "role": "assistant",
-                "content": text
-            }
-        ]
-    }))
-    .expect("failed to deserialize test ResponseObject")
-}
-
-#[test]
-fn output_text_extracts_assistant_message_parts() {
-    let resp = make_response_with_text("assistant", "Hello, world!");
-    assert_eq!(resp.output_text(), "Hello, world!");
-}
-
-#[test]
-fn output_text_extracts_assistant_string_content() {
-    let resp = make_response_with_string_content("Direct string content");
-    assert_eq!(resp.output_text(), "Direct string content");
-}
-
-#[test]
-fn output_text_skips_non_assistant_messages() {
-    let resp = make_response_with_text("user", "I am the user");
-    assert_eq!(
-        resp.output_text(),
-        "",
-        "user message should not be returned"
-    );
-}
-
-#[test]
-fn output_text_returns_empty_for_no_output() {
-    let resp: ResponseObject = serde_json::from_value(json!({
-        "id": "resp_test",
-        "object": "response",
-        "created_at": 0,
-        "status": "completed",
-        "model": "test-model",
-        "output": []
-    }))
-    .unwrap();
-    assert_eq!(resp.output_text(), "");
-}
-
-#[test]
-fn output_text_concatenates_multiple_parts() {
-    let resp: ResponseObject = serde_json::from_value(json!({
-        "id": "resp_test",
-        "object": "response",
-        "created_at": 0,
-        "status": "completed",
-        "model": "test-model",
-        "output": [{
-            "type": "message",
-            "role": "assistant",
-            "content": [
-                { "type": "output_text", "text": "Hello" },
-                { "type": "output_text", "text": ", world!" }
-            ]
-        }]
-    }))
-    .unwrap();
-    assert_eq!(resp.output_text(), "Hello, world!");
-}
-
-// ── Content part serialisation ───────────────────────────────────────────────
-
-#[test]
-fn content_part_input_text_serializes_correctly() {
-    let part = ContentPart::InputText {
-        text: "hello".into(),
-    };
-    let json = serde_json::to_value(&part).unwrap();
-    assert_eq!(json["type"], "input_text");
-    assert_eq!(json["text"], "hello");
-}
-
-#[test]
-fn content_part_output_text_serializes_correctly() {
-    let part = ContentPart::OutputText {
-        text: "hi".into(),
-        annotations: None,
-        logprobs: None,
-    };
-    let json = serde_json::to_value(&part).unwrap();
-    assert_eq!(json["type"], "output_text");
-    assert_eq!(json["text"], "hi");
-    // skip_serializing_if = None omits the field
-    assert!(json.get("annotations").is_none());
-}
-
-#[test]
-fn content_part_refusal_roundtrips() {
-    let part = ContentPart::Refusal {
-        refusal: "I can't do that".into(),
-    };
-    let json = serde_json::to_string(&part).unwrap();
-    let back: ContentPart = serde_json::from_str(&json).unwrap();
-    let ContentPart::Refusal { refusal } = back else {
-        panic!("Expected Refusal variant");
-    };
-    assert_eq!(refusal, "I can't do that");
-}
-
-#[test]
-fn input_image_content_serializes_with_base64() {
-    let part = ContentPart::InputImage {
-        image_url: None,
-        image_data: Some("base64data==".into()),
-        media_type: Some("image/png".into()),
-        detail: Some("auto".into()),
-    };
-    let json = serde_json::to_value(&part).unwrap();
-    assert_eq!(json["type"], "input_image");
-    assert_eq!(json["image_data"], "base64data==");
-    assert_eq!(json["media_type"], "image/png");
-    assert_eq!(json["detail"], "auto");
-    // image_url should be omitted (None)
-    assert!(json.get("image_url").is_none());
-}
-
-#[test]
-fn input_image_content_serializes_with_url() {
-    let part = ContentPart::InputImage {
-        image_url: Some("https://example.com/img.png".into()),
-        image_data: None,
-        media_type: None,
-        detail: None,
-    };
-    let json = serde_json::to_value(&part).unwrap();
-    assert_eq!(json["image_url"], "https://example.com/img.png");
-    assert!(json.get("image_data").is_none());
-    assert!(json.get("media_type").is_none());
-    assert!(json.get("detail").is_none());
-}
-
-#[tokio::test]
-async fn input_image_requires_exactly_one_source() {
-    let client = ResponsesClient::new("http://127.0.0.1:1", Some("test-model"));
-    let invalid_input = ResponseInput::Items(vec![ResponseItem::Message {
-        id: None,
-        role: "user".into(),
-        content: MessageContent::Parts(vec![ContentPart::InputImage {
-            image_url: Some("https://example.com/img.png".into()),
-            image_data: Some("base64data==".into()),
-            media_type: Some("image/png".into()),
-            detail: None,
-        }]),
-        status: None,
-    }]);
-
-    let err = client
-        .create(invalid_input, None)
-        .await
-        .expect_err("invalid input_image should fail before network request");
-    assert!(err
-        .to_string()
-        .contains("Provide exactly one of image_url or image_data"));
-}
-
-#[tokio::test]
-async fn timeout_must_be_positive() {
-    let mut client = ResponsesClient::new("http://127.0.0.1:1", Some("test-model"));
-    client.settings.timeout = Duration::ZERO;
-
-    let err = client
-        .list()
-        .await
-        .expect_err("zero timeout should fail before network request");
-    assert!(err
-        .to_string()
-        .contains("timeout must be greater than zero"));
-}
-
-#[test]
-fn list_response_result_deserializes_pagination_fields() {
-    let result: ListResponsesResult = serde_json::from_value(json!({
-        "object": "list",
-        "data": [],
-        "first_id": "resp_first",
-        "last_id": "resp_last",
-        "has_more": true
-    }))
-    .unwrap();
-
-    assert_eq!(result.first_id.as_deref(), Some("resp_first"));
-    assert_eq!(result.last_id.as_deref(), Some("resp_last"));
-    assert_eq!(result.has_more, Some(true));
-}
-
-#[test]
-fn list_options_serialize_query_fields() {
-    let options = ListResponsesOptions {
-        limit: Some(10),
-        order: Some("desc".into()),
-        after: Some("resp_123".into()),
-    };
-    let json = serde_json::to_value(options).unwrap();
-    assert_eq!(json["limit"], 10);
-    assert_eq!(json["order"], "desc");
-    assert_eq!(json["after"], "resp_123");
-}
-
-// ── ResponseItem serialisation ───────────────────────────────────────────────
-
-#[test]
-fn response_item_function_call_roundtrips() {
-    let item = ResponseItem::FunctionCall {
-        id: Some("fc_1".into()),
-        call_id: "call_abc".into(),
-        name: "get_weather".into(),
-        arguments: r#"{"city":"London"}"#.into(),
-        status: Some("completed".into()),
-    };
-    let json = serde_json::to_string(&item).unwrap();
-    let back: ResponseItem = serde_json::from_str(&json).unwrap();
-    let ResponseItem::FunctionCall { name, .. } = back else {
-        panic!("Expected FunctionCall variant");
-    };
-    assert_eq!(name, "get_weather");
-}
-
-#[test]
-fn response_item_message_with_string_content_roundtrips() {
-    let json = json!({
-        "type": "message",
-        "role": "user",
-        "content": "Hello"
-    });
-    let item: ResponseItem = serde_json::from_value(json).unwrap();
-    let ResponseItem::Message { content, .. } = &item else {
-        panic!("Expected Message variant");
-    };
-    assert!(matches!(content, MessageContent::Text(_)));
-}
-
-// ── Streaming event deserialisation ─────────────────────────────────────────
-
-#[test]
-fn streaming_event_output_text_delta_deserializes() {
-    let json = json!({
-        "type": "response.output_text.delta",
-        "item_id": "item_1",
-        "output_index": 0,
-        "content_index": 0,
-        "delta": "Hello",
-        "sequence_number": 5
-    });
-    let event: StreamingEvent = serde_json::from_value(json).unwrap();
-    let StreamingEvent::OutputTextDelta {
-        delta,
-        sequence_number,
-        ..
-    } = event
-    else {
-        panic!("Expected OutputTextDelta variant");
-    };
-    assert_eq!(delta, "Hello");
-    assert_eq!(sequence_number, 5);
-}
-
-#[test]
-fn streaming_event_response_completed_deserializes() {
-    let json = json!({
-        "type": "response.completed",
-        "sequence_number": 10,
-        "response": {
-            "id": "resp_1",
-            "object": "response",
-            "created_at": 1234567890_i64,
-            "status": "completed",
-            "model": "test",
-            "output": []
-        }
-    });
-    let event: StreamingEvent = serde_json::from_value(json).unwrap();
-    assert!(matches!(event, StreamingEvent::ResponseCompleted { .. }));
-}
-
-#[test]
-fn streaming_event_error_deserializes() {
-    let json = json!({
-        "type": "error",
-        "code": "model_error",
-        "message": "Something went wrong",
-        "sequence_number": 2
-    });
-    let event: StreamingEvent = serde_json::from_value(json).unwrap();
-    let StreamingEvent::Error { code, message, .. } = event else {
-        panic!("Expected Error variant");
-    };
-    assert_eq!(code.as_deref(), Some("model_error"));
-    assert_eq!(message.as_deref(), Some("Something went wrong"));
-}
-
-// ── SSE parser ───────────────────────────────────────────────────────────────
-//
-// The SSE parser itself (`parse_sse_stream`) lives in `responses_client.rs` and
-// is exercised by `#[cfg(test)] mod tests` in that file so the real
-// implementation is covered. The check below just verifies the externally
-// observable terminator string is what we expect.
-
-#[test]
-fn sse_done_signal_is_recognized() {
-    let block = "data: [DONE]";
-    assert!(block.trim() == "data: [DONE]");
-}
-
-// ── ResponseInput serde ──────────────────────────────────────────────────────
-
-#[test]
-fn response_input_text_serializes_as_string() {
-    let input = ResponseInput::Text("what is 2+2?".into());
-    let json = serde_json::to_value(&input).unwrap();
-    assert_eq!(json, json!("what is 2+2?"));
-}
-
-#[test]
-fn response_input_items_serializes_as_array() {
-    let input = ResponseInput::Items(vec![ResponseItem::Message {
-        id: None,
-        role: "user".into(),
-        content: MessageContent::Text("hello".into()),
-        status: None,
-    }]);
-    let json = serde_json::to_value(&input).unwrap();
-    assert!(json.is_array());
-}
-
-// ── TextConfig / ReasoningConfig ─────────────────────────────────────────────
-
-#[test]
-fn text_config_with_json_schema_serializes() {
-    let cfg = TextConfig {
-        format: Some(TextFormat {
-            format_type: "json_schema".into(),
-            name: Some("MySchema".into()),
-            schema: Some(json!({"type": "object"})),
-            description: None,
-            strict: Some(true),
-        }),
-        verbosity: None,
-    };
-    let json = serde_json::to_value(&cfg).unwrap();
-    assert_eq!(json["format"]["type"], "json_schema");
-    assert_eq!(json["format"]["name"], "MySchema");
-    assert_eq!(json["format"]["strict"], true);
-}
-
-#[test]
-fn reasoning_config_serializes_correctly() {
-    let cfg = ReasoningConfig {
-        effort: Some("high".into()),
-        summary: Some("concise".into()),
-    };
-    let json = serde_json::to_value(&cfg).unwrap();
-    assert_eq!(json["effort"], "high");
-    assert_eq!(json["summary"], "concise");
-}

From 9f89490f8bfccabd963b30731404b39199e76af3 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 18:16:22 -0400
Subject: [PATCH 4/5] rust: align Responses web-service sample

Mirror the JavaScript Responses web-service PR by adding a Rust samples/rust/web-server-responses package, registering it in the Rust samples workspace, and documenting it in the sample indexes.

Tighten the Rust Responses integration tests to use cached variants, skip CI/local-missing prerequisites, assert response object/status and streaming event types, and use the same simple get_weather tool flow as the reference.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/README.md                             |   2 +-
 samples/rust/Cargo.toml                       |   1 +
 samples/rust/README.md                        |   1 +
 samples/rust/web-server-responses/Cargo.toml  |  14 +
 samples/rust/web-server-responses/src/main.rs | 322 ++++++++++++++++++
 sdk/rust/examples/responses_web_service.rs    |  71 ++--
 sdk/rust/tests/integration/responses_test.rs  |  84 ++++-
 7 files changed, 458 insertions(+), 37 deletions(-)
 create mode 100644 samples/rust/web-server-responses/Cargo.toml
 create mode 100644 samples/rust/web-server-responses/src/main.rs

diff --git a/samples/README.md b/samples/README.md
index bcac6bf3a..5439d6533 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -11,4 +11,4 @@ Explore complete working examples that demonstrate how to use Foundry Local —
 | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. |
 | [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. |
 | [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. |
-| [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. |
+| [**Rust**](rust/) | 10 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, Responses API, and tutorials. |
diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml
index 7be551ea0..ea0b7a311 100644
--- a/samples/rust/Cargo.toml
+++ b/samples/rust/Cargo.toml
@@ -1,6 +1,7 @@
 [workspace]
 members = [
     "foundry-local-webserver",
+    "web-server-responses",
     "tool-calling-foundry-local",
     "native-chat-completions",
     "audio-transcription-example",
diff --git a/samples/rust/README.md b/samples/rust/README.md
index bc65306fa..5980dcbcc 100644
--- a/samples/rust/README.md
+++ b/samples/rust/README.md
@@ -14,6 +14,7 @@ These samples demonstrate how to use the Rust binding for Foundry Local.
 | [embeddings](embeddings/) | Generate single and batch text embeddings using the native embedding client. |
 | [audio-transcription-example](audio-transcription-example/) | Audio transcription (non-streaming and streaming) using the Whisper model. |
 | [foundry-local-webserver](foundry-local-webserver/) | Start a local OpenAI-compatible web server and call it with a standard HTTP client. |
+| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. |
 | [tool-calling-foundry-local](tool-calling-foundry-local/) | Tool calling with streaming responses, multi-turn conversation, and local tool execution. |
 | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
 | [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
diff --git a/samples/rust/web-server-responses/Cargo.toml b/samples/rust/web-server-responses/Cargo.toml
new file mode 100644
index 000000000..8395637c7
--- /dev/null
+++ b/samples/rust/web-server-responses/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "web-server-responses"
+version = "0.1.0"
+edition = "2021"
+description = "Responses API sample using the Foundry Local OpenAI-compatible web service"
+
+[dependencies]
+foundry-local-sdk = { path = "../../../sdk/rust" }
+tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
+serde_json = "1"
+reqwest = { version = "0.12", features = ["json", "stream"] }
+
+[target.'cfg(windows)'.dependencies]
+foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] }
diff --git a/samples/rust/web-server-responses/src/main.rs b/samples/rust/web-server-responses/src/main.rs
new file mode 100644
index 000000000..774ed199c
--- /dev/null
+++ b/samples/rust/web-server-responses/src/main.rs
@@ -0,0 +1,322 @@
+// <complete_code>
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+//! Responses API web-service sample.
+//!
+//! Demonstrates how to use the Rust SDK for Foundry Local setup, model
+//! lifecycle, and local web-service lifecycle, then call `/v1/responses` with a
+//! standard HTTP client.
+
+// <imports>
+use std::error::Error;
+use std::io::{self, Write};
+
+use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
+use serde_json::{json, Value};
+// </imports>
+
+type SampleResult<T> = Result<T, Box<dyn Error + Send + Sync>>;
+const MODEL_ALIAS: &str = "qwen2.5-0.5b";
+
+#[tokio::main]
+async fn main() -> SampleResult<()> {
+    println!("Responses Web Service");
+    println!("=====================\n");
+
+    // ── 1. Initialise the SDK ────────────────────────────────────────────
+    // <init>
+    println!("Initializing Foundry Local SDK...");
+    let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?;
+    println!("SDK initialized successfully");
+
+    manager
+        .download_and_register_eps_with_progress(None, {
+            let mut current_ep = String::new();
+            move |ep_name: &str, percent: f64| {
+                if ep_name != current_ep {
+                    if !current_ep.is_empty() {
+                        println!();
+                    }
+                    current_ep = ep_name.to_string();
+                }
+                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
+                io::stdout().flush().ok();
+            }
+        })
+        .await?;
+    println!();
+    // </init>
+
+    // ── 2. Download and load a model ─────────────────────────────────────
+    // <model_setup>
+    let model = manager.catalog().get_model(MODEL_ALIAS).await?;
+
+    if !model.is_cached().await? {
+        println!("Downloading model {MODEL_ALIAS}...");
+        model
+            .download(Some(|progress: f64| {
+                print!("\rDownloading model... {progress:.1}%");
+                io::stdout().flush().ok();
+            }))
+            .await?;
+        println!();
+    }
+
+    println!("Loading model {MODEL_ALIAS}...");
+    model.load().await?;
+    println!("Model loaded");
+    // </model_setup>
+
+    // ── 3. Start the OpenAI-compatible web service ───────────────────────
+    // <server_setup>
+    println!("Starting web service...");
+    manager.start_web_service().await?;
+    println!("Web service started");
+
+    let endpoint = manager
+        .urls()?
+        .first()
+        .expect("Web service did not return an endpoint")
+        .trim_end_matches('/')
+        .to_string();
+    let base_url = format!("{endpoint}/v1");
+    println!("Using base URL: {base_url}");
+    // </server_setup>
+
+    let result = run_responses_flow(&base_url, model.id()).await;
+
+    // ── 4. Clean up ──────────────────────────────────────────────────────
+    manager.stop_web_service().await.ok();
+    model.unload().await.ok();
+
+    result
+}
+
+async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()> {
+    let http = reqwest::Client::new();
+
+    println!("\nTesting a non-streaming Responses call...");
+    let response = post_response_json(
+        &http,
+        base_url,
+        json!({
+            "model": model_id,
+            "input": "Reply with one short sentence about local AI.",
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false
+        }),
+    )
+    .await?;
+    println!("[ASSISTANT]: {}", output_text(&response));
+
+    println!("\nTesting a streaming Responses call...");
+    print!("[ASSISTANT STREAM]: ");
+    io::stdout().flush().ok();
+    let streaming_response = http
+        .post(format!("{base_url}/responses"))
+        .header(reqwest::header::ACCEPT, "text/event-stream")
+        .json(&json!({
+            "model": model_id,
+            "input": "Count from one to three.",
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false,
+            "stream": true
+        }))
+        .send()
+        .await?;
+    let streamed = read_responses_sse(streaming_response).await?;
+    println!();
+    if !streamed.created || streamed.delta_count == 0 || !streamed.completed {
+        return Err(
+            "stream did not include response.created, text delta, and completion events".into(),
+        );
+    }
+
+    println!("\nTesting Responses tool calling...");
+    let tools = [get_weather_tool()];
+    let tool_response = post_response_json(
+        &http,
+        base_url,
+        json!({
+            "model": model_id,
+            "input": "Use the get_weather tool and then answer with the weather.",
+            "tools": tools,
+            "tool_choice": "required",
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": true
+        }),
+    )
+    .await?;
+
+    let (call_id, name) =
+        find_function_call(&tool_response).ok_or("expected a function_call item")?;
+    println!("[TOOL CALL]: {name} ({call_id})");
+
+    let final_response = post_response_json(
+        &http,
+        base_url,
+        json!({
+            "model": model_id,
+            "previous_response_id": tool_response["id"].clone(),
+            "input": [{
+                "type": "function_call_output",
+                "call_id": call_id,
+                "output": "{\"location\":\"Seattle\",\"weather\":\"72 degrees F and sunny\"}"
+            }],
+            "tools": [get_weather_tool()],
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false
+        }),
+    )
+    .await?;
+    println!("[ASSISTANT FINAL]: {}", output_text(&final_response));
+
+    Ok(())
+}
+
+async fn post_response_json(
+    http: &reqwest::Client,
+    base_url: &str,
+    body: Value,
+) -> SampleResult<Value> {
+    let response = http
+        .post(format!("{base_url}/responses"))
+        .json(&body)
+        .send()
+        .await?;
+    let status = response.status();
+    let text = response.text().await?;
+    if !status.is_success() {
+        return Err(format!("Responses API returned {status}: {text}").into());
+    }
+    Ok(serde_json::from_str(&text)?)
+}
+
+fn output_text(response: &Value) -> String {
+    if let Some(text) = response.get("output_text").and_then(Value::as_str) {
+        return text.to_string();
+    }
+
+    response
+        .get("output")
+        .and_then(Value::as_array)
+        .into_iter()
+        .flatten()
+        .find_map(|item| {
+            if item.get("type").and_then(Value::as_str) != Some("message") {
+                return None;
+            }
+            match item.get("content") {
+                Some(Value::String(text)) => Some(text.clone()),
+                Some(Value::Array(parts)) => Some(
+                    parts
+                        .iter()
+                        .filter_map(|part| {
+                            (part.get("type").and_then(Value::as_str) == Some("output_text"))
+                                .then(|| part.get("text").and_then(Value::as_str))
+                                .flatten()
+                        })
+                        .collect::<String>(),
+                ),
+                _ => None,
+            }
+        })
+        .unwrap_or_default()
+}
+
+fn find_function_call(response: &Value) -> Option<(String, String)> {
+    response.get("output")?.as_array()?.iter().find_map(|item| {
+        if item.get("type").and_then(Value::as_str) != Some("function_call") {
+            return None;
+        }
+        let call_id = item.get("call_id")?.as_str()?.to_string();
+        let name = item.get("name")?.as_str()?.to_string();
+        Some((call_id, name))
+    })
+}
+
+fn get_weather_tool() -> Value {
+    json!({
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get the current weather. This sample always returns Seattle weather.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "additionalProperties": false
+        }
+    })
+}
+
+#[derive(Default)]
+struct StreamSummary {
+    created: bool,
+    delta_count: usize,
+    completed: bool,
+}
+
+async fn read_responses_sse(mut response: reqwest::Response) -> SampleResult<StreamSummary> {
+    let status = response.status();
+    if !status.is_success() {
+        let text = response.text().await?;
+        return Err(format!("Responses API returned {status}: {text}").into());
+    }
+
+    let mut buffer = String::new();
+    let mut summary = StreamSummary::default();
+
+    while let Some(chunk) = response.chunk().await? {
+        buffer.push_str(&String::from_utf8_lossy(&chunk).replace("\r\n", "\n"));
+        while let Some(block_end) = buffer.find("\n\n") {
+            let block = buffer[..block_end].to_string();
+            buffer = buffer[block_end + 2..].to_string();
+            if handle_sse_block(&block, &mut summary) {
+                return Ok(summary);
+            }
+        }
+    }
+
+    if !buffer.trim().is_empty() {
+        handle_sse_block(&buffer, &mut summary);
+    }
+
+    Ok(summary)
+}
+
+fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
+    let data = block
+        .lines()
+        .filter_map(|line| line.trim().strip_prefix("data:").map(str::trim_start))
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    if data.is_empty() {
+        return false;
+    }
+    if data == "[DONE]" {
+        return true;
+    }
+
+    if let Ok(event) = serde_json::from_str::<Value>(&data) {
+        match event.get("type").and_then(Value::as_str) {
+            Some("response.created") => summary.created = true,
+            Some("response.output_text.delta") => {
+                summary.delta_count += 1;
+                if let Some(delta) = event.get("delta").and_then(Value::as_str) {
+                    print!("{delta}");
+                    io::stdout().flush().ok();
+                }
+            }
+            Some("response.completed") => summary.completed = true,
+            _ => {}
+        }
+    }
+
+    false
+}
+// </complete_code>
diff --git a/sdk/rust/examples/responses_web_service.rs b/sdk/rust/examples/responses_web_service.rs
index b8ef5d391..06948f4a5 100644
--- a/sdk/rust/examples/responses_web_service.rs
+++ b/sdk/rust/examples/responses_web_service.rs
@@ -12,21 +12,34 @@ use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager};
 use serde_json::{json, Value};
 
 type SampleResult<T> = Result<T, Box<dyn Error + Send + Sync>>;
+const MODEL_ALIAS: &str = "qwen2.5-0.5b";
 
 #[tokio::main]
 async fn main() -> SampleResult<()> {
+    println!("Responses Web Service");
+    println!("=====================\n");
+
     let config = FoundryLocalConfig::new("foundry_local_responses_web_service_sample");
     let manager = FoundryLocalManager::create(config)?;
 
-    let models = manager.catalog().get_models().await?;
-    let model_alias = ["qwen2.5-0.5b", "phi-4-mini", "phi-3.5-mini"]
-        .iter()
-        .find(|alias| models.iter().any(|m| m.alias() == **alias))
-        .map(|s| s.to_string())
-        .or_else(|| models.first().map(|m| m.alias().to_string()))
-        .expect("No models available in the catalog");
+    manager
+        .download_and_register_eps_with_progress(None, {
+            let mut current_ep = String::new();
+            move |ep_name: &str, percent: f64| {
+                if ep_name != current_ep {
+                    if !current_ep.is_empty() {
+                        println!();
+                    }
+                    current_ep = ep_name.to_string();
+                }
+                print!("\r  {:<30}  {:5.1}%", ep_name, percent);
+                io::stdout().flush().ok();
+            }
+        })
+        .await?;
+    println!();
 
-    let model = manager.catalog().get_model(&model_alias).await?;
+    let model = manager.catalog().get_model(MODEL_ALIAS).await?;
     if !model.is_cached().await? {
         println!("Downloading model '{}'...", model.alias());
         model
@@ -67,7 +80,9 @@ async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()>
         json!({
             "model": model_id,
             "input": "What is 2 + 2? Respond with just the answer.",
-            "temperature": 0.0
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false
         }),
     )
     .await?;
@@ -82,14 +97,19 @@ async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()>
             "model": model_id,
             "input": "Count from 1 to 3.",
             "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false,
             "stream": true
         }))
+        .header(reqwest::header::ACCEPT, "text/event-stream")
         .send()
         .await?;
     let streamed = read_responses_sse(streaming_response).await?;
     println!("\nSaw {} text delta event(s).", streamed.delta_count);
-    if streamed.delta_count == 0 || !streamed.completed {
-        return Err("stream did not include both text delta and completion events".into());
+    if !streamed.created || streamed.delta_count == 0 || !streamed.completed {
+        return Err(
+            "stream did not include response.created, text delta, and completion events".into(),
+        );
     }
 
     println!("\n--- Function calling response ---");
@@ -99,10 +119,11 @@ async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()>
         base_url,
         json!({
             "model": model_id,
-            "input": "Use the get_weather tool for Seattle, then answer.",
+            "input": "Use the get_weather tool and then answer with the weather.",
             "tools": [weather_tool.clone()],
             "tool_choice": "required",
             "temperature": 0.0,
+            "max_output_tokens": 64,
             "store": true
         }),
     )
@@ -120,10 +141,12 @@ async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()>
             "input": [{
                 "type": "function_call_output",
                 "call_id": call_id,
-                "output": "Seattle weather is 72F and sunny."
+                "output": "{\"location\":\"Seattle\",\"weather\":\"72 degrees F and sunny\"}"
             }],
             "tools": [weather_tool],
-            "temperature": 0.0
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false
         }),
     )
     .await?;
@@ -151,6 +174,10 @@ async fn post_response_json(
 }
 
 fn output_text(response: &Value) -> String {
+    if let Some(text) = response.get("output_text").and_then(Value::as_str) {
+        return text.to_string();
+    }
+
     response
         .get("output")
         .and_then(Value::as_array)
@@ -193,19 +220,18 @@ fn get_weather_tool() -> Value {
     json!({
         "type": "function",
         "name": "get_weather",
-        "description": "Get the current weather for a city.",
+        "description": "Get the current weather. This sample always returns Seattle weather.",
         "parameters": {
             "type": "object",
-            "properties": {
-                "city": { "type": "string", "description": "City name" }
-            },
-            "required": ["city"]
+            "properties": {},
+            "additionalProperties": false
         }
     })
 }
 
 #[derive(Default)]
 struct StreamSummary {
+    created: bool,
     delta_count: usize,
     completed: bool,
 }
@@ -231,13 +257,17 @@ async fn read_responses_sse(mut response: reqwest::Response) -> SampleResult<Str
         }
     }
 
+    if !buffer.trim().is_empty() {
+        handle_sse_block(&buffer, &mut summary);
+    }
+
     Ok(summary)
 }
 
 fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
     let data = block
         .lines()
-        .filter_map(|line| line.trim().strip_prefix("data: "))
+        .filter_map(|line| line.trim().strip_prefix("data:").map(str::trim_start))
         .collect::<Vec<_>>()
         .join("\n");
 
@@ -250,6 +280,7 @@ fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
 
     if let Ok(event) = serde_json::from_str::<Value>(&data) {
         match event.get("type").and_then(Value::as_str) {
+            Some("response.created") => summary.created = true,
             Some("response.output_text.delta") => {
                 summary.delta_count += 1;
                 if let Some(delta) = event.get("delta").and_then(Value::as_str) {
diff --git a/sdk/rust/tests/integration/responses_test.rs b/sdk/rust/tests/integration/responses_test.rs
index 91e0356c2..cc5fae9c9 100644
--- a/sdk/rust/tests/integration/responses_test.rs
+++ b/sdk/rust/tests/integration/responses_test.rs
@@ -15,26 +15,46 @@ struct ResponsesServiceContext {
 
 impl ResponsesServiceContext {
     async fn start() -> Option<Self> {
+        if common::is_running_in_ci() {
+            eprintln!("Skipping Responses web-service test in CI");
+            return None;
+        }
+
         let manager = common::get_test_manager();
         let catalog = manager.catalog();
-        let model = match catalog.get_model(common::TEST_MODEL_ALIAS).await {
-            Ok(model) => model,
+
+        let cached_models = match catalog.get_cached_models().await {
+            Ok(models) => models,
             Err(e) => {
-                eprintln!(
-                    "Skipping Responses web-service test: model '{}' unavailable: {e}",
-                    common::TEST_MODEL_ALIAS
-                );
+                eprintln!("Skipping Responses web-service test: cached model lookup failed: {e}");
                 return None;
             }
         };
 
-        if !model.is_cached().await.unwrap_or(false) {
+        let Some(cached_variant) = cached_models
+            .into_iter()
+            .find(|model| model.alias() == common::TEST_MODEL_ALIAS)
+        else {
             eprintln!(
                 "Skipping Responses web-service test: model '{}' is not cached",
                 common::TEST_MODEL_ALIAS
             );
             return None;
-        }
+        };
+
+        let model = match catalog.get_model(common::TEST_MODEL_ALIAS).await {
+            Ok(model) => model,
+            Err(e) => {
+                eprintln!(
+                    "Skipping Responses web-service test: model '{}' unavailable: {e}",
+                    common::TEST_MODEL_ALIAS
+                );
+                return None;
+            }
+        };
+        model
+            .select_variant(cached_variant.as_ref())
+            .expect("select cached model variant failed");
 
         model.load().await.expect("model.load() failed");
         manager
@@ -81,7 +101,9 @@ async fn should_create_non_streaming_response_via_rest_api() {
         json!({
             "model": ctx.model.id(),
             "input": "What is 2 + 2? Respond with just the answer.",
-            "temperature": 0.0
+            "temperature": 0.0,
+            "max_output_tokens": 64,
+            "store": false
         }),
     )
     .await;
@@ -89,6 +111,11 @@ async fn should_create_non_streaming_response_via_rest_api() {
     ctx.cleanup().await;
 
     let body = result.expect("Responses non-streaming request failed");
+    assert_eq!(body.get("object").and_then(Value::as_str), Some("response"));
+    assert_eq!(
+        body.get("status").and_then(Value::as_str),
+        Some("completed")
+    );
     let text = output_text(&body);
     println!("Responses non-streaming text: {text}");
     assert!(!text.trim().is_empty(), "response text should not be empty");
@@ -109,8 +136,11 @@ async fn should_stream_response_via_rest_api() {
                 "model": ctx.model.id(),
                 "input": "Count from 1 to 3.",
                 "temperature": 0.0,
+                "max_output_tokens": 64,
+                "store": false,
                 "stream": true
             }))
+            .header(reqwest::header::ACCEPT, "text/event-stream")
             .send()
             .await?;
 
@@ -121,6 +151,10 @@ async fn should_stream_response_via_rest_api() {
     ctx.cleanup().await;
 
     let summary = result.expect("Responses streaming request failed");
+    assert!(
+        summary.created,
+        "expected a response.created event in the stream"
+    );
     assert!(
         summary.delta_count > 0,
         "expected at least one response.output_text.delta event"
@@ -148,6 +182,7 @@ async fn should_complete_tool_calling_response_via_rest_api() {
                 "tools": [weather_tool.clone()],
                 "tool_choice": "required",
                 "temperature": 0.0,
+                "max_output_tokens": 64,
                 "store": true
             }),
         )
@@ -155,6 +190,9 @@ async fn should_complete_tool_calling_response_via_rest_api() {
 
         let (call_id, name) = find_function_call(&tool_response)
             .ok_or("expected a function_call item in the tool response")?;
+        if call_id.is_empty() {
+            return Err("expected non-empty function call ID".into());
+        }
         if name != "get_weather" {
             return Err(format!("expected get_weather function call, got {name}").into());
         }
@@ -170,11 +208,17 @@ async fn should_complete_tool_calling_response_via_rest_api() {
                     "output": "Seattle weather is 72F and sunny."
                 }],
                 "tools": [weather_tool],
-                "temperature": 0.0
+                "temperature": 0.0,
+                "max_output_tokens": 64,
+                "store": false
             }),
         )
         .await?;
 
+        if final_response.get("status").and_then(Value::as_str) != Some("completed") {
+            return Err(format!("expected completed final response, got {final_response}").into());
+        }
+
         Ok::<String, Box<dyn std::error::Error + Send + Sync>>(output_text(&final_response))
     }
     .await;
@@ -205,6 +249,10 @@ async fn post_response_json(ctx: &ResponsesServiceContext, body: Value) -> TestR
 }
 
 fn output_text(response: &Value) -> String {
+    if let Some(text) = response.get("output_text").and_then(Value::as_str) {
+        return text.to_string();
+    }
+
     response
         .get("output")
         .and_then(Value::as_array)
@@ -247,19 +295,18 @@ fn get_weather_tool() -> Value {
     json!({
         "type": "function",
         "name": "get_weather",
-        "description": "Get the current weather for a city.",
+        "description": "Get the current weather. This test always returns Seattle weather.",
         "parameters": {
             "type": "object",
-            "properties": {
-                "city": { "type": "string", "description": "City name" }
-            },
-            "required": ["city"]
+            "properties": {},
+            "additionalProperties": false
         }
     })
 }
 
 #[derive(Default)]
 struct StreamSummary {
+    created: bool,
     delta_count: usize,
     completed: bool,
 }
@@ -285,13 +332,17 @@ async fn read_responses_sse(mut response: reqwest::Response) -> TestResult<Strea
         }
     }
 
+    if !buffer.trim().is_empty() {
+        handle_sse_block(&buffer, &mut summary);
+    }
+
     Ok(summary)
 }
 
 fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
     let data = block
         .lines()
-        .filter_map(|line| line.trim().strip_prefix("data: "))
+        .filter_map(|line| line.trim().strip_prefix("data:").map(str::trim_start))
         .collect::<Vec<_>>()
         .join("\n");
 
@@ -304,6 +355,7 @@ fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool {
 
     if let Ok(event) = serde_json::from_str::<Value>(&data) {
         match event.get("type").and_then(Value::as_str) {
+            Some("response.created") => summary.created = true,
             Some("response.output_text.delta") => summary.delta_count += 1,
             Some("response.completed") => summary.completed = true,
             _ => {}

From 45ee9dc0f65dca4e7b98226889d86805d755ef32 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 18:27:18 -0400
Subject: [PATCH 5/5] docs: add Rust Responses sample README

Document the Rust web-server Responses sample with prerequisites, restored Cargo dependencies, runtime downloads, and commands for running from either the samples workspace or sample directory.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/rust/web-server-responses/README.md | 67 +++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 samples/rust/web-server-responses/README.md

diff --git a/samples/rust/web-server-responses/README.md b/samples/rust/web-server-responses/README.md
new file mode 100644
index 000000000..49d94dcd6
--- /dev/null
+++ b/samples/rust/web-server-responses/README.md
@@ -0,0 +1,67 @@
+# Responses API web-service sample
+
+This sample starts the Foundry Local OpenAI-compatible web service with the Rust SDK, then calls the Responses API through raw HTTP requests to `/v1/responses`.
+
+It demonstrates:
+
+- Non-streaming Responses API calls
+- Streaming Server-Sent Events (SSE) responses
+- Function/tool calling with `previous_response_id`
+- Local model load/unload and web-service cleanup
+
+## Prerequisites
+
+- Rust 1.70 or later
+- Foundry Local runtime prerequisites for your platform
+- Internet access the first time dependencies, execution providers, or the sample model need to be downloaded
+
+No OpenAI API key is required. The sample talks to the local Foundry Local web service.
+
+## What gets installed
+
+Cargo restores the Rust crates declared in `Cargo.toml`:
+
+| Dependency | Purpose |
+|------------|---------|
+| `foundry-local-sdk` | Initializes Foundry Local, downloads/registers execution providers, manages the model, and starts/stops the local web service. |
+| `tokio` | Runs the async sample. |
+| `reqwest` | Sends JSON requests and reads streaming SSE chunks from `/v1/responses`. |
+| `serde_json` | Builds request payloads and reads response JSON. |
+
+On Windows, the sample enables the SDK `winml` feature through the target-specific dependency in `Cargo.toml`.
+
+At runtime, the sample also:
+
+- Downloads and registers Foundry Local execution providers if needed.
+- Downloads `qwen2.5-0.5b` if it is not already cached.
+- Starts the local OpenAI-compatible web service and uses the dynamic URL returned by the SDK.
+
+Downloaded models, native runtime files, and Cargo build outputs are local machine artifacts and should not be committed.
+
+## Run the sample
+
+From the Rust samples workspace:
+
+```powershell
+cd samples\rust
+cargo run -p web-server-responses
+```
+
+Or from this sample directory:
+
+```powershell
+cd samples\rust\web-server-responses
+cargo run
+```
+
+The sample prints progress for execution-provider/model setup, then runs:
+
+1. A non-streaming Responses request.
+2. A streaming Responses request that consumes `response.output_text.delta` events.
+3. A function-calling request that asks the model to call `get_weather`, submits a `function_call_output`, and prints the final assistant response.
+
+## Troubleshooting
+
+If setup fails while resolving native Foundry Local symbols, verify that your locally installed Foundry Local runtime packages are compatible with the SDK version in this repository.
+
+If model download is unavailable, pre-cache `qwen2.5-0.5b` with your normal Foundry Local workflow, then run the sample again.