From 12e30b943660a43ba8ab0e137d0fd188307edf9d Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 23 Apr 2026 12:45:26 -0400
Subject: [PATCH 01/12] feat(sdk/python): add Responses API client

Implements the OpenAI Responses API client for the Foundry Local Python SDK.

New files:
- src/openai/responses_types.py: full type system (content parts, items, tools,
  config, ResponseObject with output_text property), all streaming event
  dataclasses, parse_streaming_event factory, and _to_dict serializer
- src/openai/responses_client.py: HTTP-only sync client (ResponsesClient,
  ResponsesClientSettings, ResponsesAPIError) with create, create_streaming
  (SSE generator), get, delete, cancel, get_input_items, list
- examples/responses.py: 5 end-to-end scenarios (basic, streaming, multi-turn,
  tool calling, vision)
- test/openai/test_responses_client.py: 56 unit tests (mocked HTTP)
- test/openai/test_responses_integration.py: 14 integration tests gated on
  FOUNDRY_INTEGRATION_TESTS=1

Modified files:
- src/foundry_local_manager.py: create_responses_client factory method
- src/imodel.py: abstract create_responses_client
- src/detail/model.py: delegating create_responses_client
- src/detail/model_variant.py: concrete create_responses_client
- src/openai/__init__.py: export ResponsesClient, ResponsesClientSettings, ResponsesAPIError
- src/__init__.py: export all public Responses API types

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/python/examples/responses.py              | 154 +++
 sdk/python/requirements.txt                   |  11 +-
 sdk/python/src/__init__.py                    |  62 +-
 sdk/python/src/detail/model.py                |   5 +
 sdk/python/src/detail/model_variant.py        |   9 +
 sdk/python/src/foundry_local_manager.py       |  23 +
 sdk/python/src/imodel.py                      |  16 +
 sdk/python/src/openai/__init__.py             |  13 +-
 sdk/python/src/openai/responses_client.py     | 413 ++++++++
 sdk/python/src/openai/responses_types.py      | 885 ++++++++++++++++++
 .../test/openai/test_responses_client.py      | 603 ++++++++++++
 .../test/openai/test_responses_integration.py | 288 ++++++
 12 files changed, 2473 insertions(+), 9 deletions(-)
 create mode 100644 sdk/python/examples/responses.py
 create mode 100644 sdk/python/src/openai/responses_client.py
 create mode 100644 sdk/python/src/openai/responses_types.py
 create mode 100644 sdk/python/test/openai/test_responses_client.py
 create mode 100644 sdk/python/test/openai/test_responses_integration.py

diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py
new file mode 100644
index 000000000..ce810e814
--- /dev/null
+++ b/sdk/python/examples/responses.py
@@ -0,0 +1,154 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""End-to-end example for the OpenAI Responses API client.
+
+Run with::
+
+    python examples/responses.py
+
+Requires a loaded model and a started web service.
+"""
+
+from __future__ import annotations
+
+import json
+
+from foundry_local_sdk import (
+    Configuration,
+    FoundryLocalManager,
+    FunctionToolDefinition,
+    InputImageContent,
+    InputTextContent,
+    MessageItem,
+)
+
+MODEL_ALIAS = "phi-4-mini"
+
+
+def setup():
+    config = Configuration(app_name="ResponsesExample")
+    FoundryLocalManager.initialize(config)
+    mgr = FoundryLocalManager.instance
+
+    mgr.download_and_register_eps()
+
+    model = mgr.catalog.get_model(MODEL_ALIAS)
+    if model is None:
+        raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
+    if not model.is_cached:
+        print(f"Downloading {MODEL_ALIAS}...")
+        model.download(progress_callback=lambda p: print(f"  {p:.1f}%", end="\r"))
+        print()
+    print(f"Loading {model.alias}...", end="")
+    model.load()
+    print("loaded!")
+    mgr.start_web_service()
+
+    client = mgr.create_responses_client(model.id)
+    return mgr, model, client
+
+
+def basic_create(client):
+    print("\n=== 1. Basic create ===")
+    resp = client.create("What is 2 + 2? Answer in one word.")
+    print(f"status={resp.status}  text={resp.output_text!r}")
+
+
+def streaming(client):
+    print("\n=== 2. Streaming ===")
+    print("assistant: ", end="", flush=True)
+    for event in client.create_streaming("Count from 1 to 5, separated by spaces."):
+        if event.type == "response.output_text.delta":
+            print(event.delta, end="", flush=True)
+        elif event.type == "response.completed":
+            print(f"\n(completed, {event.response.usage.total_tokens} tokens)")
+
+
+def multi_turn(client):
+    print("\n=== 3. Multi-turn ===")
+    first = client.create("My favorite color is green. Remember that.", store=True)
+    print(f"first id={first.id!r}")
+    second = client.create(
+        "What is my favorite color?",
+        previous_response_id=first.id,
+    )
+    print(f"second: {second.output_text!r}")
+
+
+def tool_calling(client):
+    print("\n=== 4. Tool calling ===")
+    tools = [
+        FunctionToolDefinition(
+            name="multiply_numbers",
+            description="Multiply two integers together.",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "a": {"type": "integer"},
+                    "b": {"type": "integer"},
+                },
+                "required": ["a", "b"],
+            },
+        )
+    ]
+    resp = client.create("What is 7 times 6?", tools=tools)
+
+    # Find a function_call item in the output (if the model produced one).
+    for item in resp.output:
+        if getattr(item, "type", None) == "function_call":
+            print(f"call {item.name}({item.arguments})")
+            args = json.loads(item.arguments)
+            answer = args["a"] * args["b"]
+            follow = client.create(
+                [
+                    MessageItem(role="user", content="What is 7 times 6?"),
+                    item,
+                    # The function_call_output is sent back keyed by call_id
+                    {"type": "function_call_output", "call_id": item.call_id, "output": str(answer)},
+                ],
+                tools=tools,
+            )
+            print(f"final: {follow.output_text!r}")
+            return
+    print(f"no tool call — got text: {resp.output_text!r}")
+
+
+def vision(client):
+    print("\n=== 5. Vision ===")
+    # Requires a vision-capable model. Replace with a real PNG to see real output.
+    tiny_png = bytes.fromhex(
+        "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+        "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+        "ae426082"
+    )
+    msg = MessageItem(
+        role="user",
+        content=[
+            InputTextContent(text="Describe this image in one sentence."),
+            InputImageContent.from_bytes(tiny_png, "image/png"),
+        ],
+    )
+    try:
+        resp = client.create([msg])
+        print(f"vision response: {resp.output_text!r}")
+    except Exception as e:
+        print(f"(skipped — model may not support vision: {e})")
+
+
+def main():
+    mgr, model, client = setup()
+    try:
+        basic_create(client)
+        streaming(client)
+        multi_turn(client)
+        tool_calling(client)
+        vision(client)
+    finally:
+        mgr.stop_web_service()
+        model.unload()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
index 666a37211..25d05c298 100644
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -1,9 +1,8 @@
 pydantic>=2.0.0
 requests>=2.32.4
 openai>=2.24.0
-# Standard native binary packages from the ORT-Nightly PyPI feed.
-foundry-local-core==1.0.0rc1
-onnxruntime-core==1.24.4; sys_platform != "linux"
-onnxruntime-gpu==1.24.4; sys_platform == "linux"
-onnxruntime-genai-core==0.13.1; sys_platform != "linux"
-onnxruntime-genai-cuda==0.13.1; sys_platform == "linux"
+foundry-local-core==1.0.0
+onnxruntime-gpu==1.24.4; platform_system == "Linux"
+onnxruntime-core==1.24.4; platform_system != "Linux"
+onnxruntime-genai-cuda==0.13.1; platform_system == "Linux"
+onnxruntime-genai-core==0.13.1; platform_system != "Linux"
diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py
index 14534d196..273e3f9c4 100644
--- a/sdk/python/src/__init__.py
+++ b/sdk/python/src/__init__.py
@@ -7,6 +7,34 @@
 
 from .configuration import Configuration
 from .foundry_local_manager import FoundryLocalManager
+from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings
+from .openai.responses_types import (
+    ContentPart,
+    DeleteResponseResult,
+    FunctionCallItem,
+    FunctionCallOutputItem,
+    FunctionToolDefinition,
+    InputFileContent,
+    InputImageContent,
+    InputItemsListResponse,
+    InputTextContent,
+    ItemReference,
+    ListResponsesResult,
+    MessageItem,
+    OutputTextContent,
+    ReasoningConfig,
+    ReasoningItem,
+    RefusalContent,
+    ResponseError,
+    ResponseInputItem,
+    ResponseObject,
+    ResponseOutputItem,
+    ResponseUsage,
+    StreamingEvent,
+    TextConfig,
+    TextFormat,
+    parse_streaming_event,
+)
 from .version import __version__
 
 _logger = logging.getLogger(__name__)
@@ -20,4 +48,36 @@
 _logger.addHandler(_sc)
 _logger.propagate = False
 
-__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
+__all__ = [
+    "Configuration",
+    "ContentPart",
+    "DeleteResponseResult",
+    "FoundryLocalManager",
+    "FunctionCallItem",
+    "FunctionCallOutputItem",
+    "FunctionToolDefinition",
+    "InputFileContent",
+    "InputImageContent",
+    "InputItemsListResponse",
+    "InputTextContent",
+    "ItemReference",
+    "ListResponsesResult",
+    "MessageItem",
+    "OutputTextContent",
+    "ReasoningConfig",
+    "ReasoningItem",
+    "RefusalContent",
+    "ResponseError",
+    "ResponseInputItem",
+    "ResponseObject",
+    "ResponseOutputItem",
+    "ResponseUsage",
+    "ResponsesAPIError",
+    "ResponsesClient",
+    "ResponsesClientSettings",
+    "StreamingEvent",
+    "TextConfig",
+    "TextFormat",
+    "__version__",
+    "parse_streaming_event",
+]
diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py
index 6d60b7a2f..01dcfb471 100644
--- a/sdk/python/src/detail/model.py
+++ b/sdk/python/src/detail/model.py
@@ -11,6 +11,7 @@
 from ..openai.chat_client import ChatClient
 from ..openai.audio_client import AudioClient
 from ..openai.embedding_client import EmbeddingClient
+from ..openai.responses_client import ResponsesClient
 from .model_variant import ModelVariant
 from ..exception import FoundryLocalException
 from .core_interop import CoreInterop
@@ -146,3 +147,7 @@ def get_audio_client(self) -> AudioClient:
     def get_embedding_client(self) -> EmbeddingClient:
         """Get an embedding client for the currently selected variant."""
         return self._selected_variant.get_embedding_client()
+
+    def create_responses_client(self, base_url: str) -> "ResponsesClient":
+        """Create a Responses API client for the currently selected variant."""
+        return self._selected_variant.create_responses_client(base_url)
diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py
index 76efb05cd..2e19662d5 100644
--- a/sdk/python/src/detail/model_variant.py
+++ b/sdk/python/src/detail/model_variant.py
@@ -17,6 +17,7 @@
 from ..openai.audio_client import AudioClient
 from ..openai.chat_client import ChatClient
 from ..openai.embedding_client import EmbeddingClient
+from ..openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -175,3 +176,11 @@ def get_audio_client(self) -> AudioClient:
     def get_embedding_client(self) -> EmbeddingClient:
         """Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
         return EmbeddingClient(self.id, self._core_interop)
+
+    def create_responses_client(self, base_url: str) -> ResponsesClient:
+        """Create a Responses API client for this variant.
+
+        :param base_url: Base URL of the running Foundry Local web service
+            (e.g. ``manager.urls[0]``).
+        """
+        return ResponsesClient(base_url, self.id)
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
index a649f8e56..b891d1b17 100644
--- a/sdk/python/src/foundry_local_manager.py
+++ b/sdk/python/src/foundry_local_manager.py
@@ -20,6 +20,7 @@
 from .detail.core_interop import CoreInterop, InteropRequest
 from .detail.model_load_manager import ModelLoadManager
 from .exception import FoundryLocalException
+from .openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -194,3 +195,25 @@ def stop_web_service(self):
                 raise FoundryLocalException(f"Error stopping web service: {response.error}")
 
             self.urls = None
+
+    def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
+        """Create a :class:`ResponsesClient` bound to the running web service.
+
+        The Responses API is HTTP-only, so the web service must be started
+        before calling this. Use :meth:`start_web_service` first.
+
+        Args:
+            model_id: Optional default model ID baked into the client. May also
+                be supplied per-call via ``options['model']``.
+
+        Returns:
+            A new :class:`ResponsesClient`.
+
+        Raises:
+            FoundryLocalException: If the web service has not been started.
+        """
+        if not self.urls:
+            raise FoundryLocalException(
+                "Web service is not running. Call start_web_service() first."
+            )
+        return ResponsesClient(self.urls[0], model_id)
diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py
index f723e514a..6bc0d3638 100644
--- a/sdk/python/src/imodel.py
+++ b/sdk/python/src/imodel.py
@@ -10,6 +10,7 @@
 from .openai.chat_client import ChatClient
 from .openai.audio_client import AudioClient
 from .openai.embedding_client import EmbeddingClient
+from .openai.responses_client import ResponsesClient
 from .detail.model_data_types import ModelInfo
 
 class IModel(ABC):
@@ -136,6 +137,21 @@ def get_embedding_client(self) -> 'EmbeddingClient':
         """
         pass
 
+    @abstractmethod
+    def create_responses_client(self, base_url: str) -> 'ResponsesClient':
+        """
+        Create an OpenAI Responses API client bound to the running web service.
+
+        Unlike the other clients, the Responses API is HTTP-only and requires
+        the Foundry Local web service to be started. Pass the base URL
+        returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``),
+        or use :meth:`FoundryLocalManager.create_responses_client` directly.
+
+        :param base_url: Base URL of the running Foundry Local web service.
+        :return: ResponsesClient instance for this variant's model id.
+        """
+        pass
+
     @property
     @abstractmethod
     def variants(self) -> List['IModel']:
diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py
index bec5d68bd..f8c8cefef 100644
--- a/sdk/python/src/openai/__init__.py
+++ b/sdk/python/src/openai/__init__.py
@@ -2,10 +2,19 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""OpenAI-compatible clients for chat completions and audio transcription."""
+"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API."""
 
 from .chat_client import ChatClient, ChatClientSettings
 from .audio_client import AudioClient
 from .embedding_client import EmbeddingClient
+from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError
 
-__all__ = ["AudioClient", "ChatClient", "ChatClientSettings", "EmbeddingClient"]
+__all__ = [
+    "AudioClient",
+    "ChatClient",
+    "ChatClientSettings",
+    "EmbeddingClient",
+    "ResponsesAPIError",
+    "ResponsesClient",
+    "ResponsesClientSettings",
+]
diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
new file mode 100644
index 000000000..a0d9a7777
--- /dev/null
+++ b/sdk/python/src/openai/responses_client.py
@@ -0,0 +1,413 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""OpenAI Responses API client — HTTP-only against the Foundry Local web service.
+
+Unlike ``ChatClient`` / ``AudioClient`` which go through the native Core via FFI,
+the Responses API is served exclusively by the embedded web service. The client
+therefore uses ``requests`` for non-streaming calls and parses Server-Sent Events
+inline for streaming.
+
+Usage
+-----
+::
+
+    manager.start_web_service()
+    client = manager.create_responses_client("phi-4-mini")
+
+    # Non-streaming
+    resp = client.create("What is 2+2?")
+    print(resp.output_text)
+
+    # Streaming
+    for event in client.create_streaming("Tell me a story"):
+        if event.type == "response.output_text.delta":
+            print(event.delta, end="", flush=True)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import is_dataclass
+from typing import Any, Dict, Generator, List, Optional, Union
+from urllib.parse import quote
+
+import requests
+
+from .responses_types import (
+    DeleteResponseResult,
+    InputItemsListResponse,
+    ListResponsesResult,
+    ReasoningConfig,
+    ResponseObject,
+    StreamingEvent,
+    TextConfig,
+    _parse_delete_result,
+    _parse_input_items_list,
+    _parse_list_responses,
+    _parse_response_object,
+    _to_dict,
+    parse_streaming_event,
+)
+
+logger = logging.getLogger(__name__)
+
+_MAX_ID_LEN = 1024
+
+
+class ResponsesClientSettings:
+    """Tunable settings applied to every Responses API request.
+
+    Field names follow the OpenAI snake_case convention; serialization omits
+    any ``None`` values so the server applies its own defaults.
+    """
+
+    def __init__(self) -> None:
+        self.instructions: Optional[str] = None
+        self.temperature: Optional[float] = None
+        self.top_p: Optional[float] = None
+        self.max_output_tokens: Optional[int] = None
+        self.frequency_penalty: Optional[float] = None
+        self.presence_penalty: Optional[float] = None
+        self.tool_choice: Optional[Any] = None
+        self.truncation: Optional[str] = None
+        self.parallel_tool_calls: Optional[bool] = None
+        self.store: Optional[bool] = True  # SDK default — matches OpenAI convention.
+        self.metadata: Optional[Dict[str, str]] = None
+        self.reasoning: Optional[ReasoningConfig] = None
+        self.text: Optional[TextConfig] = None
+        self.seed: Optional[int] = None
+
+    def _serialize(self) -> Dict[str, Any]:
+        raw: Dict[str, Any] = {
+            "instructions": self.instructions,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "max_output_tokens": self.max_output_tokens,
+            "frequency_penalty": self.frequency_penalty,
+            "presence_penalty": self.presence_penalty,
+            "tool_choice": _to_dict(self.tool_choice) if is_dataclass(self.tool_choice) else self.tool_choice,
+            "truncation": self.truncation,
+            "parallel_tool_calls": self.parallel_tool_calls,
+            "store": self.store,
+            "metadata": self.metadata,
+            "reasoning": _to_dict(self.reasoning) if self.reasoning is not None else None,
+            "text": _to_dict(self.text) if self.text is not None else None,
+            "seed": self.seed,
+        }
+        return {k: v for k, v in raw.items() if v is not None}
+
+
+class ResponsesAPIError(Exception):
+    """Raised for HTTP/transport errors against the Responses API."""
+
+    def __init__(self, message: str, status_code: Optional[int] = None, body: Optional[str] = None):
+        super().__init__(message)
+        self.status_code = status_code
+        self.body = body
+
+
+class ResponsesClient:
+    """Client for the OpenAI Responses API served by Foundry Local.
+
+    Construct via ``manager.create_responses_client(model_id)`` or
+    ``model.create_responses_client(base_url)``.
+    """
+
+    def __init__(self, base_url: str, model_id: Optional[str] = None):
+        if not isinstance(base_url, str) or not base_url.strip():
+            raise ValueError("base_url must be a non-empty string.")
+        self._base_url = base_url.rstrip("/")
+        self._model_id = model_id
+        self.settings = ResponsesClientSettings()
+
+    # ------------------------------------------------------------------ public
+
+    def create(
+        self,
+        input: Union[str, List[Any]],
+        **options: Any,
+    ) -> ResponseObject:
+        """Create a response (non-streaming)."""
+        body = self._build_request(input, options, stream=False)
+        raw = self._post_json("/v1/responses", body)
+        return _parse_response_object(raw)
+
+    def create_streaming(
+        self,
+        input: Union[str, List[Any]],
+        **options: Any,
+    ) -> Generator[StreamingEvent, None, None]:
+        """Create a response with SSE streaming.
+
+        Returns a generator yielding :class:`StreamingEvent` objects. The HTTP
+        connection is closed automatically when the generator is exhausted or
+        garbage-collected.
+        """
+        body = self._build_request(input, options, stream=True)
+        return self._post_stream("/v1/responses", body)
+
+    def get(self, response_id: str) -> ResponseObject:
+        self._validate_id(response_id, "response_id")
+        raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}")
+        return _parse_response_object(raw)
+
+    def delete(self, response_id: str) -> DeleteResponseResult:
+        self._validate_id(response_id, "response_id")
+        raw = self._request_json("DELETE", f"/v1/responses/{quote(response_id, safe='')}")
+        return _parse_delete_result(raw)
+
+    def cancel(self, response_id: str) -> ResponseObject:
+        self._validate_id(response_id, "response_id")
+        raw = self._request_json("POST", f"/v1/responses/{quote(response_id, safe='')}/cancel")
+        return _parse_response_object(raw)
+
+    def get_input_items(self, response_id: str) -> InputItemsListResponse:
+        self._validate_id(response_id, "response_id")
+        raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}/input_items")
+        return _parse_input_items_list(raw)
+
+    def list(self) -> ListResponsesResult:
+        raw = self._request_json("GET", "/v1/responses")
+        return _parse_list_responses(raw)
+
+    # ---------------------------------------------------------------- internal
+
+    def _build_request(
+        self,
+        input: Union[str, List[Any]],
+        options: Dict[str, Any],
+        stream: bool,
+    ) -> Dict[str, Any]:
+        self._validate_input(input)
+        if options.get("tools") is not None:
+            self._validate_tools(options["tools"])
+
+        model = options.pop("model", None) or self._model_id
+        if not isinstance(model, str) or not model.strip():
+            raise ValueError(
+                "Model must be specified via create_responses_client(model_id) or options['model']."
+            )
+
+        # Normalize input: convert dataclasses to dicts for the wire format.
+        if isinstance(input, list):
+            wire_input = [_to_dict(i) if is_dataclass(i) else i for i in input]
+        else:
+            wire_input = input
+
+        # Normalize other dataclass-shaped options (tools, reasoning, etc.).
+        normalized_options: Dict[str, Any] = {}
+        for key, value in options.items():
+            if value is None:
+                continue
+            if is_dataclass(value):
+                normalized_options[key] = _to_dict(value)
+            elif isinstance(value, list):
+                normalized_options[key] = [_to_dict(v) if is_dataclass(v) else v for v in value]
+            else:
+                normalized_options[key] = value
+
+        body: Dict[str, Any] = {"model": model, "input": wire_input}
+        # Merge order: model+input → settings defaults → per-call overrides
+        body.update(self.settings._serialize())
+        body.update(normalized_options)
+        if stream:
+            body["stream"] = True
+        return body
+
+    @staticmethod
+    def _validate_input(input: Any) -> None:
+        if input is None:
+            raise ValueError("Input cannot be None.")
+        if isinstance(input, str):
+            if not input.strip():
+                raise ValueError("Input string cannot be empty.")
+            return
+        if isinstance(input, list):
+            if len(input) == 0:
+                raise ValueError("Input items list cannot be empty.")
+            for i, item in enumerate(input):
+                if is_dataclass(item):
+                    t = getattr(item, "type", None)
+                elif isinstance(item, dict):
+                    t = item.get("type")
+                else:
+                    raise ValueError(f"input[{i}] must be a dict or dataclass.")
+                if not isinstance(t, str) or not t.strip():
+                    raise ValueError(f"input[{i}] must have a non-empty 'type' field.")
+            return
+        raise ValueError("Input must be a string or a list of input items.")
+
+    @staticmethod
+    def _validate_tools(tools: Any) -> None:
+        if not isinstance(tools, list):
+            raise ValueError("tools must be a list if provided.")
+        for i, tool in enumerate(tools):
+            if is_dataclass(tool):
+                t = getattr(tool, "type", None)
+                name = getattr(tool, "name", None)
+            elif isinstance(tool, dict):
+                t = tool.get("type")
+                name = tool.get("name")
+            else:
+                raise ValueError(f"tools[{i}] must be a dict or FunctionToolDefinition.")
+            if t != "function":
+                raise ValueError(f"tools[{i}] must have type 'function'.")
+            if not isinstance(name, str) or not name.strip():
+                raise ValueError(f"tools[{i}] must have a non-empty 'name'.")
+
+    @staticmethod
+    def _validate_id(value: str, param: str) -> None:
+        if not isinstance(value, str) or not value.strip():
+            raise ValueError(f"{param} must be a non-empty string.")
+        if len(value) > _MAX_ID_LEN:
+            raise ValueError(f"{param} exceeds maximum length ({_MAX_ID_LEN}).")
+
+    # ----- HTTP plumbing -----
+
+    def _url(self, path: str) -> str:
+        return f"{self._base_url}{path}"
+
+    def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        try:
+            if body is not None:
+                resp = requests.request(
+                    method,
+                    self._url(path),
+                    headers={"Content-Type": "application/json", "Accept": "application/json"},
+                    data=json.dumps(body),
+                )
+            else:
+                resp = requests.request(method, self._url(path), headers={"Accept": "application/json"})
+        except requests.RequestException as e:
+            raise ResponsesAPIError(f"Network error calling {method} {path}: {e}") from e
+
+        return self._handle_json_response(resp, method, path)
+
+    def _post_json(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
+        return self._request_json("POST", path, body)
+
+    @staticmethod
+    def _handle_json_response(resp: requests.Response, method: str, path: str) -> Dict[str, Any]:
+        text = resp.text
+        if not resp.ok:
+            raise ResponsesAPIError(
+                f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}",
+                status_code=resp.status_code,
+                body=text,
+            )
+        try:
+            return json.loads(text) if text else {}
+        except json.JSONDecodeError as e:
+            raise ResponsesAPIError(
+                f"Failed to parse response JSON from {method} {path}: {text[:200]}"
+            ) from e
+
+    def _post_stream(
+        self, path: str, body: Dict[str, Any]
+    ) -> Generator[StreamingEvent, None, None]:
+        try:
+            resp = requests.post(
+                self._url(path),
+                headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
+                data=json.dumps(body),
+                stream=True,
+            )
+        except requests.RequestException as e:
+            raise ResponsesAPIError(f"Network error calling POST {path}: {e}") from e
+
+        if not resp.ok:
+            body_text = resp.text
+            resp.close()
+            raise ResponsesAPIError(
+                f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}",
+                status_code=resp.status_code,
+                body=body_text,
+            )
+
+        return _iter_sse_events(resp)
+
+
+def _iter_sse_events(resp: requests.Response) -> Generator[StreamingEvent, None, None]:
+    """Parse an SSE response into a stream of :class:`StreamingEvent` objects.
+
+    Closes the underlying HTTP connection when the generator ends for any
+    reason (completion, [DONE], exception, or GC).
+    """
+    try:
+        buffer_parts: List[str] = []
+        # iter_content yields bytes chunks; decode as UTF-8 and split on blank lines.
+        for chunk in resp.iter_content(chunk_size=1024, decode_unicode=False):
+            if not chunk:
+                continue
+            if isinstance(chunk, bytes):
+                text = chunk.decode("utf-8", errors="replace")
+            else:
+                text = chunk
+            buffer_parts.append(text)
+            buffer = "".join(buffer_parts)
+            # Normalize CRLF to LF so our split works on both styles.
+            buffer = buffer.replace("\r\n", "\n")
+
+            blocks = buffer.split("\n\n")
+            incomplete = blocks.pop() if blocks else ""
+            buffer_parts = [incomplete] if incomplete else []
+
+            for block in blocks:
+                event = _parse_sse_block(block)
+                if event is _SSE_DONE:
+                    return
+                if event is not None:
+                    yield event
+
+        # Flush any residual block that wasn't terminated by a blank line.
+        tail = "".join(buffer_parts).strip()
+        if tail:
+            event = _parse_sse_block(tail)
+            if event is not None and event is not _SSE_DONE:
+                yield event
+    finally:
+        resp.close()
+
+
+_SSE_DONE = object()  # sentinel returned for the `data: [DONE]` terminator
+
+
+def _parse_sse_block(block: str) -> Any:
+    """Parse a single SSE block (already stripped of its trailing blank line)."""
+    trimmed = block.strip()
+    if not trimmed:
+        return None
+    if trimmed == "data: [DONE]":
+        return _SSE_DONE
+
+    data_lines: List[str] = []
+    for line in trimmed.split("\n"):
+        if line.startswith("data: "):
+            data_lines.append(line[6:])
+        elif line == "data:":
+            data_lines.append("")
+        # `event:`, `id:`, `retry:` fields are ignored — the type lives in the JSON payload.
+
+    if not data_lines:
+        return None
+
+    data = "\n".join(data_lines)
+    if data == "[DONE]":
+        return _SSE_DONE
+    try:
+        parsed = json.loads(data)
+    except json.JSONDecodeError as e:
+        raise ResponsesAPIError(f"Failed to parse streaming event JSON: {e}") from e
+    if not isinstance(parsed, dict):
+        return None
+    return parse_streaming_event(parsed)
+
+
+__all__ = [
+    "ResponsesClient",
+    "ResponsesClientSettings",
+    "ResponsesAPIError",
+]
diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py
new file mode 100644
index 000000000..09f9f4a60
--- /dev/null
+++ b/sdk/python/src/openai/responses_types.py
@@ -0,0 +1,885 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Types for the OpenAI Responses API served by Foundry Local.
+
+These mirror the DTOs defined by neutron-server in
+``src/FoundryLocalCore/Core/Responses/Contracts/``. Dataclasses are used so
+callers can construct items with keyword arguments and we can serialize
+discriminated unions by the ``type`` field.
+"""
+
+from __future__ import annotations
+
+import base64
+import mimetypes
+from dataclasses import dataclass, field, fields, is_dataclass
+from typing import Any, Dict, List, Literal, Optional, Union
+
+
+# ---------------------------------------------------------------------------
+# Serialization helpers
+# ---------------------------------------------------------------------------
+
+def _to_dict(obj: Any) -> Any:
+    """Recursively convert a dataclass (or list/dict of them) to a plain dict,
+    omitting ``None`` values so the wire format matches the OpenAI spec.
+    """
+    if is_dataclass(obj) and not isinstance(obj, type):
+        result: Dict[str, Any] = {}
+        for f in fields(obj):
+            value = getattr(obj, f.name)
+            if value is None:
+                continue
+            result[f.name] = _to_dict(value)
+        return result
+    if isinstance(obj, list):
+        return [_to_dict(v) for v in obj]
+    if isinstance(obj, dict):
+        return {k: _to_dict(v) for k, v in obj.items() if v is not None}
+    return obj
+
+
+# ---------------------------------------------------------------------------
+# Content Parts
+# ---------------------------------------------------------------------------
+
+@dataclass
+class InputTextContent:
+    text: str = ""
+    type: Literal["input_text"] = "input_text"
+
+
+@dataclass
+class InputImageContent:
+    """Vision input. Provide either ``image_url`` or ``image_data`` (base64)."""
+    media_type: str = ""
+    image_url: Optional[str] = None
+    image_data: Optional[str] = None
+    detail: Optional[str] = None  # "low" | "high" | "auto"
+    type: Literal["input_image"] = "input_image"
+
+    @staticmethod
+    def from_file(path: str, detail: Optional[str] = None) -> "InputImageContent":
+        media_type, _ = mimetypes.guess_type(path)
+        if not media_type or not media_type.startswith("image/"):
+            raise ValueError(f"Unsupported image format: {path}")
+        with open(path, "rb") as fh:
+            data = base64.b64encode(fh.read()).decode("ascii")
+        return InputImageContent(image_data=data, media_type=media_type, detail=detail)
+
+    @staticmethod
+    def from_url(url: str, detail: Optional[str] = None) -> "InputImageContent":
+        return InputImageContent(image_url=url, media_type="image/unknown", detail=detail)
+
+    @staticmethod
+    def from_bytes(data: bytes, media_type: str, detail: Optional[str] = None) -> "InputImageContent":
+        return InputImageContent(
+            image_data=base64.b64encode(data).decode("ascii"),
+            media_type=media_type,
+            detail=detail,
+        )
+
+
+@dataclass
+class InputFileContent:
+    filename: str = ""
+    file_url: str = ""
+    type: Literal["input_file"] = "input_file"
+
+
+@dataclass
+class OutputTextContent:
+    text: str = ""
+    annotations: Optional[List[Any]] = None
+    logprobs: Optional[List[Any]] = None
+    type: Literal["output_text"] = "output_text"
+
+
+@dataclass
+class RefusalContent:
+    refusal: str = ""
+    type: Literal["refusal"] = "refusal"
+
+
+ContentPart = Union[
+    InputTextContent, InputImageContent, InputFileContent, OutputTextContent, RefusalContent
+]
+
+
+def _parse_content_part(data: Dict[str, Any]) -> ContentPart:
+    t = data.get("type")
+    if t == "input_text":
+        return InputTextContent(text=data.get("text", ""))
+    if t == "input_image":
+        return InputImageContent(
+            media_type=data.get("media_type", ""),
+            image_url=data.get("image_url"),
+            image_data=data.get("image_data"),
+            detail=data.get("detail"),
+        )
+    if t == "input_file":
+        return InputFileContent(filename=data.get("filename", ""), file_url=data.get("file_url", ""))
+    if t == "output_text":
+        return OutputTextContent(
+            text=data.get("text", ""),
+            annotations=data.get("annotations"),
+            logprobs=data.get("logprobs"),
+        )
+    if t == "refusal":
+        return RefusalContent(refusal=data.get("refusal", ""))
+    # Unknown content-part type — fall back to input_text so callers still get something
+    return InputTextContent(text=str(data.get("text", "")))
+
+
+def _parse_content(value: Any) -> Union[str, List[ContentPart]]:
+    if isinstance(value, str):
+        return value
+    if isinstance(value, list):
+        return [_parse_content_part(p) if isinstance(p, dict) else p for p in value]
+    return value
+
+
+# ---------------------------------------------------------------------------
+# Response Items (input + output)
+# ---------------------------------------------------------------------------
+
+@dataclass
+class MessageItem:
+    role: str = ""
+    content: Union[str, List[ContentPart]] = ""
+    id: Optional[str] = None
+    status: Optional[str] = None
+    type: Literal["message"] = "message"
+
+
+@dataclass
+class FunctionCallItem:
+    call_id: str = ""
+    name: str = ""
+    arguments: str = ""
+    id: Optional[str] = None
+    status: Optional[str] = None
+    type: Literal["function_call"] = "function_call"
+
+
+@dataclass
+class FunctionCallOutputItem:
+    call_id: str = ""
+    output: Union[str, List[ContentPart]] = ""
+    id: Optional[str] = None
+    type: Literal["function_call_output"] = "function_call_output"
+
+
+@dataclass
+class ItemReference:
+    id: str = ""
+    type: Literal["item_reference"] = "item_reference"
+
+
+@dataclass
+class ReasoningItem:
+    id: Optional[str] = None
+    content: Optional[List[ContentPart]] = None
+    encrypted_content: Optional[str] = None
+    summary: Optional[str] = None
+    status: Optional[str] = None
+    type: Literal["reasoning"] = "reasoning"
+
+
+ResponseInputItem = Union[
+    MessageItem, FunctionCallItem, FunctionCallOutputItem, ItemReference, ReasoningItem
+]
+ResponseOutputItem = Union[MessageItem, FunctionCallItem, ReasoningItem]
+
+
+def _parse_response_item(data: Dict[str, Any]) -> Any:
+    t = data.get("type")
+    if t == "message":
+        return MessageItem(
+            role=data.get("role", ""),
+            content=_parse_content(data.get("content", "")),
+            id=data.get("id"),
+            status=data.get("status"),
+        )
+    if t == "function_call":
+        return FunctionCallItem(
+            call_id=data.get("call_id", ""),
+            name=data.get("name", ""),
+            arguments=data.get("arguments", ""),
+            id=data.get("id"),
+            status=data.get("status"),
+        )
+    if t == "function_call_output":
+        return FunctionCallOutputItem(
+            call_id=data.get("call_id", ""),
+            output=_parse_content(data.get("output", "")),
+            id=data.get("id"),
+        )
+    if t == "item_reference":
+        return ItemReference(id=data.get("id", ""))
+    if t == "reasoning":
+        content_raw = data.get("content")
+        return ReasoningItem(
+            id=data.get("id"),
+            content=[_parse_content_part(p) for p in content_raw] if isinstance(content_raw, list) else None,
+            encrypted_content=data.get("encrypted_content"),
+            summary=data.get("summary"),
+            status=data.get("status"),
+        )
+    # Unknown item type — return the raw dict so callers can inspect
+    return data
+
+
+# ---------------------------------------------------------------------------
+# Tool Definitions & Config
+# ---------------------------------------------------------------------------
+
+@dataclass
+class FunctionToolDefinition:
+    name: str = ""
+    description: Optional[str] = None
+    parameters: Optional[Dict[str, Any]] = None
+    strict: Optional[bool] = None
+    type: Literal["function"] = "function"
+
+
+@dataclass
+class FunctionToolChoice:
+    name: str = ""
+    type: Literal["function"] = "function"
+
+
+ToolChoice = Union[str, FunctionToolChoice]  # "none" | "auto" | "required" | {type,name}
+
+
+@dataclass
+class TextFormat:
+    type: str = "text"  # "text" | "json_object" | "json_schema" | "lark_grammar" | "regex"
+    name: Optional[str] = None
+    description: Optional[str] = None
+    schema: Optional[Dict[str, Any]] = None
+    strict: Optional[bool] = None
+
+
+@dataclass
+class TextConfig:
+    format: Optional[TextFormat] = None
+
+
+@dataclass
+class ReasoningConfig:
+    effort: Optional[str] = None
+    summary: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# Response Object
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ResponseUsage:
+    input_tokens: int = 0
+    output_tokens: int = 0
+    total_tokens: int = 0
+    input_tokens_details: Optional[Dict[str, Any]] = None
+    output_tokens_details: Optional[Dict[str, Any]] = None
+
+
+@dataclass
+class ResponseError:
+    code: str = ""
+    message: str = ""
+
+
+@dataclass
+class IncompleteDetails:
+    reason: str = ""
+
+
+@dataclass
+class ResponseObject:
+    id: str = ""
+    object: str = "response"
+    created_at: int = 0
+    status: str = ""
+    model: str = ""
+    output: List[Any] = field(default_factory=list)
+    completed_at: Optional[int] = None
+    failed_at: Optional[int] = None
+    cancelled_at: Optional[int] = None
+    error: Optional[ResponseError] = None
+    usage: Optional[ResponseUsage] = None
+    instructions: Optional[str] = None
+    previous_response_id: Optional[str] = None
+    tools: Optional[List[FunctionToolDefinition]] = None
+    tool_choice: Optional[Any] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    max_output_tokens: Optional[int] = None
+    frequency_penalty: Optional[float] = None
+    presence_penalty: Optional[float] = None
+    seed: Optional[int] = None
+    truncation: Optional[str] = None
+    parallel_tool_calls: Optional[bool] = None
+    store: Optional[bool] = None
+    metadata: Optional[Dict[str, str]] = None
+    reasoning: Optional[ReasoningConfig] = None
+    text: Optional[TextConfig] = None
+    user: Optional[str] = None
+    incomplete_details: Optional[IncompleteDetails] = None
+    # Retain anything the server returned that we don't model explicitly.
+    _raw: Optional[Dict[str, Any]] = None
+
+    @property
+    def output_text(self) -> str:
+        """Concatenated text from the first assistant ``message`` item in ``output``."""
+        for item in self.output:
+            if isinstance(item, MessageItem) and item.role == "assistant":
+                content = item.content
+                if isinstance(content, str):
+                    return content
+                if isinstance(content, list):
+                    parts: List[str] = []
+                    for p in content:
+                        text = getattr(p, "text", None)
+                        if isinstance(text, str):
+                            parts.append(text)
+                    return "".join(parts)
+        return ""
+
+
+def _parse_response_object(data: Dict[str, Any]) -> ResponseObject:
+    output = data.get("output") or []
+    parsed_output = [_parse_response_item(i) if isinstance(i, dict) else i for i in output]
+
+    tools_raw = data.get("tools")
+    tools = None
+    if isinstance(tools_raw, list):
+        tools = [
+            FunctionToolDefinition(
+                name=t.get("name", ""),
+                description=t.get("description"),
+                parameters=t.get("parameters"),
+                strict=t.get("strict"),
+            ) if isinstance(t, dict) else t
+            for t in tools_raw
+        ]
+
+    usage = None
+    if isinstance(data.get("usage"), dict):
+        u = data["usage"]
+        usage = ResponseUsage(
+            input_tokens=u.get("input_tokens", 0),
+            output_tokens=u.get("output_tokens", 0),
+            total_tokens=u.get("total_tokens", 0),
+            input_tokens_details=u.get("input_tokens_details"),
+            output_tokens_details=u.get("output_tokens_details"),
+        )
+
+    error = None
+    if isinstance(data.get("error"), dict):
+        error = ResponseError(code=data["error"].get("code", ""), message=data["error"].get("message", ""))
+
+    incomplete = None
+    if isinstance(data.get("incomplete_details"), dict):
+        incomplete = IncompleteDetails(reason=data["incomplete_details"].get("reason", ""))
+
+    reasoning = None
+    if isinstance(data.get("reasoning"), dict):
+        reasoning = ReasoningConfig(
+            effort=data["reasoning"].get("effort"),
+            summary=data["reasoning"].get("summary"),
+        )
+
+    text = None
+    if isinstance(data.get("text"), dict):
+        fmt_raw = data["text"].get("format")
+        fmt = None
+        if isinstance(fmt_raw, dict):
+            fmt = TextFormat(
+                type=fmt_raw.get("type", "text"),
+                name=fmt_raw.get("name"),
+                description=fmt_raw.get("description"),
+                schema=fmt_raw.get("schema"),
+                strict=fmt_raw.get("strict"),
+            )
+        text = TextConfig(format=fmt)
+
+    return ResponseObject(
+        id=data.get("id", ""),
+        object=data.get("object", "response"),
+        created_at=data.get("created_at", 0),
+        status=data.get("status", ""),
+        model=data.get("model", ""),
+        output=parsed_output,
+        completed_at=data.get("completed_at"),
+        failed_at=data.get("failed_at"),
+        cancelled_at=data.get("cancelled_at"),
+        error=error,
+        usage=usage,
+        instructions=data.get("instructions"),
+        previous_response_id=data.get("previous_response_id"),
+        tools=tools,
+        tool_choice=data.get("tool_choice"),
+        temperature=data.get("temperature"),
+        top_p=data.get("top_p"),
+        max_output_tokens=data.get("max_output_tokens"),
+        frequency_penalty=data.get("frequency_penalty"),
+        presence_penalty=data.get("presence_penalty"),
+        seed=data.get("seed"),
+        truncation=data.get("truncation"),
+        parallel_tool_calls=data.get("parallel_tool_calls"),
+        store=data.get("store"),
+        metadata=data.get("metadata"),
+        reasoning=reasoning,
+        text=text,
+        user=data.get("user"),
+        incomplete_details=incomplete,
+        _raw=data,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Delete / List helpers
+# ---------------------------------------------------------------------------
+
+@dataclass
+class DeleteResponseResult:
+    id: str = ""
+    object: str = ""
+    deleted: bool = False
+
+
+@dataclass
+class InputItemsListResponse:
+    object: str = "list"
+    data: List[Any] = field(default_factory=list)
+
+
+@dataclass
+class ListResponsesResult:
+    object: str = "list"
+    data: List[ResponseObject] = field(default_factory=list)
+
+
+# ---------------------------------------------------------------------------
+# Streaming Events
+# ---------------------------------------------------------------------------
+
+@dataclass
+class ResponseLifecycleEvent:
+    """`response.created` / `queued` / `in_progress` / `completed` / `failed` / `incomplete`."""
+    type: str = ""
+    response: Optional[ResponseObject] = None
+    sequence_number: int = 0
+
+
+@dataclass
+class OutputItemAddedEvent:
+    item_id: str = ""
+    output_index: int = 0
+    item: Any = None
+    sequence_number: int = 0
+    type: Literal["response.output_item.added"] = "response.output_item.added"
+
+
+@dataclass
+class OutputItemDoneEvent:
+    item_id: str = ""
+    output_index: int = 0
+    item: Any = None
+    sequence_number: int = 0
+    type: Literal["response.output_item.done"] = "response.output_item.done"
+
+
+@dataclass
+class ContentPartAddedEvent:
+    item_id: str = ""
+    content_index: int = 0
+    part: Any = None
+    sequence_number: int = 0
+    type: Literal["response.content_part.added"] = "response.content_part.added"
+
+
+@dataclass
+class ContentPartDoneEvent:
+    item_id: str = ""
+    content_index: int = 0
+    part: Any = None
+    sequence_number: int = 0
+    type: Literal["response.content_part.done"] = "response.content_part.done"
+
+
+@dataclass
+class OutputTextDeltaEvent:
+    item_id: str = ""
+    output_index: int = 0
+    content_index: int = 0
+    delta: str = ""
+    sequence_number: int = 0
+    type: Literal["response.output_text.delta"] = "response.output_text.delta"
+
+
+@dataclass
+class OutputTextDoneEvent:
+    item_id: str = ""
+    output_index: int = 0
+    content_index: int = 0
+    text: str = ""
+    sequence_number: int = 0
+    type: Literal["response.output_text.done"] = "response.output_text.done"
+
+
+@dataclass
+class OutputTextAnnotationAddedEvent:
+    item_id: str = ""
+    annotation: Any = None
+    sequence_number: int = 0
+    type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added"
+
+
+@dataclass
+class RefusalDeltaEvent:
+    item_id: str = ""
+    content_index: int = 0
+    delta: str = ""
+    sequence_number: int = 0
+    type: Literal["response.refusal.delta"] = "response.refusal.delta"
+
+
+@dataclass
+class RefusalDoneEvent:
+    item_id: str = ""
+    content_index: int = 0
+    refusal: str = ""
+    sequence_number: int = 0
+    type: Literal["response.refusal.done"] = "response.refusal.done"
+
+
+@dataclass
+class FunctionCallArgsDeltaEvent:
+    item_id: str = ""
+    output_index: int = 0
+    delta: str = ""
+    sequence_number: int = 0
+    type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
+
+
+@dataclass
+class FunctionCallArgsDoneEvent:
+    item_id: str = ""
+    output_index: int = 0
+    arguments: str = ""
+    name: str = ""
+    sequence_number: int = 0
+    type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
+
+
+@dataclass
+class ReasoningSummaryPartAddedEvent:
+    item_id: str = ""
+    part: Any = None
+    sequence_number: int = 0
+    type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added"
+
+
+@dataclass
+class ReasoningSummaryPartDoneEvent:
+    item_id: str = ""
+    part: Any = None
+    sequence_number: int = 0
+    type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done"
+
+
+@dataclass
+class ReasoningDeltaEvent:
+    item_id: str = ""
+    delta: str = ""
+    sequence_number: int = 0
+    type: Literal["response.reasoning.delta"] = "response.reasoning.delta"
+
+
+@dataclass
+class ReasoningDoneEvent:
+    item_id: str = ""
+    text: str = ""
+    sequence_number: int = 0
+    type: Literal["response.reasoning.done"] = "response.reasoning.done"
+
+
+@dataclass
+class ReasoningSummaryTextDeltaEvent:
+    item_id: str = ""
+    delta: str = ""
+    sequence_number: int = 0
+    type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta"
+
+
+@dataclass
+class ReasoningSummaryTextDoneEvent:
+    item_id: str = ""
+    text: str = ""
+    sequence_number: int = 0
+    type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done"
+
+
+@dataclass
+class StreamingErrorEvent:
+    code: Optional[str] = None
+    message: Optional[str] = None
+    param: Optional[str] = None
+    sequence_number: int = 0
+    type: Literal["error"] = "error"
+
+
+@dataclass
+class UnknownStreamingEvent:
+    """Fallback for event types that aren't yet modeled."""
+    type: str = ""
+    sequence_number: int = 0
+    data: Optional[Dict[str, Any]] = None
+
+
+StreamingEvent = Union[
+    ResponseLifecycleEvent,
+    OutputItemAddedEvent,
+    OutputItemDoneEvent,
+    ContentPartAddedEvent,
+    ContentPartDoneEvent,
+    OutputTextDeltaEvent,
+    OutputTextDoneEvent,
+    OutputTextAnnotationAddedEvent,
+    RefusalDeltaEvent,
+    RefusalDoneEvent,
+    FunctionCallArgsDeltaEvent,
+    FunctionCallArgsDoneEvent,
+    ReasoningSummaryPartAddedEvent,
+    ReasoningSummaryPartDoneEvent,
+    ReasoningDeltaEvent,
+    ReasoningDoneEvent,
+    ReasoningSummaryTextDeltaEvent,
+    ReasoningSummaryTextDoneEvent,
+    StreamingErrorEvent,
+    UnknownStreamingEvent,
+]
+
+
+_LIFECYCLE_TYPES = {
+    "response.created",
+    "response.queued",
+    "response.in_progress",
+    "response.completed",
+    "response.failed",
+    "response.incomplete",
+}
+
+
+def parse_streaming_event(data: Dict[str, Any]) -> StreamingEvent:
+    """Build a typed streaming-event dataclass from a server-sent JSON payload."""
+    t = data.get("type", "")
+    seq = data.get("sequence_number", 0)
+
+    if t in _LIFECYCLE_TYPES:
+        resp_raw = data.get("response")
+        resp = _parse_response_object(resp_raw) if isinstance(resp_raw, dict) else None
+        return ResponseLifecycleEvent(type=t, response=resp, sequence_number=seq)
+
+    if t == "response.output_item.added":
+        item = data.get("item")
+        return OutputItemAddedEvent(
+            item_id=data.get("item_id", ""),
+            output_index=data.get("output_index", 0),
+            item=_parse_response_item(item) if isinstance(item, dict) else item,
+            sequence_number=seq,
+        )
+    if t == "response.output_item.done":
+        item = data.get("item")
+        return OutputItemDoneEvent(
+            item_id=data.get("item_id", ""),
+            output_index=data.get("output_index", 0),
+            item=_parse_response_item(item) if isinstance(item, dict) else item,
+            sequence_number=seq,
+        )
+    if t == "response.content_part.added":
+        part = data.get("part")
+        return ContentPartAddedEvent(
+            item_id=data.get("item_id", ""),
+            content_index=data.get("content_index", 0),
+            part=_parse_content_part(part) if isinstance(part, dict) else part,
+            sequence_number=seq,
+        )
+    if t == "response.content_part.done":
+        part = data.get("part")
+        return ContentPartDoneEvent(
+            item_id=data.get("item_id", ""),
+            content_index=data.get("content_index", 0),
+            part=_parse_content_part(part) if isinstance(part, dict) else part,
+            sequence_number=seq,
+        )
+    if t == "response.output_text.delta":
+        return OutputTextDeltaEvent(
+            item_id=data.get("item_id", ""),
+            output_index=data.get("output_index", 0),
+            content_index=data.get("content_index", 0),
+            delta=data.get("delta", ""),
+            sequence_number=seq,
+        )
+    if t == "response.output_text.done":
+        return OutputTextDoneEvent(
+            item_id=data.get("item_id", ""),
+            output_index=data.get("output_index", 0),
+            content_index=data.get("content_index", 0),
+            text=data.get("text", ""),
+            sequence_number=seq,
+        )
+    if t == "response.output_text.annotation.added":
+        return OutputTextAnnotationAddedEvent(
+            item_id=data.get("item_id", ""),
+            annotation=data.get("annotation"),
+            sequence_number=seq,
+        )
+    if t == "response.refusal.delta":
+        return RefusalDeltaEvent(
+            item_id=data.get("item_id", ""),
+            content_index=data.get("content_index", 0),
+            delta=data.get("delta", ""),
+            sequence_number=seq,
+        )
+    if t == "response.refusal.done":
+        return RefusalDoneEvent(
+            item_id=data.get("item_id", ""),
+            content_index=data.get("content_index", 0),
+            refusal=data.get("refusal", ""),
+            sequence_number=seq,
+        )
+    if t == "response.function_call_arguments.delta":
+        return FunctionCallArgsDeltaEvent(
+            item_id=data.get("item_id", ""),
+            output_index=data.get("output_index", 0),
+            delta=data.get("delta", ""),
+            sequence_number=seq,
+        )
+    if t == "response.function_call_arguments.done":
+        return FunctionCallArgsDoneEvent(
+            item_id=data.get("item_id", ""),
+            output_index=data.get("output_index", 0),
+            arguments=data.get("arguments", ""),
+            name=data.get("name", ""),
+            sequence_number=seq,
+        )
+    if t == "response.reasoning_summary_part.added":
+        return ReasoningSummaryPartAddedEvent(
+            item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq
+        )
+    if t == "response.reasoning_summary_part.done":
+        return ReasoningSummaryPartDoneEvent(
+            item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq
+        )
+    if t == "response.reasoning.delta":
+        return ReasoningDeltaEvent(
+            item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq
+        )
+    if t == "response.reasoning.done":
+        return ReasoningDoneEvent(
+            item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq
+        )
+    if t == "response.reasoning_summary_text.delta":
+        return ReasoningSummaryTextDeltaEvent(
+            item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq
+        )
+    if t == "response.reasoning_summary_text.done":
+        return ReasoningSummaryTextDoneEvent(
+            item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq
+        )
+    if t == "error":
+        return StreamingErrorEvent(
+            code=data.get("code"),
+            message=data.get("message"),
+            param=data.get("param"),
+            sequence_number=seq,
+        )
+
+    return UnknownStreamingEvent(type=t, sequence_number=seq, data=data)
+
+
+def _parse_delete_result(data: Dict[str, Any]) -> DeleteResponseResult:
+    return DeleteResponseResult(
+        id=data.get("id", ""),
+        object=data.get("object", ""),
+        deleted=bool(data.get("deleted", False)),
+    )
+
+
+def _parse_input_items_list(data: Dict[str, Any]) -> InputItemsListResponse:
+    raw = data.get("data") or []
+    return InputItemsListResponse(
+        object=data.get("object", "list"),
+        data=[_parse_response_item(i) if isinstance(i, dict) else i for i in raw],
+    )
+
+
+def _parse_list_responses(data: Dict[str, Any]) -> ListResponsesResult:
+    raw = data.get("data") or []
+    return ListResponsesResult(
+        object=data.get("object", "list"),
+        data=[_parse_response_object(r) if isinstance(r, dict) else r for r in raw],
+    )
+
+
+__all__ = [
+    # Content parts
+    "InputTextContent",
+    "InputImageContent",
+    "InputFileContent",
+    "OutputTextContent",
+    "RefusalContent",
+    "ContentPart",
+    # Items
+    "MessageItem",
+    "FunctionCallItem",
+    "FunctionCallOutputItem",
+    "ItemReference",
+    "ReasoningItem",
+    "ResponseInputItem",
+    "ResponseOutputItem",
+    # Tools & config
+    "FunctionToolDefinition",
+    "FunctionToolChoice",
+    "ToolChoice",
+    "TextFormat",
+    "TextConfig",
+    "ReasoningConfig",
+    # Response
+    "ResponseObject",
+    "ResponseUsage",
+    "ResponseError",
+    "IncompleteDetails",
+    "DeleteResponseResult",
+    "InputItemsListResponse",
+    "ListResponsesResult",
+    # Streaming events
+    "StreamingEvent",
+    "ResponseLifecycleEvent",
+    "OutputItemAddedEvent",
+    "OutputItemDoneEvent",
+    "ContentPartAddedEvent",
+    "ContentPartDoneEvent",
+    "OutputTextDeltaEvent",
+    "OutputTextDoneEvent",
+    "OutputTextAnnotationAddedEvent",
+    "RefusalDeltaEvent",
+    "RefusalDoneEvent",
+    "FunctionCallArgsDeltaEvent",
+    "FunctionCallArgsDoneEvent",
+    "ReasoningSummaryPartAddedEvent",
+    "ReasoningSummaryPartDoneEvent",
+    "ReasoningDeltaEvent",
+    "ReasoningDoneEvent",
+    "ReasoningSummaryTextDeltaEvent",
+    "ReasoningSummaryTextDoneEvent",
+    "StreamingErrorEvent",
+    "UnknownStreamingEvent",
+    "parse_streaming_event",
+]
diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py
new file mode 100644
index 000000000..15c1f3b1e
--- /dev/null
+++ b/sdk/python/test/openai/test_responses_client.py
@@ -0,0 +1,603 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Unit tests for the Responses API client (no live server required).
+
+Mirrors the scenarios covered by the JS SDK's ``responsesClient.test.ts`` and
+the Python spec's §5. HTTP calls are intercepted via :mod:`unittest.mock`.
+"""
+
+from __future__ import annotations
+
+import base64
+import io
+import json
+from typing import Any, Dict, List
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from foundry_local_sdk.openai.responses_client import (
+    ResponsesAPIError,
+    ResponsesClient,
+    ResponsesClientSettings,
+    _parse_sse_block,
+    _iter_sse_events,
+    _SSE_DONE,
+)
+from foundry_local_sdk.openai.responses_types import (
+    FunctionCallItem,
+    FunctionToolDefinition,
+    InputImageContent,
+    InputTextContent,
+    MessageItem,
+    OutputTextContent,
+    ReasoningConfig,
+    ResponseObject,
+    TextConfig,
+    TextFormat,
+    _to_dict,
+    parse_streaming_event,
+    OutputTextDeltaEvent,
+    ResponseLifecycleEvent,
+    StreamingErrorEvent,
+    UnknownStreamingEvent,
+)
+
+BASE_URL = "http://127.0.0.1:5273"
+MODEL_ID = "test-model"
+
+
+def _fake_json_response(payload: Dict[str, Any], status: int = 200):
+    resp = MagicMock()
+    resp.ok = 200 <= status < 300
+    resp.status_code = status
+    resp.text = json.dumps(payload)
+    return resp
+
+
+def _fake_stream_response(sse_payload: str, status: int = 200):
+    resp = MagicMock()
+    resp.ok = 200 <= status < 300
+    resp.status_code = status
+    resp.text = sse_payload
+    # iter_content returns the full payload in one bytes chunk.
+    resp.iter_content = MagicMock(return_value=iter([sse_payload.encode("utf-8")]))
+    resp.close = MagicMock()
+    return resp
+
+
+# ---------------------------------------------------------------------------
+# Settings
+# ---------------------------------------------------------------------------
+
+class TestResponsesClientSettings:
+    def test_serialize_defaults_contains_store(self):
+        # store defaults to True — matches OpenAI convention
+        s = ResponsesClientSettings()
+        serialized = s._serialize()
+        assert serialized == {"store": True}
+
+    def test_store_defaults_to_true(self):
+        assert ResponsesClientSettings().store is True
+
+    def test_serialize_all_fields(self):
+        s = ResponsesClientSettings()
+        s.instructions = "Be concise."
+        s.temperature = 0.2
+        s.top_p = 0.9
+        s.max_output_tokens = 256
+        s.frequency_penalty = 0.1
+        s.presence_penalty = 0.2
+        s.tool_choice = "auto"
+        s.truncation = "auto"
+        s.parallel_tool_calls = False
+        s.store = False
+        s.metadata = {"run": "1"}
+        s.reasoning = ReasoningConfig(effort="medium")
+        s.text = TextConfig(format=TextFormat(type="json_object"))
+        s.seed = 42
+
+        out = s._serialize()
+        assert out["instructions"] == "Be concise."
+        assert out["temperature"] == 0.2
+        assert out["top_p"] == 0.9
+        assert out["max_output_tokens"] == 256
+        assert out["frequency_penalty"] == 0.1
+        assert out["presence_penalty"] == 0.2
+        assert out["tool_choice"] == "auto"
+        assert out["truncation"] == "auto"
+        assert out["parallel_tool_calls"] is False
+        assert out["store"] is False
+        assert out["metadata"] == {"run": "1"}
+        assert out["reasoning"] == {"effort": "medium"}
+        assert out["text"] == {"format": {"type": "json_object"}}
+        assert out["seed"] == 42
+
+    def test_serialize_omits_none(self):
+        s = ResponsesClientSettings()
+        s.temperature = None  # explicit None is omitted
+        assert "temperature" not in s._serialize()
+
+
+# ---------------------------------------------------------------------------
+# Input / tool / id validation
+# ---------------------------------------------------------------------------
+
+class TestInputValidation:
+    def setup_method(self):
+        self.client = ResponsesClient(BASE_URL, MODEL_ID)
+
+    def test_rejects_none(self):
+        with pytest.raises(ValueError, match="None"):
+            self.client._build_request(None, {}, stream=False)
+
+    def test_rejects_empty_string(self):
+        with pytest.raises(ValueError, match="empty"):
+            self.client._build_request("", {}, stream=False)
+
+    def test_rejects_whitespace_string(self):
+        with pytest.raises(ValueError, match="empty"):
+            self.client._build_request("   ", {}, stream=False)
+
+    def test_rejects_empty_array(self):
+        with pytest.raises(ValueError, match="empty"):
+            self.client._build_request([], {}, stream=False)
+
+    def test_rejects_item_without_type(self):
+        with pytest.raises(ValueError, match="type"):
+            self.client._build_request([{"role": "user"}], {}, stream=False)
+
+    def test_accepts_string_input(self):
+        body = self.client._build_request("Hi", {}, stream=False)
+        assert body["input"] == "Hi"
+        assert body["model"] == MODEL_ID
+
+    def test_accepts_dict_input_items(self):
+        body = self.client._build_request(
+            [{"type": "message", "role": "user", "content": "hi"}], {}, stream=False
+        )
+        assert isinstance(body["input"], list)
+        assert body["input"][0]["type"] == "message"
+
+    def test_accepts_dataclass_input_items(self):
+        item = MessageItem(role="user", content="hello")
+        body = self.client._build_request([item], {}, stream=False)
+        assert body["input"][0]["type"] == "message"
+        assert body["input"][0]["role"] == "user"
+        assert body["input"][0]["content"] == "hello"
+
+    def test_stream_flag_set(self):
+        body = self.client._build_request("hi", {}, stream=True)
+        assert body["stream"] is True
+
+    def test_requires_model(self):
+        c = ResponsesClient(BASE_URL)  # no default model
+        with pytest.raises(ValueError, match="[Mm]odel"):
+            c._build_request("hi", {}, stream=False)
+
+    def test_options_model_overrides_default(self):
+        body = self.client._build_request("hi", {"model": "override"}, stream=False)
+        assert body["model"] == "override"
+
+
+class TestToolValidation:
+    def setup_method(self):
+        self.client = ResponsesClient(BASE_URL, MODEL_ID)
+
+    def test_rejects_non_function_type(self):
+        with pytest.raises(ValueError, match="function"):
+            self.client._build_request("hi", {"tools": [{"type": "retrieval", "name": "x"}]}, stream=False)
+
+    def test_rejects_empty_name(self):
+        with pytest.raises(ValueError, match="name"):
+            self.client._build_request("hi", {"tools": [{"type": "function", "name": ""}]}, stream=False)
+
+    def test_rejects_non_list(self):
+        with pytest.raises(ValueError, match="list"):
+            self.client._build_request("hi", {"tools": "nope"}, stream=False)
+
+    def test_accepts_valid_dict_tool(self):
+        body = self.client._build_request(
+            "hi",
+            {"tools": [{"type": "function", "name": "multiply", "parameters": {}}]},
+            stream=False,
+        )
+        assert body["tools"][0]["name"] == "multiply"
+
+    def test_accepts_dataclass_tool(self):
+        tool = FunctionToolDefinition(name="multiply", description="x*y")
+        body = self.client._build_request("hi", {"tools": [tool]}, stream=False)
+        assert body["tools"][0]["type"] == "function"
+        assert body["tools"][0]["name"] == "multiply"
+        assert body["tools"][0]["description"] == "x*y"
+
+
+class TestIdValidation:
+    def setup_method(self):
+        self.client = ResponsesClient(BASE_URL, MODEL_ID)
+
+    def test_rejects_empty_id(self):
+        with pytest.raises(ValueError, match="non-empty"):
+            self.client.get("")
+
+    def test_rejects_whitespace_id(self):
+        with pytest.raises(ValueError, match="non-empty"):
+            self.client.get("   ")
+
+    def test_rejects_too_long_id(self):
+        with pytest.raises(ValueError, match="length"):
+            self.client.get("x" * 2000)
+
+
+# ---------------------------------------------------------------------------
+# output_text convenience
+# ---------------------------------------------------------------------------
+
+class TestOutputText:
+    def test_extracts_from_string_content(self):
+        resp = ResponseObject(output=[MessageItem(role="assistant", content="hello world")])
+        assert resp.output_text == "hello world"
+
+    def test_extracts_from_content_parts(self):
+        resp = ResponseObject(output=[
+            MessageItem(
+                role="assistant",
+                content=[OutputTextContent(text="foo "), OutputTextContent(text="bar")],
+            )
+        ])
+        assert resp.output_text == "foo bar"
+
+    def test_returns_empty_when_no_assistant(self):
+        resp = ResponseObject(output=[MessageItem(role="user", content="hi")])
+        assert resp.output_text == ""
+
+    def test_returns_empty_for_empty_output(self):
+        assert ResponseObject().output_text == ""
+
+    def test_skips_function_call_items(self):
+        resp = ResponseObject(output=[
+            FunctionCallItem(call_id="c1", name="f", arguments="{}"),
+            MessageItem(role="assistant", content="done"),
+        ])
+        assert resp.output_text == "done"
+
+
+# ---------------------------------------------------------------------------
+# SSE parsing
+# ---------------------------------------------------------------------------
+
+class TestSSEParsing:
+    def test_parses_complete_event(self):
+        block = 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hi","sequence_number":3}'
+        evt = _parse_sse_block(block)
+        assert isinstance(evt, OutputTextDeltaEvent)
+        assert evt.delta == "hi"
+        assert evt.sequence_number == 3
+
+    def test_done_signal(self):
+        assert _parse_sse_block("data: [DONE]") is _SSE_DONE
+
+    def test_multi_line_data(self):
+        # Per SSE spec, multiple data: lines join with \n into one JSON doc.
+        block = 'data: {"type":"error",\ndata: "message":"oops","sequence_number":0}'
+        evt = _parse_sse_block(block)
+        assert isinstance(evt, StreamingErrorEvent)
+        assert evt.message == "oops"
+
+    def test_invalid_json_raises(self):
+        block = 'data: {not valid json'
+        with pytest.raises(ResponsesAPIError):
+            _parse_sse_block(block)
+
+    def test_empty_block_returns_none(self):
+        assert _parse_sse_block("") is None
+        assert _parse_sse_block("\n\n") is None
+
+    def test_ignores_non_data_lines(self):
+        block = 'id: 1\nretry: 1000\nevent: response.created\ndata: {"type":"response.created","response":{"id":"r1"},"sequence_number":0}'
+        evt = _parse_sse_block(block)
+        assert isinstance(evt, ResponseLifecycleEvent)
+        assert evt.type == "response.created"
+
+    def test_error_event(self):
+        block = 'data: {"type":"error","code":"bad","message":"oops","sequence_number":0}'
+        evt = _parse_sse_block(block)
+        assert isinstance(evt, StreamingErrorEvent)
+        assert evt.code == "bad"
+        assert evt.message == "oops"
+
+    def test_iter_sse_events_handles_partial_chunks(self):
+        payload_events = [
+            'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"Hel","sequence_number":1}\n\n',
+            'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"lo","sequence_number":2}\n\n',
+            'data: [DONE]\n\n',
+        ]
+        full = "".join(payload_events).encode("utf-8")
+
+        # Split the bytes into irregular chunks to exercise buffering.
+        chunks = [full[i:i + 7] for i in range(0, len(full), 7)]
+
+        resp = MagicMock()
+        resp.iter_content = MagicMock(return_value=iter(chunks))
+        resp.close = MagicMock()
+
+        events = list(_iter_sse_events(resp))
+        assert len(events) == 2
+        assert all(isinstance(e, OutputTextDeltaEvent) for e in events)
+        assert "".join(e.delta for e in events) == "Hello"
+        resp.close.assert_called()
+
+    def test_iter_sse_handles_crlf(self):
+        payload = (
+            'event: response.output_text.delta\r\n'
+            'data: {"type":"response.output_text.delta","delta":"x","sequence_number":0}\r\n'
+            '\r\n'
+            'data: [DONE]\r\n\r\n'
+        )
+        resp = MagicMock()
+        resp.iter_content = MagicMock(return_value=iter([payload.encode("utf-8")]))
+        resp.close = MagicMock()
+
+        events = list(_iter_sse_events(resp))
+        assert len(events) == 1
+        assert events[0].delta == "x"
+
+    def test_unknown_event_type(self):
+        block = 'data: {"type":"response.brand_new_event","sequence_number":7}'
+        evt = _parse_sse_block(block)
+        assert isinstance(evt, UnknownStreamingEvent)
+        assert evt.type == "response.brand_new_event"
+
+
+# ---------------------------------------------------------------------------
+# Vision types
+# ---------------------------------------------------------------------------
+
+class TestVisionTypes:
+    def test_input_image_from_bytes(self):
+        data = b"\x89PNG\r\n\x1a\nfakedata"
+        img = InputImageContent.from_bytes(data, "image/png", detail="high")
+        assert img.media_type == "image/png"
+        assert img.detail == "high"
+        assert base64.b64decode(img.image_data) == data
+
+    def test_input_image_from_url(self):
+        img = InputImageContent.from_url("https://example.com/x.png")
+        assert img.image_url == "https://example.com/x.png"
+        assert img.image_data is None
+
+    def test_input_image_from_file(self, tmp_path):
+        data = b"\x89PNG\r\n\x1a\nfakedata"
+        p = tmp_path / "test.png"
+        p.write_bytes(data)
+        img = InputImageContent.from_file(str(p))
+        assert img.media_type == "image/png"
+        assert base64.b64decode(img.image_data) == data
+
+    def test_input_image_from_file_rejects_non_image(self, tmp_path):
+        p = tmp_path / "text.txt"
+        p.write_text("not an image")
+        with pytest.raises(ValueError, match="Unsupported"):
+            InputImageContent.from_file(str(p))
+
+    def test_input_image_serialization(self):
+        img = InputImageContent(media_type="image/png", image_data="abc", detail="low")
+        d = _to_dict(img)
+        assert d == {"media_type": "image/png", "image_data": "abc", "detail": "low", "type": "input_image"}
+        # image_url left unset should be omitted
+        assert "image_url" not in d
+
+
+# ---------------------------------------------------------------------------
+# Type serialization & parsing
+# ---------------------------------------------------------------------------
+
+class TestTypeSerialization:
+    def test_message_item_to_dict(self):
+        msg = MessageItem(
+            role="user",
+            content=[InputTextContent(text="Hi"), InputImageContent(media_type="image/png", image_data="abc")],
+        )
+        d = _to_dict(msg)
+        assert d["type"] == "message"
+        assert d["role"] == "user"
+        assert d["content"][0] == {"text": "Hi", "type": "input_text"}
+        assert d["content"][1]["type"] == "input_image"
+        assert "id" not in d  # None omitted
+
+    def test_function_tool_to_dict(self):
+        tool = FunctionToolDefinition(
+            name="multiply",
+            description="x*y",
+            parameters={"type": "object", "properties": {"a": {"type": "number"}}},
+            strict=True,
+        )
+        d = _to_dict(tool)
+        assert d == {
+            "name": "multiply",
+            "description": "x*y",
+            "parameters": {"type": "object", "properties": {"a": {"type": "number"}}},
+            "strict": True,
+            "type": "function",
+        }
+
+    def test_response_object_from_dict(self):
+        from foundry_local_sdk.openai.responses_types import _parse_response_object
+
+        payload = {
+            "id": "resp_abc",
+            "object": "response",
+            "created_at": 1700000000,
+            "status": "completed",
+            "model": "phi-4-mini",
+            "output": [
+                {
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": "Hello!"}],
+                }
+            ],
+            "usage": {"input_tokens": 3, "output_tokens": 2, "total_tokens": 5},
+            "store": True,
+        }
+        r = _parse_response_object(payload)
+        assert r.id == "resp_abc"
+        assert r.status == "completed"
+        assert r.usage.total_tokens == 5
+        assert r.output_text == "Hello!"
+
+    def test_streaming_event_parsing_lifecycle(self):
+        evt = parse_streaming_event(
+            {
+                "type": "response.completed",
+                "response": {"id": "resp_1", "status": "completed"},
+                "sequence_number": 10,
+            }
+        )
+        assert isinstance(evt, ResponseLifecycleEvent)
+        assert evt.type == "response.completed"
+        assert evt.response.id == "resp_1"
+        assert evt.sequence_number == 10
+
+
+# ---------------------------------------------------------------------------
+# End-to-end (mocked HTTP)
+# ---------------------------------------------------------------------------
+
+class TestClientHTTPFlow:
+    def setup_method(self):
+        self.client = ResponsesClient(BASE_URL, MODEL_ID)
+
+    def test_create_posts_correct_body(self):
+        payload = {
+            "id": "resp_1",
+            "object": "response",
+            "status": "completed",
+            "model": MODEL_ID,
+            "output": [
+                {"type": "message", "role": "assistant", "content": "ok"},
+            ],
+        }
+        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
+            mock_req.return_value = _fake_json_response(payload)
+            result = self.client.create("hello", temperature=0.3)
+
+        assert result.id == "resp_1"
+        assert result.output_text == "ok"
+
+        _, kwargs = mock_req.call_args
+        assert mock_req.call_args.args[0] == "POST"
+        assert mock_req.call_args.args[1] == f"{BASE_URL}/v1/responses"
+        body = json.loads(kwargs["data"])
+        assert body["model"] == MODEL_ID
+        assert body["input"] == "hello"
+        assert body["temperature"] == 0.3
+        assert body["store"] is True  # default
+        assert "stream" not in body
+
+    def test_get_uses_url_encoded_path(self):
+        weird_id = "resp_with/slashes and spaces"
+        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
+            mock_req.return_value = _fake_json_response(
+                {"id": weird_id, "object": "response", "status": "completed", "model": MODEL_ID, "output": []}
+            )
+            self.client.get(weird_id)
+
+        path = mock_req.call_args.args[1]
+        assert "resp_with%2Fslashes%20and%20spaces" in path
+        assert mock_req.call_args.args[0] == "GET"
+
+    def test_delete_parses_result(self):
+        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
+            mock_req.return_value = _fake_json_response(
+                {"id": "resp_1", "object": "response.deleted", "deleted": True}
+            )
+            result = self.client.delete("resp_1")
+        assert result.deleted is True
+        assert result.id == "resp_1"
+
+    def test_http_error_raises_responses_api_error(self):
+        resp = MagicMock()
+        resp.ok = False
+        resp.status_code = 400
+        resp.text = '{"error":{"message":"bad"}}'
+        with patch("foundry_local_sdk.openai.responses_client.requests.request", return_value=resp):
+            with pytest.raises(ResponsesAPIError) as excinfo:
+                self.client.create("hi")
+        assert excinfo.value.status_code == 400
+        assert "bad" in str(excinfo.value)
+
+    def test_create_streaming_yields_events(self):
+        sse = (
+            'event: response.output_text.delta\n'
+            'data: {"type":"response.output_text.delta","delta":"a","sequence_number":1}\n'
+            '\n'
+            'event: response.output_text.delta\n'
+            'data: {"type":"response.output_text.delta","delta":"b","sequence_number":2}\n'
+            '\n'
+            'data: [DONE]\n\n'
+        )
+        with patch("foundry_local_sdk.openai.responses_client.requests.post") as mock_post:
+            mock_post.return_value = _fake_stream_response(sse)
+            events = list(self.client.create_streaming("hi"))
+
+        assert len(events) == 2
+        assert "".join(e.delta for e in events) == "ab"
+        _, kwargs = mock_post.call_args
+        body = json.loads(kwargs["data"])
+        assert body["stream"] is True
+        assert kwargs["headers"]["Accept"] == "text/event-stream"
+
+    def test_streaming_http_error(self):
+        resp = MagicMock()
+        resp.ok = False
+        resp.status_code = 500
+        resp.text = "boom"
+        resp.close = MagicMock()
+        with patch("foundry_local_sdk.openai.responses_client.requests.post", return_value=resp):
+            with pytest.raises(ResponsesAPIError) as excinfo:
+                list(self.client.create_streaming("hi"))
+        assert excinfo.value.status_code == 500
+
+    def test_settings_merge_precedence(self):
+        self.client.settings.temperature = 0.1
+        self.client.settings.max_output_tokens = 100
+        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
+            mock_req.return_value = _fake_json_response(
+                {"id": "r", "object": "response", "status": "completed", "model": MODEL_ID, "output": []}
+            )
+            # Per-call overrides client settings
+            self.client.create("hi", temperature=0.9)
+
+        body = json.loads(mock_req.call_args.kwargs["data"])
+        assert body["temperature"] == 0.9  # per-call wins
+        assert body["max_output_tokens"] == 100  # settings default preserved
+
+
+class TestManagerFactory:
+    """Ensure the factory method wiring doesn't require a running server."""
+
+    def test_manager_raises_if_web_service_not_started(self):
+        from foundry_local_sdk.exception import FoundryLocalException
+
+        # Build a stand-in manager without going through the constructor's
+        # heavy initialization path.
+        mgr = MagicMock()
+        mgr.urls = None
+        # Bind the real method to our MagicMock so we exercise actual logic.
+        from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M
+
+        with pytest.raises(FoundryLocalException, match="[Ww]eb service"):
+            M.create_responses_client(mgr, "some-model")
+
+    def test_manager_returns_client_when_urls_set(self):
+        mgr = MagicMock()
+        mgr.urls = [BASE_URL]
+        from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M
+
+        client = M.create_responses_client(mgr, "phi")
+        assert isinstance(client, ResponsesClient)
+        assert client._model_id == "phi"
+        assert client._base_url == BASE_URL
diff --git a/sdk/python/test/openai/test_responses_integration.py b/sdk/python/test/openai/test_responses_integration.py
new file mode 100644
index 000000000..cb4eee456
--- /dev/null
+++ b/sdk/python/test/openai/test_responses_integration.py
@@ -0,0 +1,288 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Integration tests for the Responses API client.
+
+These require a real Foundry Local runtime + a cached model. They are only
+run when ``FOUNDRY_INTEGRATION_TESTS=1`` is set in the environment.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+
+import pytest
+
+from foundry_local_sdk import (
+    FunctionToolDefinition,
+    InputImageContent,
+    InputTextContent,
+    MessageItem,
+)
+
+from ..conftest import TEST_MODEL_ALIAS
+
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("FOUNDRY_INTEGRATION_TESTS"),
+    reason="Set FOUNDRY_INTEGRATION_TESTS=1 to run Responses API integration tests.",
+)
+
+
+def _get_loaded_model(catalog):
+    cached = catalog.get_cached_models()
+    assert cached, "No cached models found"
+    variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
+    assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached"
+
+    model = catalog.get_model(TEST_MODEL_ALIAS)
+    assert model is not None
+    model.select_variant(variant)
+    model.load()
+    return model
+
+
+@pytest.fixture(scope="module")
+def responses_client(manager, catalog):
+    """Start the web service, return a ResponsesClient tied to the test model."""
+    model = _get_loaded_model(catalog)
+    manager.start_web_service()
+    client = manager.create_responses_client(model.id)
+    try:
+        yield client
+    finally:
+        try:
+            manager.stop_web_service()
+        finally:
+            model.unload()
+
+
+# ---------------------------------------------------------------------------
+# Non-streaming
+# ---------------------------------------------------------------------------
+
+class TestNonStreaming:
+    def test_simple_string_input(self, responses_client):
+        resp = responses_client.create("What is 2 + 2? Reply with just the number.")
+        assert resp.id
+        assert resp.status in {"completed", "incomplete"}
+        assert resp.output_text  # Non-empty
+
+    def test_with_options(self, responses_client):
+        resp = responses_client.create(
+            "Say hello.",
+            temperature=0.0,
+            max_output_tokens=32,
+        )
+        assert resp.output_text
+
+    def test_structured_input(self, responses_client):
+        # Validates that structured MessageItem input is accepted and produces
+        # a well-formed response. Not asserting content (too model-dependent).
+        resp = responses_client.create(
+            [
+                MessageItem(role="user", content="Reply with the single word: ping"),
+            ],
+            temperature=0.0,
+        )
+        assert resp.status in {"completed", "incomplete"}
+        assert resp.output_text.strip()
+
+    def test_with_instructions(self, responses_client):
+        resp = responses_client.create(
+            "Who are you?",
+            instructions="You are a terse assistant. Answer in exactly three words.",
+            temperature=0.0,
+        )
+        assert resp.output_text
+
+    def test_multi_turn(self, responses_client):
+        # Validates previous_response_id wiring: the second response should
+        # link back to the first via previous_response_id. We don't assert on
+        # recall quality (too model-dependent for tiny test models).
+        first = responses_client.create(
+            "My favourite colour is green. Just acknowledge with 'ok'.",
+            temperature=0.0,
+            store=True,
+        )
+        assert first.id
+        second = responses_client.create(
+            "What colour did I mention?",
+            previous_response_id=first.id,
+            temperature=0.0,
+        )
+        assert second.previous_response_id == first.id
+        assert second.output_text.strip()
+
+
+# ---------------------------------------------------------------------------
+# Streaming
+# ---------------------------------------------------------------------------
+
+class TestStreaming:
+    def test_basic_streaming(self, responses_client):
+        chunks = []
+        completed = False
+        for event in responses_client.create_streaming(
+            "Count 1, 2, 3. Reply with just the digits separated by spaces.",
+            temperature=0.0,
+        ):
+            if event.type == "response.output_text.delta":
+                chunks.append(event.delta)
+            elif event.type == "response.completed":
+                completed = True
+        assert completed
+        assert "".join(chunks).strip()
+
+    def test_streaming_with_options(self, responses_client):
+        saw_completed = False
+        for event in responses_client.create_streaming(
+            "Hello",
+            temperature=0.0,
+            max_output_tokens=16,
+        ):
+            if event.type == "response.completed":
+                saw_completed = True
+        assert saw_completed
+
+    def test_streaming_events_sequence(self, responses_client):
+        # Expect created → in_progress → ... → completed
+        types_seen = []
+        for event in responses_client.create_streaming("Say hi.", temperature=0.0):
+            types_seen.append(event.type)
+        assert "response.created" in types_seen
+        assert "response.completed" in types_seen
+        assert types_seen.index("response.created") < types_seen.index("response.completed")
+
+
+# ---------------------------------------------------------------------------
+# Storage: get / delete / list
+# ---------------------------------------------------------------------------
+
+class TestStorage:
+    def test_get_stored_response(self, responses_client):
+        first = responses_client.create("Store this.", store=True, temperature=0.0)
+        fetched = responses_client.get(first.id)
+        assert fetched.id == first.id
+        assert fetched.output_text == first.output_text
+
+    def test_delete_response(self, responses_client):
+        created = responses_client.create("Delete me.", store=True, temperature=0.0)
+        result = responses_client.delete(created.id)
+        assert result.id == created.id
+        assert result.deleted is True
+
+    def test_list_responses(self, responses_client):
+        # Create one so the list is guaranteed non-empty.
+        responses_client.create("A listable response.", store=True, temperature=0.0)
+        result = responses_client.list()
+        assert result.object == "list"
+        assert len(result.data) >= 1
+
+
+# ---------------------------------------------------------------------------
+# Tool calling
+# ---------------------------------------------------------------------------
+
+class TestToolCalling:
+    def test_function_call_round_trip(self, responses_client):
+        tool = FunctionToolDefinition(
+            name="multiply_numbers",
+            description="Multiply two integers.",
+            parameters={
+                "type": "object",
+                "properties": {
+                    "a": {"type": "integer"},
+                    "b": {"type": "integer"},
+                },
+                "required": ["a", "b"],
+            },
+        )
+        first = responses_client.create(
+            "What is 7 times 6? Use the multiply_numbers tool.",
+            tools=[tool],
+            temperature=0.0,
+        )
+
+        # Find the function_call item.
+        call = next(
+            (item for item in first.output if getattr(item, "type", None) == "function_call"),
+            None,
+        )
+        if call is None:
+            pytest.skip("Model did not emit a tool call for this prompt")
+
+        args = json.loads(call.arguments)
+        # Model may use the declared parameter names or invent its own.
+        # Extract the two integer values robustly.
+        int_values = [int(v) for v in args.values() if isinstance(v, (int, str)) and str(v).lstrip("-").isdigit()]
+        if len(int_values) < 2:
+            pytest.skip(f"Model produced unusable tool args: {args!r}")
+        product = int_values[0] * int_values[1]
+
+        follow = responses_client.create(
+            [
+                MessageItem(role="user", content="What is 7 times 6? Use the multiply_numbers tool."),
+                call,
+                {
+                    "type": "function_call_output",
+                    "call_id": call.call_id,
+                    "output": str(product),
+                },
+            ],
+            tools=[tool],
+            temperature=0.0,
+        )
+        # Validates the round-trip: the follow-up should produce a completed
+        # response that references the tool output. We don't assert content.
+        assert follow.status in {"completed", "incomplete"}
+        assert follow.output_text.strip()
+
+
+# ---------------------------------------------------------------------------
+# Vision
+# ---------------------------------------------------------------------------
+
+class TestVision:
+    """These tests require a vision-capable model and will be skipped otherwise."""
+
+    def _run_or_skip(self, responses_client, content):
+        try:
+            return responses_client.create(
+                [MessageItem(role="user", content=content)],
+                temperature=0.0,
+            )
+        except Exception as e:
+            pytest.skip(f"Model does not appear to support vision: {e}")
+
+    def test_image_base64_input(self, responses_client):
+        # Minimal 1x1 PNG.
+        png = bytes.fromhex(
+            "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+            "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+            "ae426082"
+        )
+        resp = self._run_or_skip(
+            responses_client,
+            [
+                InputTextContent(text="Describe this image briefly."),
+                InputImageContent.from_bytes(png, "image/png"),
+            ],
+        )
+        assert resp.status in {"completed", "incomplete"}
+
+    def test_image_with_text(self, responses_client):
+        png = bytes.fromhex(
+            "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
+            "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
+            "ae426082"
+        )
+        resp = self._run_or_skip(
+            responses_client,
+            [
+                InputTextContent(text="What colour is this?"),
+                InputImageContent.from_bytes(png, "image/png"),
+            ],
+        )
+        assert resp.status in {"completed", "incomplete"}

From b6ad3ae5343eb7d1146badffd3c587cd1c515c22 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 23 Apr 2026 17:09:54 -0400
Subject: [PATCH 02/12] fix(sdk/python): address Responses API PR review
 comments

- Add configurable timeout to ResponsesClientSettings (default 60s);
  non-streaming calls use it directly, streaming uses it as connect
  timeout with unbounded read (suitable for long responses)
- Fix SSE buffer: replace O(n) list-join-per-chunk with a single string
  buffer and split on double-newline; use chunk_size=None for natural
  server chunk boundaries
- Add InputImageContent.__post_init__ to enforce exactly one of
  image_url or image_data (raises ValueError if both or neither)
- Add optional max_size=(w,h) to InputImageContent.from_file and
  from_bytes to resize images before base64-encoding (requires Pillow)
- Raise ValueError for unknown content-part types instead of silently
  returning a fallback InputTextContent
- Document _MAX_ID_LEN=256 with rationale; lower from 1024

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/python/src/openai/responses_client.py     | 50 ++++++-----
 sdk/python/src/openai/responses_types.py      | 87 +++++++++++++++++--
 .../test/openai/test_responses_client.py      | 18 +++-
 3 files changed, 123 insertions(+), 32 deletions(-)

diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
index a0d9a7777..74544ad84 100644
--- a/sdk/python/src/openai/responses_client.py
+++ b/sdk/python/src/openai/responses_client.py
@@ -54,7 +54,9 @@
 
 logger = logging.getLogger(__name__)
 
-_MAX_ID_LEN = 1024
+# Practical guard against misuse (e.g. passing a full response JSON by mistake).
+# OpenAI does not publish a max ID length; 256 chars is conservative and generous.
+_MAX_ID_LEN = 256
 
 
 class ResponsesClientSettings:
@@ -79,6 +81,10 @@ def __init__(self) -> None:
         self.reasoning: Optional[ReasoningConfig] = None
         self.text: Optional[TextConfig] = None
         self.seed: Optional[int] = None
+        # Transport settings — not sent to the API.
+        self.timeout: float = 60.0
+        """Seconds to wait for the server to connect and respond on non-streaming calls.
+        For streaming, this is used only as the connection timeout; reads are unbounded."""
 
     def _serialize(self) -> Dict[str, Any]:
         raw: Dict[str, Any] = {
@@ -271,6 +277,7 @@ def _url(self, path: str) -> str:
         return f"{self._base_url}{path}"
 
     def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        timeout = self.settings.timeout
         try:
             if body is not None:
                 resp = requests.request(
@@ -278,9 +285,15 @@ def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] =
                     self._url(path),
                     headers={"Content-Type": "application/json", "Accept": "application/json"},
                     data=json.dumps(body),
+                    timeout=timeout,
                 )
             else:
-                resp = requests.request(method, self._url(path), headers={"Accept": "application/json"})
+                resp = requests.request(
+                    method,
+                    self._url(path),
+                    headers={"Accept": "application/json"},
+                    timeout=timeout,
+                )
         except requests.RequestException as e:
             raise ResponsesAPIError(f"Network error calling {method} {path}: {e}") from e
 
@@ -308,12 +321,16 @@ def _handle_json_response(resp: requests.Response, method: str, path: str) -> Di
     def _post_stream(
         self, path: str, body: Dict[str, Any]
     ) -> Generator[StreamingEvent, None, None]:
+        # Use (connect_timeout, None) so the connection attempt can time out but
+        # the read side is unbounded — streaming responses can be arbitrarily long.
+        connect_timeout = self.settings.timeout
         try:
             resp = requests.post(
                 self._url(path),
                 headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
                 data=json.dumps(body),
                 stream=True,
+                timeout=(connect_timeout, None),
             )
         except requests.RequestException as e:
             raise ResponsesAPIError(f"Network error calling POST {path}: {e}") from e
@@ -335,35 +352,28 @@ def _iter_sse_events(resp: requests.Response) -> Generator[StreamingEvent, None,
 
     Closes the underlying HTTP connection when the generator ends for any
     reason (completion, [DONE], exception, or GC).
+
+    Uses a single string buffer and splits on double-newline boundaries to
+    avoid the O(n) cost of joining a growing list on every chunk.
     """
     try:
-        buffer_parts: List[str] = []
-        # iter_content yields bytes chunks; decode as UTF-8 and split on blank lines.
-        for chunk in resp.iter_content(chunk_size=1024, decode_unicode=False):
+        buffer = ""
+        for chunk in resp.iter_content(chunk_size=None, decode_unicode=False):
             if not chunk:
                 continue
-            if isinstance(chunk, bytes):
-                text = chunk.decode("utf-8", errors="replace")
-            else:
-                text = chunk
-            buffer_parts.append(text)
-            buffer = "".join(buffer_parts)
-            # Normalize CRLF to LF so our split works on both styles.
-            buffer = buffer.replace("\r\n", "\n")
-
-            blocks = buffer.split("\n\n")
-            incomplete = blocks.pop() if blocks else ""
-            buffer_parts = [incomplete] if incomplete else []
+            text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk
+            buffer += text.replace("\r\n", "\n")
 
-            for block in blocks:
+            while "\n\n" in buffer:
+                block, buffer = buffer.split("\n\n", 1)
                 event = _parse_sse_block(block)
                 if event is _SSE_DONE:
                     return
                 if event is not None:
                     yield event
 
-        # Flush any residual block that wasn't terminated by a blank line.
-        tail = "".join(buffer_parts).strip()
+        # Flush any residual block not terminated by a blank line.
+        tail = buffer.strip()
         if tail:
             event = _parse_sse_block(tail)
             if event is not None and event is not _SSE_DONE:
diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py
index 09f9f4a60..064d2ad6a 100644
--- a/sdk/python/src/openai/responses_types.py
+++ b/sdk/python/src/openai/responses_types.py
@@ -13,9 +13,39 @@
 from __future__ import annotations
 
 import base64
+import io
 import mimetypes
 from dataclasses import dataclass, field, fields, is_dataclass
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+
+
+# ---------------------------------------------------------------------------
+# Image resize helper (optional — requires Pillow)
+# ---------------------------------------------------------------------------
+
+def _resize_image(data: bytes, media_type: str, max_size: Tuple[int, int]) -> Tuple[bytes, str]:
+    """Resize *data* so it fits within *max_size* (width, height) while preserving
+    aspect ratio. Returns the re-encoded bytes and MIME type.
+
+    Requires ``Pillow`` (``pip install pillow``). Raises ``ImportError`` if it is
+    not installed.
+    """
+    try:
+        from PIL import Image  # type: ignore[import-untyped]
+    except ImportError as exc:
+        raise ImportError(
+            "Image resizing requires Pillow. Install it with: pip install pillow"
+        ) from exc
+
+    img = Image.open(io.BytesIO(data))
+    img.thumbnail(max_size, Image.LANCZOS)
+    buf = io.BytesIO()
+    fmt = media_type.split("/")[-1].upper().replace("JPG", "JPEG")
+    if fmt not in ("JPEG", "PNG", "WEBP", "GIF"):
+        fmt = "PNG"
+        media_type = "image/png"
+    img.save(buf, format=fmt)
+    return buf.getvalue(), media_type
 
 
 # ---------------------------------------------------------------------------
@@ -53,28 +83,69 @@ class InputTextContent:
 
 @dataclass
 class InputImageContent:
-    """Vision input. Provide either ``image_url`` or ``image_data`` (base64)."""
+    """Vision input. Provide exactly one of ``image_url`` or ``image_data`` (base64)."""
     media_type: str = ""
     image_url: Optional[str] = None
     image_data: Optional[str] = None
     detail: Optional[str] = None  # "low" | "high" | "auto"
     type: Literal["input_image"] = "input_image"
 
+    def __post_init__(self) -> None:
+        has_url = self.image_url is not None
+        has_data = self.image_data is not None
+        if has_url == has_data:
+            raise ValueError(
+                "Provide exactly one of image_url or image_data, not both (or neither)."
+            )
+
     @staticmethod
-    def from_file(path: str, detail: Optional[str] = None) -> "InputImageContent":
+    def from_file(
+        path: str,
+        detail: Optional[str] = None,
+        max_size: Optional[Tuple[int, int]] = None,
+    ) -> "InputImageContent":
+        """Load an image from *path*, base64-encode it, and return an :class:`InputImageContent`.
+
+        Args:
+            path: Filesystem path to the image file.
+            detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``.
+            max_size: Optional ``(width, height)`` cap. If the image exceeds either
+                dimension it is resized proportionally (requires ``Pillow``).
+        """
         media_type, _ = mimetypes.guess_type(path)
         if not media_type or not media_type.startswith("image/"):
             raise ValueError(f"Unsupported image format: {path}")
         with open(path, "rb") as fh:
-            data = base64.b64encode(fh.read()).decode("ascii")
-        return InputImageContent(image_data=data, media_type=media_type, detail=detail)
+            raw = fh.read()
+        if max_size is not None:
+            raw, media_type = _resize_image(raw, media_type, max_size)
+        return InputImageContent(
+            image_data=base64.b64encode(raw).decode("ascii"),
+            media_type=media_type,
+            detail=detail,
+        )
 
     @staticmethod
     def from_url(url: str, detail: Optional[str] = None) -> "InputImageContent":
         return InputImageContent(image_url=url, media_type="image/unknown", detail=detail)
 
     @staticmethod
-    def from_bytes(data: bytes, media_type: str, detail: Optional[str] = None) -> "InputImageContent":
+    def from_bytes(
+        data: bytes,
+        media_type: str,
+        detail: Optional[str] = None,
+        max_size: Optional[Tuple[int, int]] = None,
+    ) -> "InputImageContent":
+        """Create an :class:`InputImageContent` from raw *data* bytes.
+
+        Args:
+            data: Raw image bytes.
+            media_type: MIME type, e.g. ``"image/png"``.
+            detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``.
+            max_size: Optional ``(width, height)`` cap. Requires ``Pillow``.
+        """
+        if max_size is not None:
+            data, media_type = _resize_image(data, media_type, max_size)
         return InputImageContent(
             image_data=base64.b64encode(data).decode("ascii"),
             media_type=media_type,
@@ -129,8 +200,8 @@ def _parse_content_part(data: Dict[str, Any]) -> ContentPart:
         )
     if t == "refusal":
         return RefusalContent(refusal=data.get("refusal", ""))
-    # Unknown content-part type — fall back to input_text so callers still get something
-    return InputTextContent(text=str(data.get("text", "")))
+    # Unknown content-part type — raise so callers know the SDK needs updating
+    raise ValueError(f"Unknown content-part type: {t!r}")
 
 
 def _parse_content(value: Any) -> Union[str, List[ContentPart]]:
diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py
index 15c1f3b1e..6b20754ef 100644
--- a/sdk/python/test/openai/test_responses_client.py
+++ b/sdk/python/test/openai/test_responses_client.py
@@ -115,10 +115,14 @@ def test_serialize_all_fields(self):
         assert out["text"] == {"format": {"type": "json_object"}}
         assert out["seed"] == 42
 
-    def test_serialize_omits_none(self):
+    def test_timeout_not_serialized(self):
+        # timeout is a transport setting and must NOT appear in the API payload
         s = ResponsesClientSettings()
-        s.temperature = None  # explicit None is omitted
-        assert "temperature" not in s._serialize()
+        s.timeout = 30.0
+        assert "timeout" not in s._serialize()
+
+    def test_timeout_default(self):
+        assert ResponsesClientSettings().timeout == 60.0
 
 
 # ---------------------------------------------------------------------------
@@ -228,7 +232,7 @@ def test_rejects_whitespace_id(self):
 
     def test_rejects_too_long_id(self):
         with pytest.raises(ValueError, match="length"):
-            self.client.get("x" * 2000)
+            self.client.get("x" * 1000)
 
 
 # ---------------------------------------------------------------------------
@@ -389,6 +393,12 @@ def test_input_image_serialization(self):
         # image_url left unset should be omitted
         assert "image_url" not in d
 
+    def test_input_image_mutual_exclusivity(self):
+        with pytest.raises(ValueError, match="exactly one"):
+            InputImageContent(media_type="image/png")  # neither set
+        with pytest.raises(ValueError, match="exactly one"):
+            InputImageContent(media_type="image/png", image_url="http://x.com/a.png", image_data="abc")  # both set
+
 
 # ---------------------------------------------------------------------------
 # Type serialization & parsing

From dbc3e93f1961b9fe9f3603473ee1c7c5ad58a36b Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 23 Apr 2026 22:03:50 -0400
Subject: [PATCH 03/12] address review: store=None, MAX_ID_LEN=1024, unknown
 content-part returns None, example usage guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ResponsesClientSettings.store defaults to None (omitted from request body, server
  decides) — aligns with JS SDK which has store?: boolean
- _MAX_ID_LEN reverted to 1024 to align with JS SDK constant
- _parse_content_part returns None for unknown types (forward-compat, not ValueError);
  _parse_content filters out None entries
- examples/responses.py: guard event.response.usage chain with getattr to avoid
  AttributeError if response or usage is absent
- Tests updated: store default tests, too-long-id threshold (1025), request body assertions

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/python/examples/responses.py                |  5 ++++-
 sdk/python/src/openai/responses_client.py       |  7 +++----
 sdk/python/src/openai/responses_types.py        |  9 +++++----
 sdk/python/test/openai/test_responses_client.py | 15 +++++++--------
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py
index ce810e814..047ddbdeb 100644
--- a/sdk/python/examples/responses.py
+++ b/sdk/python/examples/responses.py
@@ -63,7 +63,10 @@ def streaming(client):
         if event.type == "response.output_text.delta":
             print(event.delta, end="", flush=True)
         elif event.type == "response.completed":
-            print(f"\n(completed, {event.response.usage.total_tokens} tokens)")
+            response = getattr(event, "response", None)
+            usage = getattr(response, "usage", None) if response is not None else None
+            total = getattr(usage, "total_tokens", None) if usage is not None else None
+            print(f"\n(completed{f', {total} tokens' if total is not None else ''})")
 
 
 def multi_turn(client):
diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
index 74544ad84..0cfbaaad9 100644
--- a/sdk/python/src/openai/responses_client.py
+++ b/sdk/python/src/openai/responses_client.py
@@ -54,9 +54,8 @@
 
 logger = logging.getLogger(__name__)
 
-# Practical guard against misuse (e.g. passing a full response JSON by mistake).
-# OpenAI does not publish a max ID length; 256 chars is conservative and generous.
-_MAX_ID_LEN = 256
+# Align with the JS SDK limit to avoid surprising client-side rejections of valid IDs.
+_MAX_ID_LEN = 1024
 
 
 class ResponsesClientSettings:
@@ -76,7 +75,7 @@ def __init__(self) -> None:
         self.tool_choice: Optional[Any] = None
         self.truncation: Optional[str] = None
         self.parallel_tool_calls: Optional[bool] = None
-        self.store: Optional[bool] = True  # SDK default — matches OpenAI convention.
+        self.store: Optional[bool] = None  # Omitted by default; server applies its own default.
         self.metadata: Optional[Dict[str, str]] = None
         self.reasoning: Optional[ReasoningConfig] = None
         self.text: Optional[TextConfig] = None
diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py
index 064d2ad6a..ad1266a44 100644
--- a/sdk/python/src/openai/responses_types.py
+++ b/sdk/python/src/openai/responses_types.py
@@ -179,7 +179,7 @@ class RefusalContent:
 ]
 
 
-def _parse_content_part(data: Dict[str, Any]) -> ContentPart:
+def _parse_content_part(data: Dict[str, Any]) -> Optional[ContentPart]:
     t = data.get("type")
     if t == "input_text":
         return InputTextContent(text=data.get("text", ""))
@@ -200,15 +200,16 @@ def _parse_content_part(data: Dict[str, Any]) -> ContentPart:
         )
     if t == "refusal":
         return RefusalContent(refusal=data.get("refusal", ""))
-    # Unknown content-part type — raise so callers know the SDK needs updating
-    raise ValueError(f"Unknown content-part type: {t!r}")
+    # Unknown content-part type — return None so callers can filter forward-compat parts.
+    return None
 
 
 def _parse_content(value: Any) -> Union[str, List[ContentPart]]:
     if isinstance(value, str):
         return value
     if isinstance(value, list):
-        return [_parse_content_part(p) if isinstance(p, dict) else p for p in value]
+        parts = [_parse_content_part(p) if isinstance(p, dict) else p for p in value]
+        return [p for p in parts if p is not None]
     return value
 
 
diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py
index 6b20754ef..32034d3c2 100644
--- a/sdk/python/test/openai/test_responses_client.py
+++ b/sdk/python/test/openai/test_responses_client.py
@@ -73,14 +73,13 @@ def _fake_stream_response(sse_payload: str, status: int = 200):
 # ---------------------------------------------------------------------------
 
 class TestResponsesClientSettings:
-    def test_serialize_defaults_contains_store(self):
-        # store defaults to True — matches OpenAI convention
+    def test_serialize_defaults_empty(self):
+        # No fields set by default — server applies its own defaults
         s = ResponsesClientSettings()
-        serialized = s._serialize()
-        assert serialized == {"store": True}
+        assert s._serialize() == {}
 
-    def test_store_defaults_to_true(self):
-        assert ResponsesClientSettings().store is True
+    def test_store_defaults_to_none(self):
+        assert ResponsesClientSettings().store is None
 
     def test_serialize_all_fields(self):
         s = ResponsesClientSettings()
@@ -232,7 +231,7 @@ def test_rejects_whitespace_id(self):
 
     def test_rejects_too_long_id(self):
         with pytest.raises(ValueError, match="length"):
-            self.client.get("x" * 1000)
+            self.client.get("x" * 1025)
 
 
 # ---------------------------------------------------------------------------
@@ -504,7 +503,7 @@ def test_create_posts_correct_body(self):
         assert body["model"] == MODEL_ID
         assert body["input"] == "hello"
         assert body["temperature"] == 0.3
-        assert body["store"] is True  # default
+        assert "store" not in body  # store=None is omitted from request
         assert "stream" not in body
 
     def test_get_uses_url_encoded_path(self):

From 7e8cd888fb946ff2d979776bdcf73151efccb0a0 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Mon, 27 Apr 2026 15:11:18 -0400
Subject: [PATCH 04/12] fix(sdk/python): address Responses API review feedback

- Rename model and variant Responses client factories to get_responses_client
  to match existing get_*_client naming.
- Use FoundryLocalException for Responses API transport and parsing errors
  instead of exporting a dedicated ResponsesAPIError.
- Keep only the foundry-local-core version bump in requirements.txt and
  restore existing ORT dependency markers/order.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/python/requirements.txt                   |  9 ++---
 sdk/python/src/__init__.py                    |  3 +-
 sdk/python/src/detail/model.py                |  6 ++--
 sdk/python/src/detail/model_variant.py        |  4 +--
 sdk/python/src/imodel.py                      |  4 +--
 sdk/python/src/openai/__init__.py             |  5 ++-
 sdk/python/src/openai/responses_client.py     | 33 ++++++-------------
 .../test/openai/test_responses_client.py      | 14 ++++----
 8 files changed, 32 insertions(+), 46 deletions(-)

diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
index 25d05c298..ce84af748 100644
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -1,8 +1,9 @@
 pydantic>=2.0.0
 requests>=2.32.4
 openai>=2.24.0
+# Standard native binary packages from the ORT-Nightly PyPI feed.
 foundry-local-core==1.0.0
-onnxruntime-gpu==1.24.4; platform_system == "Linux"
-onnxruntime-core==1.24.4; platform_system != "Linux"
-onnxruntime-genai-cuda==0.13.1; platform_system == "Linux"
-onnxruntime-genai-core==0.13.1; platform_system != "Linux"
+onnxruntime-core==1.24.4; sys_platform != "linux"
+onnxruntime-gpu==1.24.4; sys_platform == "linux"
+onnxruntime-genai-core==0.13.1; sys_platform != "linux"
+onnxruntime-genai-cuda==0.13.1; sys_platform == "linux"
diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py
index 273e3f9c4..ae768e7c3 100644
--- a/sdk/python/src/__init__.py
+++ b/sdk/python/src/__init__.py
@@ -7,7 +7,7 @@
 
 from .configuration import Configuration
 from .foundry_local_manager import FoundryLocalManager
-from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings
+from .openai.responses_client import ResponsesClient, ResponsesClientSettings
 from .openai.responses_types import (
     ContentPart,
     DeleteResponseResult,
@@ -72,7 +72,6 @@
     "ResponseObject",
     "ResponseOutputItem",
     "ResponseUsage",
-    "ResponsesAPIError",
     "ResponsesClient",
     "ResponsesClientSettings",
     "StreamingEvent",
diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py
index 01dcfb471..e15aa583f 100644
--- a/sdk/python/src/detail/model.py
+++ b/sdk/python/src/detail/model.py
@@ -148,6 +148,6 @@ def get_embedding_client(self) -> EmbeddingClient:
         """Get an embedding client for the currently selected variant."""
         return self._selected_variant.get_embedding_client()
 
-    def create_responses_client(self, base_url: str) -> "ResponsesClient":
-        """Create a Responses API client for the currently selected variant."""
-        return self._selected_variant.create_responses_client(base_url)
+    def get_responses_client(self, base_url: str) -> "ResponsesClient":
+        """Get a Responses API client for the currently selected variant."""
+        return self._selected_variant.get_responses_client(base_url)
diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py
index 2e19662d5..7e57b3a2e 100644
--- a/sdk/python/src/detail/model_variant.py
+++ b/sdk/python/src/detail/model_variant.py
@@ -177,8 +177,8 @@ def get_embedding_client(self) -> EmbeddingClient:
         """Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
         return EmbeddingClient(self.id, self._core_interop)
 
-    def create_responses_client(self, base_url: str) -> ResponsesClient:
-        """Create a Responses API client for this variant.
+    def get_responses_client(self, base_url: str) -> ResponsesClient:
+        """Create an OpenAI-compatible ``ResponsesClient`` for this variant.
 
         :param base_url: Base URL of the running Foundry Local web service
             (e.g. ``manager.urls[0]``).
diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py
index 6bc0d3638..f76ad1b03 100644
--- a/sdk/python/src/imodel.py
+++ b/sdk/python/src/imodel.py
@@ -138,9 +138,9 @@ def get_embedding_client(self) -> 'EmbeddingClient':
         pass
 
     @abstractmethod
-    def create_responses_client(self, base_url: str) -> 'ResponsesClient':
+    def get_responses_client(self, base_url: str) -> 'ResponsesClient':
         """
-        Create an OpenAI Responses API client bound to the running web service.
+        Get an OpenAI Responses API client bound to the running web service.
 
         Unlike the other clients, the Responses API is HTTP-only and requires
         the Foundry Local web service to be started. Pass the base URL
diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py
index 011ed15bd..ea97ca575 100644
--- a/sdk/python/src/openai/__init__.py
+++ b/sdk/python/src/openai/__init__.py
@@ -14,7 +14,7 @@
     LiveAudioTranscriptionResponse,
     TranscriptionContentPart,
 )
-from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError
+from .responses_client import ResponsesClient, ResponsesClientSettings
 
 __all__ = [
     "AudioClient",
@@ -25,8 +25,7 @@
     "LiveAudioTranscriptionOptions",
     "LiveAudioTranscriptionResponse",
     "LiveAudioTranscriptionSession",
-    "ResponsesAPIError",
     "ResponsesClient",
     "ResponsesClientSettings",
     "TranscriptionContentPart",
-]
\ No newline at end of file
+]
diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
index 0cfbaaad9..ddcce586c 100644
--- a/sdk/python/src/openai/responses_client.py
+++ b/sdk/python/src/openai/responses_client.py
@@ -36,6 +36,7 @@
 
 import requests
 
+from ..exception import FoundryLocalException
 from .responses_types import (
     DeleteResponseResult,
     InputItemsListResponse,
@@ -105,20 +106,11 @@ def _serialize(self) -> Dict[str, Any]:
         return {k: v for k, v in raw.items() if v is not None}
 
 
-class ResponsesAPIError(Exception):
-    """Raised for HTTP/transport errors against the Responses API."""
-
-    def __init__(self, message: str, status_code: Optional[int] = None, body: Optional[str] = None):
-        super().__init__(message)
-        self.status_code = status_code
-        self.body = body
-
-
 class ResponsesClient:
     """Client for the OpenAI Responses API served by Foundry Local.
 
     Construct via ``manager.create_responses_client(model_id)`` or
-    ``model.create_responses_client(base_url)``.
+    ``model.get_responses_client(base_url)``.
     """
 
     def __init__(self, base_url: str, model_id: Optional[str] = None):
@@ -294,7 +286,7 @@ def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] =
                     timeout=timeout,
                 )
         except requests.RequestException as e:
-            raise ResponsesAPIError(f"Network error calling {method} {path}: {e}") from e
+            raise FoundryLocalException(f"Network error calling {method} {path}: {e}") from e
 
         return self._handle_json_response(resp, method, path)
 
@@ -305,15 +297,13 @@ def _post_json(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
     def _handle_json_response(resp: requests.Response, method: str, path: str) -> Dict[str, Any]:
         text = resp.text
         if not resp.ok:
-            raise ResponsesAPIError(
-                f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}",
-                status_code=resp.status_code,
-                body=text,
+            raise FoundryLocalException(
+                f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}"
             )
         try:
             return json.loads(text) if text else {}
         except json.JSONDecodeError as e:
-            raise ResponsesAPIError(
+            raise FoundryLocalException(
                 f"Failed to parse response JSON from {method} {path}: {text[:200]}"
             ) from e
 
@@ -332,15 +322,13 @@ def _post_stream(
                 timeout=(connect_timeout, None),
             )
         except requests.RequestException as e:
-            raise ResponsesAPIError(f"Network error calling POST {path}: {e}") from e
+            raise FoundryLocalException(f"Network error calling POST {path}: {e}") from e
 
         if not resp.ok:
             body_text = resp.text
             resp.close()
-            raise ResponsesAPIError(
-                f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}",
-                status_code=resp.status_code,
-                body=body_text,
+            raise FoundryLocalException(
+                f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}"
             )
 
         return _iter_sse_events(resp)
@@ -409,7 +397,7 @@ def _parse_sse_block(block: str) -> Any:
     try:
         parsed = json.loads(data)
     except json.JSONDecodeError as e:
-        raise ResponsesAPIError(f"Failed to parse streaming event JSON: {e}") from e
+        raise FoundryLocalException(f"Failed to parse streaming event JSON: {e}") from e
     if not isinstance(parsed, dict):
         return None
     return parse_streaming_event(parsed)
@@ -418,5 +406,4 @@ def _parse_sse_block(block: str) -> Any:
 __all__ = [
     "ResponsesClient",
     "ResponsesClientSettings",
-    "ResponsesAPIError",
 ]
diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py
index 32034d3c2..871fe301c 100644
--- a/sdk/python/test/openai/test_responses_client.py
+++ b/sdk/python/test/openai/test_responses_client.py
@@ -18,8 +18,8 @@
 
 import pytest
 
+from foundry_local_sdk.exception import FoundryLocalException
 from foundry_local_sdk.openai.responses_client import (
-    ResponsesAPIError,
     ResponsesClient,
     ResponsesClientSettings,
     _parse_sse_block,
@@ -291,7 +291,7 @@ def test_multi_line_data(self):
 
     def test_invalid_json_raises(self):
         block = 'data: {not valid json'
-        with pytest.raises(ResponsesAPIError):
+        with pytest.raises(FoundryLocalException):
             _parse_sse_block(block)
 
     def test_empty_block_returns_none(self):
@@ -527,15 +527,15 @@ def test_delete_parses_result(self):
         assert result.deleted is True
         assert result.id == "resp_1"
 
-    def test_http_error_raises_responses_api_error(self):
+    def test_http_error_raises_foundry_local_exception(self):
         resp = MagicMock()
         resp.ok = False
         resp.status_code = 400
         resp.text = '{"error":{"message":"bad"}}'
         with patch("foundry_local_sdk.openai.responses_client.requests.request", return_value=resp):
-            with pytest.raises(ResponsesAPIError) as excinfo:
+            with pytest.raises(FoundryLocalException) as excinfo:
                 self.client.create("hi")
-        assert excinfo.value.status_code == 400
+        assert "400" in str(excinfo.value)
         assert "bad" in str(excinfo.value)
 
     def test_create_streaming_yields_events(self):
@@ -566,9 +566,9 @@ def test_streaming_http_error(self):
         resp.text = "boom"
         resp.close = MagicMock()
         with patch("foundry_local_sdk.openai.responses_client.requests.post", return_value=resp):
-            with pytest.raises(ResponsesAPIError) as excinfo:
+            with pytest.raises(FoundryLocalException) as excinfo:
                 list(self.client.create_streaming("hi"))
-        assert excinfo.value.status_code == 500
+        assert "500" in str(excinfo.value)
 
     def test_settings_merge_precedence(self):
         self.client.settings.temperature = 0.1

From 9bc1606b8320330e8b565aa229f64a38f8575628 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 16:06:30 -0400
Subject: [PATCH 05/12] refactor(sdk/python): use web-service sample for
 Responses API

Replace the SDK-native Responses client implementation with a focused Python
sample and integration tests that use FoundryLocalManager for setup/model/server
lifecycle and the official OpenAI Python client for /v1/responses calls.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/python/README.md                          |   3 +-
 sdk/python/examples/responses.py              | 157 ---
 sdk/python/examples/responses_web_service.py  | 176 ++++
 sdk/python/requirements.txt                   |  10 +-
 sdk/python/src/__init__.py                    |  61 +-
 sdk/python/src/detail/model.py                |   5 -
 sdk/python/src/detail/model_variant.py        |   9 -
 sdk/python/src/foundry_local_manager.py       |  23 -
 sdk/python/src/imodel.py                      |  16 -
 sdk/python/src/openai/__init__.py             |   5 +-
 sdk/python/src/openai/responses_client.py     | 409 --------
 sdk/python/src/openai/responses_types.py      | 957 ------------------
 .../test/openai/test_responses_client.py      | 612 -----------
 .../test/openai/test_responses_integration.py | 288 ------
 .../test/openai/test_responses_web_service.py | 194 ++++
 15 files changed, 379 insertions(+), 2546 deletions(-)
 delete mode 100644 sdk/python/examples/responses.py
 create mode 100644 sdk/python/examples/responses_web_service.py
 delete mode 100644 sdk/python/src/openai/responses_client.py
 delete mode 100644 sdk/python/src/openai/responses_types.py
 delete mode 100644 sdk/python/test/openai/test_responses_client.py
 delete mode 100644 sdk/python/test/openai/test_responses_integration.py
 create mode 100644 sdk/python/test/openai/test_responses_web_service.py

diff --git a/sdk/python/README.md b/sdk/python/README.md
index 2a121411e..0c065bc85 100644
--- a/sdk/python/README.md
+++ b/sdk/python/README.md
@@ -328,4 +328,5 @@ See [test/README.md](test/README.md) for detailed test setup and structure.
 
 ```bash
 python examples/chat_completion.py
-```
\ No newline at end of file
+python examples/responses_web_service.py
+```
diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py
deleted file mode 100644
index 047ddbdeb..000000000
--- a/sdk/python/examples/responses.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-"""End-to-end example for the OpenAI Responses API client.
-
-Run with::
-
-    python examples/responses.py
-
-Requires a loaded model and a started web service.
-"""
-
-from __future__ import annotations
-
-import json
-
-from foundry_local_sdk import (
-    Configuration,
-    FoundryLocalManager,
-    FunctionToolDefinition,
-    InputImageContent,
-    InputTextContent,
-    MessageItem,
-)
-
-MODEL_ALIAS = "phi-4-mini"
-
-
-def setup():
-    config = Configuration(app_name="ResponsesExample")
-    FoundryLocalManager.initialize(config)
-    mgr = FoundryLocalManager.instance
-
-    mgr.download_and_register_eps()
-
-    model = mgr.catalog.get_model(MODEL_ALIAS)
-    if model is None:
-        raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
-    if not model.is_cached:
-        print(f"Downloading {MODEL_ALIAS}...")
-        model.download(progress_callback=lambda p: print(f"  {p:.1f}%", end="\r"))
-        print()
-    print(f"Loading {model.alias}...", end="")
-    model.load()
-    print("loaded!")
-    mgr.start_web_service()
-
-    client = mgr.create_responses_client(model.id)
-    return mgr, model, client
-
-
-def basic_create(client):
-    print("\n=== 1. Basic create ===")
-    resp = client.create("What is 2 + 2? Answer in one word.")
-    print(f"status={resp.status}  text={resp.output_text!r}")
-
-
-def streaming(client):
-    print("\n=== 2. Streaming ===")
-    print("assistant: ", end="", flush=True)
-    for event in client.create_streaming("Count from 1 to 5, separated by spaces."):
-        if event.type == "response.output_text.delta":
-            print(event.delta, end="", flush=True)
-        elif event.type == "response.completed":
-            response = getattr(event, "response", None)
-            usage = getattr(response, "usage", None) if response is not None else None
-            total = getattr(usage, "total_tokens", None) if usage is not None else None
-            print(f"\n(completed{f', {total} tokens' if total is not None else ''})")
-
-
-def multi_turn(client):
-    print("\n=== 3. Multi-turn ===")
-    first = client.create("My favorite color is green. Remember that.", store=True)
-    print(f"first id={first.id!r}")
-    second = client.create(
-        "What is my favorite color?",
-        previous_response_id=first.id,
-    )
-    print(f"second: {second.output_text!r}")
-
-
-def tool_calling(client):
-    print("\n=== 4. Tool calling ===")
-    tools = [
-        FunctionToolDefinition(
-            name="multiply_numbers",
-            description="Multiply two integers together.",
-            parameters={
-                "type": "object",
-                "properties": {
-                    "a": {"type": "integer"},
-                    "b": {"type": "integer"},
-                },
-                "required": ["a", "b"],
-            },
-        )
-    ]
-    resp = client.create("What is 7 times 6?", tools=tools)
-
-    # Find a function_call item in the output (if the model produced one).
-    for item in resp.output:
-        if getattr(item, "type", None) == "function_call":
-            print(f"call {item.name}({item.arguments})")
-            args = json.loads(item.arguments)
-            answer = args["a"] * args["b"]
-            follow = client.create(
-                [
-                    MessageItem(role="user", content="What is 7 times 6?"),
-                    item,
-                    # The function_call_output is sent back keyed by call_id
-                    {"type": "function_call_output", "call_id": item.call_id, "output": str(answer)},
-                ],
-                tools=tools,
-            )
-            print(f"final: {follow.output_text!r}")
-            return
-    print(f"no tool call — got text: {resp.output_text!r}")
-
-
-def vision(client):
-    print("\n=== 5. Vision ===")
-    # Requires a vision-capable model. Replace with a real PNG to see real output.
-    tiny_png = bytes.fromhex(
-        "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
-        "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
-        "ae426082"
-    )
-    msg = MessageItem(
-        role="user",
-        content=[
-            InputTextContent(text="Describe this image in one sentence."),
-            InputImageContent.from_bytes(tiny_png, "image/png"),
-        ],
-    )
-    try:
-        resp = client.create([msg])
-        print(f"vision response: {resp.output_text!r}")
-    except Exception as e:
-        print(f"(skipped — model may not support vision: {e})")
-
-
-def main():
-    mgr, model, client = setup()
-    try:
-        basic_create(client)
-        streaming(client)
-        multi_turn(client)
-        tool_calling(client)
-        vision(client)
-    finally:
-        mgr.stop_web_service()
-        model.unload()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/sdk/python/examples/responses_web_service.py b/sdk/python/examples/responses_web_service.py
new file mode 100644
index 000000000..fe9517949
--- /dev/null
+++ b/sdk/python/examples/responses_web_service.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+
+"""Example: Responses API through the Foundry Local web service.
+
+Foundry Local manages setup, model lifecycle, and the local OpenAI-compatible
+web service. The official OpenAI Python client sends the actual /v1/responses
+requests to that local service.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from openai import OpenAI
+
+from foundry_local_sdk import Configuration, FoundryLocalManager
+
+
+MODEL_ALIAS = "qwen2.5-0.5b"
+
+
+def _field(value: Any, name: str, default: Any = None) -> Any:
+    if isinstance(value, dict):
+        return value.get(name, default)
+    return getattr(value, name, default)
+
+
+def _response_text(response: Any) -> str:
+    text = _field(response, "output_text")
+    if isinstance(text, str) and text:
+        return text
+
+    for item in _field(response, "output", []) or []:
+        if _field(item, "type") != "message":
+            continue
+        for part in _field(item, "content", []) or []:
+            if _field(part, "type") == "output_text":
+                part_text = _field(part, "text", "")
+                if isinstance(part_text, str):
+                    text = (text or "") + part_text
+    return text or ""
+
+
+def _get_function_call(response: Any) -> Any:
+    for item in _field(response, "output", []) or []:
+        if _field(item, "type") == "function_call":
+            return item
+    return None
+
+
+def _get_weather_tool() -> dict[str, Any]:
+    return {
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get the current weather for a city.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {
+                    "type": "string",
+                    "description": "The city and region, for example Seattle, WA.",
+                }
+            },
+            "required": ["location"],
+        },
+    }
+
+
+def main() -> None:
+    config = Configuration(app_name="ResponsesWebServiceExample")
+    print("Initializing Foundry Local Manager")
+    FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
+    if manager is None:
+        raise RuntimeError("FoundryLocalManager.initialize did not set instance")
+
+    print("Registering execution providers...")
+    ep_result = manager.download_and_register_eps()
+    print(f"EP registration success: {ep_result.success} ({ep_result.status})")
+
+    model = manager.catalog.get_model(MODEL_ALIAS)
+    if model is None:
+        raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
+
+    if not model.is_cached:
+        print(f"Downloading {model.alias}...")
+        model.download(progress_callback=lambda pct: print(f"  {pct:.1f}%", end="\r"))
+        print()
+
+    print(f"Loading {model.alias}...", end="")
+    model.load()
+    print("loaded!")
+
+    openai_client: OpenAI | None = None
+    try:
+        print("Starting OpenAI-compatible web service...", end="")
+        manager.start_web_service()
+        if not manager.urls:
+            raise RuntimeError("Web service started but did not return any URLs")
+        print("started!")
+
+        base_url = manager.urls[0].rstrip("/") + "/v1"
+        openai_client = OpenAI(base_url=base_url, api_key="notneeded")
+
+        print("\n--- Non-streaming Responses call ---")
+        response = openai_client.responses.create(
+            model=model.id,
+            input="What is 2 + 2? Reply briefly.",
+        )
+        print(_response_text(response))
+
+        print("\n--- Streaming Responses call ---")
+        stream = openai_client.responses.create(
+            model=model.id,
+            input="Count from 1 to 3, separated by spaces.",
+            stream=True,
+        )
+        for event in stream:
+            if _field(event, "type") == "response.output_text.delta":
+                print(_field(event, "delta", ""), end="", flush=True)
+        print()
+
+        print("\n--- Function/tool calling Responses flow ---")
+        weather_tool = _get_weather_tool()
+        tool_response = openai_client.responses.create(
+            model=model.id,
+            input="Use get_weather to check the weather in Seattle, then answer.",
+            tools=[weather_tool],
+            tool_choice="required",
+            store=True,
+        )
+        function_call = _get_function_call(tool_response)
+        if function_call is None:
+            raise RuntimeError("Model did not return a function_call item")
+
+        print(f"Tool call: {_field(function_call, 'name')}")
+        print(f"Arguments: {_field(function_call, 'arguments')}")
+
+        final_response = openai_client.responses.create(
+            model=model.id,
+            previous_response_id=_field(tool_response, "id"),
+            input=[
+                {
+                    "type": "function_call_output",
+                    "call_id": _field(function_call, "call_id"),
+                    "output": json.dumps(
+                        {
+                            "location": "Seattle, WA",
+                            "temperature": "68 F",
+                            "conditions": "sunny",
+                        }
+                    ),
+                }
+            ],
+        )
+        print(_response_text(final_response))
+
+    finally:
+        if openai_client is not None:
+            openai_client.close()
+        try:
+            manager.stop_web_service()
+            print("Web service stopped.")
+        except Exception:
+            pass
+        model.unload()
+        print("Model unloaded.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
index ce84af748..92c98b540 100644
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -2,8 +2,8 @@ pydantic>=2.0.0
 requests>=2.32.4
 openai>=2.24.0
 # Standard native binary packages from the ORT-Nightly PyPI feed.
-foundry-local-core==1.0.0
-onnxruntime-core==1.24.4; sys_platform != "linux"
-onnxruntime-gpu==1.24.4; sys_platform == "linux"
-onnxruntime-genai-core==0.13.1; sys_platform != "linux"
-onnxruntime-genai-cuda==0.13.1; sys_platform == "linux"
+foundry-local-core==1.0.0rc1
+onnxruntime-core==1.25.1; sys_platform != "linux"
+onnxruntime-gpu==1.25.1; sys_platform == "linux"
+onnxruntime-genai-core==0.13.2; sys_platform != "linux"
+onnxruntime-genai-cuda==0.13.2; sys_platform == "linux"
diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py
index ae768e7c3..14534d196 100644
--- a/sdk/python/src/__init__.py
+++ b/sdk/python/src/__init__.py
@@ -7,34 +7,6 @@
 
 from .configuration import Configuration
 from .foundry_local_manager import FoundryLocalManager
-from .openai.responses_client import ResponsesClient, ResponsesClientSettings
-from .openai.responses_types import (
-    ContentPart,
-    DeleteResponseResult,
-    FunctionCallItem,
-    FunctionCallOutputItem,
-    FunctionToolDefinition,
-    InputFileContent,
-    InputImageContent,
-    InputItemsListResponse,
-    InputTextContent,
-    ItemReference,
-    ListResponsesResult,
-    MessageItem,
-    OutputTextContent,
-    ReasoningConfig,
-    ReasoningItem,
-    RefusalContent,
-    ResponseError,
-    ResponseInputItem,
-    ResponseObject,
-    ResponseOutputItem,
-    ResponseUsage,
-    StreamingEvent,
-    TextConfig,
-    TextFormat,
-    parse_streaming_event,
-)
 from .version import __version__
 
 _logger = logging.getLogger(__name__)
@@ -48,35 +20,4 @@
 _logger.addHandler(_sc)
 _logger.propagate = False
 
-__all__ = [
-    "Configuration",
-    "ContentPart",
-    "DeleteResponseResult",
-    "FoundryLocalManager",
-    "FunctionCallItem",
-    "FunctionCallOutputItem",
-    "FunctionToolDefinition",
-    "InputFileContent",
-    "InputImageContent",
-    "InputItemsListResponse",
-    "InputTextContent",
-    "ItemReference",
-    "ListResponsesResult",
-    "MessageItem",
-    "OutputTextContent",
-    "ReasoningConfig",
-    "ReasoningItem",
-    "RefusalContent",
-    "ResponseError",
-    "ResponseInputItem",
-    "ResponseObject",
-    "ResponseOutputItem",
-    "ResponseUsage",
-    "ResponsesClient",
-    "ResponsesClientSettings",
-    "StreamingEvent",
-    "TextConfig",
-    "TextFormat",
-    "__version__",
-    "parse_streaming_event",
-]
+__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py
index e15aa583f..6d60b7a2f 100644
--- a/sdk/python/src/detail/model.py
+++ b/sdk/python/src/detail/model.py
@@ -11,7 +11,6 @@
 from ..openai.chat_client import ChatClient
 from ..openai.audio_client import AudioClient
 from ..openai.embedding_client import EmbeddingClient
-from ..openai.responses_client import ResponsesClient
 from .model_variant import ModelVariant
 from ..exception import FoundryLocalException
 from .core_interop import CoreInterop
@@ -147,7 +146,3 @@ def get_audio_client(self) -> AudioClient:
     def get_embedding_client(self) -> EmbeddingClient:
         """Get an embedding client for the currently selected variant."""
         return self._selected_variant.get_embedding_client()
-
-    def get_responses_client(self, base_url: str) -> "ResponsesClient":
-        """Get a Responses API client for the currently selected variant."""
-        return self._selected_variant.get_responses_client(base_url)
diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py
index 7e57b3a2e..76efb05cd 100644
--- a/sdk/python/src/detail/model_variant.py
+++ b/sdk/python/src/detail/model_variant.py
@@ -17,7 +17,6 @@
 from ..openai.audio_client import AudioClient
 from ..openai.chat_client import ChatClient
 from ..openai.embedding_client import EmbeddingClient
-from ..openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -176,11 +175,3 @@ def get_audio_client(self) -> AudioClient:
     def get_embedding_client(self) -> EmbeddingClient:
         """Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
         return EmbeddingClient(self.id, self._core_interop)
-
-    def get_responses_client(self, base_url: str) -> ResponsesClient:
-        """Create an OpenAI-compatible ``ResponsesClient`` for this variant.
-
-        :param base_url: Base URL of the running Foundry Local web service
-            (e.g. ``manager.urls[0]``).
-        """
-        return ResponsesClient(base_url, self.id)
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
index b891d1b17..a649f8e56 100644
--- a/sdk/python/src/foundry_local_manager.py
+++ b/sdk/python/src/foundry_local_manager.py
@@ -20,7 +20,6 @@
 from .detail.core_interop import CoreInterop, InteropRequest
 from .detail.model_load_manager import ModelLoadManager
 from .exception import FoundryLocalException
-from .openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -195,25 +194,3 @@ def stop_web_service(self):
                 raise FoundryLocalException(f"Error stopping web service: {response.error}")
 
             self.urls = None
-
-    def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
-        """Create a :class:`ResponsesClient` bound to the running web service.
-
-        The Responses API is HTTP-only, so the web service must be started
-        before calling this. Use :meth:`start_web_service` first.
-
-        Args:
-            model_id: Optional default model ID baked into the client. May also
-                be supplied per-call via ``options['model']``.
-
-        Returns:
-            A new :class:`ResponsesClient`.
-
-        Raises:
-            FoundryLocalException: If the web service has not been started.
-        """
-        if not self.urls:
-            raise FoundryLocalException(
-                "Web service is not running. Call start_web_service() first."
-            )
-        return ResponsesClient(self.urls[0], model_id)
diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py
index f76ad1b03..f723e514a 100644
--- a/sdk/python/src/imodel.py
+++ b/sdk/python/src/imodel.py
@@ -10,7 +10,6 @@
 from .openai.chat_client import ChatClient
 from .openai.audio_client import AudioClient
 from .openai.embedding_client import EmbeddingClient
-from .openai.responses_client import ResponsesClient
 from .detail.model_data_types import ModelInfo
 
 class IModel(ABC):
@@ -137,21 +136,6 @@ def get_embedding_client(self) -> 'EmbeddingClient':
         """
         pass
 
-    @abstractmethod
-    def get_responses_client(self, base_url: str) -> 'ResponsesClient':
-        """
-        Get an OpenAI Responses API client bound to the running web service.
-
-        Unlike the other clients, the Responses API is HTTP-only and requires
-        the Foundry Local web service to be started. Pass the base URL
-        returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``),
-        or use :meth:`FoundryLocalManager.create_responses_client` directly.
-
-        :param base_url: Base URL of the running Foundry Local web service.
-        :return: ResponsesClient instance for this variant's model id.
-        """
-        pass
-
     @property
     @abstractmethod
     def variants(self) -> List['IModel']:
diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py
index ea97ca575..2fa51a6f6 100644
--- a/sdk/python/src/openai/__init__.py
+++ b/sdk/python/src/openai/__init__.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API."""
+"""OpenAI-compatible clients for chat completions and audio transcription."""
 
 from .chat_client import ChatClient, ChatClientSettings
 from .audio_client import AudioClient
@@ -14,7 +14,6 @@
     LiveAudioTranscriptionResponse,
     TranscriptionContentPart,
 )
-from .responses_client import ResponsesClient, ResponsesClientSettings
 
 __all__ = [
     "AudioClient",
@@ -25,7 +24,5 @@
     "LiveAudioTranscriptionOptions",
     "LiveAudioTranscriptionResponse",
     "LiveAudioTranscriptionSession",
-    "ResponsesClient",
-    "ResponsesClientSettings",
     "TranscriptionContentPart",
 ]
diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
deleted file mode 100644
index ddcce586c..000000000
--- a/sdk/python/src/openai/responses_client.py
+++ /dev/null
@@ -1,409 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-"""OpenAI Responses API client — HTTP-only against the Foundry Local web service.
-
-Unlike ``ChatClient`` / ``AudioClient`` which go through the native Core via FFI,
-the Responses API is served exclusively by the embedded web service. The client
-therefore uses ``requests`` for non-streaming calls and parses Server-Sent Events
-inline for streaming.
-
-Usage
------
-::
-
-    manager.start_web_service()
-    client = manager.create_responses_client("phi-4-mini")
-
-    # Non-streaming
-    resp = client.create("What is 2+2?")
-    print(resp.output_text)
-
-    # Streaming
-    for event in client.create_streaming("Tell me a story"):
-        if event.type == "response.output_text.delta":
-            print(event.delta, end="", flush=True)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from dataclasses import is_dataclass
-from typing import Any, Dict, Generator, List, Optional, Union
-from urllib.parse import quote
-
-import requests
-
-from ..exception import FoundryLocalException
-from .responses_types import (
-    DeleteResponseResult,
-    InputItemsListResponse,
-    ListResponsesResult,
-    ReasoningConfig,
-    ResponseObject,
-    StreamingEvent,
-    TextConfig,
-    _parse_delete_result,
-    _parse_input_items_list,
-    _parse_list_responses,
-    _parse_response_object,
-    _to_dict,
-    parse_streaming_event,
-)
-
-logger = logging.getLogger(__name__)
-
-# Align with the JS SDK limit to avoid surprising client-side rejections of valid IDs.
-_MAX_ID_LEN = 1024
-
-
-class ResponsesClientSettings:
-    """Tunable settings applied to every Responses API request.
-
-    Field names follow the OpenAI snake_case convention; serialization omits
-    any ``None`` values so the server applies its own defaults.
-    """
-
-    def __init__(self) -> None:
-        self.instructions: Optional[str] = None
-        self.temperature: Optional[float] = None
-        self.top_p: Optional[float] = None
-        self.max_output_tokens: Optional[int] = None
-        self.frequency_penalty: Optional[float] = None
-        self.presence_penalty: Optional[float] = None
-        self.tool_choice: Optional[Any] = None
-        self.truncation: Optional[str] = None
-        self.parallel_tool_calls: Optional[bool] = None
-        self.store: Optional[bool] = None  # Omitted by default; server applies its own default.
-        self.metadata: Optional[Dict[str, str]] = None
-        self.reasoning: Optional[ReasoningConfig] = None
-        self.text: Optional[TextConfig] = None
-        self.seed: Optional[int] = None
-        # Transport settings — not sent to the API.
-        self.timeout: float = 60.0
-        """Seconds to wait for the server to connect and respond on non-streaming calls.
-        For streaming, this is used only as the connection timeout; reads are unbounded."""
-
-    def _serialize(self) -> Dict[str, Any]:
-        raw: Dict[str, Any] = {
-            "instructions": self.instructions,
-            "temperature": self.temperature,
-            "top_p": self.top_p,
-            "max_output_tokens": self.max_output_tokens,
-            "frequency_penalty": self.frequency_penalty,
-            "presence_penalty": self.presence_penalty,
-            "tool_choice": _to_dict(self.tool_choice) if is_dataclass(self.tool_choice) else self.tool_choice,
-            "truncation": self.truncation,
-            "parallel_tool_calls": self.parallel_tool_calls,
-            "store": self.store,
-            "metadata": self.metadata,
-            "reasoning": _to_dict(self.reasoning) if self.reasoning is not None else None,
-            "text": _to_dict(self.text) if self.text is not None else None,
-            "seed": self.seed,
-        }
-        return {k: v for k, v in raw.items() if v is not None}
-
-
-class ResponsesClient:
-    """Client for the OpenAI Responses API served by Foundry Local.
-
-    Construct via ``manager.create_responses_client(model_id)`` or
-    ``model.get_responses_client(base_url)``.
-    """
-
-    def __init__(self, base_url: str, model_id: Optional[str] = None):
-        if not isinstance(base_url, str) or not base_url.strip():
-            raise ValueError("base_url must be a non-empty string.")
-        self._base_url = base_url.rstrip("/")
-        self._model_id = model_id
-        self.settings = ResponsesClientSettings()
-
-    # ------------------------------------------------------------------ public
-
-    def create(
-        self,
-        input: Union[str, List[Any]],
-        **options: Any,
-    ) -> ResponseObject:
-        """Create a response (non-streaming)."""
-        body = self._build_request(input, options, stream=False)
-        raw = self._post_json("/v1/responses", body)
-        return _parse_response_object(raw)
-
-    def create_streaming(
-        self,
-        input: Union[str, List[Any]],
-        **options: Any,
-    ) -> Generator[StreamingEvent, None, None]:
-        """Create a response with SSE streaming.
-
-        Returns a generator yielding :class:`StreamingEvent` objects. The HTTP
-        connection is closed automatically when the generator is exhausted or
-        garbage-collected.
-        """
-        body = self._build_request(input, options, stream=True)
-        return self._post_stream("/v1/responses", body)
-
-    def get(self, response_id: str) -> ResponseObject:
-        self._validate_id(response_id, "response_id")
-        raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}")
-        return _parse_response_object(raw)
-
-    def delete(self, response_id: str) -> DeleteResponseResult:
-        self._validate_id(response_id, "response_id")
-        raw = self._request_json("DELETE", f"/v1/responses/{quote(response_id, safe='')}")
-        return _parse_delete_result(raw)
-
-    def cancel(self, response_id: str) -> ResponseObject:
-        self._validate_id(response_id, "response_id")
-        raw = self._request_json("POST", f"/v1/responses/{quote(response_id, safe='')}/cancel")
-        return _parse_response_object(raw)
-
-    def get_input_items(self, response_id: str) -> InputItemsListResponse:
-        self._validate_id(response_id, "response_id")
-        raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}/input_items")
-        return _parse_input_items_list(raw)
-
-    def list(self) -> ListResponsesResult:
-        raw = self._request_json("GET", "/v1/responses")
-        return _parse_list_responses(raw)
-
-    # ---------------------------------------------------------------- internal
-
-    def _build_request(
-        self,
-        input: Union[str, List[Any]],
-        options: Dict[str, Any],
-        stream: bool,
-    ) -> Dict[str, Any]:
-        self._validate_input(input)
-        if options.get("tools") is not None:
-            self._validate_tools(options["tools"])
-
-        model = options.pop("model", None) or self._model_id
-        if not isinstance(model, str) or not model.strip():
-            raise ValueError(
-                "Model must be specified via create_responses_client(model_id) or options['model']."
-            )
-
-        # Normalize input: convert dataclasses to dicts for the wire format.
-        if isinstance(input, list):
-            wire_input = [_to_dict(i) if is_dataclass(i) else i for i in input]
-        else:
-            wire_input = input
-
-        # Normalize other dataclass-shaped options (tools, reasoning, etc.).
-        normalized_options: Dict[str, Any] = {}
-        for key, value in options.items():
-            if value is None:
-                continue
-            if is_dataclass(value):
-                normalized_options[key] = _to_dict(value)
-            elif isinstance(value, list):
-                normalized_options[key] = [_to_dict(v) if is_dataclass(v) else v for v in value]
-            else:
-                normalized_options[key] = value
-
-        body: Dict[str, Any] = {"model": model, "input": wire_input}
-        # Merge order: model+input → settings defaults → per-call overrides
-        body.update(self.settings._serialize())
-        body.update(normalized_options)
-        if stream:
-            body["stream"] = True
-        return body
-
-    @staticmethod
-    def _validate_input(input: Any) -> None:
-        if input is None:
-            raise ValueError("Input cannot be None.")
-        if isinstance(input, str):
-            if not input.strip():
-                raise ValueError("Input string cannot be empty.")
-            return
-        if isinstance(input, list):
-            if len(input) == 0:
-                raise ValueError("Input items list cannot be empty.")
-            for i, item in enumerate(input):
-                if is_dataclass(item):
-                    t = getattr(item, "type", None)
-                elif isinstance(item, dict):
-                    t = item.get("type")
-                else:
-                    raise ValueError(f"input[{i}] must be a dict or dataclass.")
-                if not isinstance(t, str) or not t.strip():
-                    raise ValueError(f"input[{i}] must have a non-empty 'type' field.")
-            return
-        raise ValueError("Input must be a string or a list of input items.")
-
-    @staticmethod
-    def _validate_tools(tools: Any) -> None:
-        if not isinstance(tools, list):
-            raise ValueError("tools must be a list if provided.")
-        for i, tool in enumerate(tools):
-            if is_dataclass(tool):
-                t = getattr(tool, "type", None)
-                name = getattr(tool, "name", None)
-            elif isinstance(tool, dict):
-                t = tool.get("type")
-                name = tool.get("name")
-            else:
-                raise ValueError(f"tools[{i}] must be a dict or FunctionToolDefinition.")
-            if t != "function":
-                raise ValueError(f"tools[{i}] must have type 'function'.")
-            if not isinstance(name, str) or not name.strip():
-                raise ValueError(f"tools[{i}] must have a non-empty 'name'.")
-
-    @staticmethod
-    def _validate_id(value: str, param: str) -> None:
-        if not isinstance(value, str) or not value.strip():
-            raise ValueError(f"{param} must be a non-empty string.")
-        if len(value) > _MAX_ID_LEN:
-            raise ValueError(f"{param} exceeds maximum length ({_MAX_ID_LEN}).")
-
-    # ----- HTTP plumbing -----
-
-    def _url(self, path: str) -> str:
-        return f"{self._base_url}{path}"
-
-    def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
-        timeout = self.settings.timeout
-        try:
-            if body is not None:
-                resp = requests.request(
-                    method,
-                    self._url(path),
-                    headers={"Content-Type": "application/json", "Accept": "application/json"},
-                    data=json.dumps(body),
-                    timeout=timeout,
-                )
-            else:
-                resp = requests.request(
-                    method,
-                    self._url(path),
-                    headers={"Accept": "application/json"},
-                    timeout=timeout,
-                )
-        except requests.RequestException as e:
-            raise FoundryLocalException(f"Network error calling {method} {path}: {e}") from e
-
-        return self._handle_json_response(resp, method, path)
-
-    def _post_json(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]:
-        return self._request_json("POST", path, body)
-
-    @staticmethod
-    def _handle_json_response(resp: requests.Response, method: str, path: str) -> Dict[str, Any]:
-        text = resp.text
-        if not resp.ok:
-            raise FoundryLocalException(
-                f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}"
-            )
-        try:
-            return json.loads(text) if text else {}
-        except json.JSONDecodeError as e:
-            raise FoundryLocalException(
-                f"Failed to parse response JSON from {method} {path}: {text[:200]}"
-            ) from e
-
-    def _post_stream(
-        self, path: str, body: Dict[str, Any]
-    ) -> Generator[StreamingEvent, None, None]:
-        # Use (connect_timeout, None) so the connection attempt can time out but
-        # the read side is unbounded — streaming responses can be arbitrarily long.
-        connect_timeout = self.settings.timeout
-        try:
-            resp = requests.post(
-                self._url(path),
-                headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
-                data=json.dumps(body),
-                stream=True,
-                timeout=(connect_timeout, None),
-            )
-        except requests.RequestException as e:
-            raise FoundryLocalException(f"Network error calling POST {path}: {e}") from e
-
-        if not resp.ok:
-            body_text = resp.text
-            resp.close()
-            raise FoundryLocalException(
-                f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}"
-            )
-
-        return _iter_sse_events(resp)
-
-
-def _iter_sse_events(resp: requests.Response) -> Generator[StreamingEvent, None, None]:
-    """Parse an SSE response into a stream of :class:`StreamingEvent` objects.
-
-    Closes the underlying HTTP connection when the generator ends for any
-    reason (completion, [DONE], exception, or GC).
-
-    Uses a single string buffer and splits on double-newline boundaries to
-    avoid the O(n) cost of joining a growing list on every chunk.
-    """
-    try:
-        buffer = ""
-        for chunk in resp.iter_content(chunk_size=None, decode_unicode=False):
-            if not chunk:
-                continue
-            text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk
-            buffer += text.replace("\r\n", "\n")
-
-            while "\n\n" in buffer:
-                block, buffer = buffer.split("\n\n", 1)
-                event = _parse_sse_block(block)
-                if event is _SSE_DONE:
-                    return
-                if event is not None:
-                    yield event
-
-        # Flush any residual block not terminated by a blank line.
-        tail = buffer.strip()
-        if tail:
-            event = _parse_sse_block(tail)
-            if event is not None and event is not _SSE_DONE:
-                yield event
-    finally:
-        resp.close()
-
-
-_SSE_DONE = object()  # sentinel returned for the `data: [DONE]` terminator
-
-
-def _parse_sse_block(block: str) -> Any:
-    """Parse a single SSE block (already stripped of its trailing blank line)."""
-    trimmed = block.strip()
-    if not trimmed:
-        return None
-    if trimmed == "data: [DONE]":
-        return _SSE_DONE
-
-    data_lines: List[str] = []
-    for line in trimmed.split("\n"):
-        if line.startswith("data: "):
-            data_lines.append(line[6:])
-        elif line == "data:":
-            data_lines.append("")
-        # `event:`, `id:`, `retry:` fields are ignored — the type lives in the JSON payload.
-
-    if not data_lines:
-        return None
-
-    data = "\n".join(data_lines)
-    if data == "[DONE]":
-        return _SSE_DONE
-    try:
-        parsed = json.loads(data)
-    except json.JSONDecodeError as e:
-        raise FoundryLocalException(f"Failed to parse streaming event JSON: {e}") from e
-    if not isinstance(parsed, dict):
-        return None
-    return parse_streaming_event(parsed)
-
-
-__all__ = [
-    "ResponsesClient",
-    "ResponsesClientSettings",
-]
diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py
deleted file mode 100644
index ad1266a44..000000000
--- a/sdk/python/src/openai/responses_types.py
+++ /dev/null
@@ -1,957 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-"""Types for the OpenAI Responses API served by Foundry Local.
-
-These mirror the DTOs defined by neutron-server in
-``src/FoundryLocalCore/Core/Responses/Contracts/``. Dataclasses are used so
-callers can construct items with keyword arguments and we can serialize
-discriminated unions by the ``type`` field.
-"""
-
-from __future__ import annotations
-
-import base64
-import io
-import mimetypes
-from dataclasses import dataclass, field, fields, is_dataclass
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union
-
-
-# ---------------------------------------------------------------------------
-# Image resize helper (optional — requires Pillow)
-# ---------------------------------------------------------------------------
-
-def _resize_image(data: bytes, media_type: str, max_size: Tuple[int, int]) -> Tuple[bytes, str]:
-    """Resize *data* so it fits within *max_size* (width, height) while preserving
-    aspect ratio. Returns the re-encoded bytes and MIME type.
-
-    Requires ``Pillow`` (``pip install pillow``). Raises ``ImportError`` if it is
-    not installed.
-    """
-    try:
-        from PIL import Image  # type: ignore[import-untyped]
-    except ImportError as exc:
-        raise ImportError(
-            "Image resizing requires Pillow. Install it with: pip install pillow"
-        ) from exc
-
-    img = Image.open(io.BytesIO(data))
-    img.thumbnail(max_size, Image.LANCZOS)
-    buf = io.BytesIO()
-    fmt = media_type.split("/")[-1].upper().replace("JPG", "JPEG")
-    if fmt not in ("JPEG", "PNG", "WEBP", "GIF"):
-        fmt = "PNG"
-        media_type = "image/png"
-    img.save(buf, format=fmt)
-    return buf.getvalue(), media_type
-
-
-# ---------------------------------------------------------------------------
-# Serialization helpers
-# ---------------------------------------------------------------------------
-
-def _to_dict(obj: Any) -> Any:
-    """Recursively convert a dataclass (or list/dict of them) to a plain dict,
-    omitting ``None`` values so the wire format matches the OpenAI spec.
-    """
-    if is_dataclass(obj) and not isinstance(obj, type):
-        result: Dict[str, Any] = {}
-        for f in fields(obj):
-            value = getattr(obj, f.name)
-            if value is None:
-                continue
-            result[f.name] = _to_dict(value)
-        return result
-    if isinstance(obj, list):
-        return [_to_dict(v) for v in obj]
-    if isinstance(obj, dict):
-        return {k: _to_dict(v) for k, v in obj.items() if v is not None}
-    return obj
-
-
-# ---------------------------------------------------------------------------
-# Content Parts
-# ---------------------------------------------------------------------------
-
-@dataclass
-class InputTextContent:
-    text: str = ""
-    type: Literal["input_text"] = "input_text"
-
-
-@dataclass
-class InputImageContent:
-    """Vision input. Provide exactly one of ``image_url`` or ``image_data`` (base64)."""
-    media_type: str = ""
-    image_url: Optional[str] = None
-    image_data: Optional[str] = None
-    detail: Optional[str] = None  # "low" | "high" | "auto"
-    type: Literal["input_image"] = "input_image"
-
-    def __post_init__(self) -> None:
-        has_url = self.image_url is not None
-        has_data = self.image_data is not None
-        if has_url == has_data:
-            raise ValueError(
-                "Provide exactly one of image_url or image_data, not both (or neither)."
-            )
-
-    @staticmethod
-    def from_file(
-        path: str,
-        detail: Optional[str] = None,
-        max_size: Optional[Tuple[int, int]] = None,
-    ) -> "InputImageContent":
-        """Load an image from *path*, base64-encode it, and return an :class:`InputImageContent`.
-
-        Args:
-            path: Filesystem path to the image file.
-            detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``.
-            max_size: Optional ``(width, height)`` cap. If the image exceeds either
-                dimension it is resized proportionally (requires ``Pillow``).
-        """
-        media_type, _ = mimetypes.guess_type(path)
-        if not media_type or not media_type.startswith("image/"):
-            raise ValueError(f"Unsupported image format: {path}")
-        with open(path, "rb") as fh:
-            raw = fh.read()
-        if max_size is not None:
-            raw, media_type = _resize_image(raw, media_type, max_size)
-        return InputImageContent(
-            image_data=base64.b64encode(raw).decode("ascii"),
-            media_type=media_type,
-            detail=detail,
-        )
-
-    @staticmethod
-    def from_url(url: str, detail: Optional[str] = None) -> "InputImageContent":
-        return InputImageContent(image_url=url, media_type="image/unknown", detail=detail)
-
-    @staticmethod
-    def from_bytes(
-        data: bytes,
-        media_type: str,
-        detail: Optional[str] = None,
-        max_size: Optional[Tuple[int, int]] = None,
-    ) -> "InputImageContent":
-        """Create an :class:`InputImageContent` from raw *data* bytes.
-
-        Args:
-            data: Raw image bytes.
-            media_type: MIME type, e.g. ``"image/png"``.
-            detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``.
-            max_size: Optional ``(width, height)`` cap. Requires ``Pillow``.
-        """
-        if max_size is not None:
-            data, media_type = _resize_image(data, media_type, max_size)
-        return InputImageContent(
-            image_data=base64.b64encode(data).decode("ascii"),
-            media_type=media_type,
-            detail=detail,
-        )
-
-
-@dataclass
-class InputFileContent:
-    filename: str = ""
-    file_url: str = ""
-    type: Literal["input_file"] = "input_file"
-
-
-@dataclass
-class OutputTextContent:
-    text: str = ""
-    annotations: Optional[List[Any]] = None
-    logprobs: Optional[List[Any]] = None
-    type: Literal["output_text"] = "output_text"
-
-
-@dataclass
-class RefusalContent:
-    refusal: str = ""
-    type: Literal["refusal"] = "refusal"
-
-
-ContentPart = Union[
-    InputTextContent, InputImageContent, InputFileContent, OutputTextContent, RefusalContent
-]
-
-
-def _parse_content_part(data: Dict[str, Any]) -> Optional[ContentPart]:
-    t = data.get("type")
-    if t == "input_text":
-        return InputTextContent(text=data.get("text", ""))
-    if t == "input_image":
-        return InputImageContent(
-            media_type=data.get("media_type", ""),
-            image_url=data.get("image_url"),
-            image_data=data.get("image_data"),
-            detail=data.get("detail"),
-        )
-    if t == "input_file":
-        return InputFileContent(filename=data.get("filename", ""), file_url=data.get("file_url", ""))
-    if t == "output_text":
-        return OutputTextContent(
-            text=data.get("text", ""),
-            annotations=data.get("annotations"),
-            logprobs=data.get("logprobs"),
-        )
-    if t == "refusal":
-        return RefusalContent(refusal=data.get("refusal", ""))
-    # Unknown content-part type — return None so callers can filter forward-compat parts.
-    return None
-
-
-def _parse_content(value: Any) -> Union[str, List[ContentPart]]:
-    if isinstance(value, str):
-        return value
-    if isinstance(value, list):
-        parts = [_parse_content_part(p) if isinstance(p, dict) else p for p in value]
-        return [p for p in parts if p is not None]
-    return value
-
-
-# ---------------------------------------------------------------------------
-# Response Items (input + output)
-# ---------------------------------------------------------------------------
-
-@dataclass
-class MessageItem:
-    role: str = ""
-    content: Union[str, List[ContentPart]] = ""
-    id: Optional[str] = None
-    status: Optional[str] = None
-    type: Literal["message"] = "message"
-
-
-@dataclass
-class FunctionCallItem:
-    call_id: str = ""
-    name: str = ""
-    arguments: str = ""
-    id: Optional[str] = None
-    status: Optional[str] = None
-    type: Literal["function_call"] = "function_call"
-
-
-@dataclass
-class FunctionCallOutputItem:
-    call_id: str = ""
-    output: Union[str, List[ContentPart]] = ""
-    id: Optional[str] = None
-    type: Literal["function_call_output"] = "function_call_output"
-
-
-@dataclass
-class ItemReference:
-    id: str = ""
-    type: Literal["item_reference"] = "item_reference"
-
-
-@dataclass
-class ReasoningItem:
-    id: Optional[str] = None
-    content: Optional[List[ContentPart]] = None
-    encrypted_content: Optional[str] = None
-    summary: Optional[str] = None
-    status: Optional[str] = None
-    type: Literal["reasoning"] = "reasoning"
-
-
-ResponseInputItem = Union[
-    MessageItem, FunctionCallItem, FunctionCallOutputItem, ItemReference, ReasoningItem
-]
-ResponseOutputItem = Union[MessageItem, FunctionCallItem, ReasoningItem]
-
-
-def _parse_response_item(data: Dict[str, Any]) -> Any:
-    t = data.get("type")
-    if t == "message":
-        return MessageItem(
-            role=data.get("role", ""),
-            content=_parse_content(data.get("content", "")),
-            id=data.get("id"),
-            status=data.get("status"),
-        )
-    if t == "function_call":
-        return FunctionCallItem(
-            call_id=data.get("call_id", ""),
-            name=data.get("name", ""),
-            arguments=data.get("arguments", ""),
-            id=data.get("id"),
-            status=data.get("status"),
-        )
-    if t == "function_call_output":
-        return FunctionCallOutputItem(
-            call_id=data.get("call_id", ""),
-            output=_parse_content(data.get("output", "")),
-            id=data.get("id"),
-        )
-    if t == "item_reference":
-        return ItemReference(id=data.get("id", ""))
-    if t == "reasoning":
-        content_raw = data.get("content")
-        return ReasoningItem(
-            id=data.get("id"),
-            content=[_parse_content_part(p) for p in content_raw] if isinstance(content_raw, list) else None,
-            encrypted_content=data.get("encrypted_content"),
-            summary=data.get("summary"),
-            status=data.get("status"),
-        )
-    # Unknown item type — return the raw dict so callers can inspect
-    return data
-
-
-# ---------------------------------------------------------------------------
-# Tool Definitions & Config
-# ---------------------------------------------------------------------------
-
-@dataclass
-class FunctionToolDefinition:
-    name: str = ""
-    description: Optional[str] = None
-    parameters: Optional[Dict[str, Any]] = None
-    strict: Optional[bool] = None
-    type: Literal["function"] = "function"
-
-
-@dataclass
-class FunctionToolChoice:
-    name: str = ""
-    type: Literal["function"] = "function"
-
-
-ToolChoice = Union[str, FunctionToolChoice]  # "none" | "auto" | "required" | {type,name}
-
-
-@dataclass
-class TextFormat:
-    type: str = "text"  # "text" | "json_object" | "json_schema" | "lark_grammar" | "regex"
-    name: Optional[str] = None
-    description: Optional[str] = None
-    schema: Optional[Dict[str, Any]] = None
-    strict: Optional[bool] = None
-
-
-@dataclass
-class TextConfig:
-    format: Optional[TextFormat] = None
-
-
-@dataclass
-class ReasoningConfig:
-    effort: Optional[str] = None
-    summary: Optional[str] = None
-
-
-# ---------------------------------------------------------------------------
-# Response Object
-# ---------------------------------------------------------------------------
-
-@dataclass
-class ResponseUsage:
-    input_tokens: int = 0
-    output_tokens: int = 0
-    total_tokens: int = 0
-    input_tokens_details: Optional[Dict[str, Any]] = None
-    output_tokens_details: Optional[Dict[str, Any]] = None
-
-
-@dataclass
-class ResponseError:
-    code: str = ""
-    message: str = ""
-
-
-@dataclass
-class IncompleteDetails:
-    reason: str = ""
-
-
-@dataclass
-class ResponseObject:
-    id: str = ""
-    object: str = "response"
-    created_at: int = 0
-    status: str = ""
-    model: str = ""
-    output: List[Any] = field(default_factory=list)
-    completed_at: Optional[int] = None
-    failed_at: Optional[int] = None
-    cancelled_at: Optional[int] = None
-    error: Optional[ResponseError] = None
-    usage: Optional[ResponseUsage] = None
-    instructions: Optional[str] = None
-    previous_response_id: Optional[str] = None
-    tools: Optional[List[FunctionToolDefinition]] = None
-    tool_choice: Optional[Any] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    max_output_tokens: Optional[int] = None
-    frequency_penalty: Optional[float] = None
-    presence_penalty: Optional[float] = None
-    seed: Optional[int] = None
-    truncation: Optional[str] = None
-    parallel_tool_calls: Optional[bool] = None
-    store: Optional[bool] = None
-    metadata: Optional[Dict[str, str]] = None
-    reasoning: Optional[ReasoningConfig] = None
-    text: Optional[TextConfig] = None
-    user: Optional[str] = None
-    incomplete_details: Optional[IncompleteDetails] = None
-    # Retain anything the server returned that we don't model explicitly.
-    _raw: Optional[Dict[str, Any]] = None
-
-    @property
-    def output_text(self) -> str:
-        """Concatenated text from the first assistant ``message`` item in ``output``."""
-        for item in self.output:
-            if isinstance(item, MessageItem) and item.role == "assistant":
-                content = item.content
-                if isinstance(content, str):
-                    return content
-                if isinstance(content, list):
-                    parts: List[str] = []
-                    for p in content:
-                        text = getattr(p, "text", None)
-                        if isinstance(text, str):
-                            parts.append(text)
-                    return "".join(parts)
-        return ""
-
-
-def _parse_response_object(data: Dict[str, Any]) -> ResponseObject:
-    output = data.get("output") or []
-    parsed_output = [_parse_response_item(i) if isinstance(i, dict) else i for i in output]
-
-    tools_raw = data.get("tools")
-    tools = None
-    if isinstance(tools_raw, list):
-        tools = [
-            FunctionToolDefinition(
-                name=t.get("name", ""),
-                description=t.get("description"),
-                parameters=t.get("parameters"),
-                strict=t.get("strict"),
-            ) if isinstance(t, dict) else t
-            for t in tools_raw
-        ]
-
-    usage = None
-    if isinstance(data.get("usage"), dict):
-        u = data["usage"]
-        usage = ResponseUsage(
-            input_tokens=u.get("input_tokens", 0),
-            output_tokens=u.get("output_tokens", 0),
-            total_tokens=u.get("total_tokens", 0),
-            input_tokens_details=u.get("input_tokens_details"),
-            output_tokens_details=u.get("output_tokens_details"),
-        )
-
-    error = None
-    if isinstance(data.get("error"), dict):
-        error = ResponseError(code=data["error"].get("code", ""), message=data["error"].get("message", ""))
-
-    incomplete = None
-    if isinstance(data.get("incomplete_details"), dict):
-        incomplete = IncompleteDetails(reason=data["incomplete_details"].get("reason", ""))
-
-    reasoning = None
-    if isinstance(data.get("reasoning"), dict):
-        reasoning = ReasoningConfig(
-            effort=data["reasoning"].get("effort"),
-            summary=data["reasoning"].get("summary"),
-        )
-
-    text = None
-    if isinstance(data.get("text"), dict):
-        fmt_raw = data["text"].get("format")
-        fmt = None
-        if isinstance(fmt_raw, dict):
-            fmt = TextFormat(
-                type=fmt_raw.get("type", "text"),
-                name=fmt_raw.get("name"),
-                description=fmt_raw.get("description"),
-                schema=fmt_raw.get("schema"),
-                strict=fmt_raw.get("strict"),
-            )
-        text = TextConfig(format=fmt)
-
-    return ResponseObject(
-        id=data.get("id", ""),
-        object=data.get("object", "response"),
-        created_at=data.get("created_at", 0),
-        status=data.get("status", ""),
-        model=data.get("model", ""),
-        output=parsed_output,
-        completed_at=data.get("completed_at"),
-        failed_at=data.get("failed_at"),
-        cancelled_at=data.get("cancelled_at"),
-        error=error,
-        usage=usage,
-        instructions=data.get("instructions"),
-        previous_response_id=data.get("previous_response_id"),
-        tools=tools,
-        tool_choice=data.get("tool_choice"),
-        temperature=data.get("temperature"),
-        top_p=data.get("top_p"),
-        max_output_tokens=data.get("max_output_tokens"),
-        frequency_penalty=data.get("frequency_penalty"),
-        presence_penalty=data.get("presence_penalty"),
-        seed=data.get("seed"),
-        truncation=data.get("truncation"),
-        parallel_tool_calls=data.get("parallel_tool_calls"),
-        store=data.get("store"),
-        metadata=data.get("metadata"),
-        reasoning=reasoning,
-        text=text,
-        user=data.get("user"),
-        incomplete_details=incomplete,
-        _raw=data,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Delete / List helpers
-# ---------------------------------------------------------------------------
-
-@dataclass
-class DeleteResponseResult:
-    id: str = ""
-    object: str = ""
-    deleted: bool = False
-
-
-@dataclass
-class InputItemsListResponse:
-    object: str = "list"
-    data: List[Any] = field(default_factory=list)
-
-
-@dataclass
-class ListResponsesResult:
-    object: str = "list"
-    data: List[ResponseObject] = field(default_factory=list)
-
-
-# ---------------------------------------------------------------------------
-# Streaming Events
-# ---------------------------------------------------------------------------
-
-@dataclass
-class ResponseLifecycleEvent:
-    """`response.created` / `queued` / `in_progress` / `completed` / `failed` / `incomplete`."""
-    type: str = ""
-    response: Optional[ResponseObject] = None
-    sequence_number: int = 0
-
-
-@dataclass
-class OutputItemAddedEvent:
-    item_id: str = ""
-    output_index: int = 0
-    item: Any = None
-    sequence_number: int = 0
-    type: Literal["response.output_item.added"] = "response.output_item.added"
-
-
-@dataclass
-class OutputItemDoneEvent:
-    item_id: str = ""
-    output_index: int = 0
-    item: Any = None
-    sequence_number: int = 0
-    type: Literal["response.output_item.done"] = "response.output_item.done"
-
-
-@dataclass
-class ContentPartAddedEvent:
-    item_id: str = ""
-    content_index: int = 0
-    part: Any = None
-    sequence_number: int = 0
-    type: Literal["response.content_part.added"] = "response.content_part.added"
-
-
-@dataclass
-class ContentPartDoneEvent:
-    item_id: str = ""
-    content_index: int = 0
-    part: Any = None
-    sequence_number: int = 0
-    type: Literal["response.content_part.done"] = "response.content_part.done"
-
-
-@dataclass
-class OutputTextDeltaEvent:
-    item_id: str = ""
-    output_index: int = 0
-    content_index: int = 0
-    delta: str = ""
-    sequence_number: int = 0
-    type: Literal["response.output_text.delta"] = "response.output_text.delta"
-
-
-@dataclass
-class OutputTextDoneEvent:
-    item_id: str = ""
-    output_index: int = 0
-    content_index: int = 0
-    text: str = ""
-    sequence_number: int = 0
-    type: Literal["response.output_text.done"] = "response.output_text.done"
-
-
-@dataclass
-class OutputTextAnnotationAddedEvent:
-    item_id: str = ""
-    annotation: Any = None
-    sequence_number: int = 0
-    type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added"
-
-
-@dataclass
-class RefusalDeltaEvent:
-    item_id: str = ""
-    content_index: int = 0
-    delta: str = ""
-    sequence_number: int = 0
-    type: Literal["response.refusal.delta"] = "response.refusal.delta"
-
-
-@dataclass
-class RefusalDoneEvent:
-    item_id: str = ""
-    content_index: int = 0
-    refusal: str = ""
-    sequence_number: int = 0
-    type: Literal["response.refusal.done"] = "response.refusal.done"
-
-
-@dataclass
-class FunctionCallArgsDeltaEvent:
-    item_id: str = ""
-    output_index: int = 0
-    delta: str = ""
-    sequence_number: int = 0
-    type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
-
-
-@dataclass
-class FunctionCallArgsDoneEvent:
-    item_id: str = ""
-    output_index: int = 0
-    arguments: str = ""
-    name: str = ""
-    sequence_number: int = 0
-    type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
-
-
-@dataclass
-class ReasoningSummaryPartAddedEvent:
-    item_id: str = ""
-    part: Any = None
-    sequence_number: int = 0
-    type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added"
-
-
-@dataclass
-class ReasoningSummaryPartDoneEvent:
-    item_id: str = ""
-    part: Any = None
-    sequence_number: int = 0
-    type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done"
-
-
-@dataclass
-class ReasoningDeltaEvent:
-    item_id: str = ""
-    delta: str = ""
-    sequence_number: int = 0
-    type: Literal["response.reasoning.delta"] = "response.reasoning.delta"
-
-
-@dataclass
-class ReasoningDoneEvent:
-    item_id: str = ""
-    text: str = ""
-    sequence_number: int = 0
-    type: Literal["response.reasoning.done"] = "response.reasoning.done"
-
-
-@dataclass
-class ReasoningSummaryTextDeltaEvent:
-    item_id: str = ""
-    delta: str = ""
-    sequence_number: int = 0
-    type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta"
-
-
-@dataclass
-class ReasoningSummaryTextDoneEvent:
-    item_id: str = ""
-    text: str = ""
-    sequence_number: int = 0
-    type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done"
-
-
-@dataclass
-class StreamingErrorEvent:
-    code: Optional[str] = None
-    message: Optional[str] = None
-    param: Optional[str] = None
-    sequence_number: int = 0
-    type: Literal["error"] = "error"
-
-
-@dataclass
-class UnknownStreamingEvent:
-    """Fallback for event types that aren't yet modeled."""
-    type: str = ""
-    sequence_number: int = 0
-    data: Optional[Dict[str, Any]] = None
-
-
-StreamingEvent = Union[
-    ResponseLifecycleEvent,
-    OutputItemAddedEvent,
-    OutputItemDoneEvent,
-    ContentPartAddedEvent,
-    ContentPartDoneEvent,
-    OutputTextDeltaEvent,
-    OutputTextDoneEvent,
-    OutputTextAnnotationAddedEvent,
-    RefusalDeltaEvent,
-    RefusalDoneEvent,
-    FunctionCallArgsDeltaEvent,
-    FunctionCallArgsDoneEvent,
-    ReasoningSummaryPartAddedEvent,
-    ReasoningSummaryPartDoneEvent,
-    ReasoningDeltaEvent,
-    ReasoningDoneEvent,
-    ReasoningSummaryTextDeltaEvent,
-    ReasoningSummaryTextDoneEvent,
-    StreamingErrorEvent,
-    UnknownStreamingEvent,
-]
-
-
-_LIFECYCLE_TYPES = {
-    "response.created",
-    "response.queued",
-    "response.in_progress",
-    "response.completed",
-    "response.failed",
-    "response.incomplete",
-}
-
-
-def parse_streaming_event(data: Dict[str, Any]) -> StreamingEvent:
-    """Build a typed streaming-event dataclass from a server-sent JSON payload."""
-    t = data.get("type", "")
-    seq = data.get("sequence_number", 0)
-
-    if t in _LIFECYCLE_TYPES:
-        resp_raw = data.get("response")
-        resp = _parse_response_object(resp_raw) if isinstance(resp_raw, dict) else None
-        return ResponseLifecycleEvent(type=t, response=resp, sequence_number=seq)
-
-    if t == "response.output_item.added":
-        item = data.get("item")
-        return OutputItemAddedEvent(
-            item_id=data.get("item_id", ""),
-            output_index=data.get("output_index", 0),
-            item=_parse_response_item(item) if isinstance(item, dict) else item,
-            sequence_number=seq,
-        )
-    if t == "response.output_item.done":
-        item = data.get("item")
-        return OutputItemDoneEvent(
-            item_id=data.get("item_id", ""),
-            output_index=data.get("output_index", 0),
-            item=_parse_response_item(item) if isinstance(item, dict) else item,
-            sequence_number=seq,
-        )
-    if t == "response.content_part.added":
-        part = data.get("part")
-        return ContentPartAddedEvent(
-            item_id=data.get("item_id", ""),
-            content_index=data.get("content_index", 0),
-            part=_parse_content_part(part) if isinstance(part, dict) else part,
-            sequence_number=seq,
-        )
-    if t == "response.content_part.done":
-        part = data.get("part")
-        return ContentPartDoneEvent(
-            item_id=data.get("item_id", ""),
-            content_index=data.get("content_index", 0),
-            part=_parse_content_part(part) if isinstance(part, dict) else part,
-            sequence_number=seq,
-        )
-    if t == "response.output_text.delta":
-        return OutputTextDeltaEvent(
-            item_id=data.get("item_id", ""),
-            output_index=data.get("output_index", 0),
-            content_index=data.get("content_index", 0),
-            delta=data.get("delta", ""),
-            sequence_number=seq,
-        )
-    if t == "response.output_text.done":
-        return OutputTextDoneEvent(
-            item_id=data.get("item_id", ""),
-            output_index=data.get("output_index", 0),
-            content_index=data.get("content_index", 0),
-            text=data.get("text", ""),
-            sequence_number=seq,
-        )
-    if t == "response.output_text.annotation.added":
-        return OutputTextAnnotationAddedEvent(
-            item_id=data.get("item_id", ""),
-            annotation=data.get("annotation"),
-            sequence_number=seq,
-        )
-    if t == "response.refusal.delta":
-        return RefusalDeltaEvent(
-            item_id=data.get("item_id", ""),
-            content_index=data.get("content_index", 0),
-            delta=data.get("delta", ""),
-            sequence_number=seq,
-        )
-    if t == "response.refusal.done":
-        return RefusalDoneEvent(
-            item_id=data.get("item_id", ""),
-            content_index=data.get("content_index", 0),
-            refusal=data.get("refusal", ""),
-            sequence_number=seq,
-        )
-    if t == "response.function_call_arguments.delta":
-        return FunctionCallArgsDeltaEvent(
-            item_id=data.get("item_id", ""),
-            output_index=data.get("output_index", 0),
-            delta=data.get("delta", ""),
-            sequence_number=seq,
-        )
-    if t == "response.function_call_arguments.done":
-        return FunctionCallArgsDoneEvent(
-            item_id=data.get("item_id", ""),
-            output_index=data.get("output_index", 0),
-            arguments=data.get("arguments", ""),
-            name=data.get("name", ""),
-            sequence_number=seq,
-        )
-    if t == "response.reasoning_summary_part.added":
-        return ReasoningSummaryPartAddedEvent(
-            item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq
-        )
-    if t == "response.reasoning_summary_part.done":
-        return ReasoningSummaryPartDoneEvent(
-            item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq
-        )
-    if t == "response.reasoning.delta":
-        return ReasoningDeltaEvent(
-            item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq
-        )
-    if t == "response.reasoning.done":
-        return ReasoningDoneEvent(
-            item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq
-        )
-    if t == "response.reasoning_summary_text.delta":
-        return ReasoningSummaryTextDeltaEvent(
-            item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq
-        )
-    if t == "response.reasoning_summary_text.done":
-        return ReasoningSummaryTextDoneEvent(
-            item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq
-        )
-    if t == "error":
-        return StreamingErrorEvent(
-            code=data.get("code"),
-            message=data.get("message"),
-            param=data.get("param"),
-            sequence_number=seq,
-        )
-
-    return UnknownStreamingEvent(type=t, sequence_number=seq, data=data)
-
-
-def _parse_delete_result(data: Dict[str, Any]) -> DeleteResponseResult:
-    return DeleteResponseResult(
-        id=data.get("id", ""),
-        object=data.get("object", ""),
-        deleted=bool(data.get("deleted", False)),
-    )
-
-
-def _parse_input_items_list(data: Dict[str, Any]) -> InputItemsListResponse:
-    raw = data.get("data") or []
-    return InputItemsListResponse(
-        object=data.get("object", "list"),
-        data=[_parse_response_item(i) if isinstance(i, dict) else i for i in raw],
-    )
-
-
-def _parse_list_responses(data: Dict[str, Any]) -> ListResponsesResult:
-    raw = data.get("data") or []
-    return ListResponsesResult(
-        object=data.get("object", "list"),
-        data=[_parse_response_object(r) if isinstance(r, dict) else r for r in raw],
-    )
-
-
-__all__ = [
-    # Content parts
-    "InputTextContent",
-    "InputImageContent",
-    "InputFileContent",
-    "OutputTextContent",
-    "RefusalContent",
-    "ContentPart",
-    # Items
-    "MessageItem",
-    "FunctionCallItem",
-    "FunctionCallOutputItem",
-    "ItemReference",
-    "ReasoningItem",
-    "ResponseInputItem",
-    "ResponseOutputItem",
-    # Tools & config
-    "FunctionToolDefinition",
-    "FunctionToolChoice",
-    "ToolChoice",
-    "TextFormat",
-    "TextConfig",
-    "ReasoningConfig",
-    # Response
-    "ResponseObject",
-    "ResponseUsage",
-    "ResponseError",
-    "IncompleteDetails",
-    "DeleteResponseResult",
-    "InputItemsListResponse",
-    "ListResponsesResult",
-    # Streaming events
-    "StreamingEvent",
-    "ResponseLifecycleEvent",
-    "OutputItemAddedEvent",
-    "OutputItemDoneEvent",
-    "ContentPartAddedEvent",
-    "ContentPartDoneEvent",
-    "OutputTextDeltaEvent",
-    "OutputTextDoneEvent",
-    "OutputTextAnnotationAddedEvent",
-    "RefusalDeltaEvent",
-    "RefusalDoneEvent",
-    "FunctionCallArgsDeltaEvent",
-    "FunctionCallArgsDoneEvent",
-    "ReasoningSummaryPartAddedEvent",
-    "ReasoningSummaryPartDoneEvent",
-    "ReasoningDeltaEvent",
-    "ReasoningDoneEvent",
-    "ReasoningSummaryTextDeltaEvent",
-    "ReasoningSummaryTextDoneEvent",
-    "StreamingErrorEvent",
-    "UnknownStreamingEvent",
-    "parse_streaming_event",
-]
diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py
deleted file mode 100644
index 871fe301c..000000000
--- a/sdk/python/test/openai/test_responses_client.py
+++ /dev/null
@@ -1,612 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-"""Unit tests for the Responses API client (no live server required).
-
-Mirrors the scenarios covered by the JS SDK's ``responsesClient.test.ts`` and
-the Python spec's §5. HTTP calls are intercepted via :mod:`unittest.mock`.
-"""
-
-from __future__ import annotations
-
-import base64
-import io
-import json
-from typing import Any, Dict, List
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from foundry_local_sdk.exception import FoundryLocalException
-from foundry_local_sdk.openai.responses_client import (
-    ResponsesClient,
-    ResponsesClientSettings,
-    _parse_sse_block,
-    _iter_sse_events,
-    _SSE_DONE,
-)
-from foundry_local_sdk.openai.responses_types import (
-    FunctionCallItem,
-    FunctionToolDefinition,
-    InputImageContent,
-    InputTextContent,
-    MessageItem,
-    OutputTextContent,
-    ReasoningConfig,
-    ResponseObject,
-    TextConfig,
-    TextFormat,
-    _to_dict,
-    parse_streaming_event,
-    OutputTextDeltaEvent,
-    ResponseLifecycleEvent,
-    StreamingErrorEvent,
-    UnknownStreamingEvent,
-)
-
-BASE_URL = "http://127.0.0.1:5273"
-MODEL_ID = "test-model"
-
-
-def _fake_json_response(payload: Dict[str, Any], status: int = 200):
-    resp = MagicMock()
-    resp.ok = 200 <= status < 300
-    resp.status_code = status
-    resp.text = json.dumps(payload)
-    return resp
-
-
-def _fake_stream_response(sse_payload: str, status: int = 200):
-    resp = MagicMock()
-    resp.ok = 200 <= status < 300
-    resp.status_code = status
-    resp.text = sse_payload
-    # iter_content returns the full payload in one bytes chunk.
-    resp.iter_content = MagicMock(return_value=iter([sse_payload.encode("utf-8")]))
-    resp.close = MagicMock()
-    return resp
-
-
-# ---------------------------------------------------------------------------
-# Settings
-# ---------------------------------------------------------------------------
-
-class TestResponsesClientSettings:
-    def test_serialize_defaults_empty(self):
-        # No fields set by default — server applies its own defaults
-        s = ResponsesClientSettings()
-        assert s._serialize() == {}
-
-    def test_store_defaults_to_none(self):
-        assert ResponsesClientSettings().store is None
-
-    def test_serialize_all_fields(self):
-        s = ResponsesClientSettings()
-        s.instructions = "Be concise."
-        s.temperature = 0.2
-        s.top_p = 0.9
-        s.max_output_tokens = 256
-        s.frequency_penalty = 0.1
-        s.presence_penalty = 0.2
-        s.tool_choice = "auto"
-        s.truncation = "auto"
-        s.parallel_tool_calls = False
-        s.store = False
-        s.metadata = {"run": "1"}
-        s.reasoning = ReasoningConfig(effort="medium")
-        s.text = TextConfig(format=TextFormat(type="json_object"))
-        s.seed = 42
-
-        out = s._serialize()
-        assert out["instructions"] == "Be concise."
-        assert out["temperature"] == 0.2
-        assert out["top_p"] == 0.9
-        assert out["max_output_tokens"] == 256
-        assert out["frequency_penalty"] == 0.1
-        assert out["presence_penalty"] == 0.2
-        assert out["tool_choice"] == "auto"
-        assert out["truncation"] == "auto"
-        assert out["parallel_tool_calls"] is False
-        assert out["store"] is False
-        assert out["metadata"] == {"run": "1"}
-        assert out["reasoning"] == {"effort": "medium"}
-        assert out["text"] == {"format": {"type": "json_object"}}
-        assert out["seed"] == 42
-
-    def test_timeout_not_serialized(self):
-        # timeout is a transport setting and must NOT appear in the API payload
-        s = ResponsesClientSettings()
-        s.timeout = 30.0
-        assert "timeout" not in s._serialize()
-
-    def test_timeout_default(self):
-        assert ResponsesClientSettings().timeout == 60.0
-
-
-# ---------------------------------------------------------------------------
-# Input / tool / id validation
-# ---------------------------------------------------------------------------
-
-class TestInputValidation:
-    def setup_method(self):
-        self.client = ResponsesClient(BASE_URL, MODEL_ID)
-
-    def test_rejects_none(self):
-        with pytest.raises(ValueError, match="None"):
-            self.client._build_request(None, {}, stream=False)
-
-    def test_rejects_empty_string(self):
-        with pytest.raises(ValueError, match="empty"):
-            self.client._build_request("", {}, stream=False)
-
-    def test_rejects_whitespace_string(self):
-        with pytest.raises(ValueError, match="empty"):
-            self.client._build_request("   ", {}, stream=False)
-
-    def test_rejects_empty_array(self):
-        with pytest.raises(ValueError, match="empty"):
-            self.client._build_request([], {}, stream=False)
-
-    def test_rejects_item_without_type(self):
-        with pytest.raises(ValueError, match="type"):
-            self.client._build_request([{"role": "user"}], {}, stream=False)
-
-    def test_accepts_string_input(self):
-        body = self.client._build_request("Hi", {}, stream=False)
-        assert body["input"] == "Hi"
-        assert body["model"] == MODEL_ID
-
-    def test_accepts_dict_input_items(self):
-        body = self.client._build_request(
-            [{"type": "message", "role": "user", "content": "hi"}], {}, stream=False
-        )
-        assert isinstance(body["input"], list)
-        assert body["input"][0]["type"] == "message"
-
-    def test_accepts_dataclass_input_items(self):
-        item = MessageItem(role="user", content="hello")
-        body = self.client._build_request([item], {}, stream=False)
-        assert body["input"][0]["type"] == "message"
-        assert body["input"][0]["role"] == "user"
-        assert body["input"][0]["content"] == "hello"
-
-    def test_stream_flag_set(self):
-        body = self.client._build_request("hi", {}, stream=True)
-        assert body["stream"] is True
-
-    def test_requires_model(self):
-        c = ResponsesClient(BASE_URL)  # no default model
-        with pytest.raises(ValueError, match="[Mm]odel"):
-            c._build_request("hi", {}, stream=False)
-
-    def test_options_model_overrides_default(self):
-        body = self.client._build_request("hi", {"model": "override"}, stream=False)
-        assert body["model"] == "override"
-
-
-class TestToolValidation:
-    def setup_method(self):
-        self.client = ResponsesClient(BASE_URL, MODEL_ID)
-
-    def test_rejects_non_function_type(self):
-        with pytest.raises(ValueError, match="function"):
-            self.client._build_request("hi", {"tools": [{"type": "retrieval", "name": "x"}]}, stream=False)
-
-    def test_rejects_empty_name(self):
-        with pytest.raises(ValueError, match="name"):
-            self.client._build_request("hi", {"tools": [{"type": "function", "name": ""}]}, stream=False)
-
-    def test_rejects_non_list(self):
-        with pytest.raises(ValueError, match="list"):
-            self.client._build_request("hi", {"tools": "nope"}, stream=False)
-
-    def test_accepts_valid_dict_tool(self):
-        body = self.client._build_request(
-            "hi",
-            {"tools": [{"type": "function", "name": "multiply", "parameters": {}}]},
-            stream=False,
-        )
-        assert body["tools"][0]["name"] == "multiply"
-
-    def test_accepts_dataclass_tool(self):
-        tool = FunctionToolDefinition(name="multiply", description="x*y")
-        body = self.client._build_request("hi", {"tools": [tool]}, stream=False)
-        assert body["tools"][0]["type"] == "function"
-        assert body["tools"][0]["name"] == "multiply"
-        assert body["tools"][0]["description"] == "x*y"
-
-
-class TestIdValidation:
-    def setup_method(self):
-        self.client = ResponsesClient(BASE_URL, MODEL_ID)
-
-    def test_rejects_empty_id(self):
-        with pytest.raises(ValueError, match="non-empty"):
-            self.client.get("")
-
-    def test_rejects_whitespace_id(self):
-        with pytest.raises(ValueError, match="non-empty"):
-            self.client.get("   ")
-
-    def test_rejects_too_long_id(self):
-        with pytest.raises(ValueError, match="length"):
-            self.client.get("x" * 1025)
-
-
-# ---------------------------------------------------------------------------
-# output_text convenience
-# ---------------------------------------------------------------------------
-
-class TestOutputText:
-    def test_extracts_from_string_content(self):
-        resp = ResponseObject(output=[MessageItem(role="assistant", content="hello world")])
-        assert resp.output_text == "hello world"
-
-    def test_extracts_from_content_parts(self):
-        resp = ResponseObject(output=[
-            MessageItem(
-                role="assistant",
-                content=[OutputTextContent(text="foo "), OutputTextContent(text="bar")],
-            )
-        ])
-        assert resp.output_text == "foo bar"
-
-    def test_returns_empty_when_no_assistant(self):
-        resp = ResponseObject(output=[MessageItem(role="user", content="hi")])
-        assert resp.output_text == ""
-
-    def test_returns_empty_for_empty_output(self):
-        assert ResponseObject().output_text == ""
-
-    def test_skips_function_call_items(self):
-        resp = ResponseObject(output=[
-            FunctionCallItem(call_id="c1", name="f", arguments="{}"),
-            MessageItem(role="assistant", content="done"),
-        ])
-        assert resp.output_text == "done"
-
-
-# ---------------------------------------------------------------------------
-# SSE parsing
-# ---------------------------------------------------------------------------
-
-class TestSSEParsing:
-    def test_parses_complete_event(self):
-        block = 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hi","sequence_number":3}'
-        evt = _parse_sse_block(block)
-        assert isinstance(evt, OutputTextDeltaEvent)
-        assert evt.delta == "hi"
-        assert evt.sequence_number == 3
-
-    def test_done_signal(self):
-        assert _parse_sse_block("data: [DONE]") is _SSE_DONE
-
-    def test_multi_line_data(self):
-        # Per SSE spec, multiple data: lines join with \n into one JSON doc.
-        block = 'data: {"type":"error",\ndata: "message":"oops","sequence_number":0}'
-        evt = _parse_sse_block(block)
-        assert isinstance(evt, StreamingErrorEvent)
-        assert evt.message == "oops"
-
-    def test_invalid_json_raises(self):
-        block = 'data: {not valid json'
-        with pytest.raises(FoundryLocalException):
-            _parse_sse_block(block)
-
-    def test_empty_block_returns_none(self):
-        assert _parse_sse_block("") is None
-        assert _parse_sse_block("\n\n") is None
-
-    def test_ignores_non_data_lines(self):
-        block = 'id: 1\nretry: 1000\nevent: response.created\ndata: {"type":"response.created","response":{"id":"r1"},"sequence_number":0}'
-        evt = _parse_sse_block(block)
-        assert isinstance(evt, ResponseLifecycleEvent)
-        assert evt.type == "response.created"
-
-    def test_error_event(self):
-        block = 'data: {"type":"error","code":"bad","message":"oops","sequence_number":0}'
-        evt = _parse_sse_block(block)
-        assert isinstance(evt, StreamingErrorEvent)
-        assert evt.code == "bad"
-        assert evt.message == "oops"
-
-    def test_iter_sse_events_handles_partial_chunks(self):
-        payload_events = [
-            'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"Hel","sequence_number":1}\n\n',
-            'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"lo","sequence_number":2}\n\n',
-            'data: [DONE]\n\n',
-        ]
-        full = "".join(payload_events).encode("utf-8")
-
-        # Split the bytes into irregular chunks to exercise buffering.
-        chunks = [full[i:i + 7] for i in range(0, len(full), 7)]
-
-        resp = MagicMock()
-        resp.iter_content = MagicMock(return_value=iter(chunks))
-        resp.close = MagicMock()
-
-        events = list(_iter_sse_events(resp))
-        assert len(events) == 2
-        assert all(isinstance(e, OutputTextDeltaEvent) for e in events)
-        assert "".join(e.delta for e in events) == "Hello"
-        resp.close.assert_called()
-
-    def test_iter_sse_handles_crlf(self):
-        payload = (
-            'event: response.output_text.delta\r\n'
-            'data: {"type":"response.output_text.delta","delta":"x","sequence_number":0}\r\n'
-            '\r\n'
-            'data: [DONE]\r\n\r\n'
-        )
-        resp = MagicMock()
-        resp.iter_content = MagicMock(return_value=iter([payload.encode("utf-8")]))
-        resp.close = MagicMock()
-
-        events = list(_iter_sse_events(resp))
-        assert len(events) == 1
-        assert events[0].delta == "x"
-
-    def test_unknown_event_type(self):
-        block = 'data: {"type":"response.brand_new_event","sequence_number":7}'
-        evt = _parse_sse_block(block)
-        assert isinstance(evt, UnknownStreamingEvent)
-        assert evt.type == "response.brand_new_event"
-
-
-# ---------------------------------------------------------------------------
-# Vision types
-# ---------------------------------------------------------------------------
-
-class TestVisionTypes:
-    def test_input_image_from_bytes(self):
-        data = b"\x89PNG\r\n\x1a\nfakedata"
-        img = InputImageContent.from_bytes(data, "image/png", detail="high")
-        assert img.media_type == "image/png"
-        assert img.detail == "high"
-        assert base64.b64decode(img.image_data) == data
-
-    def test_input_image_from_url(self):
-        img = InputImageContent.from_url("https://example.com/x.png")
-        assert img.image_url == "https://example.com/x.png"
-        assert img.image_data is None
-
-    def test_input_image_from_file(self, tmp_path):
-        data = b"\x89PNG\r\n\x1a\nfakedata"
-        p = tmp_path / "test.png"
-        p.write_bytes(data)
-        img = InputImageContent.from_file(str(p))
-        assert img.media_type == "image/png"
-        assert base64.b64decode(img.image_data) == data
-
-    def test_input_image_from_file_rejects_non_image(self, tmp_path):
-        p = tmp_path / "text.txt"
-        p.write_text("not an image")
-        with pytest.raises(ValueError, match="Unsupported"):
-            InputImageContent.from_file(str(p))
-
-    def test_input_image_serialization(self):
-        img = InputImageContent(media_type="image/png", image_data="abc", detail="low")
-        d = _to_dict(img)
-        assert d == {"media_type": "image/png", "image_data": "abc", "detail": "low", "type": "input_image"}
-        # image_url left unset should be omitted
-        assert "image_url" not in d
-
-    def test_input_image_mutual_exclusivity(self):
-        with pytest.raises(ValueError, match="exactly one"):
-            InputImageContent(media_type="image/png")  # neither set
-        with pytest.raises(ValueError, match="exactly one"):
-            InputImageContent(media_type="image/png", image_url="http://x.com/a.png", image_data="abc")  # both set
-
-
-# ---------------------------------------------------------------------------
-# Type serialization & parsing
-# ---------------------------------------------------------------------------
-
-class TestTypeSerialization:
-    def test_message_item_to_dict(self):
-        msg = MessageItem(
-            role="user",
-            content=[InputTextContent(text="Hi"), InputImageContent(media_type="image/png", image_data="abc")],
-        )
-        d = _to_dict(msg)
-        assert d["type"] == "message"
-        assert d["role"] == "user"
-        assert d["content"][0] == {"text": "Hi", "type": "input_text"}
-        assert d["content"][1]["type"] == "input_image"
-        assert "id" not in d  # None omitted
-
-    def test_function_tool_to_dict(self):
-        tool = FunctionToolDefinition(
-            name="multiply",
-            description="x*y",
-            parameters={"type": "object", "properties": {"a": {"type": "number"}}},
-            strict=True,
-        )
-        d = _to_dict(tool)
-        assert d == {
-            "name": "multiply",
-            "description": "x*y",
-            "parameters": {"type": "object", "properties": {"a": {"type": "number"}}},
-            "strict": True,
-            "type": "function",
-        }
-
-    def test_response_object_from_dict(self):
-        from foundry_local_sdk.openai.responses_types import _parse_response_object
-
-        payload = {
-            "id": "resp_abc",
-            "object": "response",
-            "created_at": 1700000000,
-            "status": "completed",
-            "model": "phi-4-mini",
-            "output": [
-                {
-                    "type": "message",
-                    "role": "assistant",
-                    "content": [{"type": "output_text", "text": "Hello!"}],
-                }
-            ],
-            "usage": {"input_tokens": 3, "output_tokens": 2, "total_tokens": 5},
-            "store": True,
-        }
-        r = _parse_response_object(payload)
-        assert r.id == "resp_abc"
-        assert r.status == "completed"
-        assert r.usage.total_tokens == 5
-        assert r.output_text == "Hello!"
-
-    def test_streaming_event_parsing_lifecycle(self):
-        evt = parse_streaming_event(
-            {
-                "type": "response.completed",
-                "response": {"id": "resp_1", "status": "completed"},
-                "sequence_number": 10,
-            }
-        )
-        assert isinstance(evt, ResponseLifecycleEvent)
-        assert evt.type == "response.completed"
-        assert evt.response.id == "resp_1"
-        assert evt.sequence_number == 10
-
-
-# ---------------------------------------------------------------------------
-# End-to-end (mocked HTTP)
-# ---------------------------------------------------------------------------
-
-class TestClientHTTPFlow:
-    def setup_method(self):
-        self.client = ResponsesClient(BASE_URL, MODEL_ID)
-
-    def test_create_posts_correct_body(self):
-        payload = {
-            "id": "resp_1",
-            "object": "response",
-            "status": "completed",
-            "model": MODEL_ID,
-            "output": [
-                {"type": "message", "role": "assistant", "content": "ok"},
-            ],
-        }
-        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
-            mock_req.return_value = _fake_json_response(payload)
-            result = self.client.create("hello", temperature=0.3)
-
-        assert result.id == "resp_1"
-        assert result.output_text == "ok"
-
-        _, kwargs = mock_req.call_args
-        assert mock_req.call_args.args[0] == "POST"
-        assert mock_req.call_args.args[1] == f"{BASE_URL}/v1/responses"
-        body = json.loads(kwargs["data"])
-        assert body["model"] == MODEL_ID
-        assert body["input"] == "hello"
-        assert body["temperature"] == 0.3
-        assert "store" not in body  # store=None is omitted from request
-        assert "stream" not in body
-
-    def test_get_uses_url_encoded_path(self):
-        weird_id = "resp_with/slashes and spaces"
-        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
-            mock_req.return_value = _fake_json_response(
-                {"id": weird_id, "object": "response", "status": "completed", "model": MODEL_ID, "output": []}
-            )
-            self.client.get(weird_id)
-
-        path = mock_req.call_args.args[1]
-        assert "resp_with%2Fslashes%20and%20spaces" in path
-        assert mock_req.call_args.args[0] == "GET"
-
-    def test_delete_parses_result(self):
-        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
-            mock_req.return_value = _fake_json_response(
-                {"id": "resp_1", "object": "response.deleted", "deleted": True}
-            )
-            result = self.client.delete("resp_1")
-        assert result.deleted is True
-        assert result.id == "resp_1"
-
-    def test_http_error_raises_foundry_local_exception(self):
-        resp = MagicMock()
-        resp.ok = False
-        resp.status_code = 400
-        resp.text = '{"error":{"message":"bad"}}'
-        with patch("foundry_local_sdk.openai.responses_client.requests.request", return_value=resp):
-            with pytest.raises(FoundryLocalException) as excinfo:
-                self.client.create("hi")
-        assert "400" in str(excinfo.value)
-        assert "bad" in str(excinfo.value)
-
-    def test_create_streaming_yields_events(self):
-        sse = (
-            'event: response.output_text.delta\n'
-            'data: {"type":"response.output_text.delta","delta":"a","sequence_number":1}\n'
-            '\n'
-            'event: response.output_text.delta\n'
-            'data: {"type":"response.output_text.delta","delta":"b","sequence_number":2}\n'
-            '\n'
-            'data: [DONE]\n\n'
-        )
-        with patch("foundry_local_sdk.openai.responses_client.requests.post") as mock_post:
-            mock_post.return_value = _fake_stream_response(sse)
-            events = list(self.client.create_streaming("hi"))
-
-        assert len(events) == 2
-        assert "".join(e.delta for e in events) == "ab"
-        _, kwargs = mock_post.call_args
-        body = json.loads(kwargs["data"])
-        assert body["stream"] is True
-        assert kwargs["headers"]["Accept"] == "text/event-stream"
-
-    def test_streaming_http_error(self):
-        resp = MagicMock()
-        resp.ok = False
-        resp.status_code = 500
-        resp.text = "boom"
-        resp.close = MagicMock()
-        with patch("foundry_local_sdk.openai.responses_client.requests.post", return_value=resp):
-            with pytest.raises(FoundryLocalException) as excinfo:
-                list(self.client.create_streaming("hi"))
-        assert "500" in str(excinfo.value)
-
-    def test_settings_merge_precedence(self):
-        self.client.settings.temperature = 0.1
-        self.client.settings.max_output_tokens = 100
-        with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req:
-            mock_req.return_value = _fake_json_response(
-                {"id": "r", "object": "response", "status": "completed", "model": MODEL_ID, "output": []}
-            )
-            # Per-call overrides client settings
-            self.client.create("hi", temperature=0.9)
-
-        body = json.loads(mock_req.call_args.kwargs["data"])
-        assert body["temperature"] == 0.9  # per-call wins
-        assert body["max_output_tokens"] == 100  # settings default preserved
-
-
-class TestManagerFactory:
-    """Ensure the factory method wiring doesn't require a running server."""
-
-    def test_manager_raises_if_web_service_not_started(self):
-        from foundry_local_sdk.exception import FoundryLocalException
-
-        # Build a stand-in manager without going through the constructor's
-        # heavy initialization path.
-        mgr = MagicMock()
-        mgr.urls = None
-        # Bind the real method to our MagicMock so we exercise actual logic.
-        from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M
-
-        with pytest.raises(FoundryLocalException, match="[Ww]eb service"):
-            M.create_responses_client(mgr, "some-model")
-
-    def test_manager_returns_client_when_urls_set(self):
-        mgr = MagicMock()
-        mgr.urls = [BASE_URL]
-        from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M
-
-        client = M.create_responses_client(mgr, "phi")
-        assert isinstance(client, ResponsesClient)
-        assert client._model_id == "phi"
-        assert client._base_url == BASE_URL
diff --git a/sdk/python/test/openai/test_responses_integration.py b/sdk/python/test/openai/test_responses_integration.py
deleted file mode 100644
index cb4eee456..000000000
--- a/sdk/python/test/openai/test_responses_integration.py
+++ /dev/null
@@ -1,288 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-"""Integration tests for the Responses API client.
-
-These require a real Foundry Local runtime + a cached model. They are only
-run when ``FOUNDRY_INTEGRATION_TESTS=1`` is set in the environment.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-
-import pytest
-
-from foundry_local_sdk import (
-    FunctionToolDefinition,
-    InputImageContent,
-    InputTextContent,
-    MessageItem,
-)
-
-from ..conftest import TEST_MODEL_ALIAS
-
-pytestmark = pytest.mark.skipif(
-    not os.environ.get("FOUNDRY_INTEGRATION_TESTS"),
-    reason="Set FOUNDRY_INTEGRATION_TESTS=1 to run Responses API integration tests.",
-)
-
-
-def _get_loaded_model(catalog):
-    cached = catalog.get_cached_models()
-    assert cached, "No cached models found"
-    variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
-    assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached"
-
-    model = catalog.get_model(TEST_MODEL_ALIAS)
-    assert model is not None
-    model.select_variant(variant)
-    model.load()
-    return model
-
-
-@pytest.fixture(scope="module")
-def responses_client(manager, catalog):
-    """Start the web service, return a ResponsesClient tied to the test model."""
-    model = _get_loaded_model(catalog)
-    manager.start_web_service()
-    client = manager.create_responses_client(model.id)
-    try:
-        yield client
-    finally:
-        try:
-            manager.stop_web_service()
-        finally:
-            model.unload()
-
-
-# ---------------------------------------------------------------------------
-# Non-streaming
-# ---------------------------------------------------------------------------
-
-class TestNonStreaming:
-    def test_simple_string_input(self, responses_client):
-        resp = responses_client.create("What is 2 + 2? Reply with just the number.")
-        assert resp.id
-        assert resp.status in {"completed", "incomplete"}
-        assert resp.output_text  # Non-empty
-
-    def test_with_options(self, responses_client):
-        resp = responses_client.create(
-            "Say hello.",
-            temperature=0.0,
-            max_output_tokens=32,
-        )
-        assert resp.output_text
-
-    def test_structured_input(self, responses_client):
-        # Validates that structured MessageItem input is accepted and produces
-        # a well-formed response. Not asserting content (too model-dependent).
-        resp = responses_client.create(
-            [
-                MessageItem(role="user", content="Reply with the single word: ping"),
-            ],
-            temperature=0.0,
-        )
-        assert resp.status in {"completed", "incomplete"}
-        assert resp.output_text.strip()
-
-    def test_with_instructions(self, responses_client):
-        resp = responses_client.create(
-            "Who are you?",
-            instructions="You are a terse assistant. Answer in exactly three words.",
-            temperature=0.0,
-        )
-        assert resp.output_text
-
-    def test_multi_turn(self, responses_client):
-        # Validates previous_response_id wiring: the second response should
-        # link back to the first via previous_response_id. We don't assert on
-        # recall quality (too model-dependent for tiny test models).
-        first = responses_client.create(
-            "My favourite colour is green. Just acknowledge with 'ok'.",
-            temperature=0.0,
-            store=True,
-        )
-        assert first.id
-        second = responses_client.create(
-            "What colour did I mention?",
-            previous_response_id=first.id,
-            temperature=0.0,
-        )
-        assert second.previous_response_id == first.id
-        assert second.output_text.strip()
-
-
-# ---------------------------------------------------------------------------
-# Streaming
-# ---------------------------------------------------------------------------
-
-class TestStreaming:
-    def test_basic_streaming(self, responses_client):
-        chunks = []
-        completed = False
-        for event in responses_client.create_streaming(
-            "Count 1, 2, 3. Reply with just the digits separated by spaces.",
-            temperature=0.0,
-        ):
-            if event.type == "response.output_text.delta":
-                chunks.append(event.delta)
-            elif event.type == "response.completed":
-                completed = True
-        assert completed
-        assert "".join(chunks).strip()
-
-    def test_streaming_with_options(self, responses_client):
-        saw_completed = False
-        for event in responses_client.create_streaming(
-            "Hello",
-            temperature=0.0,
-            max_output_tokens=16,
-        ):
-            if event.type == "response.completed":
-                saw_completed = True
-        assert saw_completed
-
-    def test_streaming_events_sequence(self, responses_client):
-        # Expect created → in_progress → ... → completed
-        types_seen = []
-        for event in responses_client.create_streaming("Say hi.", temperature=0.0):
-            types_seen.append(event.type)
-        assert "response.created" in types_seen
-        assert "response.completed" in types_seen
-        assert types_seen.index("response.created") < types_seen.index("response.completed")
-
-
-# ---------------------------------------------------------------------------
-# Storage: get / delete / list
-# ---------------------------------------------------------------------------
-
-class TestStorage:
-    def test_get_stored_response(self, responses_client):
-        first = responses_client.create("Store this.", store=True, temperature=0.0)
-        fetched = responses_client.get(first.id)
-        assert fetched.id == first.id
-        assert fetched.output_text == first.output_text
-
-    def test_delete_response(self, responses_client):
-        created = responses_client.create("Delete me.", store=True, temperature=0.0)
-        result = responses_client.delete(created.id)
-        assert result.id == created.id
-        assert result.deleted is True
-
-    def test_list_responses(self, responses_client):
-        # Create one so the list is guaranteed non-empty.
-        responses_client.create("A listable response.", store=True, temperature=0.0)
-        result = responses_client.list()
-        assert result.object == "list"
-        assert len(result.data) >= 1
-
-
-# ---------------------------------------------------------------------------
-# Tool calling
-# ---------------------------------------------------------------------------
-
-class TestToolCalling:
-    def test_function_call_round_trip(self, responses_client):
-        tool = FunctionToolDefinition(
-            name="multiply_numbers",
-            description="Multiply two integers.",
-            parameters={
-                "type": "object",
-                "properties": {
-                    "a": {"type": "integer"},
-                    "b": {"type": "integer"},
-                },
-                "required": ["a", "b"],
-            },
-        )
-        first = responses_client.create(
-            "What is 7 times 6? Use the multiply_numbers tool.",
-            tools=[tool],
-            temperature=0.0,
-        )
-
-        # Find the function_call item.
-        call = next(
-            (item for item in first.output if getattr(item, "type", None) == "function_call"),
-            None,
-        )
-        if call is None:
-            pytest.skip("Model did not emit a tool call for this prompt")
-
-        args = json.loads(call.arguments)
-        # Model may use the declared parameter names or invent its own.
-        # Extract the two integer values robustly.
-        int_values = [int(v) for v in args.values() if isinstance(v, (int, str)) and str(v).lstrip("-").isdigit()]
-        if len(int_values) < 2:
-            pytest.skip(f"Model produced unusable tool args: {args!r}")
-        product = int_values[0] * int_values[1]
-
-        follow = responses_client.create(
-            [
-                MessageItem(role="user", content="What is 7 times 6? Use the multiply_numbers tool."),
-                call,
-                {
-                    "type": "function_call_output",
-                    "call_id": call.call_id,
-                    "output": str(product),
-                },
-            ],
-            tools=[tool],
-            temperature=0.0,
-        )
-        # Validates the round-trip: the follow-up should produce a completed
-        # response that references the tool output. We don't assert content.
-        assert follow.status in {"completed", "incomplete"}
-        assert follow.output_text.strip()
-
-
-# ---------------------------------------------------------------------------
-# Vision
-# ---------------------------------------------------------------------------
-
-class TestVision:
-    """These tests require a vision-capable model and will be skipped otherwise."""
-
-    def _run_or_skip(self, responses_client, content):
-        try:
-            return responses_client.create(
-                [MessageItem(role="user", content=content)],
-                temperature=0.0,
-            )
-        except Exception as e:
-            pytest.skip(f"Model does not appear to support vision: {e}")
-
-    def test_image_base64_input(self, responses_client):
-        # Minimal 1x1 PNG.
-        png = bytes.fromhex(
-            "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
-            "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
-            "ae426082"
-        )
-        resp = self._run_or_skip(
-            responses_client,
-            [
-                InputTextContent(text="Describe this image briefly."),
-                InputImageContent.from_bytes(png, "image/png"),
-            ],
-        )
-        assert resp.status in {"completed", "incomplete"}
-
-    def test_image_with_text(self, responses_client):
-        png = bytes.fromhex(
-            "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
-            "890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
-            "ae426082"
-        )
-        resp = self._run_or_skip(
-            responses_client,
-            [
-                InputTextContent(text="What colour is this?"),
-                InputImageContent.from_bytes(png, "image/png"),
-            ],
-        )
-        assert resp.status in {"completed", "incomplete"}
diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py
new file mode 100644
index 000000000..aee569034
--- /dev/null
+++ b/sdk/python/test/openai/test_responses_web_service.py
@@ -0,0 +1,194 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Integration tests for /v1/responses through the local web service.
+
+These tests intentionally use FoundryLocalManager only for SDK setup, model
+lifecycle, and web-service lifecycle. Actual Responses API calls go through the
+official OpenAI Python client against the local OpenAI-compatible endpoint.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+import pytest
+from openai import OpenAI
+
+from ..conftest import TEST_MODEL_ALIAS, skip_in_ci
+
+
+pytestmark = skip_in_ci
+
+
+def _field(value: Any, name: str, default: Any = None) -> Any:
+    if isinstance(value, dict):
+        return value.get(name, default)
+    return getattr(value, name, default)
+
+
+def _response_text(response: Any) -> str:
+    text = _field(response, "output_text")
+    if isinstance(text, str) and text:
+        return text
+
+    output_text = ""
+    for item in _field(response, "output", []) or []:
+        if _field(item, "type") != "message":
+            continue
+        for part in _field(item, "content", []) or []:
+            if _field(part, "type") == "output_text":
+                part_text = _field(part, "text", "")
+                if isinstance(part_text, str):
+                    output_text += part_text
+    return output_text
+
+
+def _get_function_call(response: Any) -> Any:
+    for item in _field(response, "output", []) or []:
+        if _field(item, "type") == "function_call":
+            return item
+    return None
+
+
+def _get_weather_tool() -> dict[str, Any]:
+    return {
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get the current weather for a city.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "location": {
+                    "type": "string",
+                    "description": "The city and region, for example Seattle, WA.",
+                }
+            },
+            "required": ["location"],
+        },
+    }
+
+
+@pytest.fixture(scope="module")
+def responses_web_service(manager, catalog):
+    cached = catalog.get_cached_models()
+    cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
+    if cached_variant is None:
+        pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests")
+
+    model = catalog.get_model(TEST_MODEL_ALIAS)
+    if model is None:
+        pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog")
+
+    model.select_variant(cached_variant)
+    client = None
+    service_started = False
+    model_loaded = False
+
+    try:
+        try:
+            model.load()
+            model_loaded = True
+            manager.start_web_service()
+            service_started = True
+        except Exception as exc:
+            message = str(exc)
+            if "execute_command_with_binary" in message:
+                pytest.skip(
+                    "Local Foundry Local Core/native runtime is stale: "
+                    "failed to resolve execute_command_with_binary"
+                )
+            pytest.skip(f"Failed to start Responses web-service test prerequisites: {exc}")
+
+        if not manager.urls:
+            pytest.skip("Web service started but did not return any URLs")
+
+        base_url = manager.urls[0].rstrip("/") + "/v1"
+        client = OpenAI(base_url=base_url, api_key="notneeded")
+        if not hasattr(client, "responses"):
+            pytest.skip("Installed openai package does not expose the Responses API")
+        yield client, model.id
+    finally:
+        if client is not None:
+            client.close()
+        if service_started:
+            try:
+                manager.stop_web_service()
+            except Exception:
+                pass
+        if model_loaded:
+            try:
+                model.unload()
+            except Exception:
+                pass
+
+
+class TestResponsesWebService:
+    def test_should_create_non_streaming_response(self, responses_web_service):
+        client, model_id = responses_web_service
+
+        response = client.responses.create(
+            model=model_id,
+            input="What is 2 + 2? Reply briefly.",
+        )
+
+        assert _response_text(response).strip()
+
+    def test_should_stream_response_events(self, responses_web_service):
+        client, model_id = responses_web_service
+        saw_text_delta = False
+        saw_completion = False
+
+        stream = client.responses.create(
+            model=model_id,
+            input="Count from 1 to 3, separated by spaces.",
+            stream=True,
+        )
+        for event in stream:
+            event_type = _field(event, "type")
+            if event_type == "response.output_text.delta" and _field(event, "delta"):
+                saw_text_delta = True
+            if event_type == "response.completed":
+                saw_completion = True
+
+        assert saw_text_delta
+        assert saw_completion
+
+    def test_should_round_trip_function_call_output(self, responses_web_service):
+        client, model_id = responses_web_service
+        weather_tool = _get_weather_tool()
+
+        tool_response = client.responses.create(
+            model=model_id,
+            input="Use get_weather to check the weather in Seattle, then answer.",
+            tools=[weather_tool],
+            tool_choice="required",
+            store=True,
+        )
+        function_call = _get_function_call(tool_response)
+
+        assert function_call is not None
+        assert _field(function_call, "name") == "get_weather"
+        assert _field(function_call, "call_id")
+
+        final_response = client.responses.create(
+            model=model_id,
+            previous_response_id=_field(tool_response, "id"),
+            input=[
+                {
+                    "type": "function_call_output",
+                    "call_id": _field(function_call, "call_id"),
+                    "output": json.dumps(
+                        {
+                            "location": "Seattle, WA",
+                            "temperature": "68 F",
+                            "conditions": "sunny",
+                        }
+                    ),
+                }
+            ],
+        )
+
+        assert _response_text(final_response).strip()

From 718e78cca24ccd97ce85e04c3c343a95da48805e Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 16:17:02 -0400
Subject: [PATCH 06/12] refactor(sdk/python): align responses web-service
 sample tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/README.md                             |   2 +-
 samples/python/README.md                      |   1 +
 .../web-server-responses/requirements.txt     |   3 +
 .../python/web-server-responses/src/app.py    | 152 ++++++++++++
 sdk/python/README.md                          |   3 +-
 sdk/python/examples/responses_web_service.py  | 176 -------------
 .../test/openai/test_responses_web_service.py | 232 +++++++++++-------
 7 files changed, 299 insertions(+), 270 deletions(-)
 create mode 100644 samples/python/web-server-responses/requirements.txt
 create mode 100644 samples/python/web-server-responses/src/app.py
 delete mode 100644 sdk/python/examples/responses_web_service.py

diff --git a/samples/README.md b/samples/README.md
index bcac6bf3a..bed7e41c1 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -10,5 +10,5 @@ Explore complete working examples that demonstrate how to use Foundry Local —
 |----------|---------|-------------|
 | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. |
 | [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. |
-| [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. |
+| [**Python**](python/) | 11 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, Responses API, and tutorials. |
 | [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. |
diff --git a/samples/python/README.md b/samples/python/README.md
index 7262f012a..49e99c8a6 100644
--- a/samples/python/README.md
+++ b/samples/python/README.md
@@ -14,6 +14,7 @@ These samples demonstrate how to use Foundry Local with Python.
 | [embeddings](embeddings/) | Generate single and batch text embeddings using the Foundry Local SDK. |
 | [audio-transcription](audio-transcription/) | Transcribe audio files using the Whisper model. |
 | [web-server](web-server/) | Start a local OpenAI-compatible web server and call it with the OpenAI Python SDK. |
+| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. |
 | [tool-calling](tool-calling/) | Tool calling with custom function definitions (get_weather, calculate). |
 | [langchain-integration](langchain-integration/) | LangChain integration for building translation and text generation chains. |
 | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
diff --git a/samples/python/web-server-responses/requirements.txt b/samples/python/web-server-responses/requirements.txt
new file mode 100644
index 000000000..db870f608
--- /dev/null
+++ b/samples/python/web-server-responses/requirements.txt
@@ -0,0 +1,3 @@
+foundry-local-sdk; sys_platform != "win32"
+foundry-local-sdk-winml; sys_platform == "win32"
+openai
diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py
new file mode 100644
index 000000000..6f186a2a6
--- /dev/null
+++ b/samples/python/web-server-responses/src/app.py
@@ -0,0 +1,152 @@
+# <complete_code>
+# <imports>
+import json
+from typing import Any
+
+from openai import OpenAI
+
+from foundry_local_sdk import Configuration, FoundryLocalManager
+# </imports>
+
+
+def get_response_text(response: Any) -> str:
+    if isinstance(getattr(response, "output_text", None), str):
+        return response.output_text
+    return "".join(
+        getattr(part, "text", "")
+        for item in getattr(response, "output", []) or []
+        for part in getattr(item, "content", []) or []
+        if getattr(part, "type", None) == "output_text"
+    )
+
+
+# <init>
+# Initialize the Foundry Local SDK
+config = Configuration(app_name="foundry_local_samples")
+FoundryLocalManager.initialize(config)
+manager = FoundryLocalManager.instance
+
+# Download and register all execution providers.
+current_ep = ""
+
+
+def _ep_progress(ep_name: str, percent: float):
+    global current_ep
+    if ep_name != current_ep:
+        if current_ep:
+            print()
+        current_ep = ep_name
+    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+
+
+manager.download_and_register_eps(progress_callback=_ep_progress)
+if current_ep:
+    print()
+# </init>
+
+# <model_setup>
+model_alias = "qwen2.5-0.5b"
+model = manager.catalog.get_model(model_alias)
+
+print(f"\nDownloading model {model_alias}...")
+model.download(
+    lambda progress: print(
+        f"\rDownloading model: {progress:.2f}%",
+        end="",
+        flush=True,
+    )
+)
+print("\nModel downloaded")
+
+print("\nLoading model...")
+model.load()
+print("Model loaded")
+# </model_setup>
+
+# <server_setup>
+print("\nStarting web service...")
+manager.start_web_service()
+base_url = manager.urls[0].rstrip("/") + "/v1"
+print("Web service started")
+
+# <<<<<< OPENAI SDK USAGE >>>>>>
+# Use the OpenAI SDK to call the local Foundry web service Responses API
+openai = OpenAI(
+    base_url=base_url,
+    api_key="notneeded",
+)
+# </server_setup>
+
+try:
+    print("\nTesting a non-streaming Responses call...")
+    response = openai.responses.create(
+        model=model.id,
+        input="Reply with one short sentence about local AI.",
+    )
+    print(f"[ASSISTANT]: {get_response_text(response)}")
+
+    print("\nTesting a streaming Responses call...")
+    stream = openai.responses.create(
+        model=model.id,
+        input="Count from one to three.",
+        stream=True,
+    )
+
+    print("[ASSISTANT STREAM]: ", end="", flush=True)
+    for event in stream:
+        if getattr(event, "type", None) == "response.output_text.delta":
+            print(getattr(event, "delta", ""), end="", flush=True)
+    print()
+
+    print("\nTesting Responses tool calling...")
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get the current weather. This sample always returns Seattle weather.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "additionalProperties": False,
+            },
+        },
+    ]
+
+    tool_response = openai.responses.create(
+        model=model.id,
+        input="Use the get_weather tool and then answer with the weather.",
+        tools=tools,
+        tool_choice="required",
+        store=True,
+    )
+
+    function_call = next(
+        (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"),
+        None,
+    )
+    if function_call is None:
+        raise RuntimeError("Expected the model to call get_weather.")
+
+    print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})")
+
+    final_response = openai.responses.create(
+        model=model.id,
+        previous_response_id=tool_response.id,
+        input=[
+            {
+                "type": "function_call_output",
+                "call_id": function_call.call_id,
+                "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
+            }
+        ],
+        tools=tools,
+    )
+
+    print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}")
+    # <<<<<< END OPENAI SDK USAGE >>>>>>
+finally:
+    # Tidy up
+    openai.close()
+    manager.stop_web_service()
+    model.unload()
+# </complete_code>
diff --git a/sdk/python/README.md b/sdk/python/README.md
index 0c065bc85..2a121411e 100644
--- a/sdk/python/README.md
+++ b/sdk/python/README.md
@@ -328,5 +328,4 @@ See [test/README.md](test/README.md) for detailed test setup and structure.
 
 ```bash
 python examples/chat_completion.py
-python examples/responses_web_service.py
-```
+```
\ No newline at end of file
diff --git a/sdk/python/examples/responses_web_service.py b/sdk/python/examples/responses_web_service.py
deleted file mode 100644
index fe9517949..000000000
--- a/sdk/python/examples/responses_web_service.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/usr/bin/env python3
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-
-"""Example: Responses API through the Foundry Local web service.
-
-Foundry Local manages setup, model lifecycle, and the local OpenAI-compatible
-web service. The official OpenAI Python client sends the actual /v1/responses
-requests to that local service.
-"""
-
-from __future__ import annotations
-
-import json
-from typing import Any
-
-from openai import OpenAI
-
-from foundry_local_sdk import Configuration, FoundryLocalManager
-
-
-MODEL_ALIAS = "qwen2.5-0.5b"
-
-
-def _field(value: Any, name: str, default: Any = None) -> Any:
-    if isinstance(value, dict):
-        return value.get(name, default)
-    return getattr(value, name, default)
-
-
-def _response_text(response: Any) -> str:
-    text = _field(response, "output_text")
-    if isinstance(text, str) and text:
-        return text
-
-    for item in _field(response, "output", []) or []:
-        if _field(item, "type") != "message":
-            continue
-        for part in _field(item, "content", []) or []:
-            if _field(part, "type") == "output_text":
-                part_text = _field(part, "text", "")
-                if isinstance(part_text, str):
-                    text = (text or "") + part_text
-    return text or ""
-
-
-def _get_function_call(response: Any) -> Any:
-    for item in _field(response, "output", []) or []:
-        if _field(item, "type") == "function_call":
-            return item
-    return None
-
-
-def _get_weather_tool() -> dict[str, Any]:
-    return {
-        "type": "function",
-        "name": "get_weather",
-        "description": "Get the current weather for a city.",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "location": {
-                    "type": "string",
-                    "description": "The city and region, for example Seattle, WA.",
-                }
-            },
-            "required": ["location"],
-        },
-    }
-
-
-def main() -> None:
-    config = Configuration(app_name="ResponsesWebServiceExample")
-    print("Initializing Foundry Local Manager")
-    FoundryLocalManager.initialize(config)
-    manager = FoundryLocalManager.instance
-    if manager is None:
-        raise RuntimeError("FoundryLocalManager.initialize did not set instance")
-
-    print("Registering execution providers...")
-    ep_result = manager.download_and_register_eps()
-    print(f"EP registration success: {ep_result.success} ({ep_result.status})")
-
-    model = manager.catalog.get_model(MODEL_ALIAS)
-    if model is None:
-        raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
-
-    if not model.is_cached:
-        print(f"Downloading {model.alias}...")
-        model.download(progress_callback=lambda pct: print(f"  {pct:.1f}%", end="\r"))
-        print()
-
-    print(f"Loading {model.alias}...", end="")
-    model.load()
-    print("loaded!")
-
-    openai_client: OpenAI | None = None
-    try:
-        print("Starting OpenAI-compatible web service...", end="")
-        manager.start_web_service()
-        if not manager.urls:
-            raise RuntimeError("Web service started but did not return any URLs")
-        print("started!")
-
-        base_url = manager.urls[0].rstrip("/") + "/v1"
-        openai_client = OpenAI(base_url=base_url, api_key="notneeded")
-
-        print("\n--- Non-streaming Responses call ---")
-        response = openai_client.responses.create(
-            model=model.id,
-            input="What is 2 + 2? Reply briefly.",
-        )
-        print(_response_text(response))
-
-        print("\n--- Streaming Responses call ---")
-        stream = openai_client.responses.create(
-            model=model.id,
-            input="Count from 1 to 3, separated by spaces.",
-            stream=True,
-        )
-        for event in stream:
-            if _field(event, "type") == "response.output_text.delta":
-                print(_field(event, "delta", ""), end="", flush=True)
-        print()
-
-        print("\n--- Function/tool calling Responses flow ---")
-        weather_tool = _get_weather_tool()
-        tool_response = openai_client.responses.create(
-            model=model.id,
-            input="Use get_weather to check the weather in Seattle, then answer.",
-            tools=[weather_tool],
-            tool_choice="required",
-            store=True,
-        )
-        function_call = _get_function_call(tool_response)
-        if function_call is None:
-            raise RuntimeError("Model did not return a function_call item")
-
-        print(f"Tool call: {_field(function_call, 'name')}")
-        print(f"Arguments: {_field(function_call, 'arguments')}")
-
-        final_response = openai_client.responses.create(
-            model=model.id,
-            previous_response_id=_field(tool_response, "id"),
-            input=[
-                {
-                    "type": "function_call_output",
-                    "call_id": _field(function_call, "call_id"),
-                    "output": json.dumps(
-                        {
-                            "location": "Seattle, WA",
-                            "temperature": "68 F",
-                            "conditions": "sunny",
-                        }
-                    ),
-                }
-            ],
-        )
-        print(_response_text(final_response))
-
-    finally:
-        if openai_client is not None:
-            openai_client.close()
-        try:
-            manager.stop_web_service()
-            print("Web service stopped.")
-        except Exception:
-            pass
-        model.unload()
-        print("Model unloaded.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py
index aee569034..e323a892e 100644
--- a/sdk/python/test/openai/test_responses_web_service.py
+++ b/sdk/python/test/openai/test_responses_web_service.py
@@ -6,16 +6,15 @@
 
 These tests intentionally use FoundryLocalManager only for SDK setup, model
 lifecycle, and web-service lifecycle. Actual Responses API calls go through the
-official OpenAI Python client against the local OpenAI-compatible endpoint.
+OpenAI-compatible HTTP endpoint directly.
 """
 
 from __future__ import annotations
 
 import json
-from typing import Any
 
 import pytest
-from openai import OpenAI
+import requests
 
 from ..conftest import TEST_MODEL_ALIAS, skip_in_ci
 
@@ -23,50 +22,95 @@
 pytestmark = skip_in_ci
 
 
-def _field(value: Any, name: str, default: Any = None) -> Any:
-    if isinstance(value, dict):
-        return value.get(name, default)
-    return getattr(value, name, default)
-
-
-def _response_text(response: Any) -> str:
-    text = _field(response, "output_text")
+def _response_text(response: dict) -> str:
+    text = response.get("output_text")
     if isinstance(text, str) and text:
         return text
 
-    output_text = ""
-    for item in _field(response, "output", []) or []:
-        if _field(item, "type") != "message":
-            continue
-        for part in _field(item, "content", []) or []:
-            if _field(part, "type") == "output_text":
-                part_text = _field(part, "text", "")
-                if isinstance(part_text, str):
-                    output_text += part_text
-    return output_text
-
-
-def _get_function_call(response: Any) -> Any:
-    for item in _field(response, "output", []) or []:
-        if _field(item, "type") == "function_call":
+    return "".join(
+        part.get("text", "")
+        for item in response.get("output", []) or []
+        if item.get("type") == "message"
+        for part in item.get("content", []) or []
+        if part.get("type") == "output_text" and isinstance(part.get("text"), str)
+    )
+
+
+def _post_response(base_url: str, body: dict) -> dict:
+    response = requests.post(
+        f"{base_url}/v1/responses",
+        headers={"Content-Type": "application/json"},
+        json=body,
+        timeout=60,
+    )
+    assert response.ok, response.text
+    return response.json()
+
+
+def _post_streaming_response(base_url: str, body: dict) -> list[dict]:
+    response = requests.post(
+        f"{base_url}/v1/responses",
+        headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
+        json={**body, "stream": True},
+        stream=True,
+        timeout=(60, None),
+    )
+    assert response.ok, response.text
+
+    events: list[dict] = []
+    buffer = ""
+    try:
+        for chunk in response.iter_content(chunk_size=None, decode_unicode=False):
+            if not chunk:
+                continue
+            text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk
+            buffer += text.replace("\r\n", "\n")
+
+            while "\n\n" in buffer:
+                block, buffer = buffer.split("\n\n", 1)
+                data = _sse_data(block)
+                if not data:
+                    continue
+                if data == "[DONE]":
+                    return events
+                events.append(json.loads(data))
+    finally:
+        response.close()
+
+    tail = buffer.strip()
+    if tail:
+        data = _sse_data(tail)
+        if data and data != "[DONE]":
+            events.append(json.loads(data))
+    return events
+
+
+def _sse_data(block: str) -> str:
+    lines: list[str] = []
+    for line in block.strip().split("\n"):
+        if line.startswith("data: "):
+            lines.append(line[6:])
+        elif line == "data:":
+            lines.append("")
+    return "\n".join(lines).strip()
+
+
+def _get_function_call(response: dict) -> dict | None:
+    for item in response.get("output", []) or []:
+        if item.get("type") == "function_call":
             return item
     return None
 
 
-def _get_weather_tool() -> dict[str, Any]:
+def _get_weather_tool() -> dict:
     return {
         "type": "function",
         "name": "get_weather",
-        "description": "Get the current weather for a city.",
+        "description": "Get the current weather. This test always returns Seattle weather.",
         "parameters": {
             "type": "object",
-            "properties": {
-                "location": {
-                    "type": "string",
-                    "description": "The city and region, for example Seattle, WA.",
-                }
-            },
-            "required": ["location"],
+            "properties": {},
+            "additionalProperties": False,
         },
     }
 
@@ -83,7 +127,6 @@ def responses_web_service(manager, catalog):
         pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog")
 
     model.select_variant(cached_variant)
-    client = None
     service_started = False
     model_loaded = False
 
@@ -105,14 +148,8 @@ def responses_web_service(manager, catalog):
         if not manager.urls:
             pytest.skip("Web service started but did not return any URLs")
 
-        base_url = manager.urls[0].rstrip("/") + "/v1"
-        client = OpenAI(base_url=base_url, api_key="notneeded")
-        if not hasattr(client, "responses"):
-            pytest.skip("Installed openai package does not expose the Responses API")
-        yield client, model.id
+        yield manager.urls[0].rstrip("/"), model.id
     finally:
-        if client is not None:
-            client.close()
         if service_started:
             try:
                 manager.stop_web_service()
@@ -127,68 +164,81 @@ def responses_web_service(manager, catalog):
 
 class TestResponsesWebService:
     def test_should_create_non_streaming_response(self, responses_web_service):
-        client, model_id = responses_web_service
-
-        response = client.responses.create(
-            model=model_id,
-            input="What is 2 + 2? Reply briefly.",
+        base_url, model_id = responses_web_service
+
+        response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "What is 2 + 2? Answer with just the number.",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
         )
 
+        assert response["object"] == "response"
+        assert response["status"] == "completed"
         assert _response_text(response).strip()
 
     def test_should_stream_response_events(self, responses_web_service):
-        client, model_id = responses_web_service
-        saw_text_delta = False
-        saw_completion = False
-
-        stream = client.responses.create(
-            model=model_id,
-            input="Count from 1 to 3, separated by spaces.",
-            stream=True,
+        base_url, model_id = responses_web_service
+
+        events = _post_streaming_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "Count from 1 to 3.",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
         )
-        for event in stream:
-            event_type = _field(event, "type")
-            if event_type == "response.output_text.delta" and _field(event, "delta"):
-                saw_text_delta = True
-            if event_type == "response.completed":
-                saw_completion = True
 
-        assert saw_text_delta
-        assert saw_completion
+        assert any(event.get("type") == "response.created" for event in events)
+        assert any(event.get("type") == "response.output_text.delta" for event in events)
+        assert any(event.get("type") == "response.completed" for event in events)
 
     def test_should_round_trip_function_call_output(self, responses_web_service):
-        client, model_id = responses_web_service
+        base_url, model_id = responses_web_service
         weather_tool = _get_weather_tool()
 
-        tool_response = client.responses.create(
-            model=model_id,
-            input="Use get_weather to check the weather in Seattle, then answer.",
-            tools=[weather_tool],
-            tool_choice="required",
-            store=True,
+        tool_response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "Use the get_weather tool and then answer with the weather.",
+                "tools": [weather_tool],
+                "tool_choice": "required",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": True,
+            },
         )
         function_call = _get_function_call(tool_response)
 
-        assert function_call is not None
-        assert _field(function_call, "name") == "get_weather"
-        assert _field(function_call, "call_id")
-
-        final_response = client.responses.create(
-            model=model_id,
-            previous_response_id=_field(tool_response, "id"),
-            input=[
-                {
-                    "type": "function_call_output",
-                    "call_id": _field(function_call, "call_id"),
-                    "output": json.dumps(
-                        {
-                            "location": "Seattle, WA",
-                            "temperature": "68 F",
-                            "conditions": "sunny",
-                        }
-                    ),
-                }
-            ],
+        assert function_call is not None, json.dumps(tool_response.get("output", []))
+        assert function_call["name"] == "get_weather"
+        assert isinstance(function_call["call_id"], str)
+
+        final_response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "previous_response_id": tool_response["id"],
+                "input": [
+                    {
+                        "type": "function_call_output",
+                        "call_id": function_call["call_id"],
+                        "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
+                    }
+                ],
+                "tools": [weather_tool],
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
         )
 
+        assert final_response["status"] == "completed"
         assert _response_text(final_response).strip()

From 0808187d82a8eb731583cb6896ea3ad5958bdd42 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 17:54:21 -0400
Subject: [PATCH 07/12] docs(samples): add Python responses web-service README

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/python/web-server-responses/README.md | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 samples/python/web-server-responses/README.md

diff --git a/samples/python/web-server-responses/README.md b/samples/python/web-server-responses/README.md
new file mode 100644
index 000000000..95666d910
--- /dev/null
+++ b/samples/python/web-server-responses/README.md
@@ -0,0 +1,44 @@
+# Foundry Local Python Responses Web-Service Sample
+
+This sample starts the Foundry Local OpenAI-compatible web service, then calls the Responses API with the official OpenAI Python client.
+
+It demonstrates:
+
+- A non-streaming `/v1/responses` call
+- A streaming `/v1/responses` call
+- A function/tool-calling round trip using `previous_response_id`
+
+## What gets installed
+
+Install the sample dependencies from `requirements.txt`:
+
+```bash
+pip install -r requirements.txt
+```
+
+That installs:
+
+- `foundry-local-sdk` on non-Windows platforms
+- `foundry-local-sdk-winml` on Windows
+- `openai`
+
+The sample downloads/registers Foundry Local execution providers and downloads the `qwen2.5-0.5b` model the first time it runs.
+
+## Run the sample
+
+From this directory:
+
+```bash
+python -m venv .venv
+.\.venv\Scripts\activate
+pip install -r requirements.txt
+python src\app.py
+```
+
+On macOS or Linux, activate the virtual environment with:
+
+```bash
+source .venv/bin/activate
+```
+
+The sample starts the local web service, sends Responses API requests to `http://localhost:<port>/v1`, prints the model output, and then unloads the model and stops the web service.

From a3e8a0f8001822dea29a997b04d7322d7bb4985a Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Tue, 5 May 2026 21:49:04 +0200
Subject: [PATCH 08/12] test(sdk/python): add Responses image URL coverage

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../test/openai/test_responses_web_service.py | 75 +++++++++++++++++--
 1 file changed, 68 insertions(+), 7 deletions(-)

diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py
index e323a892e..0b1a4cb45 100644
--- a/sdk/python/test/openai/test_responses_web_service.py
+++ b/sdk/python/test/openai/test_responses_web_service.py
@@ -21,6 +21,11 @@
 
 pytestmark = skip_in_ci
 
+VISION_MODEL_ALIAS = "qwen3-vl-2b-instruct"
+VISION_IMAGE_URL = (
+    "https://raw.githubusercontent.com/microsoft/fluentui-emoji/main/assets/Camera/3D/camera_3d.png"
+)
+
 
 def _response_text(response: dict) -> str:
     text = response.get("output_text")
@@ -115,18 +120,21 @@ def _get_weather_tool() -> dict:
     }
 
 
-@pytest.fixture(scope="module")
-def responses_web_service(manager, catalog):
+def _get_cached_model(catalog, model_alias: str):
     cached = catalog.get_cached_models()
-    cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
+    cached_variant = next((m for m in cached if m.alias == model_alias), None)
     if cached_variant is None:
-        pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests")
+        pytest.skip(f"{model_alias} must be cached to run Responses web-service tests")
 
-    model = catalog.get_model(TEST_MODEL_ALIAS)
+    model = catalog.get_model(model_alias)
     if model is None:
-        pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog")
+        pytest.skip(f"{model_alias} was not found in the catalog")
 
     model.select_variant(cached_variant)
+    return model
+
+
+def _run_responses_web_service(manager, model):
     service_started = False
     model_loaded = False
 
@@ -162,6 +170,22 @@ def responses_web_service(manager, catalog):
                 pass
 
 
+@pytest.fixture(scope="class")
+def responses_web_service(manager, catalog):
+    model = _get_cached_model(catalog, TEST_MODEL_ALIAS)
+    yield from _run_responses_web_service(manager, model)
+
+
+@pytest.fixture(scope="class")
+def responses_vision_web_service(manager, catalog):
+    model = _get_cached_model(catalog, VISION_MODEL_ALIAS)
+    input_modalities = model.input_modalities or ""
+    if "image" not in input_modalities.split(","):
+        pytest.skip(f"{VISION_MODEL_ALIAS} does not advertise image input support")
+
+    yield from _run_responses_web_service(manager, model)
+
+
 class TestResponsesWebService:
     def test_should_create_non_streaming_response(self, responses_web_service):
         base_url, model_id = responses_web_service
@@ -178,7 +202,7 @@ def test_should_create_non_streaming_response(self, responses_web_service):
         )
 
         assert response["object"] == "response"
-        assert response["status"] == "completed"
+        assert response["status"] == "completed", response.get("error")
         assert _response_text(response).strip()
 
     def test_should_stream_response_events(self, responses_web_service):
@@ -242,3 +266,40 @@ def test_should_round_trip_function_call_output(self, responses_web_service):
 
         assert final_response["status"] == "completed"
         assert _response_text(final_response).strip()
+
+
+class TestResponsesVisionWebService:
+    def test_should_create_response_with_image_url(self, responses_vision_web_service):
+        base_url, model_id = responses_vision_web_service
+
+        response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": [
+                    {
+                        "type": "message",
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "input_text",
+                                "text": "Describe this image in one short sentence.",
+                            },
+                            {
+                                "type": "input_image",
+                                "image_url": VISION_IMAGE_URL,
+                                "media_type": "image/png",
+                                "detail": "low",
+                            },
+                        ],
+                    }
+                ],
+                "temperature": 0,
+                "max_output_tokens": 128,
+                "store": False,
+            },
+        )
+
+        assert response["object"] == "response"
+        assert response["status"] == "completed", response.get("error")
+        assert _response_text(response).strip()

From 3f0579ed3817486e924dcb593e9dda7d1fa38078 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Tue, 5 May 2026 22:00:11 +0200
Subject: [PATCH 09/12] feat(python): add ResponsesClient backed by native
 openai SDK

Add ResponsesClient to the Python SDK, backed by the native openai SDK
pointed at Foundry Local's OpenAI-compatible web service. Adds
FoundryLocalManager.create_responses_client(model_id) factory.

Usage:
    manager.start_web_service()
    client = manager.create_responses_client(model.id)
    response = client.create('What is 2 + 2?')
    print(response.output_text)

- ResponsesClient: thin wrapper over openai.OpenAI targeting /v1
- ResponsesClientSettings: default settings (temperature, store, etc.)
- create() / create_streaming() / stream() / get() / delete() / cancel()
- context-manager support (close() releases underlying HTTP client)
- Exported from foundry_local_sdk and foundry_local_sdk.openai
- Integration tests updated to use ResponsesClient (3 pass, 1 skipped)
- Sample updated to use manager.create_responses_client()

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../python/web-server-responses/src/app.py    |  70 ++---
 sdk/python/src/__init__.py                    |   3 +-
 sdk/python/src/foundry_local_manager.py       |  32 ++
 sdk/python/src/openai/__init__.py             |   5 +-
 sdk/python/src/openai/responses_client.py     | 267 +++++++++++++++++
 .../test/openai/test_responses_web_service.py | 274 ++++++------------
 6 files changed, 420 insertions(+), 231 deletions(-)
 create mode 100644 sdk/python/src/openai/responses_client.py

diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py
index 6f186a2a6..e58df3e4e 100644
--- a/samples/python/web-server-responses/src/app.py
+++ b/samples/python/web-server-responses/src/app.py
@@ -1,25 +1,11 @@
 # <complete_code>
 # <imports>
 import json
-from typing import Any
-
-from openai import OpenAI
 
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-def get_response_text(response: Any) -> str:
-    if isinstance(getattr(response, "output_text", None), str):
-        return response.output_text
-    return "".join(
-        getattr(part, "text", "")
-        for item in getattr(response, "output", []) or []
-        for part in getattr(item, "content", []) or []
-        if getattr(part, "type", None) == "output_text"
-    )
-
-
 # <init>
 # Initialize the Foundry Local SDK
 config = Configuration(app_name="foundry_local_samples")
@@ -66,36 +52,29 @@ def _ep_progress(ep_name: str, percent: float):
 # <server_setup>
 print("\nStarting web service...")
 manager.start_web_service()
-base_url = manager.urls[0].rstrip("/") + "/v1"
 print("Web service started")
-
-# <<<<<< OPENAI SDK USAGE >>>>>>
-# Use the OpenAI SDK to call the local Foundry web service Responses API
-openai = OpenAI(
-    base_url=base_url,
-    api_key="notneeded",
-)
 # </server_setup>
 
+# <responses_client>
+# Create a Responses API client via the SDK manager — no manual URL or API key needed.
+client = manager.create_responses_client(model.id)
+# </responses_client>
+
 try:
     print("\nTesting a non-streaming Responses call...")
-    response = openai.responses.create(
-        model=model.id,
-        input="Reply with one short sentence about local AI.",
-    )
-    print(f"[ASSISTANT]: {get_response_text(response)}")
+    response = client.create("Reply with one short sentence about local AI.")
+    print(f"[ASSISTANT]: {response.output_text}")
 
     print("\nTesting a streaming Responses call...")
-    stream = openai.responses.create(
-        model=model.id,
-        input="Count from one to three.",
-        stream=True,
-    )
-
     print("[ASSISTANT STREAM]: ", end="", flush=True)
-    for event in stream:
-        if getattr(event, "type", None) == "response.output_text.delta":
-            print(getattr(event, "delta", ""), end="", flush=True)
+    client.create_streaming(
+        "Count from one to three.",
+        callback=lambda event: print(
+            getattr(event, "delta", ""),
+            end="",
+            flush=True,
+        ) if getattr(event, "type", None) == "response.output_text.delta" else None,
+    )
     print()
 
     print("\nTesting Responses tool calling...")
@@ -112,16 +91,15 @@ def _ep_progress(ep_name: str, percent: float):
         },
     ]
 
-    tool_response = openai.responses.create(
-        model=model.id,
-        input="Use the get_weather tool and then answer with the weather.",
+    tool_response = client.create(
+        "Use the get_weather tool and then answer with the weather.",
         tools=tools,
         tool_choice="required",
         store=True,
     )
 
     function_call = next(
-        (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"),
+        (item for item in tool_response.output if item.type == "function_call"),
         None,
     )
     if function_call is None:
@@ -129,24 +107,22 @@ def _ep_progress(ep_name: str, percent: float):
 
     print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})")
 
-    final_response = openai.responses.create(
-        model=model.id,
-        previous_response_id=tool_response.id,
-        input=[
+    final_response = client.create(
+        [
             {
                 "type": "function_call_output",
                 "call_id": function_call.call_id,
                 "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
             }
         ],
+        previous_response_id=tool_response.id,
         tools=tools,
     )
 
-    print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}")
-    # <<<<<< END OPENAI SDK USAGE >>>>>>
+    print(f"[ASSISTANT FINAL]: {final_response.output_text}")
 finally:
     # Tidy up
-    openai.close()
+    client.close()
     manager.stop_web_service()
     model.unload()
 # </complete_code>
diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py
index 14534d196..9fecca997 100644
--- a/sdk/python/src/__init__.py
+++ b/sdk/python/src/__init__.py
@@ -7,6 +7,7 @@
 
 from .configuration import Configuration
 from .foundry_local_manager import FoundryLocalManager
+from .openai.responses_client import ResponsesClient, ResponsesClientSettings
 from .version import __version__
 
 _logger = logging.getLogger(__name__)
@@ -20,4 +21,4 @@
 _logger.addHandler(_sc)
 _logger.propagate = False
 
-__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
+__all__ = ["Configuration", "FoundryLocalManager", "ResponsesClient", "ResponsesClientSettings", "__version__"]
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
index a649f8e56..902db6e18 100644
--- a/sdk/python/src/foundry_local_manager.py
+++ b/sdk/python/src/foundry_local_manager.py
@@ -20,6 +20,7 @@
 from .detail.core_interop import CoreInterop, InteropRequest
 from .detail.model_load_manager import ModelLoadManager
 from .exception import FoundryLocalException
+from .openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -194,3 +195,34 @@ def stop_web_service(self):
                 raise FoundryLocalException(f"Error stopping web service: {response.error}")
 
             self.urls = None
+
+    def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
+        """Create a :class:`ResponsesClient` for the Responses API.
+
+        The web service must be running before calling this method.  Start it
+        with :meth:`start_web_service` first.
+
+        Args:
+            model_id: Default model ID for requests.  Can be overridden
+                per-request via the ``model`` keyword argument on
+                :meth:`~ResponsesClient.create`.
+
+        Returns:
+            A :class:`ResponsesClient` pointed at the running web service.
+
+        Raises:
+            FoundryLocalException: If the web service is not running.
+
+        Example::
+
+            manager.start_web_service()
+            client = manager.create_responses_client(model.id)
+            response = client.create("What is 2 + 2?")
+            print(response.output_text)
+        """
+        if not self.urls:
+            raise FoundryLocalException(
+                "Web service is not running. Call start_web_service() before "
+                "creating a ResponsesClient."
+            )
+        return ResponsesClient(self.urls[0], model_id)
diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py
index 2fa51a6f6..e7016799a 100644
--- a/sdk/python/src/openai/__init__.py
+++ b/sdk/python/src/openai/__init__.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""OpenAI-compatible clients for chat completions and audio transcription."""
+"""OpenAI-compatible clients for chat completions, audio transcription, and responses."""
 
 from .chat_client import ChatClient, ChatClientSettings
 from .audio_client import AudioClient
@@ -14,6 +14,7 @@
     LiveAudioTranscriptionResponse,
     TranscriptionContentPart,
 )
+from .responses_client import ResponsesClient, ResponsesClientSettings
 
 __all__ = [
     "AudioClient",
@@ -24,5 +25,7 @@
     "LiveAudioTranscriptionOptions",
     "LiveAudioTranscriptionResponse",
     "LiveAudioTranscriptionSession",
+    "ResponsesClient",
+    "ResponsesClientSettings",
     "TranscriptionContentPart",
 ]
diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
new file mode 100644
index 000000000..55121e320
--- /dev/null
+++ b/sdk/python/src/openai/responses_client.py
@@ -0,0 +1,267 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Responses API client for Foundry Local's embedded web service.
+
+Uses the native ``openai`` SDK to call the Responses API on Foundry Local's
+OpenAI-compatible web service.  Create via
+``FoundryLocalManager.create_responses_client()`` or
+``model.create_responses_client(base_url)``.
+
+Example::
+
+    manager.start_web_service()
+    client = manager.create_responses_client(model.id)
+
+    # Non-streaming
+    response = client.create("Hello, world!")
+    print(response.output_text)
+
+    # Streaming
+    client.create_streaming("Tell me a story", lambda event: print(event))
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Callable, Iterator, Optional, Union
+
+from openai import OpenAI
+
+logger = logging.getLogger(__name__)
+
+
+class ResponsesClientSettings:
+    """Default settings applied to every request made by a :class:`ResponsesClient`.
+
+    Per-call keyword arguments passed to :meth:`ResponsesClient.create` override
+    these defaults.  Attribute names match the OpenAI Responses API parameters
+    (snake_case).
+    """
+
+    def __init__(
+        self,
+        instructions: Optional[str] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        max_output_tokens: Optional[int] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
+        tool_choice: Optional[Any] = None,
+        truncation: Optional[str] = None,
+        parallel_tool_calls: Optional[bool] = None,
+        store: Optional[bool] = None,
+        seed: Optional[int] = None,
+    ):
+        self.instructions = instructions
+        self.temperature = temperature
+        self.top_p = top_p
+        self.max_output_tokens = max_output_tokens
+        self.frequency_penalty = frequency_penalty
+        self.presence_penalty = presence_penalty
+        self.tool_choice = tool_choice
+        self.truncation = truncation
+        self.parallel_tool_calls = parallel_tool_calls
+        self.store = store
+        self.seed = seed
+
+    def _as_kwargs(self) -> dict[str, Any]:
+        """Return non-None settings as keyword arguments for the openai SDK."""
+        return {
+            k: v for k, v in {
+                "instructions": self.instructions,
+                "temperature": self.temperature,
+                "top_p": self.top_p,
+                "max_output_tokens": self.max_output_tokens,
+                "frequency_penalty": self.frequency_penalty,
+                "presence_penalty": self.presence_penalty,
+                "tool_choice": self.tool_choice,
+                "truncation": self.truncation,
+                "parallel_tool_calls": self.parallel_tool_calls,
+                "store": self.store,
+                "seed": self.seed,
+            }.items() if v is not None
+        }
+
+
+class ResponsesClient:
+    """Client for the OpenAI Responses API served by Foundry Local.
+
+    Backed by the native ``openai`` SDK pointed at the local web service.
+    Create via :meth:`FoundryLocalManager.create_responses_client` or
+    :meth:`model.create_responses_client`.
+
+    Args:
+        base_url: Base URL of the Foundry Local web service (e.g.
+            ``"http://127.0.0.1:5273"``).  Do **not** include ``/v1`` — it is
+            appended automatically.  Trailing slashes are stripped.
+        model_id: Default model ID.  Can be overridden per-request via the
+            ``model`` keyword argument to :meth:`create`.
+    """
+
+    def __init__(self, base_url: str, model_id: Optional[str] = None):
+        if not base_url or not isinstance(base_url, str) or not base_url.strip():
+            raise ValueError("base_url must be a non-empty string.")
+        openai_base = base_url.rstrip("/") + "/v1"
+        self._client = OpenAI(base_url=openai_base, api_key="notneeded")
+        self._model_id = model_id
+        self.settings = ResponsesClientSettings()
+
+    # =========================================================================
+    # Public API
+    # =========================================================================
+
+    def create(self, input: Union[str, list], **options: Any) -> Any:  # noqa: A002
+        """Create a model response (non-streaming).
+
+        Args:
+            input: A string prompt or a list of Responses API input items.
+                Each dict item must have a ``"type"`` field (e.g.
+                ``{"type": "message", "role": "user", "content": [...]}``)..
+            **options: Additional parameters forwarded to
+                ``openai.responses.create``.  Pass ``model="..."`` to override
+                the constructor default.
+
+        Returns:
+            An ``openai.types.responses.Response`` object.  Use
+            ``.output_text`` for the assistant text, ``.output`` for the full
+            item list, and ``.id`` for chaining with ``previous_response_id``.
+
+        Raises:
+            ValueError: If ``input`` is invalid or no model is specified.
+            openai.OpenAIError: On API or network errors.
+        """
+        model = options.pop("model", None) or self._model_id
+        self._require_model(model)
+        kwargs = {**self.settings._as_kwargs(), **options}
+        return self._client.responses.create(model=model, input=input, **kwargs)
+
+    def create_streaming(
+        self,
+        input: Union[str, list],  # noqa: A002
+        callback: Callable[[Any], None],
+        **options: Any,
+    ) -> None:
+        """Create a model response with streaming.
+
+        Each event object from the openai stream is delivered to *callback*.
+
+        Args:
+            input: A string prompt or a list of Responses API input items.
+            callback: Called for each streaming event.  Events are typed
+                ``openai`` SDK objects with a ``.type`` attribute.
+            **options: Additional parameters forwarded to
+                ``openai.responses.create``.
+
+        Raises:
+            ValueError: If ``input`` is invalid or *callback* is not callable.
+            openai.OpenAIError: On API or network errors.
+        """
+        if not callable(callback):
+            raise ValueError("callback must be a callable.")
+        model = options.pop("model", None) or self._model_id
+        self._require_model(model)
+        kwargs = {**self.settings._as_kwargs(), **options}
+        with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream:
+            for event in stream:
+                callback(event)
+
+    def stream(self, input: Union[str, list], **options: Any) -> Iterator[Any]:  # noqa: A002
+        """Create a model response and return an iterator of streaming events.
+
+        This is a generator-style alternative to :meth:`create_streaming` that
+        yields each event instead of using a callback.
+
+        Args:
+            input: A string prompt or a list of Responses API input items.
+            **options: Additional parameters forwarded to
+                ``openai.responses.create``.
+
+        Yields:
+            Streaming event objects from the openai SDK.
+
+        Raises:
+            ValueError: If no model is specified.
+            openai.OpenAIError: On API or network errors.
+        """
+        model = options.pop("model", None) or self._model_id
+        self._require_model(model)
+        kwargs = {**self.settings._as_kwargs(), **options}
+        with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream:
+            yield from stream
+
+    def get(self, response_id: str) -> Any:
+        """Retrieve a stored response by ID.
+
+        Args:
+            response_id: The ID of the response to retrieve.
+
+        Returns:
+            An ``openai.types.responses.Response`` object.
+        """
+        self._validate_id(response_id, "response_id")
+        return self._client.responses.retrieve(response_id)
+
+    def delete(self, response_id: str) -> Any:
+        """Delete a stored response by ID.
+
+        Args:
+            response_id: The ID of the response to delete.
+
+        Returns:
+            The deletion result object.
+        """
+        self._validate_id(response_id, "response_id")
+        return self._client.responses.delete(response_id)
+
+    def cancel(self, response_id: str) -> Any:
+        """Cancel an in-progress response.
+
+        Args:
+            response_id: The ID of the response to cancel.
+
+        Returns:
+            The cancelled ``openai.types.responses.Response`` object.
+        """
+        self._validate_id(response_id, "response_id")
+        return self._client.responses.cancel(response_id)
+
+    def get_input_items(self, response_id: str) -> Any:
+        """Retrieve the input items for a stored response.
+
+        Args:
+            response_id: The ID of the response.
+
+        Returns:
+            A paginated list of input items.
+        """
+        self._validate_id(response_id, "response_id")
+        return self._client.responses.input_items.list(response_id)
+
+    def close(self) -> None:
+        """Close the underlying OpenAI HTTP client and release resources."""
+        self._client.close()
+
+    def __enter__(self) -> "ResponsesClient":
+        return self
+
+    def __exit__(self, *args: Any) -> None:
+        self.close()
+
+    # =========================================================================
+    # Internal helpers
+    # =========================================================================
+
+    def _require_model(self, model: Optional[str]) -> None:
+        if not model or not isinstance(model, str) or not model.strip():
+            raise ValueError(
+                "model must be specified either in the constructor via "
+                "create_responses_client(model_id) or as an options keyword argument."
+            )
+
+    def _validate_id(self, value: Any, param: str) -> None:
+        if not isinstance(value, str) or not value.strip():
+            raise ValueError(f"{param} must be a non-empty string.")
+        if len(value) > 1024:
+            raise ValueError(f"{param} exceeds the maximum length of 1024 characters.")
diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py
index 0b1a4cb45..7683fc768 100644
--- a/sdk/python/test/openai/test_responses_web_service.py
+++ b/sdk/python/test/openai/test_responses_web_service.py
@@ -4,9 +4,9 @@
 # --------------------------------------------------------------------------
 """Integration tests for /v1/responses through the local web service.
 
-These tests intentionally use FoundryLocalManager only for SDK setup, model
-lifecycle, and web-service lifecycle. Actual Responses API calls go through the
-OpenAI-compatible HTTP endpoint directly.
+These tests use FoundryLocalManager for SDK setup, model lifecycle, and web-service
+lifecycle.  Actual Responses API calls go through ResponsesClient, which is backed
+by the native openai SDK pointed at the local web service.
 """
 
 from __future__ import annotations
@@ -14,7 +14,9 @@
 import json
 
 import pytest
-import requests
+
+from foundry_local_sdk import FoundryLocalManager
+from foundry_local_sdk.openai import ResponsesClient
 
 from ..conftest import TEST_MODEL_ALIAS, skip_in_ci
 
@@ -27,86 +29,6 @@
 )
 
 
-def _response_text(response: dict) -> str:
-    text = response.get("output_text")
-    if isinstance(text, str) and text:
-        return text
-
-    return "".join(
-        part.get("text", "")
-        for item in response.get("output", []) or []
-        if item.get("type") == "message"
-        for part in item.get("content", []) or []
-        if part.get("type") == "output_text" and isinstance(part.get("text"), str)
-    )
-
-
-def _post_response(base_url: str, body: dict) -> dict:
-    response = requests.post(
-        f"{base_url}/v1/responses",
-        headers={"Content-Type": "application/json"},
-        json=body,
-        timeout=60,
-    )
-    assert response.ok, response.text
-    return response.json()
-
-
-def _post_streaming_response(base_url: str, body: dict) -> list[dict]:
-    response = requests.post(
-        f"{base_url}/v1/responses",
-        headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
-        json={**body, "stream": True},
-        stream=True,
-        timeout=(60, None),
-    )
-    assert response.ok, response.text
-
-    events: list[dict] = []
-    buffer = ""
-    try:
-        for chunk in response.iter_content(chunk_size=None, decode_unicode=False):
-            if not chunk:
-                continue
-            text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk
-            buffer += text.replace("\r\n", "\n")
-
-            while "\n\n" in buffer:
-                block, buffer = buffer.split("\n\n", 1)
-                data = _sse_data(block)
-                if not data:
-                    continue
-                if data == "[DONE]":
-                    return events
-                events.append(json.loads(data))
-    finally:
-        response.close()
-
-    tail = buffer.strip()
-    if tail:
-        data = _sse_data(tail)
-        if data and data != "[DONE]":
-            events.append(json.loads(data))
-    return events
-
-
-def _sse_data(block: str) -> str:
-    lines: list[str] = []
-    for line in block.strip().split("\n"):
-        if line.startswith("data: "):
-            lines.append(line[6:])
-        elif line == "data:":
-            lines.append("")
-    return "\n".join(lines).strip()
-
-
-def _get_function_call(response: dict) -> dict | None:
-    for item in response.get("output", []) or []:
-        if item.get("type") == "function_call":
-            return item
-    return None
-
-
 def _get_weather_tool() -> dict:
     return {
         "type": "function",
@@ -134,9 +56,10 @@ def _get_cached_model(catalog, model_alias: str):
     return model
 
 
-def _run_responses_web_service(manager, model):
+def _run_responses_web_service(manager: FoundryLocalManager, model):
     service_started = False
     model_loaded = False
+    client: ResponsesClient | None = None
 
     try:
         try:
@@ -156,8 +79,11 @@ def _run_responses_web_service(manager, model):
         if not manager.urls:
             pytest.skip("Web service started but did not return any URLs")
 
-        yield manager.urls[0].rstrip("/"), model.id
+        client = manager.create_responses_client(model.id)
+        yield client, model.id
     finally:
+        if client is not None:
+            client.close()
         if service_started:
             try:
                 manager.stop_web_service()
@@ -188,118 +114,102 @@ def responses_vision_web_service(manager, catalog):
 
 class TestResponsesWebService:
     def test_should_create_non_streaming_response(self, responses_web_service):
-        base_url, model_id = responses_web_service
-
-        response = _post_response(
-            base_url,
-            {
-                "model": model_id,
-                "input": "What is 2 + 2? Answer with just the number.",
-                "temperature": 0,
-                "max_output_tokens": 64,
-                "store": False,
-            },
+        client, model_id = responses_web_service
+
+        response = client.create(
+            "What is 2 + 2? Answer with just the number.",
+            temperature=0,
+            max_output_tokens=64,
+            store=False,
         )
 
-        assert response["object"] == "response"
-        assert response["status"] == "completed", response.get("error")
-        assert _response_text(response).strip()
+        assert response.status == "completed", response.error
+        assert response.output_text.strip(), "Expected non-empty assistant text"
 
     def test_should_stream_response_events(self, responses_web_service):
-        base_url, model_id = responses_web_service
-
-        events = _post_streaming_response(
-            base_url,
-            {
-                "model": model_id,
-                "input": "Count from 1 to 3.",
-                "temperature": 0,
-                "max_output_tokens": 64,
-                "store": False,
-            },
+        client, model_id = responses_web_service
+
+        event_types: list[str] = []
+        client.create_streaming(
+            "Count from 1 to 3.",
+            callback=lambda e: event_types.append(getattr(e, "type", "")),
+            temperature=0,
+            max_output_tokens=64,
+            store=False,
         )
 
-        assert any(event.get("type") == "response.created" for event in events)
-        assert any(event.get("type") == "response.output_text.delta" for event in events)
-        assert any(event.get("type") == "response.completed" for event in events)
+        assert "response.created" in event_types, f"Events seen: {event_types}"
+        assert "response.output_text.delta" in event_types, f"Events seen: {event_types}"
+        assert "response.completed" in event_types, f"Events seen: {event_types}"
 
     def test_should_round_trip_function_call_output(self, responses_web_service):
-        base_url, model_id = responses_web_service
+        client, model_id = responses_web_service
         weather_tool = _get_weather_tool()
 
-        tool_response = _post_response(
-            base_url,
-            {
-                "model": model_id,
-                "input": "Use the get_weather tool and then answer with the weather.",
-                "tools": [weather_tool],
-                "tool_choice": "required",
-                "temperature": 0,
-                "max_output_tokens": 64,
-                "store": True,
-            },
+        tool_response = client.create(
+            "Use the get_weather tool and then answer with the weather.",
+            tools=[weather_tool],
+            tool_choice="required",
+            temperature=0,
+            max_output_tokens=64,
+            store=True,
+        )
+
+        function_call = next(
+            (item for item in tool_response.output if item.type == "function_call"),
+            None,
+        )
+        assert function_call is not None, (
+            f"Expected a function_call item. Output: {tool_response.output}"
         )
-        function_call = _get_function_call(tool_response)
-
-        assert function_call is not None, json.dumps(tool_response.get("output", []))
-        assert function_call["name"] == "get_weather"
-        assert isinstance(function_call["call_id"], str)
-
-        final_response = _post_response(
-            base_url,
-            {
-                "model": model_id,
-                "previous_response_id": tool_response["id"],
-                "input": [
-                    {
-                        "type": "function_call_output",
-                        "call_id": function_call["call_id"],
-                        "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
-                    }
-                ],
-                "tools": [weather_tool],
-                "temperature": 0,
-                "max_output_tokens": 64,
-                "store": False,
-            },
+        assert function_call.name == "get_weather"
+
+        final_response = client.create(
+            [
+                {
+                    "type": "function_call_output",
+                    "call_id": function_call.call_id,
+                    "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
+                }
+            ],
+            previous_response_id=tool_response.id,
+            tools=[weather_tool],
+            temperature=0,
+            max_output_tokens=64,
+            store=False,
         )
 
-        assert final_response["status"] == "completed"
-        assert _response_text(final_response).strip()
+        assert final_response.status == "completed"
+        assert final_response.output_text.strip(), "Expected non-empty final assistant text"
 
 
 class TestResponsesVisionWebService:
     def test_should_create_response_with_image_url(self, responses_vision_web_service):
-        base_url, model_id = responses_vision_web_service
-
-        response = _post_response(
-            base_url,
-            {
-                "model": model_id,
-                "input": [
-                    {
-                        "type": "message",
-                        "role": "user",
-                        "content": [
-                            {
-                                "type": "input_text",
-                                "text": "Describe this image in one short sentence.",
-                            },
-                            {
-                                "type": "input_image",
-                                "image_url": VISION_IMAGE_URL,
-                                "media_type": "image/png",
-                                "detail": "low",
-                            },
-                        ],
-                    }
-                ],
-                "temperature": 0,
-                "max_output_tokens": 128,
-                "store": False,
-            },
+        client, model_id = responses_vision_web_service
+
+        response = client.create(
+            [
+                {
+                    "type": "message",
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "input_text",
+                            "text": "Describe this image in one short sentence.",
+                        },
+                        {
+                            "type": "input_image",
+                            "image_url": VISION_IMAGE_URL,
+                            "media_type": "image/png",
+                            "detail": "low",
+                        },
+                    ],
+                }
+            ],
+            temperature=0,
+            max_output_tokens=128,
+            store=False,
         )
 
-        assert response["object"] == "response"
-        assert response["status"] == "completed", response.get("error")
-        assert _response_text(response).strip()
+        assert response.status == "completed", response.error
+        assert response.output_text.strip(), "Expected non-empty vision response text"

From 3bde49aeca1b0551f4f211a22ea95f46002371f8 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Tue, 5 May 2026 22:11:29 +0200
Subject: [PATCH 10/12] revert(python): restore responses web-service sample
 baseline

Revert the ResponsesClient wrapper and image URL coverage commits to return the PR to the text-only web-service sample/test baseline from 0808187.

The sample still uses the native openai Python SDK against the local /v1 web-service endpoint.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../python/web-server-responses/src/app.py    |  70 +++--
 sdk/python/src/__init__.py                    |   3 +-
 sdk/python/src/foundry_local_manager.py       |  32 --
 sdk/python/src/openai/__init__.py             |   5 +-
 sdk/python/src/openai/responses_client.py     | 267 -----------------
 .../test/openai/test_responses_web_service.py | 281 ++++++++++--------
 6 files changed, 204 insertions(+), 454 deletions(-)
 delete mode 100644 sdk/python/src/openai/responses_client.py

diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py
index e58df3e4e..6f186a2a6 100644
--- a/samples/python/web-server-responses/src/app.py
+++ b/samples/python/web-server-responses/src/app.py
@@ -1,11 +1,25 @@
 # <complete_code>
 # <imports>
 import json
+from typing import Any
+
+from openai import OpenAI
 
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
+def get_response_text(response: Any) -> str:
+    if isinstance(getattr(response, "output_text", None), str):
+        return response.output_text
+    return "".join(
+        getattr(part, "text", "")
+        for item in getattr(response, "output", []) or []
+        for part in getattr(item, "content", []) or []
+        if getattr(part, "type", None) == "output_text"
+    )
+
+
 # <init>
 # Initialize the Foundry Local SDK
 config = Configuration(app_name="foundry_local_samples")
@@ -52,29 +66,36 @@ def _ep_progress(ep_name: str, percent: float):
 # <server_setup>
 print("\nStarting web service...")
 manager.start_web_service()
+base_url = manager.urls[0].rstrip("/") + "/v1"
 print("Web service started")
-# </server_setup>
 
-# <responses_client>
-# Create a Responses API client via the SDK manager — no manual URL or API key needed.
-client = manager.create_responses_client(model.id)
-# </responses_client>
+# <<<<<< OPENAI SDK USAGE >>>>>>
+# Use the OpenAI SDK to call the local Foundry web service Responses API
+openai = OpenAI(
+    base_url=base_url,
+    api_key="notneeded",
+)
+# </server_setup>
 
 try:
     print("\nTesting a non-streaming Responses call...")
-    response = client.create("Reply with one short sentence about local AI.")
-    print(f"[ASSISTANT]: {response.output_text}")
+    response = openai.responses.create(
+        model=model.id,
+        input="Reply with one short sentence about local AI.",
+    )
+    print(f"[ASSISTANT]: {get_response_text(response)}")
 
     print("\nTesting a streaming Responses call...")
-    print("[ASSISTANT STREAM]: ", end="", flush=True)
-    client.create_streaming(
-        "Count from one to three.",
-        callback=lambda event: print(
-            getattr(event, "delta", ""),
-            end="",
-            flush=True,
-        ) if getattr(event, "type", None) == "response.output_text.delta" else None,
+    stream = openai.responses.create(
+        model=model.id,
+        input="Count from one to three.",
+        stream=True,
     )
+
+    print("[ASSISTANT STREAM]: ", end="", flush=True)
+    for event in stream:
+        if getattr(event, "type", None) == "response.output_text.delta":
+            print(getattr(event, "delta", ""), end="", flush=True)
     print()
 
     print("\nTesting Responses tool calling...")
@@ -91,15 +112,16 @@ def _ep_progress(ep_name: str, percent: float):
         },
     ]
 
-    tool_response = client.create(
-        "Use the get_weather tool and then answer with the weather.",
+    tool_response = openai.responses.create(
+        model=model.id,
+        input="Use the get_weather tool and then answer with the weather.",
         tools=tools,
         tool_choice="required",
         store=True,
     )
 
     function_call = next(
-        (item for item in tool_response.output if item.type == "function_call"),
+        (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"),
         None,
     )
     if function_call is None:
@@ -107,22 +129,24 @@ def _ep_progress(ep_name: str, percent: float):
 
     print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})")
 
-    final_response = client.create(
-        [
+    final_response = openai.responses.create(
+        model=model.id,
+        previous_response_id=tool_response.id,
+        input=[
             {
                 "type": "function_call_output",
                 "call_id": function_call.call_id,
                 "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
             }
         ],
-        previous_response_id=tool_response.id,
         tools=tools,
     )
 
-    print(f"[ASSISTANT FINAL]: {final_response.output_text}")
+    print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}")
+    # <<<<<< END OPENAI SDK USAGE >>>>>>
 finally:
     # Tidy up
-    client.close()
+    openai.close()
     manager.stop_web_service()
     model.unload()
 # </complete_code>
diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py
index 9fecca997..14534d196 100644
--- a/sdk/python/src/__init__.py
+++ b/sdk/python/src/__init__.py
@@ -7,7 +7,6 @@
 
 from .configuration import Configuration
 from .foundry_local_manager import FoundryLocalManager
-from .openai.responses_client import ResponsesClient, ResponsesClientSettings
 from .version import __version__
 
 _logger = logging.getLogger(__name__)
@@ -21,4 +20,4 @@
 _logger.addHandler(_sc)
 _logger.propagate = False
 
-__all__ = ["Configuration", "FoundryLocalManager", "ResponsesClient", "ResponsesClientSettings", "__version__"]
+__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
index 902db6e18..a649f8e56 100644
--- a/sdk/python/src/foundry_local_manager.py
+++ b/sdk/python/src/foundry_local_manager.py
@@ -20,7 +20,6 @@
 from .detail.core_interop import CoreInterop, InteropRequest
 from .detail.model_load_manager import ModelLoadManager
 from .exception import FoundryLocalException
-from .openai.responses_client import ResponsesClient
 
 logger = logging.getLogger(__name__)
 
@@ -195,34 +194,3 @@ def stop_web_service(self):
                 raise FoundryLocalException(f"Error stopping web service: {response.error}")
 
             self.urls = None
-
-    def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
-        """Create a :class:`ResponsesClient` for the Responses API.
-
-        The web service must be running before calling this method.  Start it
-        with :meth:`start_web_service` first.
-
-        Args:
-            model_id: Default model ID for requests.  Can be overridden
-                per-request via the ``model`` keyword argument on
-                :meth:`~ResponsesClient.create`.
-
-        Returns:
-            A :class:`ResponsesClient` pointed at the running web service.
-
-        Raises:
-            FoundryLocalException: If the web service is not running.
-
-        Example::
-
-            manager.start_web_service()
-            client = manager.create_responses_client(model.id)
-            response = client.create("What is 2 + 2?")
-            print(response.output_text)
-        """
-        if not self.urls:
-            raise FoundryLocalException(
-                "Web service is not running. Call start_web_service() before "
-                "creating a ResponsesClient."
-            )
-        return ResponsesClient(self.urls[0], model_id)
diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py
index e7016799a..2fa51a6f6 100644
--- a/sdk/python/src/openai/__init__.py
+++ b/sdk/python/src/openai/__init__.py
@@ -2,7 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 # --------------------------------------------------------------------------
-"""OpenAI-compatible clients for chat completions, audio transcription, and responses."""
+"""OpenAI-compatible clients for chat completions and audio transcription."""
 
 from .chat_client import ChatClient, ChatClientSettings
 from .audio_client import AudioClient
@@ -14,7 +14,6 @@
     LiveAudioTranscriptionResponse,
     TranscriptionContentPart,
 )
-from .responses_client import ResponsesClient, ResponsesClientSettings
 
 __all__ = [
     "AudioClient",
@@ -25,7 +24,5 @@
     "LiveAudioTranscriptionOptions",
     "LiveAudioTranscriptionResponse",
     "LiveAudioTranscriptionSession",
-    "ResponsesClient",
-    "ResponsesClientSettings",
     "TranscriptionContentPart",
 ]
diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py
deleted file mode 100644
index 55121e320..000000000
--- a/sdk/python/src/openai/responses_client.py
+++ /dev/null
@@ -1,267 +0,0 @@
-# -------------------------------------------------------------------------
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-# --------------------------------------------------------------------------
-"""Responses API client for Foundry Local's embedded web service.
-
-Uses the native ``openai`` SDK to call the Responses API on Foundry Local's
-OpenAI-compatible web service.  Create via
-``FoundryLocalManager.create_responses_client()`` or
-``model.create_responses_client(base_url)``.
-
-Example::
-
-    manager.start_web_service()
-    client = manager.create_responses_client(model.id)
-
-    # Non-streaming
-    response = client.create("Hello, world!")
-    print(response.output_text)
-
-    # Streaming
-    client.create_streaming("Tell me a story", lambda event: print(event))
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Callable, Iterator, Optional, Union
-
-from openai import OpenAI
-
-logger = logging.getLogger(__name__)
-
-
-class ResponsesClientSettings:
-    """Default settings applied to every request made by a :class:`ResponsesClient`.
-
-    Per-call keyword arguments passed to :meth:`ResponsesClient.create` override
-    these defaults.  Attribute names match the OpenAI Responses API parameters
-    (snake_case).
-    """
-
-    def __init__(
-        self,
-        instructions: Optional[str] = None,
-        temperature: Optional[float] = None,
-        top_p: Optional[float] = None,
-        max_output_tokens: Optional[int] = None,
-        frequency_penalty: Optional[float] = None,
-        presence_penalty: Optional[float] = None,
-        tool_choice: Optional[Any] = None,
-        truncation: Optional[str] = None,
-        parallel_tool_calls: Optional[bool] = None,
-        store: Optional[bool] = None,
-        seed: Optional[int] = None,
-    ):
-        self.instructions = instructions
-        self.temperature = temperature
-        self.top_p = top_p
-        self.max_output_tokens = max_output_tokens
-        self.frequency_penalty = frequency_penalty
-        self.presence_penalty = presence_penalty
-        self.tool_choice = tool_choice
-        self.truncation = truncation
-        self.parallel_tool_calls = parallel_tool_calls
-        self.store = store
-        self.seed = seed
-
-    def _as_kwargs(self) -> dict[str, Any]:
-        """Return non-None settings as keyword arguments for the openai SDK."""
-        return {
-            k: v for k, v in {
-                "instructions": self.instructions,
-                "temperature": self.temperature,
-                "top_p": self.top_p,
-                "max_output_tokens": self.max_output_tokens,
-                "frequency_penalty": self.frequency_penalty,
-                "presence_penalty": self.presence_penalty,
-                "tool_choice": self.tool_choice,
-                "truncation": self.truncation,
-                "parallel_tool_calls": self.parallel_tool_calls,
-                "store": self.store,
-                "seed": self.seed,
-            }.items() if v is not None
-        }
-
-
-class ResponsesClient:
-    """Client for the OpenAI Responses API served by Foundry Local.
-
-    Backed by the native ``openai`` SDK pointed at the local web service.
-    Create via :meth:`FoundryLocalManager.create_responses_client` or
-    :meth:`model.create_responses_client`.
-
-    Args:
-        base_url: Base URL of the Foundry Local web service (e.g.
-            ``"http://127.0.0.1:5273"``).  Do **not** include ``/v1`` — it is
-            appended automatically.  Trailing slashes are stripped.
-        model_id: Default model ID.  Can be overridden per-request via the
-            ``model`` keyword argument to :meth:`create`.
-    """
-
-    def __init__(self, base_url: str, model_id: Optional[str] = None):
-        if not base_url or not isinstance(base_url, str) or not base_url.strip():
-            raise ValueError("base_url must be a non-empty string.")
-        openai_base = base_url.rstrip("/") + "/v1"
-        self._client = OpenAI(base_url=openai_base, api_key="notneeded")
-        self._model_id = model_id
-        self.settings = ResponsesClientSettings()
-
-    # =========================================================================
-    # Public API
-    # =========================================================================
-
-    def create(self, input: Union[str, list], **options: Any) -> Any:  # noqa: A002
-        """Create a model response (non-streaming).
-
-        Args:
-            input: A string prompt or a list of Responses API input items.
-                Each dict item must have a ``"type"`` field (e.g.
-                ``{"type": "message", "role": "user", "content": [...]}``)..
-            **options: Additional parameters forwarded to
-                ``openai.responses.create``.  Pass ``model="..."`` to override
-                the constructor default.
-
-        Returns:
-            An ``openai.types.responses.Response`` object.  Use
-            ``.output_text`` for the assistant text, ``.output`` for the full
-            item list, and ``.id`` for chaining with ``previous_response_id``.
-
-        Raises:
-            ValueError: If ``input`` is invalid or no model is specified.
-            openai.OpenAIError: On API or network errors.
-        """
-        model = options.pop("model", None) or self._model_id
-        self._require_model(model)
-        kwargs = {**self.settings._as_kwargs(), **options}
-        return self._client.responses.create(model=model, input=input, **kwargs)
-
-    def create_streaming(
-        self,
-        input: Union[str, list],  # noqa: A002
-        callback: Callable[[Any], None],
-        **options: Any,
-    ) -> None:
-        """Create a model response with streaming.
-
-        Each event object from the openai stream is delivered to *callback*.
-
-        Args:
-            input: A string prompt or a list of Responses API input items.
-            callback: Called for each streaming event.  Events are typed
-                ``openai`` SDK objects with a ``.type`` attribute.
-            **options: Additional parameters forwarded to
-                ``openai.responses.create``.
-
-        Raises:
-            ValueError: If ``input`` is invalid or *callback* is not callable.
-            openai.OpenAIError: On API or network errors.
-        """
-        if not callable(callback):
-            raise ValueError("callback must be a callable.")
-        model = options.pop("model", None) or self._model_id
-        self._require_model(model)
-        kwargs = {**self.settings._as_kwargs(), **options}
-        with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream:
-            for event in stream:
-                callback(event)
-
-    def stream(self, input: Union[str, list], **options: Any) -> Iterator[Any]:  # noqa: A002
-        """Create a model response and return an iterator of streaming events.
-
-        This is a generator-style alternative to :meth:`create_streaming` that
-        yields each event instead of using a callback.
-
-        Args:
-            input: A string prompt or a list of Responses API input items.
-            **options: Additional parameters forwarded to
-                ``openai.responses.create``.
-
-        Yields:
-            Streaming event objects from the openai SDK.
-
-        Raises:
-            ValueError: If no model is specified.
-            openai.OpenAIError: On API or network errors.
-        """
-        model = options.pop("model", None) or self._model_id
-        self._require_model(model)
-        kwargs = {**self.settings._as_kwargs(), **options}
-        with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream:
-            yield from stream
-
-    def get(self, response_id: str) -> Any:
-        """Retrieve a stored response by ID.
-
-        Args:
-            response_id: The ID of the response to retrieve.
-
-        Returns:
-            An ``openai.types.responses.Response`` object.
-        """
-        self._validate_id(response_id, "response_id")
-        return self._client.responses.retrieve(response_id)
-
-    def delete(self, response_id: str) -> Any:
-        """Delete a stored response by ID.
-
-        Args:
-            response_id: The ID of the response to delete.
-
-        Returns:
-            The deletion result object.
-        """
-        self._validate_id(response_id, "response_id")
-        return self._client.responses.delete(response_id)
-
-    def cancel(self, response_id: str) -> Any:
-        """Cancel an in-progress response.
-
-        Args:
-            response_id: The ID of the response to cancel.
-
-        Returns:
-            The cancelled ``openai.types.responses.Response`` object.
-        """
-        self._validate_id(response_id, "response_id")
-        return self._client.responses.cancel(response_id)
-
-    def get_input_items(self, response_id: str) -> Any:
-        """Retrieve the input items for a stored response.
-
-        Args:
-            response_id: The ID of the response.
-
-        Returns:
-            A paginated list of input items.
-        """
-        self._validate_id(response_id, "response_id")
-        return self._client.responses.input_items.list(response_id)
-
-    def close(self) -> None:
-        """Close the underlying OpenAI HTTP client and release resources."""
-        self._client.close()
-
-    def __enter__(self) -> "ResponsesClient":
-        return self
-
-    def __exit__(self, *args: Any) -> None:
-        self.close()
-
-    # =========================================================================
-    # Internal helpers
-    # =========================================================================
-
-    def _require_model(self, model: Optional[str]) -> None:
-        if not model or not isinstance(model, str) or not model.strip():
-            raise ValueError(
-                "model must be specified either in the constructor via "
-                "create_responses_client(model_id) or as an options keyword argument."
-            )
-
-    def _validate_id(self, value: Any, param: str) -> None:
-        if not isinstance(value, str) or not value.strip():
-            raise ValueError(f"{param} must be a non-empty string.")
-        if len(value) > 1024:
-            raise ValueError(f"{param} exceeds the maximum length of 1024 characters.")
diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py
index 7683fc768..e323a892e 100644
--- a/sdk/python/test/openai/test_responses_web_service.py
+++ b/sdk/python/test/openai/test_responses_web_service.py
@@ -4,9 +4,9 @@
 # --------------------------------------------------------------------------
 """Integration tests for /v1/responses through the local web service.
 
-These tests use FoundryLocalManager for SDK setup, model lifecycle, and web-service
-lifecycle.  Actual Responses API calls go through ResponsesClient, which is backed
-by the native openai SDK pointed at the local web service.
+These tests intentionally use FoundryLocalManager only for SDK setup, model
+lifecycle, and web-service lifecycle. Actual Responses API calls go through the
+OpenAI-compatible HTTP endpoint directly.
 """
 
 from __future__ import annotations
@@ -14,19 +14,92 @@
 import json
 
 import pytest
-
-from foundry_local_sdk import FoundryLocalManager
-from foundry_local_sdk.openai import ResponsesClient
+import requests
 
 from ..conftest import TEST_MODEL_ALIAS, skip_in_ci
 
 
 pytestmark = skip_in_ci
 
-VISION_MODEL_ALIAS = "qwen3-vl-2b-instruct"
-VISION_IMAGE_URL = (
-    "https://raw.githubusercontent.com/microsoft/fluentui-emoji/main/assets/Camera/3D/camera_3d.png"
-)
+
+def _response_text(response: dict) -> str:
+    text = response.get("output_text")
+    if isinstance(text, str) and text:
+        return text
+
+    return "".join(
+        part.get("text", "")
+        for item in response.get("output", []) or []
+        if item.get("type") == "message"
+        for part in item.get("content", []) or []
+        if part.get("type") == "output_text" and isinstance(part.get("text"), str)
+    )
+
+
+def _post_response(base_url: str, body: dict) -> dict:
+    response = requests.post(
+        f"{base_url}/v1/responses",
+        headers={"Content-Type": "application/json"},
+        json=body,
+        timeout=60,
+    )
+    assert response.ok, response.text
+    return response.json()
+
+
+def _post_streaming_response(base_url: str, body: dict) -> list[dict]:
+    response = requests.post(
+        f"{base_url}/v1/responses",
+        headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
+        json={**body, "stream": True},
+        stream=True,
+        timeout=(60, None),
+    )
+    assert response.ok, response.text
+
+    events: list[dict] = []
+    buffer = ""
+    try:
+        for chunk in response.iter_content(chunk_size=None, decode_unicode=False):
+            if not chunk:
+                continue
+            text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk
+            buffer += text.replace("\r\n", "\n")
+
+            while "\n\n" in buffer:
+                block, buffer = buffer.split("\n\n", 1)
+                data = _sse_data(block)
+                if not data:
+                    continue
+                if data == "[DONE]":
+                    return events
+                events.append(json.loads(data))
+    finally:
+        response.close()
+
+    tail = buffer.strip()
+    if tail:
+        data = _sse_data(tail)
+        if data and data != "[DONE]":
+            events.append(json.loads(data))
+    return events
+
+
+def _sse_data(block: str) -> str:
+    lines: list[str] = []
+    for line in block.strip().split("\n"):
+        if line.startswith("data: "):
+            lines.append(line[6:])
+        elif line == "data:":
+            lines.append("")
+    return "\n".join(lines).strip()
+
+
+def _get_function_call(response: dict) -> dict | None:
+    for item in response.get("output", []) or []:
+        if item.get("type") == "function_call":
+            return item
+    return None
 
 
 def _get_weather_tool() -> dict:
@@ -42,24 +115,20 @@ def _get_weather_tool() -> dict:
     }
 
 
-def _get_cached_model(catalog, model_alias: str):
+@pytest.fixture(scope="module")
+def responses_web_service(manager, catalog):
     cached = catalog.get_cached_models()
-    cached_variant = next((m for m in cached if m.alias == model_alias), None)
+    cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
     if cached_variant is None:
-        pytest.skip(f"{model_alias} must be cached to run Responses web-service tests")
+        pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests")
 
-    model = catalog.get_model(model_alias)
+    model = catalog.get_model(TEST_MODEL_ALIAS)
     if model is None:
-        pytest.skip(f"{model_alias} was not found in the catalog")
+        pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog")
 
     model.select_variant(cached_variant)
-    return model
-
-
-def _run_responses_web_service(manager: FoundryLocalManager, model):
     service_started = False
     model_loaded = False
-    client: ResponsesClient | None = None
 
     try:
         try:
@@ -79,11 +148,8 @@ def _run_responses_web_service(manager: FoundryLocalManager, model):
         if not manager.urls:
             pytest.skip("Web service started but did not return any URLs")
 
-        client = manager.create_responses_client(model.id)
-        yield client, model.id
+        yield manager.urls[0].rstrip("/"), model.id
     finally:
-        if client is not None:
-            client.close()
         if service_started:
             try:
                 manager.stop_web_service()
@@ -96,120 +162,83 @@ def _run_responses_web_service(manager: FoundryLocalManager, model):
                 pass
 
 
-@pytest.fixture(scope="class")
-def responses_web_service(manager, catalog):
-    model = _get_cached_model(catalog, TEST_MODEL_ALIAS)
-    yield from _run_responses_web_service(manager, model)
-
-
-@pytest.fixture(scope="class")
-def responses_vision_web_service(manager, catalog):
-    model = _get_cached_model(catalog, VISION_MODEL_ALIAS)
-    input_modalities = model.input_modalities or ""
-    if "image" not in input_modalities.split(","):
-        pytest.skip(f"{VISION_MODEL_ALIAS} does not advertise image input support")
-
-    yield from _run_responses_web_service(manager, model)
-
-
 class TestResponsesWebService:
     def test_should_create_non_streaming_response(self, responses_web_service):
-        client, model_id = responses_web_service
-
-        response = client.create(
-            "What is 2 + 2? Answer with just the number.",
-            temperature=0,
-            max_output_tokens=64,
-            store=False,
+        base_url, model_id = responses_web_service
+
+        response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "What is 2 + 2? Answer with just the number.",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
         )
 
-        assert response.status == "completed", response.error
-        assert response.output_text.strip(), "Expected non-empty assistant text"
+        assert response["object"] == "response"
+        assert response["status"] == "completed"
+        assert _response_text(response).strip()
 
     def test_should_stream_response_events(self, responses_web_service):
-        client, model_id = responses_web_service
-
-        event_types: list[str] = []
-        client.create_streaming(
-            "Count from 1 to 3.",
-            callback=lambda e: event_types.append(getattr(e, "type", "")),
-            temperature=0,
-            max_output_tokens=64,
-            store=False,
+        base_url, model_id = responses_web_service
+
+        events = _post_streaming_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "Count from 1 to 3.",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
         )
 
-        assert "response.created" in event_types, f"Events seen: {event_types}"
-        assert "response.output_text.delta" in event_types, f"Events seen: {event_types}"
-        assert "response.completed" in event_types, f"Events seen: {event_types}"
+        assert any(event.get("type") == "response.created" for event in events)
+        assert any(event.get("type") == "response.output_text.delta" for event in events)
+        assert any(event.get("type") == "response.completed" for event in events)
 
     def test_should_round_trip_function_call_output(self, responses_web_service):
-        client, model_id = responses_web_service
+        base_url, model_id = responses_web_service
         weather_tool = _get_weather_tool()
 
-        tool_response = client.create(
-            "Use the get_weather tool and then answer with the weather.",
-            tools=[weather_tool],
-            tool_choice="required",
-            temperature=0,
-            max_output_tokens=64,
-            store=True,
-        )
-
-        function_call = next(
-            (item for item in tool_response.output if item.type == "function_call"),
-            None,
+        tool_response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "Use the get_weather tool and then answer with the weather.",
+                "tools": [weather_tool],
+                "tool_choice": "required",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": True,
+            },
         )
-        assert function_call is not None, (
-            f"Expected a function_call item. Output: {tool_response.output}"
-        )
-        assert function_call.name == "get_weather"
-
-        final_response = client.create(
-            [
-                {
-                    "type": "function_call_output",
-                    "call_id": function_call.call_id,
-                    "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
-                }
-            ],
-            previous_response_id=tool_response.id,
-            tools=[weather_tool],
-            temperature=0,
-            max_output_tokens=64,
-            store=False,
-        )
-
-        assert final_response.status == "completed"
-        assert final_response.output_text.strip(), "Expected non-empty final assistant text"
-
-
-class TestResponsesVisionWebService:
-    def test_should_create_response_with_image_url(self, responses_vision_web_service):
-        client, model_id = responses_vision_web_service
-
-        response = client.create(
-            [
-                {
-                    "type": "message",
-                    "role": "user",
-                    "content": [
-                        {
-                            "type": "input_text",
-                            "text": "Describe this image in one short sentence.",
-                        },
-                        {
-                            "type": "input_image",
-                            "image_url": VISION_IMAGE_URL,
-                            "media_type": "image/png",
-                            "detail": "low",
-                        },
-                    ],
-                }
-            ],
-            temperature=0,
-            max_output_tokens=128,
-            store=False,
+        function_call = _get_function_call(tool_response)
+
+        assert function_call is not None, json.dumps(tool_response.get("output", []))
+        assert function_call["name"] == "get_weather"
+        assert isinstance(function_call["call_id"], str)
+
+        final_response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "previous_response_id": tool_response["id"],
+                "input": [
+                    {
+                        "type": "function_call_output",
+                        "call_id": function_call["call_id"],
+                        "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
+                    }
+                ],
+                "tools": [weather_tool],
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
         )
 
-        assert response.status == "completed", response.error
-        assert response.output_text.strip(), "Expected non-empty vision response text"
+        assert final_response["status"] == "completed"
+        assert _response_text(final_response).strip()

From 84861db45f98d1265d0541f4eaf5e27836dd3931 Mon Sep 17 00:00:00 2001
From: Akshay Sonawane <asonawane@microsoft.com>
Date: Tue, 5 May 2026 17:14:40 -0700
Subject: [PATCH 11/12] Add ResponseAPI vision sample

---
 .../web-server-responses-vision/README.md     |  53 ++++++++
 .../requirements.txt                          |   3 +
 .../web-server-responses-vision/src/app.py    | 115 ++++++++++++++++++
 3 files changed, 171 insertions(+)
 create mode 100644 samples/python/web-server-responses-vision/README.md
 create mode 100644 samples/python/web-server-responses-vision/requirements.txt
 create mode 100644 samples/python/web-server-responses-vision/src/app.py

diff --git a/samples/python/web-server-responses-vision/README.md b/samples/python/web-server-responses-vision/README.md
new file mode 100644
index 000000000..fc7fff702
--- /dev/null
+++ b/samples/python/web-server-responses-vision/README.md
@@ -0,0 +1,53 @@
+# Foundry Local Python Vision Sample (Responses API)
+
+This sample demonstrates vision (image understanding) capabilities using the Foundry Local web service and the OpenAI Responses API.
+
+It demonstrates:
+
+- Streaming a vision response with a local image via the Responses API
+- Streaming a text-only response (when no image is provided)
+
+## What gets installed
+
+Install the sample dependencies from `requirements.txt`:
+
+```bash
+pip install -r requirements.txt
+```
+
+That installs:
+
+- `foundry-local-sdk`
+- `openai`
+- `Pillow` (for image resizing)
+
+The sample downloads the specified model the first time it runs (skips if already cached).
+
+## Run the sample
+
+From this directory:
+
+```bash
+python -m venv .venv
+.\.venv\Scripts\activate
+pip install -r requirements.txt
+python src\app.py <model_alias> [image_path]
+```
+
+Examples:
+
+```bash
+# Vision with an image
+python src\app.py qwen3.5-0.8b path\to\image.jpg
+
+# Text only
+python src\app.py qwen3.5-0.8b
+```
+
+On macOS or Linux, activate the virtual environment with:
+
+```bash
+source .venv/bin/activate
+```
+
+The sample starts the local web service, sends vision requests via the Responses API to `http://localhost:<port>/v1`, prints the model output, and then stops the web service.
diff --git a/samples/python/web-server-responses-vision/requirements.txt b/samples/python/web-server-responses-vision/requirements.txt
new file mode 100644
index 000000000..d948ff7bb
--- /dev/null
+++ b/samples/python/web-server-responses-vision/requirements.txt
@@ -0,0 +1,3 @@
+foundry-local-sdk
+openai
+Pillow
diff --git a/samples/python/web-server-responses-vision/src/app.py b/samples/python/web-server-responses-vision/src/app.py
new file mode 100644
index 000000000..02a70889e
--- /dev/null
+++ b/samples/python/web-server-responses-vision/src/app.py
@@ -0,0 +1,115 @@
+# <complete_code>
+# <imports>
+import base64
+import io
+import sys
+
+from PIL import Image
+from openai import OpenAI
+
+from foundry_local_sdk import Configuration, FoundryLocalManager
+# </imports>
+
+if len(sys.argv) < 2:
+    print("Usage: python src/app.py <model_alias> [image_path]")
+    print("  Example: python src/app.py qwen3.5-0.8b path/to/image.jpg")
+    print("  Text only: python src/app.py qwen3.5-0.8b")
+    sys.exit(1)
+
+model_alias = sys.argv[1]
+image_path = sys.argv[2] if len(sys.argv) > 2 else None
+
+
+def resize_and_encode(path, max_dim=512):
+    """Load and resize a local image, returning (base64_str, media_type)."""
+    img = Image.open(path)
+    if max(img.size) > max_dim:
+        img.thumbnail((max_dim, max_dim))
+        print(f"  (resized to {img.size[0]}x{img.size[1]})")
+    buf = io.BytesIO()
+    img.save(buf, format="JPEG")
+    return base64.b64encode(buf.getvalue()).decode(), "image/jpeg"
+
+
+# <init>
+config = Configuration(app_name="foundry_local_samples")
+FoundryLocalManager.initialize(config)
+manager = FoundryLocalManager.instance
+# </init>
+
+# <model_setup>
+model = manager.catalog.get_model(model_alias)
+if model is None:
+    available = [m.alias for m in manager.catalog.list_models()]
+    print(f"\nModel '{model_alias}' not found in catalog.")
+    print(f"Available models: {available}")
+    sys.exit(1)
+
+if not model.is_cached:
+    print(f"\nDownloading model {model_alias}...")
+    model.download(
+        lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True)
+    )
+    print("\nModel downloaded")
+
+print("\nLoading model...")
+model.load()
+print("Model loaded")
+# </model_setup>
+
+# <server_setup>
+print("\nStarting web service...")
+manager.start_web_service()
+base_url = manager.urls[0].rstrip("/") + "/v1"
+print("Web service started")
+
+# <<<<<< OPENAI SDK USAGE >>>>>>
+# Use the OpenAI SDK to call the local Foundry web service Responses API
+openai = OpenAI(base_url=base_url, api_key="notneeded")
+# </server_setup>
+
+# <inference>
+if image_path:
+    print(f"\nPreparing image: {image_path}")
+    image_b64, media_type = resize_and_encode(image_path)
+
+    vision_input = [
+        {
+            "type": "message",
+            "role": "user",
+            "content": [
+                {"type": "input_text", "text": "Describe this image."},
+                {
+                    "type": "input_image",
+                    "image_data": image_b64,
+                    "media_type": media_type,
+                },
+            ],
+        }
+    ]
+
+    print("\nStreaming vision response...")
+    stream = openai.responses.create(
+        model=model.id,
+        input="placeholder",
+        extra_body={"input": vision_input},
+        stream=True,
+    )
+else:
+    print("\nStreaming text response...")
+    stream = openai.responses.create(
+        model=model.id,
+        input="Reply with one short sentence about local AI.",
+        stream=True,
+    )
+
+print("[ASSISTANT]: ", end="", flush=True)
+for event in stream:
+    if getattr(event, "type", None) == "response.output_text.delta":
+        print(getattr(event, "delta", ""), end="", flush=True)
+print()
+# </inference>
+
+openai.close()
+manager.stop_web_service()
+model.unload()

From 6d73032772caaf21baf53663f99ea81104825319 Mon Sep 17 00:00:00 2001
From: Akshay Sonawane <asonawane@microsoft.com>
Date: Tue, 5 May 2026 17:24:48 -0700
Subject: [PATCH 12/12] Add default image

---
 .../web-server-responses-vision/README.md     |  16 ++---
 .../web-server-responses-vision/src/app.py    |  66 ++++++++----------
 .../src/test_image.jpg                        | Bin 0 -> 6828 bytes
 3 files changed, 33 insertions(+), 49 deletions(-)
 create mode 100644 samples/python/web-server-responses-vision/src/test_image.jpg

diff --git a/samples/python/web-server-responses-vision/README.md b/samples/python/web-server-responses-vision/README.md
index fc7fff702..75e16950a 100644
--- a/samples/python/web-server-responses-vision/README.md
+++ b/samples/python/web-server-responses-vision/README.md
@@ -4,8 +4,8 @@ This sample demonstrates vision (image understanding) capabilities using the Fou
 
 It demonstrates:
 
-- Streaming a vision response with a local image via the Responses API
-- Streaming a text-only response (when no image is provided)
+- Streaming a vision response via the Responses API
+- Uses a default test image (`src/test_image.jpg`) if no image path is provided
 
 ## What gets installed
 
@@ -31,19 +31,11 @@ From this directory:
 python -m venv .venv
 .\.venv\Scripts\activate
 pip install -r requirements.txt
-python src\app.py <model_alias> [image_path]
-```
-
-Examples:
-
-```bash
-# Vision with an image
-python src\app.py qwen3.5-0.8b path\to\image.jpg
-
-# Text only
 python src\app.py qwen3.5-0.8b
 ```
 
+You can also pass a custom image path as the second argument.
+
 On macOS or Linux, activate the virtual environment with:
 
 ```bash
diff --git a/samples/python/web-server-responses-vision/src/app.py b/samples/python/web-server-responses-vision/src/app.py
index 02a70889e..d77170a89 100644
--- a/samples/python/web-server-responses-vision/src/app.py
+++ b/samples/python/web-server-responses-vision/src/app.py
@@ -9,16 +9,16 @@
 
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
+import os
 
 if len(sys.argv) < 2:
     print("Usage: python src/app.py <model_alias> [image_path]")
-    print("  Example: python src/app.py qwen3.5-0.8b path/to/image.jpg")
-    print("  Text only: python src/app.py qwen3.5-0.8b")
+    print("  Example: python src/app.py qwen3.5-0.8b")
     sys.exit(1)
 
 model_alias = sys.argv[1]
-image_path = sys.argv[2] if len(sys.argv) > 2 else None
-
+default_image = os.path.join(os.path.dirname(__file__), "test_image.jpg")
+image_path = sys.argv[2] if len(sys.argv) > 2 else default_image
 
 def resize_and_encode(path, max_dim=512):
     """Load and resize a local image, returning (base64_str, media_type)."""
@@ -69,39 +69,31 @@ def resize_and_encode(path, max_dim=512):
 # </server_setup>
 
 # <inference>
-if image_path:
-    print(f"\nPreparing image: {image_path}")
-    image_b64, media_type = resize_and_encode(image_path)
-
-    vision_input = [
-        {
-            "type": "message",
-            "role": "user",
-            "content": [
-                {"type": "input_text", "text": "Describe this image."},
-                {
-                    "type": "input_image",
-                    "image_data": image_b64,
-                    "media_type": media_type,
-                },
-            ],
-        }
-    ]
-
-    print("\nStreaming vision response...")
-    stream = openai.responses.create(
-        model=model.id,
-        input="placeholder",
-        extra_body={"input": vision_input},
-        stream=True,
-    )
-else:
-    print("\nStreaming text response...")
-    stream = openai.responses.create(
-        model=model.id,
-        input="Reply with one short sentence about local AI.",
-        stream=True,
-    )
+print(f"\nPreparing image: {image_path}")
+image_b64, media_type = resize_and_encode(image_path)
+
+vision_input = [
+    {
+        "type": "message",
+        "role": "user",
+        "content": [
+            {"type": "input_text", "text": "Describe this image."},
+            {
+                "type": "input_image",
+                "image_data": image_b64,
+                "media_type": media_type,
+            },
+        ],
+    }
+]
+
+print("\nStreaming vision response...")
+stream = openai.responses.create(
+    model=model.id,
+    input="placeholder",
+    extra_body={"input": vision_input},
+    stream=True,
+)
 
 print("[ASSISTANT]: ", end="", flush=True)
 for event in stream:
diff --git a/samples/python/web-server-responses-vision/src/test_image.jpg b/samples/python/web-server-responses-vision/src/test_image.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..73a4e8004db0fd82a2913bd14ad8b97672097ac5
GIT binary patch
literal 6828
zcmc&&cT^MmwjOE#k)nx!lu)HAB1n;DIROy`K_EyK5D_?l^r8?20gV(1f*z`X2+|_b
zM0)R{(v%{-w-8zqQr>XxJ?HAVuiX3IA8)euT9Y+1d;h-K-*4}4Htj2I0^qu&qo)JV
z(E$J*_ycGozy*MXnHj>&!~%gpSXo)v*r5m64;*0UJ9L;6DtHtwBzROnKv?{Qq_C)*
zn1FzkinQEG1tldVxTKn<>M4yAib|*UozSteva%mw=RJ6k_mqf$$f<w(pw$E1Ec<I>
zpc4h?x#<|V>1fRW99$<8-R}qR_k)g}fsu(B!otdS0KB1$3!tZCV4!DYU}9oq1n&+8
zuLF$SOov1i&M_Z0vV(|vLQg%2%U}^ZU)%sQ{)`h>wD$^SWjn&d%Xd`bn52}njM8ak
z6;-t}7k)wL=w8&jbj`%n3~7Gd!r`XlEvMVgF5W)Ae*OW0L17QWBO)I?Mq}ciB_uw7
z@mo@6)~oE-Ik|80-j$S=l~=s4{Lt9c+|t_C-qHD`uYX{0=<B!P$*Jj?*}3_J#ib4W
z=GOMkE`dne$F-01kK=E^{(_4e#6{1@$iN8M$3;i)4;lkEBa?^%^PzJ_5IfJqqNg6P
zK+nfz6gRMnDH`Ko_FkXaj)*HwN^I;y`yJW81}yY{h3rpY|G+g2urtts$z$LKw1Hic
zROWrQ|D&nN^9PJjxz^v(fabbYl*^{}Y{l~m_wgMA@2agLLn@CGMSq;AP6LJ)2;XSH
zN$tG=@Z5K%Y-Y;yw4G_I{Dm#{c=3AsJJ4mboIv%lv0c|JP1heEqXFUIgT}j50~)aN
z0dFD8S%jo|Gzeiy6$vz;7fVjEoeLTwpXen$ufzM(fU?gt;6MpAd!3w016F-!N{zNV
zX+RG_m<E{P>xkPlAYPx`Yw@#N?MXjWe+_u4j#5;eh}NpKCDt**m%bxufL0ye8TH$;
zEv`y?$%Y1Wj8lhne~31O`K&u6O9NCU!YGztp!p%{75UBo9)c8U7BxraLgfeAl1ZS;
zY^qZss&Rz|ytzvQBvE_TL4FCrl$=-BZHXba!!Kjpl1nPQ!lHItU1x(x{*B{!soQni
zlMw0?i`9mqH^x`4j1r3^I&m*#OlBh^W!=WjNP*?9MhMsfTnK7p6&p<zSrmeVp048b
zMx9EG_;N^vJ1oLJWC^2?W@yC_7M*LiG$kWrk`<^G>L%gmJJ8~q4kHU@WQ^ryZ;f0i
zP=?pav9^7O3yZ~v)=w`fZqB1aL<#XNBU%0O_wp5KzzM(lHXhtIpG4)=5c@mcQAk-^
z=g>(eM>(tQRrn?ikl0kqenh=CewuTb$1ukCs7K_QyU0l5-Q@7_u$voY7eW<cya_@5
zG$7(J_zEm(fDD|HSA7P(<Ir^7-pprx^EvUiJ?C95bf1<M!EaL7TGy~&Zq9E+<m|f^
z-j6Eo?e+?xcPf_v%{>ZdOA_QUm!Ozg`3J9Ta`wGTc^qFZJFzo+b_llRVx#jeN+u2m
z%!eY;w_j*@>9kzu6$oXS;kr9?ME2p^1lU4H17(>^0}f@`=zFg&$<Es{lRXBMa$@?L
z?Y3o^j2RcQR;BOL<1cck&inUl9)3R!8Q<zL=rZrS(eJbT@O^2jf18Yt7%`ON9OUUa
zND`sDO}cp&U*?$gtuTH2>tZ-U^NEiUA}~B0YIK$fG@&aSCDa?JoedgLy4>M9SgVZc
z=A?et-boES9$?-(-8D;u=i$c5=tb1XIO!U=Su11jNM)jdA5C^~`~a#wBa;RUJ%p2v
zyUxk{%#p2A>uBB9jaZy?P+j|ACc1OuL=`0Bo(ig9v}j?~Uhc5Chj(MqS=BBs#Tdn;
z9p^^VV;M|-TQ)iMDsBe4Q#sAbxLe7vv=r{Ly8{mV3XGO7r@LkQF<v}kjA1FO4Ma@f
zY-O6>e8Oy!nY=hg;g+AII3`d$ZQJ$wK+*Q<t##Ad;2{;`-{6c-D;KiFpT4|7mj=-1
z4lVO(Ar0kUW|k$7QPZAKSsRSqe+6M}M>V3QUefQ|2W{qnUSpiWJNW{`-xyz5CY${#
zzMu}*;Zt@0KJhfLQ&y(mdGPgOKk~`gLFLrXJFlbjEnJ6Bzs|ScD=f-9bnK#p_kaN_
z-Gg>94K9QYHi^m}(6a?UbmFm>ch1tgJo@Xg2`VC&9>i@5NKfzO2+M0<Le3CtE#>MC
zEqd_eh1lYh$w8@G^B;?#V9p%(bN2s`FDwILZ%QZKxEMEjB7)zS2gz@)u*olC66JW&
zDy{l4M1aADXe&gRzvgD!W-+$-UjO0UjziK84Q9~>{S5-##mF>DAhve$vP^!IuVX2U
z>VY7orwk=zIizQzWw+v^eq_@Ax$JUX<acooSb(fy|7#!Dn%A<a1+1G~6BvL_AAq~7
zva5UBiXuNydo7Zcj?N`WH2$iCp5x_UxEcEH<eF|5uV$p6VsH0`8O*eP&*rT^Q4_vE
z)!CA8wyaz&H9~~k_WOV0JvP#HO@@Hk`51hLI@P+QePV9dEW)H+-CB?F=AA%Dc1UVN
zw9Pvjz)2kq@p!aw4gp1ureq+XSLuE<n|vIEs#~?b$E-&Kvh1-kSSw0;!941Si+`7p
zTShV78n5afcPYwuMu!GGe~*>abaou@7}-|J-pE>Tk;<$SSKp^M=dqug;j%5i_8f!;
zc)<4zsuQc_oHl0h<k*1#4=Tc^Zgrg^4U*5Q?LEjt;75aPt%ujmT>Xq%ZsepKDcghv
zt513mp-Q$STo?^N6bAA6w}?##v=N{+wll8CPFvWP@+YdOi)$G<d2sL7%PXvQ$Mk&`
z7}f!yI}n?krC~faM(7ZA2sawETA+?q>fNqF%Hf1+=p>z+yv_oZ*z(B&mnMg2j-^|~
zEwT?WJ#{cv$uK216HWPL@}HChNp|8`#Gn?mdSC{L>~}gKWDdJM)y@vg2(AjNQ5U8w
zahm*kRYPUTS7cgLti~>hT@F1zpD#dv{sG276-e6S*ZJb6BzUy%Qt_4Y!D5y^VfwF2
zZ2*1qOni*qv0B|Ho_T3URUNBS$HB@M=24jn74!Gnh8o$X3EF`vgT&wal?J?kQ>sy`
zXYdx=*y!|41{zQiOUbd(w>hFnz!O7sm#S50fanL?on7iPFfT<8qyLQJr+!OB*QVks
z$mpZ;lz?+IAak&Wy=p3MquPPYW`o6_&QqH}&#?-pi6`=zyozhjNa&B^IyVujZt@IS
zkXoi&50}2Dk5j5;+hFBhIb*%7k%%y#g>t`W7f&gg<(n4{8xr<3-SK!-t^S&jqm!&Z
zAhIbOpWrw-<@x#3j$Qg3$2nng!!#-~sqVpibWx|(*UK$-RSyMU%}koZKQyyX#|<$~
zOm9_3YDAn!%D6rZzL$<_%3TUI#n;)ZsxU?eEror3@*fDXZUk@KEnBT|iwzSg5jKg)
zD>w9y^a|Y(1L(p^Qsxgv!8BCKjS<YxOA9DG$OWq#Gb_>QVQ@|w@J^1W`9sRpn%S94
zgZjHkEHB{6YV}TxcOSHd++r94X)f93;NsxgSz|pu@j-sMe=W+y^eHGv+OHZRpma0i
zKeD|xxAsfvs$th&aCqVP^VGC^vd2cJRokak;Rz-*;Gt#H_!iw48i3=_#$mN;*-T~B
zaw>QDY!7{DuoH=V#_w5MVQq<evk3z?16lOD5Dj?A@tg+S_XGtV9QP#PF7^HyTPj@}
z6&AbWQ}<#^d*~>9bCI$@M59UmRBjJW)F+5O4X9V80gbT)M71UjI4dYm1Au87z@}Z2
zO>UsR|3m}Q4cB$MO^|(cKe~jX8jnw|lYm7O0Rkr&QBvjSIJa9P$s|;emVIU|s6T&N
ze%4>Ztt_HWqKcHX=lk|P|IYxRC?|bl7{xOK)THU(V_)R|-1l$)C0e0sW(<oluY>ti
z^!ZHT##{jaSdh&>7~x~-mL?RKcCGtOBUhN2e}6F4$U3qoTvvnJszLm4Gp0%A8Ac6y
z>O)*8iw2jj3kH3-JeN~o!?51<K+TX4U)3Ia-R81a_XhlfYRGleqs=Pu2Y598pm#m>
z7;cL1wDr+B7Sju*ON?<+lDE4uWH<sE)MkRvdu(Wu6%W+m<f2YaZT0RR4fr^BDubsy
zvZ)j~d^9q$)Y`%~-T3p?UPpqT+;im)F{TdHE_zQ)z;0e0E}Vd*hNtp_NfD(1Q?B1)
z`HU)R^u`DHDAv1LnxU-jPBAQ^fwvMubp>3mr#`L+IP@gT{L=&p>Qgmd7j5SH_D=K=
zov~U-K$(R*cy;U|J5<1z54JUyn(#W+C9z!j?TDpr>eB|^U#wD6=;~+U3H{qG=xMe-
zvy3acJ<C-6O&L|+ffQs|2=8qnj)1jE-KqiDMV0t8q%iZ>tZ)cq;BB+J@Q^D4hJPP*
z#p*I8+gVLp(FbD&b`dj%e{8rPKeq}vVT5;Gjb<mU#>Be&E{mCG-QbNW_$v7nvSi|!
z^7z>^)7fIh6gwVi&4VQAIDG-!t55A1glL5Bg@hY{jNJX#meBvX<J`>mdZ`1bhotS@
zf*hMvwbsLaULzleqmOM%fF0jxzy1DK#-3<C5k8mnON>;1NmYu>gAVbu&|2?}w#TnF
zR37D|I{FodNBKQ6?Bw!%&(40NbxkLTqp^m6suyi(&=-HcOf_vsbMw6%8zKczXc)b~
z(0lfrK$wlRjoF#9@e*6b=I&yikWNh%&F7OUXzI`{(m~2ccagc?Zg(3Eudvw3)}Z*X
z`o^K0q0V8OY|pHv#NqHPx3uqPmpg#zTvg$Ts8ID#*TrZEGScSL%JxO({`aw-HU_N%
zr)D0coD@@?ht;xGXaou^<Ii}l=$1y7BrfBXZfBy`G=($KQos*Jf1t8D*MPHol&vT9
z8ha<DD%QxbD!J#y1UbJxR~EmremeDD)39;SHNV&{Y!CH_x^v^Fk|p<07Dj=v@8~xU
z%7p57A&!e%`MxA&2oL)$A=u<VaBtq;M?Kwyub_rn7T*_Ae>Ity;(Ut*3q)&9kd<Y9
zrQ4H4@uvVoKL(PcF@;4&+%w%czeM`7X!cI6tnXW*0b69Us=H#l)eQj-eP2d1Ui9})
zfiZ($O?}TNKevfIoacf(+WN*j*SdUhP5i_thB=51V*u1Myrs`GLIfNi8%sYm;=UsL
zDFhaf;S=p#n*T<?Vu?K;&J=njMV@a$`HlgnND$!Ko133w)Q7>Gh(=y=@mvUYMu2sb
zqxx#C7;Xun6f+!-DL^tkAV#Xg#ZxdmX++3<1?%u&#$!1!iGEU&FYKap+bvL$80~l5
z--VLt+8o|a7@qnbW?#JKxcOR6YhfZ-wth;kYsp;aC55<9hs2YIPiJvBbA$bWXg8|I
z_?j<a&tE9B92O>J*DfadcKL$gb18dGWZV|^?d0XIZ)O2{H4=H?cYA0Z{90P|iOM)X
zu#tU=Ir~FO|DN8P@7fsUmA;63>SU*mPMUD&iElyay1|J<%fZQ5j?(fkqDwpnXD#U&
zZqR_k6KQARMwm+8)0>4Jgs$f3sdVluxr_m;jgJba@41_|6{%zN)}y8+VGnGM$W)Z2
zC5z1!zle3ab|tX^6o+c<`%26`K)=~Lz_yudyscYu*PpdiA>c97{SE;f%ZwLuKsz?u
zK2dE_FC$JIjbM+7l&QzFh~;AA^S^3w2V~!@(BiLD#c1M(i)9$3)o!*KpOYISNWQdK
zuOFO!UJ`70?Rc|S*EfBqiZ@Hoj1WP8=)ZHq^m_Gz3+}^jWo^1<n@WS{r*Z4oeUCSP
zzt=+IxPRBgl&F071?4V}bjw~oVsU(Kd#iZ-l0Y$7E~*{Q=Cp{QO+CN%N#*Alo6O}v
zZk5?&x{&zM4bM)_aL4JJ{3_XwH#7nV8R0hAuU?}*3SVyz#affoO({Ma#J1o0z4jBo
zWxKWH2-uZG=a*ltrl#CIG|Fei+E@apa`U;W0#T>^hj*|XVHw|D;-fn}geIz^R{~7i
zA$<1Co~^EP!Pku651!UfJ-=FE=qchi^HlK@JLV2EAb20v6kXUMgs39NswgzyRlTda
z1`4%osFBUN2~Z3Ft%8}qt^3d?pHcHJb)qu7Li|ZTd`X=vKn#sVu#T804g2f%3keM!
zzlX>iSyC=~c=4s)(elg2H{37O&jd5;aGsYnu9lqiKwIh#@#v+@^<Is;ddPCuYs(mD
zC&oZGD19kxyDqzLw`&UI`$@k_pxmzLW3Pf0)cchTI2IAbHxQGtUkWjRcnaP3;+$ta
zirVf?pO8Hp%68jUlJB1E=Hzr?iT8SPo05<#x*%cZByWS_2iUh6Z~jP|mviMt&5s0w
zxRaXmN%zL=CC6Ns2*{H=ypa*fyG|8YvS95r8}37${l2EnO8Ik#Az*D}#Kw7ocM}$R
z>dX>P-e=1M$gF_Jz0i|q-PLDeSb8qcW$;LTLMOmf)w*s5&bQb?PhbmAB)EztTWkJy
zb0RmeBw|`4ycKM86;}~~+fZb0f9{fANBVJ#{CpKPUyvs+yML@+vI~ZhHd40S1Ygl4
zUAOZcX0oC6<fhbEjDg;P-g{y7yx6p5r^XLkY5W_oqRi~>OCwGQs%kI0LR|dl&1qqw
zfzJ_zD>)%iOAS++(fT!M^L;)h$`x0?e{1k}-<wD~jZtpkPl!0zFrV?}dgvR6dWK^)
zZQ=<dpGb~HEtZSry0Z+MDz@3T`evVY$*Qe6ClHT#zR(DfF%9Yb9E55ZUC-e#-!HrP
z9{N8f2PrWQcb5AF`fle%rHE(q+<NHNCfdd8yWl*Kp3@bnAQatB8VnDqc{V+$&3bgq
zFC-tex#h%k(W`TRXTLhTpQ8@P7#17nR~M(E_0=mu^@Pz|ke$OGA;fq<143Pist-ff
z#+1XQxx{l-CDZgY()8MRVQX4vKA%Q$-*(XdGF$do;_2xogBeHU)0a%zEnz9KRpZC1
zN2t*8M|JbNpyI51huVP6Ps6U4@fLE*pNw{iKF{r^v4-9<BwPytbx!)2JeAr3j#Z67
zA?nd=ySbN51IE8;6VQ{SGLKn}-Zw*3v%oRfLOSj>89@WIKiE<z@igF3*3U7Z!4?(x
z$o^#-Ap0+Q*DACmURd&_0SRgTh#*O_bR9RUcMX)(sGFepk^&2Z%9K2j0!s4rm2@%!
zZdORn*giu8I(yYKYYzV<HmWgsgNICAMD6YvQbl9)&8SyNsi;|})Xz5HtnX)7+LKoZ
zPpBxLy1hL~8o;ePkdKlIHmJjwI8Ab*`34O+tS{czd2Oi$h~IpEc|QK*WP|G1M*w@N
zU!svE(rktz+wK-}C?K;Ff1v6v<+x-h0;9Ysu<ED=Vb<`4En~j*$Q8`S$1kB8D>rqc
z19&2{m}ie`uYRwaUFb#0%^S@Y6uQ-6MWbOJds?>Zl5?oW*cBqGI{i0rorLrijW+7N
zlA)cqYbKe6G{C8gGDGeKC4aj7Zv3Y9r0$N811QVQ@0$C|7@5(4l36M~#EC3L?R~!2
zK}^SG?QtrADV5VKP*0G`M}5Q7fIw|UDk$mya1{9h9A+m94GMNXGp+kGu=byhnz~W*
zU7W5Qv!?b1ym!vE7ruCPsJTcTo$<#K%-rlSK<DhGG#$<$k^z6yG|qJFOzH5$I_7dz
e&e+%1lBk5f{*wK%h5*R&pT`>i=6?w7+kXIB{W3HF

literal 0
HcmV?d00001