From 12e30b943660a43ba8ab0e137d0fd188307edf9d Mon Sep 17 00:00:00 2001 From: maanavd Date: Thu, 23 Apr 2026 12:45:26 -0400 Subject: [PATCH 01/12] feat(sdk/python): add Responses API client Implements the OpenAI Responses API client for the Foundry Local Python SDK. New files: - src/openai/responses_types.py: full type system (content parts, items, tools, config, ResponseObject with output_text property), all streaming event dataclasses, parse_streaming_event factory, and _to_dict serializer - src/openai/responses_client.py: HTTP-only sync client (ResponsesClient, ResponsesClientSettings, ResponsesAPIError) with create, create_streaming (SSE generator), get, delete, cancel, get_input_items, list - examples/responses.py: 5 end-to-end scenarios (basic, streaming, multi-turn, tool calling, vision) - test/openai/test_responses_client.py: 56 unit tests (mocked HTTP) - test/openai/test_responses_integration.py: 14 integration tests gated on FOUNDRY_INTEGRATION_TESTS=1 Modified files: - src/foundry_local_manager.py: create_responses_client factory method - src/imodel.py: abstract create_responses_client - src/detail/model.py: delegating create_responses_client - src/detail/model_variant.py: concrete create_responses_client - src/openai/__init__.py: export ResponsesClient, ResponsesClientSettings, ResponsesAPIError - src/__init__.py: export all public Responses API types Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/python/examples/responses.py | 154 +++ sdk/python/requirements.txt | 11 +- sdk/python/src/__init__.py | 62 +- sdk/python/src/detail/model.py | 5 + sdk/python/src/detail/model_variant.py | 9 + sdk/python/src/foundry_local_manager.py | 23 + sdk/python/src/imodel.py | 16 + sdk/python/src/openai/__init__.py | 13 +- sdk/python/src/openai/responses_client.py | 413 ++++++++ sdk/python/src/openai/responses_types.py | 885 ++++++++++++++++++ .../test/openai/test_responses_client.py | 603 ++++++++++++ .../test/openai/test_responses_integration.py | 288 ++++++ 12 files changed, 2473 insertions(+), 9 deletions(-) create mode 100644 sdk/python/examples/responses.py create mode 100644 sdk/python/src/openai/responses_client.py create mode 100644 sdk/python/src/openai/responses_types.py create mode 100644 sdk/python/test/openai/test_responses_client.py create mode 100644 sdk/python/test/openai/test_responses_integration.py diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py new file mode 100644 index 000000000..ce810e814 --- /dev/null +++ b/sdk/python/examples/responses.py @@ -0,0 +1,154 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""End-to-end example for the OpenAI Responses API client. + +Run with:: + + python examples/responses.py + +Requires a loaded model and a started web service. +""" + +from __future__ import annotations + +import json + +from foundry_local_sdk import ( + Configuration, + FoundryLocalManager, + FunctionToolDefinition, + InputImageContent, + InputTextContent, + MessageItem, +) + +MODEL_ALIAS = "phi-4-mini" + + +def setup(): + config = Configuration(app_name="ResponsesExample") + FoundryLocalManager.initialize(config) + mgr = FoundryLocalManager.instance + + mgr.download_and_register_eps() + + model = mgr.catalog.get_model(MODEL_ALIAS) + if model is None: + raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog") + if not model.is_cached: + print(f"Downloading {MODEL_ALIAS}...") + model.download(progress_callback=lambda p: print(f" {p:.1f}%", end="\r")) + print() + print(f"Loading {model.alias}...", end="") + model.load() + print("loaded!") + mgr.start_web_service() + + client = mgr.create_responses_client(model.id) + return mgr, model, client + + +def basic_create(client): + print("\n=== 1. Basic create ===") + resp = client.create("What is 2 + 2? Answer in one word.") + print(f"status={resp.status} text={resp.output_text!r}") + + +def streaming(client): + print("\n=== 2. Streaming ===") + print("assistant: ", end="", flush=True) + for event in client.create_streaming("Count from 1 to 5, separated by spaces."): + if event.type == "response.output_text.delta": + print(event.delta, end="", flush=True) + elif event.type == "response.completed": + print(f"\n(completed, {event.response.usage.total_tokens} tokens)") + + +def multi_turn(client): + print("\n=== 3. Multi-turn ===") + first = client.create("My favorite color is green. Remember that.", store=True) + print(f"first id={first.id!r}") + second = client.create( + "What is my favorite color?", + previous_response_id=first.id, + ) + print(f"second: {second.output_text!r}") + + +def tool_calling(client): + print("\n=== 4. Tool calling ===") + tools = [ + FunctionToolDefinition( + name="multiply_numbers", + description="Multiply two integers together.", + parameters={ + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"}, + }, + "required": ["a", "b"], + }, + ) + ] + resp = client.create("What is 7 times 6?", tools=tools) + + # Find a function_call item in the output (if the model produced one). + for item in resp.output: + if getattr(item, "type", None) == "function_call": + print(f"call {item.name}({item.arguments})") + args = json.loads(item.arguments) + answer = args["a"] * args["b"] + follow = client.create( + [ + MessageItem(role="user", content="What is 7 times 6?"), + item, + # The function_call_output is sent back keyed by call_id + {"type": "function_call_output", "call_id": item.call_id, "output": str(answer)}, + ], + tools=tools, + ) + print(f"final: {follow.output_text!r}") + return + print(f"no tool call — got text: {resp.output_text!r}") + + +def vision(client): + print("\n=== 5. Vision ===") + # Requires a vision-capable model. Replace with a real PNG to see real output. + tiny_png = bytes.fromhex( + "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" + "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" + "ae426082" + ) + msg = MessageItem( + role="user", + content=[ + InputTextContent(text="Describe this image in one sentence."), + InputImageContent.from_bytes(tiny_png, "image/png"), + ], + ) + try: + resp = client.create([msg]) + print(f"vision response: {resp.output_text!r}") + except Exception as e: + print(f"(skipped — model may not support vision: {e})") + + +def main(): + mgr, model, client = setup() + try: + basic_create(client) + streaming(client) + multi_turn(client) + tool_calling(client) + vision(client) + finally: + mgr.stop_web_service() + model.unload() + + +if __name__ == "__main__": + main() diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 666a37211..25d05c298 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -1,9 +1,8 @@ pydantic>=2.0.0 requests>=2.32.4 openai>=2.24.0 -# Standard native binary packages from the ORT-Nightly PyPI feed. -foundry-local-core==1.0.0rc1 -onnxruntime-core==1.24.4; sys_platform != "linux" -onnxruntime-gpu==1.24.4; sys_platform == "linux" -onnxruntime-genai-core==0.13.1; sys_platform != "linux" -onnxruntime-genai-cuda==0.13.1; sys_platform == "linux" +foundry-local-core==1.0.0 +onnxruntime-gpu==1.24.4; platform_system == "Linux" +onnxruntime-core==1.24.4; platform_system != "Linux" +onnxruntime-genai-cuda==0.13.1; platform_system == "Linux" +onnxruntime-genai-core==0.13.1; platform_system != "Linux" diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py index 14534d196..273e3f9c4 100644 --- a/sdk/python/src/__init__.py +++ b/sdk/python/src/__init__.py @@ -7,6 +7,34 @@ from .configuration import Configuration from .foundry_local_manager import FoundryLocalManager +from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings +from .openai.responses_types import ( + ContentPart, + DeleteResponseResult, + FunctionCallItem, + FunctionCallOutputItem, + FunctionToolDefinition, + InputFileContent, + InputImageContent, + InputItemsListResponse, + InputTextContent, + ItemReference, + ListResponsesResult, + MessageItem, + OutputTextContent, + ReasoningConfig, + ReasoningItem, + RefusalContent, + ResponseError, + ResponseInputItem, + ResponseObject, + ResponseOutputItem, + ResponseUsage, + StreamingEvent, + TextConfig, + TextFormat, + parse_streaming_event, +) from .version import __version__ _logger = logging.getLogger(__name__) @@ -20,4 +48,36 @@ _logger.addHandler(_sc) _logger.propagate = False -__all__ = ["Configuration", "FoundryLocalManager", "__version__"] +__all__ = [ + "Configuration", + "ContentPart", + "DeleteResponseResult", + "FoundryLocalManager", + "FunctionCallItem", + "FunctionCallOutputItem", + "FunctionToolDefinition", + "InputFileContent", + "InputImageContent", + "InputItemsListResponse", + "InputTextContent", + "ItemReference", + "ListResponsesResult", + "MessageItem", + "OutputTextContent", + "ReasoningConfig", + "ReasoningItem", + "RefusalContent", + "ResponseError", + "ResponseInputItem", + "ResponseObject", + "ResponseOutputItem", + "ResponseUsage", + "ResponsesAPIError", + "ResponsesClient", + "ResponsesClientSettings", + "StreamingEvent", + "TextConfig", + "TextFormat", + "__version__", + "parse_streaming_event", +] diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py index 6d60b7a2f..01dcfb471 100644 --- a/sdk/python/src/detail/model.py +++ b/sdk/python/src/detail/model.py @@ -11,6 +11,7 @@ from ..openai.chat_client import ChatClient from ..openai.audio_client import AudioClient from ..openai.embedding_client import EmbeddingClient +from ..openai.responses_client import ResponsesClient from .model_variant import ModelVariant from ..exception import FoundryLocalException from .core_interop import CoreInterop @@ -146,3 +147,7 @@ def get_audio_client(self) -> AudioClient: def get_embedding_client(self) -> EmbeddingClient: """Get an embedding client for the currently selected variant.""" return self._selected_variant.get_embedding_client() + + def create_responses_client(self, base_url: str) -> "ResponsesClient": + """Create a Responses API client for the currently selected variant.""" + return self._selected_variant.create_responses_client(base_url) diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py index 76efb05cd..2e19662d5 100644 --- a/sdk/python/src/detail/model_variant.py +++ b/sdk/python/src/detail/model_variant.py @@ -17,6 +17,7 @@ from ..openai.audio_client import AudioClient from ..openai.chat_client import ChatClient from ..openai.embedding_client import EmbeddingClient +from ..openai.responses_client import ResponsesClient logger = logging.getLogger(__name__) @@ -175,3 +176,11 @@ def get_audio_client(self) -> AudioClient: def get_embedding_client(self) -> EmbeddingClient: """Create an OpenAI-compatible ``EmbeddingClient`` for this variant.""" return EmbeddingClient(self.id, self._core_interop) + + def create_responses_client(self, base_url: str) -> ResponsesClient: + """Create a Responses API client for this variant. + + :param base_url: Base URL of the running Foundry Local web service + (e.g. ``manager.urls[0]``). + """ + return ResponsesClient(base_url, self.id) diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py index a649f8e56..b891d1b17 100644 --- a/sdk/python/src/foundry_local_manager.py +++ b/sdk/python/src/foundry_local_manager.py @@ -20,6 +20,7 @@ from .detail.core_interop import CoreInterop, InteropRequest from .detail.model_load_manager import ModelLoadManager from .exception import FoundryLocalException +from .openai.responses_client import ResponsesClient logger = logging.getLogger(__name__) @@ -194,3 +195,25 @@ def stop_web_service(self): raise FoundryLocalException(f"Error stopping web service: {response.error}") self.urls = None + + def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient: + """Create a :class:`ResponsesClient` bound to the running web service. + + The Responses API is HTTP-only, so the web service must be started + before calling this. Use :meth:`start_web_service` first. + + Args: + model_id: Optional default model ID baked into the client. May also + be supplied per-call via ``options['model']``. + + Returns: + A new :class:`ResponsesClient`. + + Raises: + FoundryLocalException: If the web service has not been started. + """ + if not self.urls: + raise FoundryLocalException( + "Web service is not running. Call start_web_service() first." + ) + return ResponsesClient(self.urls[0], model_id) diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py index f723e514a..6bc0d3638 100644 --- a/sdk/python/src/imodel.py +++ b/sdk/python/src/imodel.py @@ -10,6 +10,7 @@ from .openai.chat_client import ChatClient from .openai.audio_client import AudioClient from .openai.embedding_client import EmbeddingClient +from .openai.responses_client import ResponsesClient from .detail.model_data_types import ModelInfo class IModel(ABC): @@ -136,6 +137,21 @@ def get_embedding_client(self) -> 'EmbeddingClient': """ pass + @abstractmethod + def create_responses_client(self, base_url: str) -> 'ResponsesClient': + """ + Create an OpenAI Responses API client bound to the running web service. + + Unlike the other clients, the Responses API is HTTP-only and requires + the Foundry Local web service to be started. Pass the base URL + returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``), + or use :meth:`FoundryLocalManager.create_responses_client` directly. + + :param base_url: Base URL of the running Foundry Local web service. + :return: ResponsesClient instance for this variant's model id. + """ + pass + @property @abstractmethod def variants(self) -> List['IModel']: diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py index bec5d68bd..f8c8cefef 100644 --- a/sdk/python/src/openai/__init__.py +++ b/sdk/python/src/openai/__init__.py @@ -2,10 +2,19 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""OpenAI-compatible clients for chat completions and audio transcription.""" +"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API.""" from .chat_client import ChatClient, ChatClientSettings from .audio_client import AudioClient from .embedding_client import EmbeddingClient +from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError -__all__ = ["AudioClient", "ChatClient", "ChatClientSettings", "EmbeddingClient"] +__all__ = [ + "AudioClient", + "ChatClient", + "ChatClientSettings", + "EmbeddingClient", + "ResponsesAPIError", + "ResponsesClient", + "ResponsesClientSettings", +] diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py new file mode 100644 index 000000000..a0d9a7777 --- /dev/null +++ b/sdk/python/src/openai/responses_client.py @@ -0,0 +1,413 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""OpenAI Responses API client — HTTP-only against the Foundry Local web service. + +Unlike ``ChatClient`` / ``AudioClient`` which go through the native Core via FFI, +the Responses API is served exclusively by the embedded web service. The client +therefore uses ``requests`` for non-streaming calls and parses Server-Sent Events +inline for streaming. + +Usage +----- +:: + + manager.start_web_service() + client = manager.create_responses_client("phi-4-mini") + + # Non-streaming + resp = client.create("What is 2+2?") + print(resp.output_text) + + # Streaming + for event in client.create_streaming("Tell me a story"): + if event.type == "response.output_text.delta": + print(event.delta, end="", flush=True) +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import is_dataclass +from typing import Any, Dict, Generator, List, Optional, Union +from urllib.parse import quote + +import requests + +from .responses_types import ( + DeleteResponseResult, + InputItemsListResponse, + ListResponsesResult, + ReasoningConfig, + ResponseObject, + StreamingEvent, + TextConfig, + _parse_delete_result, + _parse_input_items_list, + _parse_list_responses, + _parse_response_object, + _to_dict, + parse_streaming_event, +) + +logger = logging.getLogger(__name__) + +_MAX_ID_LEN = 1024 + + +class ResponsesClientSettings: + """Tunable settings applied to every Responses API request. + + Field names follow the OpenAI snake_case convention; serialization omits + any ``None`` values so the server applies its own defaults. + """ + + def __init__(self) -> None: + self.instructions: Optional[str] = None + self.temperature: Optional[float] = None + self.top_p: Optional[float] = None + self.max_output_tokens: Optional[int] = None + self.frequency_penalty: Optional[float] = None + self.presence_penalty: Optional[float] = None + self.tool_choice: Optional[Any] = None + self.truncation: Optional[str] = None + self.parallel_tool_calls: Optional[bool] = None + self.store: Optional[bool] = True # SDK default — matches OpenAI convention. + self.metadata: Optional[Dict[str, str]] = None + self.reasoning: Optional[ReasoningConfig] = None + self.text: Optional[TextConfig] = None + self.seed: Optional[int] = None + + def _serialize(self) -> Dict[str, Any]: + raw: Dict[str, Any] = { + "instructions": self.instructions, + "temperature": self.temperature, + "top_p": self.top_p, + "max_output_tokens": self.max_output_tokens, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "tool_choice": _to_dict(self.tool_choice) if is_dataclass(self.tool_choice) else self.tool_choice, + "truncation": self.truncation, + "parallel_tool_calls": self.parallel_tool_calls, + "store": self.store, + "metadata": self.metadata, + "reasoning": _to_dict(self.reasoning) if self.reasoning is not None else None, + "text": _to_dict(self.text) if self.text is not None else None, + "seed": self.seed, + } + return {k: v for k, v in raw.items() if v is not None} + + +class ResponsesAPIError(Exception): + """Raised for HTTP/transport errors against the Responses API.""" + + def __init__(self, message: str, status_code: Optional[int] = None, body: Optional[str] = None): + super().__init__(message) + self.status_code = status_code + self.body = body + + +class ResponsesClient: + """Client for the OpenAI Responses API served by Foundry Local. + + Construct via ``manager.create_responses_client(model_id)`` or + ``model.create_responses_client(base_url)``. + """ + + def __init__(self, base_url: str, model_id: Optional[str] = None): + if not isinstance(base_url, str) or not base_url.strip(): + raise ValueError("base_url must be a non-empty string.") + self._base_url = base_url.rstrip("/") + self._model_id = model_id + self.settings = ResponsesClientSettings() + + # ------------------------------------------------------------------ public + + def create( + self, + input: Union[str, List[Any]], + **options: Any, + ) -> ResponseObject: + """Create a response (non-streaming).""" + body = self._build_request(input, options, stream=False) + raw = self._post_json("/v1/responses", body) + return _parse_response_object(raw) + + def create_streaming( + self, + input: Union[str, List[Any]], + **options: Any, + ) -> Generator[StreamingEvent, None, None]: + """Create a response with SSE streaming. + + Returns a generator yielding :class:`StreamingEvent` objects. The HTTP + connection is closed automatically when the generator is exhausted or + garbage-collected. + """ + body = self._build_request(input, options, stream=True) + return self._post_stream("/v1/responses", body) + + def get(self, response_id: str) -> ResponseObject: + self._validate_id(response_id, "response_id") + raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}") + return _parse_response_object(raw) + + def delete(self, response_id: str) -> DeleteResponseResult: + self._validate_id(response_id, "response_id") + raw = self._request_json("DELETE", f"/v1/responses/{quote(response_id, safe='')}") + return _parse_delete_result(raw) + + def cancel(self, response_id: str) -> ResponseObject: + self._validate_id(response_id, "response_id") + raw = self._request_json("POST", f"/v1/responses/{quote(response_id, safe='')}/cancel") + return _parse_response_object(raw) + + def get_input_items(self, response_id: str) -> InputItemsListResponse: + self._validate_id(response_id, "response_id") + raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}/input_items") + return _parse_input_items_list(raw) + + def list(self) -> ListResponsesResult: + raw = self._request_json("GET", "/v1/responses") + return _parse_list_responses(raw) + + # ---------------------------------------------------------------- internal + + def _build_request( + self, + input: Union[str, List[Any]], + options: Dict[str, Any], + stream: bool, + ) -> Dict[str, Any]: + self._validate_input(input) + if options.get("tools") is not None: + self._validate_tools(options["tools"]) + + model = options.pop("model", None) or self._model_id + if not isinstance(model, str) or not model.strip(): + raise ValueError( + "Model must be specified via create_responses_client(model_id) or options['model']." + ) + + # Normalize input: convert dataclasses to dicts for the wire format. + if isinstance(input, list): + wire_input = [_to_dict(i) if is_dataclass(i) else i for i in input] + else: + wire_input = input + + # Normalize other dataclass-shaped options (tools, reasoning, etc.). + normalized_options: Dict[str, Any] = {} + for key, value in options.items(): + if value is None: + continue + if is_dataclass(value): + normalized_options[key] = _to_dict(value) + elif isinstance(value, list): + normalized_options[key] = [_to_dict(v) if is_dataclass(v) else v for v in value] + else: + normalized_options[key] = value + + body: Dict[str, Any] = {"model": model, "input": wire_input} + # Merge order: model+input → settings defaults → per-call overrides + body.update(self.settings._serialize()) + body.update(normalized_options) + if stream: + body["stream"] = True + return body + + @staticmethod + def _validate_input(input: Any) -> None: + if input is None: + raise ValueError("Input cannot be None.") + if isinstance(input, str): + if not input.strip(): + raise ValueError("Input string cannot be empty.") + return + if isinstance(input, list): + if len(input) == 0: + raise ValueError("Input items list cannot be empty.") + for i, item in enumerate(input): + if is_dataclass(item): + t = getattr(item, "type", None) + elif isinstance(item, dict): + t = item.get("type") + else: + raise ValueError(f"input[{i}] must be a dict or dataclass.") + if not isinstance(t, str) or not t.strip(): + raise ValueError(f"input[{i}] must have a non-empty 'type' field.") + return + raise ValueError("Input must be a string or a list of input items.") + + @staticmethod + def _validate_tools(tools: Any) -> None: + if not isinstance(tools, list): + raise ValueError("tools must be a list if provided.") + for i, tool in enumerate(tools): + if is_dataclass(tool): + t = getattr(tool, "type", None) + name = getattr(tool, "name", None) + elif isinstance(tool, dict): + t = tool.get("type") + name = tool.get("name") + else: + raise ValueError(f"tools[{i}] must be a dict or FunctionToolDefinition.") + if t != "function": + raise ValueError(f"tools[{i}] must have type 'function'.") + if not isinstance(name, str) or not name.strip(): + raise ValueError(f"tools[{i}] must have a non-empty 'name'.") + + @staticmethod + def _validate_id(value: str, param: str) -> None: + if not isinstance(value, str) or not value.strip(): + raise ValueError(f"{param} must be a non-empty string.") + if len(value) > _MAX_ID_LEN: + raise ValueError(f"{param} exceeds maximum length ({_MAX_ID_LEN}).") + + # ----- HTTP plumbing ----- + + def _url(self, path: str) -> str: + return f"{self._base_url}{path}" + + def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + try: + if body is not None: + resp = requests.request( + method, + self._url(path), + headers={"Content-Type": "application/json", "Accept": "application/json"}, + data=json.dumps(body), + ) + else: + resp = requests.request(method, self._url(path), headers={"Accept": "application/json"}) + except requests.RequestException as e: + raise ResponsesAPIError(f"Network error calling {method} {path}: {e}") from e + + return self._handle_json_response(resp, method, path) + + def _post_json(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: + return self._request_json("POST", path, body) + + @staticmethod + def _handle_json_response(resp: requests.Response, method: str, path: str) -> Dict[str, Any]: + text = resp.text + if not resp.ok: + raise ResponsesAPIError( + f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}", + status_code=resp.status_code, + body=text, + ) + try: + return json.loads(text) if text else {} + except json.JSONDecodeError as e: + raise ResponsesAPIError( + f"Failed to parse response JSON from {method} {path}: {text[:200]}" + ) from e + + def _post_stream( + self, path: str, body: Dict[str, Any] + ) -> Generator[StreamingEvent, None, None]: + try: + resp = requests.post( + self._url(path), + headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, + data=json.dumps(body), + stream=True, + ) + except requests.RequestException as e: + raise ResponsesAPIError(f"Network error calling POST {path}: {e}") from e + + if not resp.ok: + body_text = resp.text + resp.close() + raise ResponsesAPIError( + f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}", + status_code=resp.status_code, + body=body_text, + ) + + return _iter_sse_events(resp) + + +def _iter_sse_events(resp: requests.Response) -> Generator[StreamingEvent, None, None]: + """Parse an SSE response into a stream of :class:`StreamingEvent` objects. + + Closes the underlying HTTP connection when the generator ends for any + reason (completion, [DONE], exception, or GC). + """ + try: + buffer_parts: List[str] = [] + # iter_content yields bytes chunks; decode as UTF-8 and split on blank lines. + for chunk in resp.iter_content(chunk_size=1024, decode_unicode=False): + if not chunk: + continue + if isinstance(chunk, bytes): + text = chunk.decode("utf-8", errors="replace") + else: + text = chunk + buffer_parts.append(text) + buffer = "".join(buffer_parts) + # Normalize CRLF to LF so our split works on both styles. + buffer = buffer.replace("\r\n", "\n") + + blocks = buffer.split("\n\n") + incomplete = blocks.pop() if blocks else "" + buffer_parts = [incomplete] if incomplete else [] + + for block in blocks: + event = _parse_sse_block(block) + if event is _SSE_DONE: + return + if event is not None: + yield event + + # Flush any residual block that wasn't terminated by a blank line. + tail = "".join(buffer_parts).strip() + if tail: + event = _parse_sse_block(tail) + if event is not None and event is not _SSE_DONE: + yield event + finally: + resp.close() + + +_SSE_DONE = object() # sentinel returned for the `data: [DONE]` terminator + + +def _parse_sse_block(block: str) -> Any: + """Parse a single SSE block (already stripped of its trailing blank line).""" + trimmed = block.strip() + if not trimmed: + return None + if trimmed == "data: [DONE]": + return _SSE_DONE + + data_lines: List[str] = [] + for line in trimmed.split("\n"): + if line.startswith("data: "): + data_lines.append(line[6:]) + elif line == "data:": + data_lines.append("") + # `event:`, `id:`, `retry:` fields are ignored — the type lives in the JSON payload. + + if not data_lines: + return None + + data = "\n".join(data_lines) + if data == "[DONE]": + return _SSE_DONE + try: + parsed = json.loads(data) + except json.JSONDecodeError as e: + raise ResponsesAPIError(f"Failed to parse streaming event JSON: {e}") from e + if not isinstance(parsed, dict): + return None + return parse_streaming_event(parsed) + + +__all__ = [ + "ResponsesClient", + "ResponsesClientSettings", + "ResponsesAPIError", +] diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py new file mode 100644 index 000000000..09f9f4a60 --- /dev/null +++ b/sdk/python/src/openai/responses_types.py @@ -0,0 +1,885 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Types for the OpenAI Responses API served by Foundry Local. + +These mirror the DTOs defined by neutron-server in +``src/FoundryLocalCore/Core/Responses/Contracts/``. Dataclasses are used so +callers can construct items with keyword arguments and we can serialize +discriminated unions by the ``type`` field. +""" + +from __future__ import annotations + +import base64 +import mimetypes +from dataclasses import dataclass, field, fields, is_dataclass +from typing import Any, Dict, List, Literal, Optional, Union + + +# --------------------------------------------------------------------------- +# Serialization helpers +# --------------------------------------------------------------------------- + +def _to_dict(obj: Any) -> Any: + """Recursively convert a dataclass (or list/dict of them) to a plain dict, + omitting ``None`` values so the wire format matches the OpenAI spec. + """ + if is_dataclass(obj) and not isinstance(obj, type): + result: Dict[str, Any] = {} + for f in fields(obj): + value = getattr(obj, f.name) + if value is None: + continue + result[f.name] = _to_dict(value) + return result + if isinstance(obj, list): + return [_to_dict(v) for v in obj] + if isinstance(obj, dict): + return {k: _to_dict(v) for k, v in obj.items() if v is not None} + return obj + + +# --------------------------------------------------------------------------- +# Content Parts +# --------------------------------------------------------------------------- + +@dataclass +class InputTextContent: + text: str = "" + type: Literal["input_text"] = "input_text" + + +@dataclass +class InputImageContent: + """Vision input. Provide either ``image_url`` or ``image_data`` (base64).""" + media_type: str = "" + image_url: Optional[str] = None + image_data: Optional[str] = None + detail: Optional[str] = None # "low" | "high" | "auto" + type: Literal["input_image"] = "input_image" + + @staticmethod + def from_file(path: str, detail: Optional[str] = None) -> "InputImageContent": + media_type, _ = mimetypes.guess_type(path) + if not media_type or not media_type.startswith("image/"): + raise ValueError(f"Unsupported image format: {path}") + with open(path, "rb") as fh: + data = base64.b64encode(fh.read()).decode("ascii") + return InputImageContent(image_data=data, media_type=media_type, detail=detail) + + @staticmethod + def from_url(url: str, detail: Optional[str] = None) -> "InputImageContent": + return InputImageContent(image_url=url, media_type="image/unknown", detail=detail) + + @staticmethod + def from_bytes(data: bytes, media_type: str, detail: Optional[str] = None) -> "InputImageContent": + return InputImageContent( + image_data=base64.b64encode(data).decode("ascii"), + media_type=media_type, + detail=detail, + ) + + +@dataclass +class InputFileContent: + filename: str = "" + file_url: str = "" + type: Literal["input_file"] = "input_file" + + +@dataclass +class OutputTextContent: + text: str = "" + annotations: Optional[List[Any]] = None + logprobs: Optional[List[Any]] = None + type: Literal["output_text"] = "output_text" + + +@dataclass +class RefusalContent: + refusal: str = "" + type: Literal["refusal"] = "refusal" + + +ContentPart = Union[ + InputTextContent, InputImageContent, InputFileContent, OutputTextContent, RefusalContent +] + + +def _parse_content_part(data: Dict[str, Any]) -> ContentPart: + t = data.get("type") + if t == "input_text": + return InputTextContent(text=data.get("text", "")) + if t == "input_image": + return InputImageContent( + media_type=data.get("media_type", ""), + image_url=data.get("image_url"), + image_data=data.get("image_data"), + detail=data.get("detail"), + ) + if t == "input_file": + return InputFileContent(filename=data.get("filename", ""), file_url=data.get("file_url", "")) + if t == "output_text": + return OutputTextContent( + text=data.get("text", ""), + annotations=data.get("annotations"), + logprobs=data.get("logprobs"), + ) + if t == "refusal": + return RefusalContent(refusal=data.get("refusal", "")) + # Unknown content-part type — fall back to input_text so callers still get something + return InputTextContent(text=str(data.get("text", ""))) + + +def _parse_content(value: Any) -> Union[str, List[ContentPart]]: + if isinstance(value, str): + return value + if isinstance(value, list): + return [_parse_content_part(p) if isinstance(p, dict) else p for p in value] + return value + + +# --------------------------------------------------------------------------- +# Response Items (input + output) +# --------------------------------------------------------------------------- + +@dataclass +class MessageItem: + role: str = "" + content: Union[str, List[ContentPart]] = "" + id: Optional[str] = None + status: Optional[str] = None + type: Literal["message"] = "message" + + +@dataclass +class FunctionCallItem: + call_id: str = "" + name: str = "" + arguments: str = "" + id: Optional[str] = None + status: Optional[str] = None + type: Literal["function_call"] = "function_call" + + +@dataclass +class FunctionCallOutputItem: + call_id: str = "" + output: Union[str, List[ContentPart]] = "" + id: Optional[str] = None + type: Literal["function_call_output"] = "function_call_output" + + +@dataclass +class ItemReference: + id: str = "" + type: Literal["item_reference"] = "item_reference" + + +@dataclass +class ReasoningItem: + id: Optional[str] = None + content: Optional[List[ContentPart]] = None + encrypted_content: Optional[str] = None + summary: Optional[str] = None + status: Optional[str] = None + type: Literal["reasoning"] = "reasoning" + + +ResponseInputItem = Union[ + MessageItem, FunctionCallItem, FunctionCallOutputItem, ItemReference, ReasoningItem +] +ResponseOutputItem = Union[MessageItem, FunctionCallItem, ReasoningItem] + + +def _parse_response_item(data: Dict[str, Any]) -> Any: + t = data.get("type") + if t == "message": + return MessageItem( + role=data.get("role", ""), + content=_parse_content(data.get("content", "")), + id=data.get("id"), + status=data.get("status"), + ) + if t == "function_call": + return FunctionCallItem( + call_id=data.get("call_id", ""), + name=data.get("name", ""), + arguments=data.get("arguments", ""), + id=data.get("id"), + status=data.get("status"), + ) + if t == "function_call_output": + return FunctionCallOutputItem( + call_id=data.get("call_id", ""), + output=_parse_content(data.get("output", "")), + id=data.get("id"), + ) + if t == "item_reference": + return ItemReference(id=data.get("id", "")) + if t == "reasoning": + content_raw = data.get("content") + return ReasoningItem( + id=data.get("id"), + content=[_parse_content_part(p) for p in content_raw] if isinstance(content_raw, list) else None, + encrypted_content=data.get("encrypted_content"), + summary=data.get("summary"), + status=data.get("status"), + ) + # Unknown item type — return the raw dict so callers can inspect + return data + + +# --------------------------------------------------------------------------- +# Tool Definitions & Config +# --------------------------------------------------------------------------- + +@dataclass +class FunctionToolDefinition: + name: str = "" + description: Optional[str] = None + parameters: Optional[Dict[str, Any]] = None + strict: Optional[bool] = None + type: Literal["function"] = "function" + + +@dataclass +class FunctionToolChoice: + name: str = "" + type: Literal["function"] = "function" + + +ToolChoice = Union[str, FunctionToolChoice] # "none" | "auto" | "required" | {type,name} + + +@dataclass +class TextFormat: + type: str = "text" # "text" | "json_object" | "json_schema" | "lark_grammar" | "regex" + name: Optional[str] = None + description: Optional[str] = None + schema: Optional[Dict[str, Any]] = None + strict: Optional[bool] = None + + +@dataclass +class TextConfig: + format: Optional[TextFormat] = None + + +@dataclass +class ReasoningConfig: + effort: Optional[str] = None + summary: Optional[str] = None + + +# --------------------------------------------------------------------------- +# Response Object +# --------------------------------------------------------------------------- + +@dataclass +class ResponseUsage: + input_tokens: int = 0 + output_tokens: int = 0 + total_tokens: int = 0 + input_tokens_details: Optional[Dict[str, Any]] = None + output_tokens_details: Optional[Dict[str, Any]] = None + + +@dataclass +class ResponseError: + code: str = "" + message: str = "" + + +@dataclass +class IncompleteDetails: + reason: str = "" + + +@dataclass +class ResponseObject: + id: str = "" + object: str = "response" + created_at: int = 0 + status: str = "" + model: str = "" + output: List[Any] = field(default_factory=list) + completed_at: Optional[int] = None + failed_at: Optional[int] = None + cancelled_at: Optional[int] = None + error: Optional[ResponseError] = None + usage: Optional[ResponseUsage] = None + instructions: Optional[str] = None + previous_response_id: Optional[str] = None + tools: Optional[List[FunctionToolDefinition]] = None + tool_choice: Optional[Any] = None + temperature: Optional[float] = None + top_p: Optional[float] = None + max_output_tokens: Optional[int] = None + frequency_penalty: Optional[float] = None + presence_penalty: Optional[float] = None + seed: Optional[int] = None + truncation: Optional[str] = None + parallel_tool_calls: Optional[bool] = None + store: Optional[bool] = None + metadata: Optional[Dict[str, str]] = None + reasoning: Optional[ReasoningConfig] = None + text: Optional[TextConfig] = None + user: Optional[str] = None + incomplete_details: Optional[IncompleteDetails] = None + # Retain anything the server returned that we don't model explicitly. + _raw: Optional[Dict[str, Any]] = None + + @property + def output_text(self) -> str: + """Concatenated text from the first assistant ``message`` item in ``output``.""" + for item in self.output: + if isinstance(item, MessageItem) and item.role == "assistant": + content = item.content + if isinstance(content, str): + return content + if isinstance(content, list): + parts: List[str] = [] + for p in content: + text = getattr(p, "text", None) + if isinstance(text, str): + parts.append(text) + return "".join(parts) + return "" + + +def _parse_response_object(data: Dict[str, Any]) -> ResponseObject: + output = data.get("output") or [] + parsed_output = [_parse_response_item(i) if isinstance(i, dict) else i for i in output] + + tools_raw = data.get("tools") + tools = None + if isinstance(tools_raw, list): + tools = [ + FunctionToolDefinition( + name=t.get("name", ""), + description=t.get("description"), + parameters=t.get("parameters"), + strict=t.get("strict"), + ) if isinstance(t, dict) else t + for t in tools_raw + ] + + usage = None + if isinstance(data.get("usage"), dict): + u = data["usage"] + usage = ResponseUsage( + input_tokens=u.get("input_tokens", 0), + output_tokens=u.get("output_tokens", 0), + total_tokens=u.get("total_tokens", 0), + input_tokens_details=u.get("input_tokens_details"), + output_tokens_details=u.get("output_tokens_details"), + ) + + error = None + if isinstance(data.get("error"), dict): + error = ResponseError(code=data["error"].get("code", ""), message=data["error"].get("message", "")) + + incomplete = None + if isinstance(data.get("incomplete_details"), dict): + incomplete = IncompleteDetails(reason=data["incomplete_details"].get("reason", "")) + + reasoning = None + if isinstance(data.get("reasoning"), dict): + reasoning = ReasoningConfig( + effort=data["reasoning"].get("effort"), + summary=data["reasoning"].get("summary"), + ) + + text = None + if isinstance(data.get("text"), dict): + fmt_raw = data["text"].get("format") + fmt = None + if isinstance(fmt_raw, dict): + fmt = TextFormat( + type=fmt_raw.get("type", "text"), + name=fmt_raw.get("name"), + description=fmt_raw.get("description"), + schema=fmt_raw.get("schema"), + strict=fmt_raw.get("strict"), + ) + text = TextConfig(format=fmt) + + return ResponseObject( + id=data.get("id", ""), + object=data.get("object", "response"), + created_at=data.get("created_at", 0), + status=data.get("status", ""), + model=data.get("model", ""), + output=parsed_output, + completed_at=data.get("completed_at"), + failed_at=data.get("failed_at"), + cancelled_at=data.get("cancelled_at"), + error=error, + usage=usage, + instructions=data.get("instructions"), + previous_response_id=data.get("previous_response_id"), + tools=tools, + tool_choice=data.get("tool_choice"), + temperature=data.get("temperature"), + top_p=data.get("top_p"), + max_output_tokens=data.get("max_output_tokens"), + frequency_penalty=data.get("frequency_penalty"), + presence_penalty=data.get("presence_penalty"), + seed=data.get("seed"), + truncation=data.get("truncation"), + parallel_tool_calls=data.get("parallel_tool_calls"), + store=data.get("store"), + metadata=data.get("metadata"), + reasoning=reasoning, + text=text, + user=data.get("user"), + incomplete_details=incomplete, + _raw=data, + ) + + +# --------------------------------------------------------------------------- +# Delete / List helpers +# --------------------------------------------------------------------------- + +@dataclass +class DeleteResponseResult: + id: str = "" + object: str = "" + deleted: bool = False + + +@dataclass +class InputItemsListResponse: + object: str = "list" + data: List[Any] = field(default_factory=list) + + +@dataclass +class ListResponsesResult: + object: str = "list" + data: List[ResponseObject] = field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Streaming Events +# --------------------------------------------------------------------------- + +@dataclass +class ResponseLifecycleEvent: + """`response.created` / `queued` / `in_progress` / `completed` / `failed` / `incomplete`.""" + type: str = "" + response: Optional[ResponseObject] = None + sequence_number: int = 0 + + +@dataclass +class OutputItemAddedEvent: + item_id: str = "" + output_index: int = 0 + item: Any = None + sequence_number: int = 0 + type: Literal["response.output_item.added"] = "response.output_item.added" + + +@dataclass +class OutputItemDoneEvent: + item_id: str = "" + output_index: int = 0 + item: Any = None + sequence_number: int = 0 + type: Literal["response.output_item.done"] = "response.output_item.done" + + +@dataclass +class ContentPartAddedEvent: + item_id: str = "" + content_index: int = 0 + part: Any = None + sequence_number: int = 0 + type: Literal["response.content_part.added"] = "response.content_part.added" + + +@dataclass +class ContentPartDoneEvent: + item_id: str = "" + content_index: int = 0 + part: Any = None + sequence_number: int = 0 + type: Literal["response.content_part.done"] = "response.content_part.done" + + +@dataclass +class OutputTextDeltaEvent: + item_id: str = "" + output_index: int = 0 + content_index: int = 0 + delta: str = "" + sequence_number: int = 0 + type: Literal["response.output_text.delta"] = "response.output_text.delta" + + +@dataclass +class OutputTextDoneEvent: + item_id: str = "" + output_index: int = 0 + content_index: int = 0 + text: str = "" + sequence_number: int = 0 + type: Literal["response.output_text.done"] = "response.output_text.done" + + +@dataclass +class OutputTextAnnotationAddedEvent: + item_id: str = "" + annotation: Any = None + sequence_number: int = 0 + type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added" + + +@dataclass +class RefusalDeltaEvent: + item_id: str = "" + content_index: int = 0 + delta: str = "" + sequence_number: int = 0 + type: Literal["response.refusal.delta"] = "response.refusal.delta" + + +@dataclass +class RefusalDoneEvent: + item_id: str = "" + content_index: int = 0 + refusal: str = "" + sequence_number: int = 0 + type: Literal["response.refusal.done"] = "response.refusal.done" + + +@dataclass +class FunctionCallArgsDeltaEvent: + item_id: str = "" + output_index: int = 0 + delta: str = "" + sequence_number: int = 0 + type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta" + + +@dataclass +class FunctionCallArgsDoneEvent: + item_id: str = "" + output_index: int = 0 + arguments: str = "" + name: str = "" + sequence_number: int = 0 + type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done" + + +@dataclass +class ReasoningSummaryPartAddedEvent: + item_id: str = "" + part: Any = None + sequence_number: int = 0 + type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added" + + +@dataclass +class ReasoningSummaryPartDoneEvent: + item_id: str = "" + part: Any = None + sequence_number: int = 0 + type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done" + + +@dataclass +class ReasoningDeltaEvent: + item_id: str = "" + delta: str = "" + sequence_number: int = 0 + type: Literal["response.reasoning.delta"] = "response.reasoning.delta" + + +@dataclass +class ReasoningDoneEvent: + item_id: str = "" + text: str = "" + sequence_number: int = 0 + type: Literal["response.reasoning.done"] = "response.reasoning.done" + + +@dataclass +class ReasoningSummaryTextDeltaEvent: + item_id: str = "" + delta: str = "" + sequence_number: int = 0 + type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta" + + +@dataclass +class ReasoningSummaryTextDoneEvent: + item_id: str = "" + text: str = "" + sequence_number: int = 0 + type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done" + + +@dataclass +class StreamingErrorEvent: + code: Optional[str] = None + message: Optional[str] = None + param: Optional[str] = None + sequence_number: int = 0 + type: Literal["error"] = "error" + + +@dataclass +class UnknownStreamingEvent: + """Fallback for event types that aren't yet modeled.""" + type: str = "" + sequence_number: int = 0 + data: Optional[Dict[str, Any]] = None + + +StreamingEvent = Union[ + ResponseLifecycleEvent, + OutputItemAddedEvent, + OutputItemDoneEvent, + ContentPartAddedEvent, + ContentPartDoneEvent, + OutputTextDeltaEvent, + OutputTextDoneEvent, + OutputTextAnnotationAddedEvent, + RefusalDeltaEvent, + RefusalDoneEvent, + FunctionCallArgsDeltaEvent, + FunctionCallArgsDoneEvent, + ReasoningSummaryPartAddedEvent, + ReasoningSummaryPartDoneEvent, + ReasoningDeltaEvent, + ReasoningDoneEvent, + ReasoningSummaryTextDeltaEvent, + ReasoningSummaryTextDoneEvent, + StreamingErrorEvent, + UnknownStreamingEvent, +] + + +_LIFECYCLE_TYPES = { + "response.created", + "response.queued", + "response.in_progress", + "response.completed", + "response.failed", + "response.incomplete", +} + + +def parse_streaming_event(data: Dict[str, Any]) -> StreamingEvent: + """Build a typed streaming-event dataclass from a server-sent JSON payload.""" + t = data.get("type", "") + seq = data.get("sequence_number", 0) + + if t in _LIFECYCLE_TYPES: + resp_raw = data.get("response") + resp = _parse_response_object(resp_raw) if isinstance(resp_raw, dict) else None + return ResponseLifecycleEvent(type=t, response=resp, sequence_number=seq) + + if t == "response.output_item.added": + item = data.get("item") + return OutputItemAddedEvent( + item_id=data.get("item_id", ""), + output_index=data.get("output_index", 0), + item=_parse_response_item(item) if isinstance(item, dict) else item, + sequence_number=seq, + ) + if t == "response.output_item.done": + item = data.get("item") + return OutputItemDoneEvent( + item_id=data.get("item_id", ""), + output_index=data.get("output_index", 0), + item=_parse_response_item(item) if isinstance(item, dict) else item, + sequence_number=seq, + ) + if t == "response.content_part.added": + part = data.get("part") + return ContentPartAddedEvent( + item_id=data.get("item_id", ""), + content_index=data.get("content_index", 0), + part=_parse_content_part(part) if isinstance(part, dict) else part, + sequence_number=seq, + ) + if t == "response.content_part.done": + part = data.get("part") + return ContentPartDoneEvent( + item_id=data.get("item_id", ""), + content_index=data.get("content_index", 0), + part=_parse_content_part(part) if isinstance(part, dict) else part, + sequence_number=seq, + ) + if t == "response.output_text.delta": + return OutputTextDeltaEvent( + item_id=data.get("item_id", ""), + output_index=data.get("output_index", 0), + content_index=data.get("content_index", 0), + delta=data.get("delta", ""), + sequence_number=seq, + ) + if t == "response.output_text.done": + return OutputTextDoneEvent( + item_id=data.get("item_id", ""), + output_index=data.get("output_index", 0), + content_index=data.get("content_index", 0), + text=data.get("text", ""), + sequence_number=seq, + ) + if t == "response.output_text.annotation.added": + return OutputTextAnnotationAddedEvent( + item_id=data.get("item_id", ""), + annotation=data.get("annotation"), + sequence_number=seq, + ) + if t == "response.refusal.delta": + return RefusalDeltaEvent( + item_id=data.get("item_id", ""), + content_index=data.get("content_index", 0), + delta=data.get("delta", ""), + sequence_number=seq, + ) + if t == "response.refusal.done": + return RefusalDoneEvent( + item_id=data.get("item_id", ""), + content_index=data.get("content_index", 0), + refusal=data.get("refusal", ""), + sequence_number=seq, + ) + if t == "response.function_call_arguments.delta": + return FunctionCallArgsDeltaEvent( + item_id=data.get("item_id", ""), + output_index=data.get("output_index", 0), + delta=data.get("delta", ""), + sequence_number=seq, + ) + if t == "response.function_call_arguments.done": + return FunctionCallArgsDoneEvent( + item_id=data.get("item_id", ""), + output_index=data.get("output_index", 0), + arguments=data.get("arguments", ""), + name=data.get("name", ""), + sequence_number=seq, + ) + if t == "response.reasoning_summary_part.added": + return ReasoningSummaryPartAddedEvent( + item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq + ) + if t == "response.reasoning_summary_part.done": + return ReasoningSummaryPartDoneEvent( + item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq + ) + if t == "response.reasoning.delta": + return ReasoningDeltaEvent( + item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq + ) + if t == "response.reasoning.done": + return ReasoningDoneEvent( + item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq + ) + if t == "response.reasoning_summary_text.delta": + return ReasoningSummaryTextDeltaEvent( + item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq + ) + if t == "response.reasoning_summary_text.done": + return ReasoningSummaryTextDoneEvent( + item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq + ) + if t == "error": + return StreamingErrorEvent( + code=data.get("code"), + message=data.get("message"), + param=data.get("param"), + sequence_number=seq, + ) + + return UnknownStreamingEvent(type=t, sequence_number=seq, data=data) + + +def _parse_delete_result(data: Dict[str, Any]) -> DeleteResponseResult: + return DeleteResponseResult( + id=data.get("id", ""), + object=data.get("object", ""), + deleted=bool(data.get("deleted", False)), + ) + + +def _parse_input_items_list(data: Dict[str, Any]) -> InputItemsListResponse: + raw = data.get("data") or [] + return InputItemsListResponse( + object=data.get("object", "list"), + data=[_parse_response_item(i) if isinstance(i, dict) else i for i in raw], + ) + + +def _parse_list_responses(data: Dict[str, Any]) -> ListResponsesResult: + raw = data.get("data") or [] + return ListResponsesResult( + object=data.get("object", "list"), + data=[_parse_response_object(r) if isinstance(r, dict) else r for r in raw], + ) + + +__all__ = [ + # Content parts + "InputTextContent", + "InputImageContent", + "InputFileContent", + "OutputTextContent", + "RefusalContent", + "ContentPart", + # Items + "MessageItem", + "FunctionCallItem", + "FunctionCallOutputItem", + "ItemReference", + "ReasoningItem", + "ResponseInputItem", + "ResponseOutputItem", + # Tools & config + "FunctionToolDefinition", + "FunctionToolChoice", + "ToolChoice", + "TextFormat", + "TextConfig", + "ReasoningConfig", + # Response + "ResponseObject", + "ResponseUsage", + "ResponseError", + "IncompleteDetails", + "DeleteResponseResult", + "InputItemsListResponse", + "ListResponsesResult", + # Streaming events + "StreamingEvent", + "ResponseLifecycleEvent", + "OutputItemAddedEvent", + "OutputItemDoneEvent", + "ContentPartAddedEvent", + "ContentPartDoneEvent", + "OutputTextDeltaEvent", + "OutputTextDoneEvent", + "OutputTextAnnotationAddedEvent", + "RefusalDeltaEvent", + "RefusalDoneEvent", + "FunctionCallArgsDeltaEvent", + "FunctionCallArgsDoneEvent", + "ReasoningSummaryPartAddedEvent", + "ReasoningSummaryPartDoneEvent", + "ReasoningDeltaEvent", + "ReasoningDoneEvent", + "ReasoningSummaryTextDeltaEvent", + "ReasoningSummaryTextDoneEvent", + "StreamingErrorEvent", + "UnknownStreamingEvent", + "parse_streaming_event", +] diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py new file mode 100644 index 000000000..15c1f3b1e --- /dev/null +++ b/sdk/python/test/openai/test_responses_client.py @@ -0,0 +1,603 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Unit tests for the Responses API client (no live server required). + +Mirrors the scenarios covered by the JS SDK's ``responsesClient.test.ts`` and +the Python spec's §5. HTTP calls are intercepted via :mod:`unittest.mock`. +""" + +from __future__ import annotations + +import base64 +import io +import json +from typing import Any, Dict, List +from unittest.mock import MagicMock, patch + +import pytest + +from foundry_local_sdk.openai.responses_client import ( + ResponsesAPIError, + ResponsesClient, + ResponsesClientSettings, + _parse_sse_block, + _iter_sse_events, + _SSE_DONE, +) +from foundry_local_sdk.openai.responses_types import ( + FunctionCallItem, + FunctionToolDefinition, + InputImageContent, + InputTextContent, + MessageItem, + OutputTextContent, + ReasoningConfig, + ResponseObject, + TextConfig, + TextFormat, + _to_dict, + parse_streaming_event, + OutputTextDeltaEvent, + ResponseLifecycleEvent, + StreamingErrorEvent, + UnknownStreamingEvent, +) + +BASE_URL = "http://127.0.0.1:5273" +MODEL_ID = "test-model" + + +def _fake_json_response(payload: Dict[str, Any], status: int = 200): + resp = MagicMock() + resp.ok = 200 <= status < 300 + resp.status_code = status + resp.text = json.dumps(payload) + return resp + + +def _fake_stream_response(sse_payload: str, status: int = 200): + resp = MagicMock() + resp.ok = 200 <= status < 300 + resp.status_code = status + resp.text = sse_payload + # iter_content returns the full payload in one bytes chunk. + resp.iter_content = MagicMock(return_value=iter([sse_payload.encode("utf-8")])) + resp.close = MagicMock() + return resp + + +# --------------------------------------------------------------------------- +# Settings +# --------------------------------------------------------------------------- + +class TestResponsesClientSettings: + def test_serialize_defaults_contains_store(self): + # store defaults to True — matches OpenAI convention + s = ResponsesClientSettings() + serialized = s._serialize() + assert serialized == {"store": True} + + def test_store_defaults_to_true(self): + assert ResponsesClientSettings().store is True + + def test_serialize_all_fields(self): + s = ResponsesClientSettings() + s.instructions = "Be concise." + s.temperature = 0.2 + s.top_p = 0.9 + s.max_output_tokens = 256 + s.frequency_penalty = 0.1 + s.presence_penalty = 0.2 + s.tool_choice = "auto" + s.truncation = "auto" + s.parallel_tool_calls = False + s.store = False + s.metadata = {"run": "1"} + s.reasoning = ReasoningConfig(effort="medium") + s.text = TextConfig(format=TextFormat(type="json_object")) + s.seed = 42 + + out = s._serialize() + assert out["instructions"] == "Be concise." + assert out["temperature"] == 0.2 + assert out["top_p"] == 0.9 + assert out["max_output_tokens"] == 256 + assert out["frequency_penalty"] == 0.1 + assert out["presence_penalty"] == 0.2 + assert out["tool_choice"] == "auto" + assert out["truncation"] == "auto" + assert out["parallel_tool_calls"] is False + assert out["store"] is False + assert out["metadata"] == {"run": "1"} + assert out["reasoning"] == {"effort": "medium"} + assert out["text"] == {"format": {"type": "json_object"}} + assert out["seed"] == 42 + + def test_serialize_omits_none(self): + s = ResponsesClientSettings() + s.temperature = None # explicit None is omitted + assert "temperature" not in s._serialize() + + +# --------------------------------------------------------------------------- +# Input / tool / id validation +# --------------------------------------------------------------------------- + +class TestInputValidation: + def setup_method(self): + self.client = ResponsesClient(BASE_URL, MODEL_ID) + + def test_rejects_none(self): + with pytest.raises(ValueError, match="None"): + self.client._build_request(None, {}, stream=False) + + def test_rejects_empty_string(self): + with pytest.raises(ValueError, match="empty"): + self.client._build_request("", {}, stream=False) + + def test_rejects_whitespace_string(self): + with pytest.raises(ValueError, match="empty"): + self.client._build_request(" ", {}, stream=False) + + def test_rejects_empty_array(self): + with pytest.raises(ValueError, match="empty"): + self.client._build_request([], {}, stream=False) + + def test_rejects_item_without_type(self): + with pytest.raises(ValueError, match="type"): + self.client._build_request([{"role": "user"}], {}, stream=False) + + def test_accepts_string_input(self): + body = self.client._build_request("Hi", {}, stream=False) + assert body["input"] == "Hi" + assert body["model"] == MODEL_ID + + def test_accepts_dict_input_items(self): + body = self.client._build_request( + [{"type": "message", "role": "user", "content": "hi"}], {}, stream=False + ) + assert isinstance(body["input"], list) + assert body["input"][0]["type"] == "message" + + def test_accepts_dataclass_input_items(self): + item = MessageItem(role="user", content="hello") + body = self.client._build_request([item], {}, stream=False) + assert body["input"][0]["type"] == "message" + assert body["input"][0]["role"] == "user" + assert body["input"][0]["content"] == "hello" + + def test_stream_flag_set(self): + body = self.client._build_request("hi", {}, stream=True) + assert body["stream"] is True + + def test_requires_model(self): + c = ResponsesClient(BASE_URL) # no default model + with pytest.raises(ValueError, match="[Mm]odel"): + c._build_request("hi", {}, stream=False) + + def test_options_model_overrides_default(self): + body = self.client._build_request("hi", {"model": "override"}, stream=False) + assert body["model"] == "override" + + +class TestToolValidation: + def setup_method(self): + self.client = ResponsesClient(BASE_URL, MODEL_ID) + + def test_rejects_non_function_type(self): + with pytest.raises(ValueError, match="function"): + self.client._build_request("hi", {"tools": [{"type": "retrieval", "name": "x"}]}, stream=False) + + def test_rejects_empty_name(self): + with pytest.raises(ValueError, match="name"): + self.client._build_request("hi", {"tools": [{"type": "function", "name": ""}]}, stream=False) + + def test_rejects_non_list(self): + with pytest.raises(ValueError, match="list"): + self.client._build_request("hi", {"tools": "nope"}, stream=False) + + def test_accepts_valid_dict_tool(self): + body = self.client._build_request( + "hi", + {"tools": [{"type": "function", "name": "multiply", "parameters": {}}]}, + stream=False, + ) + assert body["tools"][0]["name"] == "multiply" + + def test_accepts_dataclass_tool(self): + tool = FunctionToolDefinition(name="multiply", description="x*y") + body = self.client._build_request("hi", {"tools": [tool]}, stream=False) + assert body["tools"][0]["type"] == "function" + assert body["tools"][0]["name"] == "multiply" + assert body["tools"][0]["description"] == "x*y" + + +class TestIdValidation: + def setup_method(self): + self.client = ResponsesClient(BASE_URL, MODEL_ID) + + def test_rejects_empty_id(self): + with pytest.raises(ValueError, match="non-empty"): + self.client.get("") + + def test_rejects_whitespace_id(self): + with pytest.raises(ValueError, match="non-empty"): + self.client.get(" ") + + def test_rejects_too_long_id(self): + with pytest.raises(ValueError, match="length"): + self.client.get("x" * 2000) + + +# --------------------------------------------------------------------------- +# output_text convenience +# --------------------------------------------------------------------------- + +class TestOutputText: + def test_extracts_from_string_content(self): + resp = ResponseObject(output=[MessageItem(role="assistant", content="hello world")]) + assert resp.output_text == "hello world" + + def test_extracts_from_content_parts(self): + resp = ResponseObject(output=[ + MessageItem( + role="assistant", + content=[OutputTextContent(text="foo "), OutputTextContent(text="bar")], + ) + ]) + assert resp.output_text == "foo bar" + + def test_returns_empty_when_no_assistant(self): + resp = ResponseObject(output=[MessageItem(role="user", content="hi")]) + assert resp.output_text == "" + + def test_returns_empty_for_empty_output(self): + assert ResponseObject().output_text == "" + + def test_skips_function_call_items(self): + resp = ResponseObject(output=[ + FunctionCallItem(call_id="c1", name="f", arguments="{}"), + MessageItem(role="assistant", content="done"), + ]) + assert resp.output_text == "done" + + +# --------------------------------------------------------------------------- +# SSE parsing +# --------------------------------------------------------------------------- + +class TestSSEParsing: + def test_parses_complete_event(self): + block = 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hi","sequence_number":3}' + evt = _parse_sse_block(block) + assert isinstance(evt, OutputTextDeltaEvent) + assert evt.delta == "hi" + assert evt.sequence_number == 3 + + def test_done_signal(self): + assert _parse_sse_block("data: [DONE]") is _SSE_DONE + + def test_multi_line_data(self): + # Per SSE spec, multiple data: lines join with \n into one JSON doc. + block = 'data: {"type":"error",\ndata: "message":"oops","sequence_number":0}' + evt = _parse_sse_block(block) + assert isinstance(evt, StreamingErrorEvent) + assert evt.message == "oops" + + def test_invalid_json_raises(self): + block = 'data: {not valid json' + with pytest.raises(ResponsesAPIError): + _parse_sse_block(block) + + def test_empty_block_returns_none(self): + assert _parse_sse_block("") is None + assert _parse_sse_block("\n\n") is None + + def test_ignores_non_data_lines(self): + block = 'id: 1\nretry: 1000\nevent: response.created\ndata: {"type":"response.created","response":{"id":"r1"},"sequence_number":0}' + evt = _parse_sse_block(block) + assert isinstance(evt, ResponseLifecycleEvent) + assert evt.type == "response.created" + + def test_error_event(self): + block = 'data: {"type":"error","code":"bad","message":"oops","sequence_number":0}' + evt = _parse_sse_block(block) + assert isinstance(evt, StreamingErrorEvent) + assert evt.code == "bad" + assert evt.message == "oops" + + def test_iter_sse_events_handles_partial_chunks(self): + payload_events = [ + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"Hel","sequence_number":1}\n\n', + 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"lo","sequence_number":2}\n\n', + 'data: [DONE]\n\n', + ] + full = "".join(payload_events).encode("utf-8") + + # Split the bytes into irregular chunks to exercise buffering. + chunks = [full[i:i + 7] for i in range(0, len(full), 7)] + + resp = MagicMock() + resp.iter_content = MagicMock(return_value=iter(chunks)) + resp.close = MagicMock() + + events = list(_iter_sse_events(resp)) + assert len(events) == 2 + assert all(isinstance(e, OutputTextDeltaEvent) for e in events) + assert "".join(e.delta for e in events) == "Hello" + resp.close.assert_called() + + def test_iter_sse_handles_crlf(self): + payload = ( + 'event: response.output_text.delta\r\n' + 'data: {"type":"response.output_text.delta","delta":"x","sequence_number":0}\r\n' + '\r\n' + 'data: [DONE]\r\n\r\n' + ) + resp = MagicMock() + resp.iter_content = MagicMock(return_value=iter([payload.encode("utf-8")])) + resp.close = MagicMock() + + events = list(_iter_sse_events(resp)) + assert len(events) == 1 + assert events[0].delta == "x" + + def test_unknown_event_type(self): + block = 'data: {"type":"response.brand_new_event","sequence_number":7}' + evt = _parse_sse_block(block) + assert isinstance(evt, UnknownStreamingEvent) + assert evt.type == "response.brand_new_event" + + +# --------------------------------------------------------------------------- +# Vision types +# --------------------------------------------------------------------------- + +class TestVisionTypes: + def test_input_image_from_bytes(self): + data = b"\x89PNG\r\n\x1a\nfakedata" + img = InputImageContent.from_bytes(data, "image/png", detail="high") + assert img.media_type == "image/png" + assert img.detail == "high" + assert base64.b64decode(img.image_data) == data + + def test_input_image_from_url(self): + img = InputImageContent.from_url("https://example.com/x.png") + assert img.image_url == "https://example.com/x.png" + assert img.image_data is None + + def test_input_image_from_file(self, tmp_path): + data = b"\x89PNG\r\n\x1a\nfakedata" + p = tmp_path / "test.png" + p.write_bytes(data) + img = InputImageContent.from_file(str(p)) + assert img.media_type == "image/png" + assert base64.b64decode(img.image_data) == data + + def test_input_image_from_file_rejects_non_image(self, tmp_path): + p = tmp_path / "text.txt" + p.write_text("not an image") + with pytest.raises(ValueError, match="Unsupported"): + InputImageContent.from_file(str(p)) + + def test_input_image_serialization(self): + img = InputImageContent(media_type="image/png", image_data="abc", detail="low") + d = _to_dict(img) + assert d == {"media_type": "image/png", "image_data": "abc", "detail": "low", "type": "input_image"} + # image_url left unset should be omitted + assert "image_url" not in d + + +# --------------------------------------------------------------------------- +# Type serialization & parsing +# --------------------------------------------------------------------------- + +class TestTypeSerialization: + def test_message_item_to_dict(self): + msg = MessageItem( + role="user", + content=[InputTextContent(text="Hi"), InputImageContent(media_type="image/png", image_data="abc")], + ) + d = _to_dict(msg) + assert d["type"] == "message" + assert d["role"] == "user" + assert d["content"][0] == {"text": "Hi", "type": "input_text"} + assert d["content"][1]["type"] == "input_image" + assert "id" not in d # None omitted + + def test_function_tool_to_dict(self): + tool = FunctionToolDefinition( + name="multiply", + description="x*y", + parameters={"type": "object", "properties": {"a": {"type": "number"}}}, + strict=True, + ) + d = _to_dict(tool) + assert d == { + "name": "multiply", + "description": "x*y", + "parameters": {"type": "object", "properties": {"a": {"type": "number"}}}, + "strict": True, + "type": "function", + } + + def test_response_object_from_dict(self): + from foundry_local_sdk.openai.responses_types import _parse_response_object + + payload = { + "id": "resp_abc", + "object": "response", + "created_at": 1700000000, + "status": "completed", + "model": "phi-4-mini", + "output": [ + { + "type": "message", + "role": "assistant", + "content": [{"type": "output_text", "text": "Hello!"}], + } + ], + "usage": {"input_tokens": 3, "output_tokens": 2, "total_tokens": 5}, + "store": True, + } + r = _parse_response_object(payload) + assert r.id == "resp_abc" + assert r.status == "completed" + assert r.usage.total_tokens == 5 + assert r.output_text == "Hello!" + + def test_streaming_event_parsing_lifecycle(self): + evt = parse_streaming_event( + { + "type": "response.completed", + "response": {"id": "resp_1", "status": "completed"}, + "sequence_number": 10, + } + ) + assert isinstance(evt, ResponseLifecycleEvent) + assert evt.type == "response.completed" + assert evt.response.id == "resp_1" + assert evt.sequence_number == 10 + + +# --------------------------------------------------------------------------- +# End-to-end (mocked HTTP) +# --------------------------------------------------------------------------- + +class TestClientHTTPFlow: + def setup_method(self): + self.client = ResponsesClient(BASE_URL, MODEL_ID) + + def test_create_posts_correct_body(self): + payload = { + "id": "resp_1", + "object": "response", + "status": "completed", + "model": MODEL_ID, + "output": [ + {"type": "message", "role": "assistant", "content": "ok"}, + ], + } + with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: + mock_req.return_value = _fake_json_response(payload) + result = self.client.create("hello", temperature=0.3) + + assert result.id == "resp_1" + assert result.output_text == "ok" + + _, kwargs = mock_req.call_args + assert mock_req.call_args.args[0] == "POST" + assert mock_req.call_args.args[1] == f"{BASE_URL}/v1/responses" + body = json.loads(kwargs["data"]) + assert body["model"] == MODEL_ID + assert body["input"] == "hello" + assert body["temperature"] == 0.3 + assert body["store"] is True # default + assert "stream" not in body + + def test_get_uses_url_encoded_path(self): + weird_id = "resp_with/slashes and spaces" + with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: + mock_req.return_value = _fake_json_response( + {"id": weird_id, "object": "response", "status": "completed", "model": MODEL_ID, "output": []} + ) + self.client.get(weird_id) + + path = mock_req.call_args.args[1] + assert "resp_with%2Fslashes%20and%20spaces" in path + assert mock_req.call_args.args[0] == "GET" + + def test_delete_parses_result(self): + with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: + mock_req.return_value = _fake_json_response( + {"id": "resp_1", "object": "response.deleted", "deleted": True} + ) + result = self.client.delete("resp_1") + assert result.deleted is True + assert result.id == "resp_1" + + def test_http_error_raises_responses_api_error(self): + resp = MagicMock() + resp.ok = False + resp.status_code = 400 + resp.text = '{"error":{"message":"bad"}}' + with patch("foundry_local_sdk.openai.responses_client.requests.request", return_value=resp): + with pytest.raises(ResponsesAPIError) as excinfo: + self.client.create("hi") + assert excinfo.value.status_code == 400 + assert "bad" in str(excinfo.value) + + def test_create_streaming_yields_events(self): + sse = ( + 'event: response.output_text.delta\n' + 'data: {"type":"response.output_text.delta","delta":"a","sequence_number":1}\n' + '\n' + 'event: response.output_text.delta\n' + 'data: {"type":"response.output_text.delta","delta":"b","sequence_number":2}\n' + '\n' + 'data: [DONE]\n\n' + ) + with patch("foundry_local_sdk.openai.responses_client.requests.post") as mock_post: + mock_post.return_value = _fake_stream_response(sse) + events = list(self.client.create_streaming("hi")) + + assert len(events) == 2 + assert "".join(e.delta for e in events) == "ab" + _, kwargs = mock_post.call_args + body = json.loads(kwargs["data"]) + assert body["stream"] is True + assert kwargs["headers"]["Accept"] == "text/event-stream" + + def test_streaming_http_error(self): + resp = MagicMock() + resp.ok = False + resp.status_code = 500 + resp.text = "boom" + resp.close = MagicMock() + with patch("foundry_local_sdk.openai.responses_client.requests.post", return_value=resp): + with pytest.raises(ResponsesAPIError) as excinfo: + list(self.client.create_streaming("hi")) + assert excinfo.value.status_code == 500 + + def test_settings_merge_precedence(self): + self.client.settings.temperature = 0.1 + self.client.settings.max_output_tokens = 100 + with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: + mock_req.return_value = _fake_json_response( + {"id": "r", "object": "response", "status": "completed", "model": MODEL_ID, "output": []} + ) + # Per-call overrides client settings + self.client.create("hi", temperature=0.9) + + body = json.loads(mock_req.call_args.kwargs["data"]) + assert body["temperature"] == 0.9 # per-call wins + assert body["max_output_tokens"] == 100 # settings default preserved + + +class TestManagerFactory: + """Ensure the factory method wiring doesn't require a running server.""" + + def test_manager_raises_if_web_service_not_started(self): + from foundry_local_sdk.exception import FoundryLocalException + + # Build a stand-in manager without going through the constructor's + # heavy initialization path. + mgr = MagicMock() + mgr.urls = None + # Bind the real method to our MagicMock so we exercise actual logic. + from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M + + with pytest.raises(FoundryLocalException, match="[Ww]eb service"): + M.create_responses_client(mgr, "some-model") + + def test_manager_returns_client_when_urls_set(self): + mgr = MagicMock() + mgr.urls = [BASE_URL] + from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M + + client = M.create_responses_client(mgr, "phi") + assert isinstance(client, ResponsesClient) + assert client._model_id == "phi" + assert client._base_url == BASE_URL diff --git a/sdk/python/test/openai/test_responses_integration.py b/sdk/python/test/openai/test_responses_integration.py new file mode 100644 index 000000000..cb4eee456 --- /dev/null +++ b/sdk/python/test/openai/test_responses_integration.py @@ -0,0 +1,288 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Integration tests for the Responses API client. + +These require a real Foundry Local runtime + a cached model. They are only +run when ``FOUNDRY_INTEGRATION_TESTS=1`` is set in the environment. +""" + +from __future__ import annotations + +import json +import os + +import pytest + +from foundry_local_sdk import ( + FunctionToolDefinition, + InputImageContent, + InputTextContent, + MessageItem, +) + +from ..conftest import TEST_MODEL_ALIAS + +pytestmark = pytest.mark.skipif( + not os.environ.get("FOUNDRY_INTEGRATION_TESTS"), + reason="Set FOUNDRY_INTEGRATION_TESTS=1 to run Responses API integration tests.", +) + + +def _get_loaded_model(catalog): + cached = catalog.get_cached_models() + assert cached, "No cached models found" + variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached" + + model = catalog.get_model(TEST_MODEL_ALIAS) + assert model is not None + model.select_variant(variant) + model.load() + return model + + +@pytest.fixture(scope="module") +def responses_client(manager, catalog): + """Start the web service, return a ResponsesClient tied to the test model.""" + model = _get_loaded_model(catalog) + manager.start_web_service() + client = manager.create_responses_client(model.id) + try: + yield client + finally: + try: + manager.stop_web_service() + finally: + model.unload() + + +# --------------------------------------------------------------------------- +# Non-streaming +# --------------------------------------------------------------------------- + +class TestNonStreaming: + def test_simple_string_input(self, responses_client): + resp = responses_client.create("What is 2 + 2? Reply with just the number.") + assert resp.id + assert resp.status in {"completed", "incomplete"} + assert resp.output_text # Non-empty + + def test_with_options(self, responses_client): + resp = responses_client.create( + "Say hello.", + temperature=0.0, + max_output_tokens=32, + ) + assert resp.output_text + + def test_structured_input(self, responses_client): + # Validates that structured MessageItem input is accepted and produces + # a well-formed response. Not asserting content (too model-dependent). + resp = responses_client.create( + [ + MessageItem(role="user", content="Reply with the single word: ping"), + ], + temperature=0.0, + ) + assert resp.status in {"completed", "incomplete"} + assert resp.output_text.strip() + + def test_with_instructions(self, responses_client): + resp = responses_client.create( + "Who are you?", + instructions="You are a terse assistant. Answer in exactly three words.", + temperature=0.0, + ) + assert resp.output_text + + def test_multi_turn(self, responses_client): + # Validates previous_response_id wiring: the second response should + # link back to the first via previous_response_id. We don't assert on + # recall quality (too model-dependent for tiny test models). + first = responses_client.create( + "My favourite colour is green. Just acknowledge with 'ok'.", + temperature=0.0, + store=True, + ) + assert first.id + second = responses_client.create( + "What colour did I mention?", + previous_response_id=first.id, + temperature=0.0, + ) + assert second.previous_response_id == first.id + assert second.output_text.strip() + + +# --------------------------------------------------------------------------- +# Streaming +# --------------------------------------------------------------------------- + +class TestStreaming: + def test_basic_streaming(self, responses_client): + chunks = [] + completed = False + for event in responses_client.create_streaming( + "Count 1, 2, 3. Reply with just the digits separated by spaces.", + temperature=0.0, + ): + if event.type == "response.output_text.delta": + chunks.append(event.delta) + elif event.type == "response.completed": + completed = True + assert completed + assert "".join(chunks).strip() + + def test_streaming_with_options(self, responses_client): + saw_completed = False + for event in responses_client.create_streaming( + "Hello", + temperature=0.0, + max_output_tokens=16, + ): + if event.type == "response.completed": + saw_completed = True + assert saw_completed + + def test_streaming_events_sequence(self, responses_client): + # Expect created → in_progress → ... → completed + types_seen = [] + for event in responses_client.create_streaming("Say hi.", temperature=0.0): + types_seen.append(event.type) + assert "response.created" in types_seen + assert "response.completed" in types_seen + assert types_seen.index("response.created") < types_seen.index("response.completed") + + +# --------------------------------------------------------------------------- +# Storage: get / delete / list +# --------------------------------------------------------------------------- + +class TestStorage: + def test_get_stored_response(self, responses_client): + first = responses_client.create("Store this.", store=True, temperature=0.0) + fetched = responses_client.get(first.id) + assert fetched.id == first.id + assert fetched.output_text == first.output_text + + def test_delete_response(self, responses_client): + created = responses_client.create("Delete me.", store=True, temperature=0.0) + result = responses_client.delete(created.id) + assert result.id == created.id + assert result.deleted is True + + def test_list_responses(self, responses_client): + # Create one so the list is guaranteed non-empty. + responses_client.create("A listable response.", store=True, temperature=0.0) + result = responses_client.list() + assert result.object == "list" + assert len(result.data) >= 1 + + +# --------------------------------------------------------------------------- +# Tool calling +# --------------------------------------------------------------------------- + +class TestToolCalling: + def test_function_call_round_trip(self, responses_client): + tool = FunctionToolDefinition( + name="multiply_numbers", + description="Multiply two integers.", + parameters={ + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"}, + }, + "required": ["a", "b"], + }, + ) + first = responses_client.create( + "What is 7 times 6? Use the multiply_numbers tool.", + tools=[tool], + temperature=0.0, + ) + + # Find the function_call item. + call = next( + (item for item in first.output if getattr(item, "type", None) == "function_call"), + None, + ) + if call is None: + pytest.skip("Model did not emit a tool call for this prompt") + + args = json.loads(call.arguments) + # Model may use the declared parameter names or invent its own. + # Extract the two integer values robustly. + int_values = [int(v) for v in args.values() if isinstance(v, (int, str)) and str(v).lstrip("-").isdigit()] + if len(int_values) < 2: + pytest.skip(f"Model produced unusable tool args: {args!r}") + product = int_values[0] * int_values[1] + + follow = responses_client.create( + [ + MessageItem(role="user", content="What is 7 times 6? Use the multiply_numbers tool."), + call, + { + "type": "function_call_output", + "call_id": call.call_id, + "output": str(product), + }, + ], + tools=[tool], + temperature=0.0, + ) + # Validates the round-trip: the follow-up should produce a completed + # response that references the tool output. We don't assert content. + assert follow.status in {"completed", "incomplete"} + assert follow.output_text.strip() + + +# --------------------------------------------------------------------------- +# Vision +# --------------------------------------------------------------------------- + +class TestVision: + """These tests require a vision-capable model and will be skipped otherwise.""" + + def _run_or_skip(self, responses_client, content): + try: + return responses_client.create( + [MessageItem(role="user", content=content)], + temperature=0.0, + ) + except Exception as e: + pytest.skip(f"Model does not appear to support vision: {e}") + + def test_image_base64_input(self, responses_client): + # Minimal 1x1 PNG. + png = bytes.fromhex( + "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" + "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" + "ae426082" + ) + resp = self._run_or_skip( + responses_client, + [ + InputTextContent(text="Describe this image briefly."), + InputImageContent.from_bytes(png, "image/png"), + ], + ) + assert resp.status in {"completed", "incomplete"} + + def test_image_with_text(self, responses_client): + png = bytes.fromhex( + "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" + "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" + "ae426082" + ) + resp = self._run_or_skip( + responses_client, + [ + InputTextContent(text="What colour is this?"), + InputImageContent.from_bytes(png, "image/png"), + ], + ) + assert resp.status in {"completed", "incomplete"} From b6ad3ae5343eb7d1146badffd3c587cd1c515c22 Mon Sep 17 00:00:00 2001 From: maanavd Date: Thu, 23 Apr 2026 17:09:54 -0400 Subject: [PATCH 02/12] fix(sdk/python): address Responses API PR review comments - Add configurable timeout to ResponsesClientSettings (default 60s); non-streaming calls use it directly, streaming uses it as connect timeout with unbounded read (suitable for long responses) - Fix SSE buffer: replace O(n) list-join-per-chunk with a single string buffer and split on double-newline; use chunk_size=None for natural server chunk boundaries - Add InputImageContent.__post_init__ to enforce exactly one of image_url or image_data (raises ValueError if both or neither) - Add optional max_size=(w,h) to InputImageContent.from_file and from_bytes to resize images before base64-encoding (requires Pillow) - Raise ValueError for unknown content-part types instead of silently returning a fallback InputTextContent - Document _MAX_ID_LEN=256 with rationale; lower from 1024 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/python/src/openai/responses_client.py | 50 ++++++----- sdk/python/src/openai/responses_types.py | 87 +++++++++++++++++-- .../test/openai/test_responses_client.py | 18 +++- 3 files changed, 123 insertions(+), 32 deletions(-) diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py index a0d9a7777..74544ad84 100644 --- a/sdk/python/src/openai/responses_client.py +++ b/sdk/python/src/openai/responses_client.py @@ -54,7 +54,9 @@ logger = logging.getLogger(__name__) -_MAX_ID_LEN = 1024 +# Practical guard against misuse (e.g. passing a full response JSON by mistake). +# OpenAI does not publish a max ID length; 256 chars is conservative and generous. +_MAX_ID_LEN = 256 class ResponsesClientSettings: @@ -79,6 +81,10 @@ def __init__(self) -> None: self.reasoning: Optional[ReasoningConfig] = None self.text: Optional[TextConfig] = None self.seed: Optional[int] = None + # Transport settings — not sent to the API. + self.timeout: float = 60.0 + """Seconds to wait for the server to connect and respond on non-streaming calls. + For streaming, this is used only as the connection timeout; reads are unbounded.""" def _serialize(self) -> Dict[str, Any]: raw: Dict[str, Any] = { @@ -271,6 +277,7 @@ def _url(self, path: str) -> str: return f"{self._base_url}{path}" def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + timeout = self.settings.timeout try: if body is not None: resp = requests.request( @@ -278,9 +285,15 @@ def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = self._url(path), headers={"Content-Type": "application/json", "Accept": "application/json"}, data=json.dumps(body), + timeout=timeout, ) else: - resp = requests.request(method, self._url(path), headers={"Accept": "application/json"}) + resp = requests.request( + method, + self._url(path), + headers={"Accept": "application/json"}, + timeout=timeout, + ) except requests.RequestException as e: raise ResponsesAPIError(f"Network error calling {method} {path}: {e}") from e @@ -308,12 +321,16 @@ def _handle_json_response(resp: requests.Response, method: str, path: str) -> Di def _post_stream( self, path: str, body: Dict[str, Any] ) -> Generator[StreamingEvent, None, None]: + # Use (connect_timeout, None) so the connection attempt can time out but + # the read side is unbounded — streaming responses can be arbitrarily long. + connect_timeout = self.settings.timeout try: resp = requests.post( self._url(path), headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, data=json.dumps(body), stream=True, + timeout=(connect_timeout, None), ) except requests.RequestException as e: raise ResponsesAPIError(f"Network error calling POST {path}: {e}") from e @@ -335,35 +352,28 @@ def _iter_sse_events(resp: requests.Response) -> Generator[StreamingEvent, None, Closes the underlying HTTP connection when the generator ends for any reason (completion, [DONE], exception, or GC). + + Uses a single string buffer and splits on double-newline boundaries to + avoid the O(n) cost of joining a growing list on every chunk. """ try: - buffer_parts: List[str] = [] - # iter_content yields bytes chunks; decode as UTF-8 and split on blank lines. - for chunk in resp.iter_content(chunk_size=1024, decode_unicode=False): + buffer = "" + for chunk in resp.iter_content(chunk_size=None, decode_unicode=False): if not chunk: continue - if isinstance(chunk, bytes): - text = chunk.decode("utf-8", errors="replace") - else: - text = chunk - buffer_parts.append(text) - buffer = "".join(buffer_parts) - # Normalize CRLF to LF so our split works on both styles. - buffer = buffer.replace("\r\n", "\n") - - blocks = buffer.split("\n\n") - incomplete = blocks.pop() if blocks else "" - buffer_parts = [incomplete] if incomplete else [] + text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk + buffer += text.replace("\r\n", "\n") - for block in blocks: + while "\n\n" in buffer: + block, buffer = buffer.split("\n\n", 1) event = _parse_sse_block(block) if event is _SSE_DONE: return if event is not None: yield event - # Flush any residual block that wasn't terminated by a blank line. - tail = "".join(buffer_parts).strip() + # Flush any residual block not terminated by a blank line. + tail = buffer.strip() if tail: event = _parse_sse_block(tail) if event is not None and event is not _SSE_DONE: diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py index 09f9f4a60..064d2ad6a 100644 --- a/sdk/python/src/openai/responses_types.py +++ b/sdk/python/src/openai/responses_types.py @@ -13,9 +13,39 @@ from __future__ import annotations import base64 +import io import mimetypes from dataclasses import dataclass, field, fields, is_dataclass -from typing import Any, Dict, List, Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + + +# --------------------------------------------------------------------------- +# Image resize helper (optional — requires Pillow) +# --------------------------------------------------------------------------- + +def _resize_image(data: bytes, media_type: str, max_size: Tuple[int, int]) -> Tuple[bytes, str]: + """Resize *data* so it fits within *max_size* (width, height) while preserving + aspect ratio. Returns the re-encoded bytes and MIME type. + + Requires ``Pillow`` (``pip install pillow``). Raises ``ImportError`` if it is + not installed. + """ + try: + from PIL import Image # type: ignore[import-untyped] + except ImportError as exc: + raise ImportError( + "Image resizing requires Pillow. Install it with: pip install pillow" + ) from exc + + img = Image.open(io.BytesIO(data)) + img.thumbnail(max_size, Image.LANCZOS) + buf = io.BytesIO() + fmt = media_type.split("/")[-1].upper().replace("JPG", "JPEG") + if fmt not in ("JPEG", "PNG", "WEBP", "GIF"): + fmt = "PNG" + media_type = "image/png" + img.save(buf, format=fmt) + return buf.getvalue(), media_type # --------------------------------------------------------------------------- @@ -53,28 +83,69 @@ class InputTextContent: @dataclass class InputImageContent: - """Vision input. Provide either ``image_url`` or ``image_data`` (base64).""" + """Vision input. Provide exactly one of ``image_url`` or ``image_data`` (base64).""" media_type: str = "" image_url: Optional[str] = None image_data: Optional[str] = None detail: Optional[str] = None # "low" | "high" | "auto" type: Literal["input_image"] = "input_image" + def __post_init__(self) -> None: + has_url = self.image_url is not None + has_data = self.image_data is not None + if has_url == has_data: + raise ValueError( + "Provide exactly one of image_url or image_data, not both (or neither)." + ) + @staticmethod - def from_file(path: str, detail: Optional[str] = None) -> "InputImageContent": + def from_file( + path: str, + detail: Optional[str] = None, + max_size: Optional[Tuple[int, int]] = None, + ) -> "InputImageContent": + """Load an image from *path*, base64-encode it, and return an :class:`InputImageContent`. + + Args: + path: Filesystem path to the image file. + detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``. + max_size: Optional ``(width, height)`` cap. If the image exceeds either + dimension it is resized proportionally (requires ``Pillow``). + """ media_type, _ = mimetypes.guess_type(path) if not media_type or not media_type.startswith("image/"): raise ValueError(f"Unsupported image format: {path}") with open(path, "rb") as fh: - data = base64.b64encode(fh.read()).decode("ascii") - return InputImageContent(image_data=data, media_type=media_type, detail=detail) + raw = fh.read() + if max_size is not None: + raw, media_type = _resize_image(raw, media_type, max_size) + return InputImageContent( + image_data=base64.b64encode(raw).decode("ascii"), + media_type=media_type, + detail=detail, + ) @staticmethod def from_url(url: str, detail: Optional[str] = None) -> "InputImageContent": return InputImageContent(image_url=url, media_type="image/unknown", detail=detail) @staticmethod - def from_bytes(data: bytes, media_type: str, detail: Optional[str] = None) -> "InputImageContent": + def from_bytes( + data: bytes, + media_type: str, + detail: Optional[str] = None, + max_size: Optional[Tuple[int, int]] = None, + ) -> "InputImageContent": + """Create an :class:`InputImageContent` from raw *data* bytes. + + Args: + data: Raw image bytes. + media_type: MIME type, e.g. ``"image/png"``. + detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``. + max_size: Optional ``(width, height)`` cap. Requires ``Pillow``. + """ + if max_size is not None: + data, media_type = _resize_image(data, media_type, max_size) return InputImageContent( image_data=base64.b64encode(data).decode("ascii"), media_type=media_type, @@ -129,8 +200,8 @@ def _parse_content_part(data: Dict[str, Any]) -> ContentPart: ) if t == "refusal": return RefusalContent(refusal=data.get("refusal", "")) - # Unknown content-part type — fall back to input_text so callers still get something - return InputTextContent(text=str(data.get("text", ""))) + # Unknown content-part type — raise so callers know the SDK needs updating + raise ValueError(f"Unknown content-part type: {t!r}") def _parse_content(value: Any) -> Union[str, List[ContentPart]]: diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py index 15c1f3b1e..6b20754ef 100644 --- a/sdk/python/test/openai/test_responses_client.py +++ b/sdk/python/test/openai/test_responses_client.py @@ -115,10 +115,14 @@ def test_serialize_all_fields(self): assert out["text"] == {"format": {"type": "json_object"}} assert out["seed"] == 42 - def test_serialize_omits_none(self): + def test_timeout_not_serialized(self): + # timeout is a transport setting and must NOT appear in the API payload s = ResponsesClientSettings() - s.temperature = None # explicit None is omitted - assert "temperature" not in s._serialize() + s.timeout = 30.0 + assert "timeout" not in s._serialize() + + def test_timeout_default(self): + assert ResponsesClientSettings().timeout == 60.0 # --------------------------------------------------------------------------- @@ -228,7 +232,7 @@ def test_rejects_whitespace_id(self): def test_rejects_too_long_id(self): with pytest.raises(ValueError, match="length"): - self.client.get("x" * 2000) + self.client.get("x" * 1000) # --------------------------------------------------------------------------- @@ -389,6 +393,12 @@ def test_input_image_serialization(self): # image_url left unset should be omitted assert "image_url" not in d + def test_input_image_mutual_exclusivity(self): + with pytest.raises(ValueError, match="exactly one"): + InputImageContent(media_type="image/png") # neither set + with pytest.raises(ValueError, match="exactly one"): + InputImageContent(media_type="image/png", image_url="http://x.com/a.png", image_data="abc") # both set + # --------------------------------------------------------------------------- # Type serialization & parsing From dbc3e93f1961b9fe9f3603473ee1c7c5ad58a36b Mon Sep 17 00:00:00 2001 From: maanavd Date: Thu, 23 Apr 2026 22:03:50 -0400 Subject: [PATCH 03/12] address review: store=None, MAX_ID_LEN=1024, unknown content-part returns None, example usage guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ResponsesClientSettings.store defaults to None (omitted from request body, server decides) — aligns with JS SDK which has store?: boolean - _MAX_ID_LEN reverted to 1024 to align with JS SDK constant - _parse_content_part returns None for unknown types (forward-compat, not ValueError); _parse_content filters out None entries - examples/responses.py: guard event.response.usage chain with getattr to avoid AttributeError if response or usage is absent - Tests updated: store default tests, too-long-id threshold (1025), request body assertions Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/python/examples/responses.py | 5 ++++- sdk/python/src/openai/responses_client.py | 7 +++---- sdk/python/src/openai/responses_types.py | 9 +++++---- sdk/python/test/openai/test_responses_client.py | 15 +++++++-------- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py index ce810e814..047ddbdeb 100644 --- a/sdk/python/examples/responses.py +++ b/sdk/python/examples/responses.py @@ -63,7 +63,10 @@ def streaming(client): if event.type == "response.output_text.delta": print(event.delta, end="", flush=True) elif event.type == "response.completed": - print(f"\n(completed, {event.response.usage.total_tokens} tokens)") + response = getattr(event, "response", None) + usage = getattr(response, "usage", None) if response is not None else None + total = getattr(usage, "total_tokens", None) if usage is not None else None + print(f"\n(completed{f', {total} tokens' if total is not None else ''})") def multi_turn(client): diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py index 74544ad84..0cfbaaad9 100644 --- a/sdk/python/src/openai/responses_client.py +++ b/sdk/python/src/openai/responses_client.py @@ -54,9 +54,8 @@ logger = logging.getLogger(__name__) -# Practical guard against misuse (e.g. passing a full response JSON by mistake). -# OpenAI does not publish a max ID length; 256 chars is conservative and generous. -_MAX_ID_LEN = 256 +# Align with the JS SDK limit to avoid surprising client-side rejections of valid IDs. +_MAX_ID_LEN = 1024 class ResponsesClientSettings: @@ -76,7 +75,7 @@ def __init__(self) -> None: self.tool_choice: Optional[Any] = None self.truncation: Optional[str] = None self.parallel_tool_calls: Optional[bool] = None - self.store: Optional[bool] = True # SDK default — matches OpenAI convention. + self.store: Optional[bool] = None # Omitted by default; server applies its own default. self.metadata: Optional[Dict[str, str]] = None self.reasoning: Optional[ReasoningConfig] = None self.text: Optional[TextConfig] = None diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py index 064d2ad6a..ad1266a44 100644 --- a/sdk/python/src/openai/responses_types.py +++ b/sdk/python/src/openai/responses_types.py @@ -179,7 +179,7 @@ class RefusalContent: ] -def _parse_content_part(data: Dict[str, Any]) -> ContentPart: +def _parse_content_part(data: Dict[str, Any]) -> Optional[ContentPart]: t = data.get("type") if t == "input_text": return InputTextContent(text=data.get("text", "")) @@ -200,15 +200,16 @@ def _parse_content_part(data: Dict[str, Any]) -> ContentPart: ) if t == "refusal": return RefusalContent(refusal=data.get("refusal", "")) - # Unknown content-part type — raise so callers know the SDK needs updating - raise ValueError(f"Unknown content-part type: {t!r}") + # Unknown content-part type — return None so callers can filter forward-compat parts. + return None def _parse_content(value: Any) -> Union[str, List[ContentPart]]: if isinstance(value, str): return value if isinstance(value, list): - return [_parse_content_part(p) if isinstance(p, dict) else p for p in value] + parts = [_parse_content_part(p) if isinstance(p, dict) else p for p in value] + return [p for p in parts if p is not None] return value diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py index 6b20754ef..32034d3c2 100644 --- a/sdk/python/test/openai/test_responses_client.py +++ b/sdk/python/test/openai/test_responses_client.py @@ -73,14 +73,13 @@ def _fake_stream_response(sse_payload: str, status: int = 200): # --------------------------------------------------------------------------- class TestResponsesClientSettings: - def test_serialize_defaults_contains_store(self): - # store defaults to True — matches OpenAI convention + def test_serialize_defaults_empty(self): + # No fields set by default — server applies its own defaults s = ResponsesClientSettings() - serialized = s._serialize() - assert serialized == {"store": True} + assert s._serialize() == {} - def test_store_defaults_to_true(self): - assert ResponsesClientSettings().store is True + def test_store_defaults_to_none(self): + assert ResponsesClientSettings().store is None def test_serialize_all_fields(self): s = ResponsesClientSettings() @@ -232,7 +231,7 @@ def test_rejects_whitespace_id(self): def test_rejects_too_long_id(self): with pytest.raises(ValueError, match="length"): - self.client.get("x" * 1000) + self.client.get("x" * 1025) # --------------------------------------------------------------------------- @@ -504,7 +503,7 @@ def test_create_posts_correct_body(self): assert body["model"] == MODEL_ID assert body["input"] == "hello" assert body["temperature"] == 0.3 - assert body["store"] is True # default + assert "store" not in body # store=None is omitted from request assert "stream" not in body def test_get_uses_url_encoded_path(self): From 7e8cd888fb946ff2d979776bdcf73151efccb0a0 Mon Sep 17 00:00:00 2001 From: maanavd Date: Mon, 27 Apr 2026 15:11:18 -0400 Subject: [PATCH 04/12] fix(sdk/python): address Responses API review feedback - Rename model and variant Responses client factories to get_responses_client to match existing get_*_client naming. - Use FoundryLocalException for Responses API transport and parsing errors instead of exporting a dedicated ResponsesAPIError. - Keep only the foundry-local-core version bump in requirements.txt and restore existing ORT dependency markers/order. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/python/requirements.txt | 9 ++--- sdk/python/src/__init__.py | 3 +- sdk/python/src/detail/model.py | 6 ++-- sdk/python/src/detail/model_variant.py | 4 +-- sdk/python/src/imodel.py | 4 +-- sdk/python/src/openai/__init__.py | 5 ++- sdk/python/src/openai/responses_client.py | 33 ++++++------------- .../test/openai/test_responses_client.py | 14 ++++---- 8 files changed, 32 insertions(+), 46 deletions(-) diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 25d05c298..ce84af748 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -1,8 +1,9 @@ pydantic>=2.0.0 requests>=2.32.4 openai>=2.24.0 +# Standard native binary packages from the ORT-Nightly PyPI feed. foundry-local-core==1.0.0 -onnxruntime-gpu==1.24.4; platform_system == "Linux" -onnxruntime-core==1.24.4; platform_system != "Linux" -onnxruntime-genai-cuda==0.13.1; platform_system == "Linux" -onnxruntime-genai-core==0.13.1; platform_system != "Linux" +onnxruntime-core==1.24.4; sys_platform != "linux" +onnxruntime-gpu==1.24.4; sys_platform == "linux" +onnxruntime-genai-core==0.13.1; sys_platform != "linux" +onnxruntime-genai-cuda==0.13.1; sys_platform == "linux" diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py index 273e3f9c4..ae768e7c3 100644 --- a/sdk/python/src/__init__.py +++ b/sdk/python/src/__init__.py @@ -7,7 +7,7 @@ from .configuration import Configuration from .foundry_local_manager import FoundryLocalManager -from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings +from .openai.responses_client import ResponsesClient, ResponsesClientSettings from .openai.responses_types import ( ContentPart, DeleteResponseResult, @@ -72,7 +72,6 @@ "ResponseObject", "ResponseOutputItem", "ResponseUsage", - "ResponsesAPIError", "ResponsesClient", "ResponsesClientSettings", "StreamingEvent", diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py index 01dcfb471..e15aa583f 100644 --- a/sdk/python/src/detail/model.py +++ b/sdk/python/src/detail/model.py @@ -148,6 +148,6 @@ def get_embedding_client(self) -> EmbeddingClient: """Get an embedding client for the currently selected variant.""" return self._selected_variant.get_embedding_client() - def create_responses_client(self, base_url: str) -> "ResponsesClient": - """Create a Responses API client for the currently selected variant.""" - return self._selected_variant.create_responses_client(base_url) + def get_responses_client(self, base_url: str) -> "ResponsesClient": + """Get a Responses API client for the currently selected variant.""" + return self._selected_variant.get_responses_client(base_url) diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py index 2e19662d5..7e57b3a2e 100644 --- a/sdk/python/src/detail/model_variant.py +++ b/sdk/python/src/detail/model_variant.py @@ -177,8 +177,8 @@ def get_embedding_client(self) -> EmbeddingClient: """Create an OpenAI-compatible ``EmbeddingClient`` for this variant.""" return EmbeddingClient(self.id, self._core_interop) - def create_responses_client(self, base_url: str) -> ResponsesClient: - """Create a Responses API client for this variant. + def get_responses_client(self, base_url: str) -> ResponsesClient: + """Create an OpenAI-compatible ``ResponsesClient`` for this variant. :param base_url: Base URL of the running Foundry Local web service (e.g. ``manager.urls[0]``). diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py index 6bc0d3638..f76ad1b03 100644 --- a/sdk/python/src/imodel.py +++ b/sdk/python/src/imodel.py @@ -138,9 +138,9 @@ def get_embedding_client(self) -> 'EmbeddingClient': pass @abstractmethod - def create_responses_client(self, base_url: str) -> 'ResponsesClient': + def get_responses_client(self, base_url: str) -> 'ResponsesClient': """ - Create an OpenAI Responses API client bound to the running web service. + Get an OpenAI Responses API client bound to the running web service. Unlike the other clients, the Responses API is HTTP-only and requires the Foundry Local web service to be started. Pass the base URL diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py index 011ed15bd..ea97ca575 100644 --- a/sdk/python/src/openai/__init__.py +++ b/sdk/python/src/openai/__init__.py @@ -14,7 +14,7 @@ LiveAudioTranscriptionResponse, TranscriptionContentPart, ) -from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError +from .responses_client import ResponsesClient, ResponsesClientSettings __all__ = [ "AudioClient", @@ -25,8 +25,7 @@ "LiveAudioTranscriptionOptions", "LiveAudioTranscriptionResponse", "LiveAudioTranscriptionSession", - "ResponsesAPIError", "ResponsesClient", "ResponsesClientSettings", "TranscriptionContentPart", -] \ No newline at end of file +] diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py index 0cfbaaad9..ddcce586c 100644 --- a/sdk/python/src/openai/responses_client.py +++ b/sdk/python/src/openai/responses_client.py @@ -36,6 +36,7 @@ import requests +from ..exception import FoundryLocalException from .responses_types import ( DeleteResponseResult, InputItemsListResponse, @@ -105,20 +106,11 @@ def _serialize(self) -> Dict[str, Any]: return {k: v for k, v in raw.items() if v is not None} -class ResponsesAPIError(Exception): - """Raised for HTTP/transport errors against the Responses API.""" - - def __init__(self, message: str, status_code: Optional[int] = None, body: Optional[str] = None): - super().__init__(message) - self.status_code = status_code - self.body = body - - class ResponsesClient: """Client for the OpenAI Responses API served by Foundry Local. Construct via ``manager.create_responses_client(model_id)`` or - ``model.create_responses_client(base_url)``. + ``model.get_responses_client(base_url)``. """ def __init__(self, base_url: str, model_id: Optional[str] = None): @@ -294,7 +286,7 @@ def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = timeout=timeout, ) except requests.RequestException as e: - raise ResponsesAPIError(f"Network error calling {method} {path}: {e}") from e + raise FoundryLocalException(f"Network error calling {method} {path}: {e}") from e return self._handle_json_response(resp, method, path) @@ -305,15 +297,13 @@ def _post_json(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: def _handle_json_response(resp: requests.Response, method: str, path: str) -> Dict[str, Any]: text = resp.text if not resp.ok: - raise ResponsesAPIError( - f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}", - status_code=resp.status_code, - body=text, + raise FoundryLocalException( + f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}" ) try: return json.loads(text) if text else {} except json.JSONDecodeError as e: - raise ResponsesAPIError( + raise FoundryLocalException( f"Failed to parse response JSON from {method} {path}: {text[:200]}" ) from e @@ -332,15 +322,13 @@ def _post_stream( timeout=(connect_timeout, None), ) except requests.RequestException as e: - raise ResponsesAPIError(f"Network error calling POST {path}: {e}") from e + raise FoundryLocalException(f"Network error calling POST {path}: {e}") from e if not resp.ok: body_text = resp.text resp.close() - raise ResponsesAPIError( - f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}", - status_code=resp.status_code, - body=body_text, + raise FoundryLocalException( + f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}" ) return _iter_sse_events(resp) @@ -409,7 +397,7 @@ def _parse_sse_block(block: str) -> Any: try: parsed = json.loads(data) except json.JSONDecodeError as e: - raise ResponsesAPIError(f"Failed to parse streaming event JSON: {e}") from e + raise FoundryLocalException(f"Failed to parse streaming event JSON: {e}") from e if not isinstance(parsed, dict): return None return parse_streaming_event(parsed) @@ -418,5 +406,4 @@ def _parse_sse_block(block: str) -> Any: __all__ = [ "ResponsesClient", "ResponsesClientSettings", - "ResponsesAPIError", ] diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py index 32034d3c2..871fe301c 100644 --- a/sdk/python/test/openai/test_responses_client.py +++ b/sdk/python/test/openai/test_responses_client.py @@ -18,8 +18,8 @@ import pytest +from foundry_local_sdk.exception import FoundryLocalException from foundry_local_sdk.openai.responses_client import ( - ResponsesAPIError, ResponsesClient, ResponsesClientSettings, _parse_sse_block, @@ -291,7 +291,7 @@ def test_multi_line_data(self): def test_invalid_json_raises(self): block = 'data: {not valid json' - with pytest.raises(ResponsesAPIError): + with pytest.raises(FoundryLocalException): _parse_sse_block(block) def test_empty_block_returns_none(self): @@ -527,15 +527,15 @@ def test_delete_parses_result(self): assert result.deleted is True assert result.id == "resp_1" - def test_http_error_raises_responses_api_error(self): + def test_http_error_raises_foundry_local_exception(self): resp = MagicMock() resp.ok = False resp.status_code = 400 resp.text = '{"error":{"message":"bad"}}' with patch("foundry_local_sdk.openai.responses_client.requests.request", return_value=resp): - with pytest.raises(ResponsesAPIError) as excinfo: + with pytest.raises(FoundryLocalException) as excinfo: self.client.create("hi") - assert excinfo.value.status_code == 400 + assert "400" in str(excinfo.value) assert "bad" in str(excinfo.value) def test_create_streaming_yields_events(self): @@ -566,9 +566,9 @@ def test_streaming_http_error(self): resp.text = "boom" resp.close = MagicMock() with patch("foundry_local_sdk.openai.responses_client.requests.post", return_value=resp): - with pytest.raises(ResponsesAPIError) as excinfo: + with pytest.raises(FoundryLocalException) as excinfo: list(self.client.create_streaming("hi")) - assert excinfo.value.status_code == 500 + assert "500" in str(excinfo.value) def test_settings_merge_precedence(self): self.client.settings.temperature = 0.1 From 9bc1606b8320330e8b565aa229f64a38f8575628 Mon Sep 17 00:00:00 2001 From: maanavd Date: Fri, 1 May 2026 16:06:30 -0400 Subject: [PATCH 05/12] refactor(sdk/python): use web-service sample for Responses API Replace the SDK-native Responses client implementation with a focused Python sample and integration tests that use FoundryLocalManager for setup/model/server lifecycle and the official OpenAI Python client for /v1/responses calls. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- sdk/python/README.md | 3 +- sdk/python/examples/responses.py | 157 --- sdk/python/examples/responses_web_service.py | 176 ++++ sdk/python/requirements.txt | 10 +- sdk/python/src/__init__.py | 61 +- sdk/python/src/detail/model.py | 5 - sdk/python/src/detail/model_variant.py | 9 - sdk/python/src/foundry_local_manager.py | 23 - sdk/python/src/imodel.py | 16 - sdk/python/src/openai/__init__.py | 5 +- sdk/python/src/openai/responses_client.py | 409 -------- sdk/python/src/openai/responses_types.py | 957 ------------------ .../test/openai/test_responses_client.py | 612 ----------- .../test/openai/test_responses_integration.py | 288 ------ .../test/openai/test_responses_web_service.py | 194 ++++ 15 files changed, 379 insertions(+), 2546 deletions(-) delete mode 100644 sdk/python/examples/responses.py create mode 100644 sdk/python/examples/responses_web_service.py delete mode 100644 sdk/python/src/openai/responses_client.py delete mode 100644 sdk/python/src/openai/responses_types.py delete mode 100644 sdk/python/test/openai/test_responses_client.py delete mode 100644 sdk/python/test/openai/test_responses_integration.py create mode 100644 sdk/python/test/openai/test_responses_web_service.py diff --git a/sdk/python/README.md b/sdk/python/README.md index 2a121411e..0c065bc85 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -328,4 +328,5 @@ See [test/README.md](test/README.md) for detailed test setup and structure. ```bash python examples/chat_completion.py -``` \ No newline at end of file +python examples/responses_web_service.py +``` diff --git a/sdk/python/examples/responses.py b/sdk/python/examples/responses.py deleted file mode 100644 index 047ddbdeb..000000000 --- a/sdk/python/examples/responses.py +++ /dev/null @@ -1,157 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -"""End-to-end example for the OpenAI Responses API client. - -Run with:: - - python examples/responses.py - -Requires a loaded model and a started web service. -""" - -from __future__ import annotations - -import json - -from foundry_local_sdk import ( - Configuration, - FoundryLocalManager, - FunctionToolDefinition, - InputImageContent, - InputTextContent, - MessageItem, -) - -MODEL_ALIAS = "phi-4-mini" - - -def setup(): - config = Configuration(app_name="ResponsesExample") - FoundryLocalManager.initialize(config) - mgr = FoundryLocalManager.instance - - mgr.download_and_register_eps() - - model = mgr.catalog.get_model(MODEL_ALIAS) - if model is None: - raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog") - if not model.is_cached: - print(f"Downloading {MODEL_ALIAS}...") - model.download(progress_callback=lambda p: print(f" {p:.1f}%", end="\r")) - print() - print(f"Loading {model.alias}...", end="") - model.load() - print("loaded!") - mgr.start_web_service() - - client = mgr.create_responses_client(model.id) - return mgr, model, client - - -def basic_create(client): - print("\n=== 1. Basic create ===") - resp = client.create("What is 2 + 2? Answer in one word.") - print(f"status={resp.status} text={resp.output_text!r}") - - -def streaming(client): - print("\n=== 2. Streaming ===") - print("assistant: ", end="", flush=True) - for event in client.create_streaming("Count from 1 to 5, separated by spaces."): - if event.type == "response.output_text.delta": - print(event.delta, end="", flush=True) - elif event.type == "response.completed": - response = getattr(event, "response", None) - usage = getattr(response, "usage", None) if response is not None else None - total = getattr(usage, "total_tokens", None) if usage is not None else None - print(f"\n(completed{f', {total} tokens' if total is not None else ''})") - - -def multi_turn(client): - print("\n=== 3. Multi-turn ===") - first = client.create("My favorite color is green. Remember that.", store=True) - print(f"first id={first.id!r}") - second = client.create( - "What is my favorite color?", - previous_response_id=first.id, - ) - print(f"second: {second.output_text!r}") - - -def tool_calling(client): - print("\n=== 4. Tool calling ===") - tools = [ - FunctionToolDefinition( - name="multiply_numbers", - description="Multiply two integers together.", - parameters={ - "type": "object", - "properties": { - "a": {"type": "integer"}, - "b": {"type": "integer"}, - }, - "required": ["a", "b"], - }, - ) - ] - resp = client.create("What is 7 times 6?", tools=tools) - - # Find a function_call item in the output (if the model produced one). - for item in resp.output: - if getattr(item, "type", None) == "function_call": - print(f"call {item.name}({item.arguments})") - args = json.loads(item.arguments) - answer = args["a"] * args["b"] - follow = client.create( - [ - MessageItem(role="user", content="What is 7 times 6?"), - item, - # The function_call_output is sent back keyed by call_id - {"type": "function_call_output", "call_id": item.call_id, "output": str(answer)}, - ], - tools=tools, - ) - print(f"final: {follow.output_text!r}") - return - print(f"no tool call — got text: {resp.output_text!r}") - - -def vision(client): - print("\n=== 5. Vision ===") - # Requires a vision-capable model. Replace with a real PNG to see real output. - tiny_png = bytes.fromhex( - "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" - "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" - "ae426082" - ) - msg = MessageItem( - role="user", - content=[ - InputTextContent(text="Describe this image in one sentence."), - InputImageContent.from_bytes(tiny_png, "image/png"), - ], - ) - try: - resp = client.create([msg]) - print(f"vision response: {resp.output_text!r}") - except Exception as e: - print(f"(skipped — model may not support vision: {e})") - - -def main(): - mgr, model, client = setup() - try: - basic_create(client) - streaming(client) - multi_turn(client) - tool_calling(client) - vision(client) - finally: - mgr.stop_web_service() - model.unload() - - -if __name__ == "__main__": - main() diff --git a/sdk/python/examples/responses_web_service.py b/sdk/python/examples/responses_web_service.py new file mode 100644 index 000000000..fe9517949 --- /dev/null +++ b/sdk/python/examples/responses_web_service.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- + +"""Example: Responses API through the Foundry Local web service. + +Foundry Local manages setup, model lifecycle, and the local OpenAI-compatible +web service. The official OpenAI Python client sends the actual /v1/responses +requests to that local service. +""" + +from __future__ import annotations + +import json +from typing import Any + +from openai import OpenAI + +from foundry_local_sdk import Configuration, FoundryLocalManager + + +MODEL_ALIAS = "qwen2.5-0.5b" + + +def _field(value: Any, name: str, default: Any = None) -> Any: + if isinstance(value, dict): + return value.get(name, default) + return getattr(value, name, default) + + +def _response_text(response: Any) -> str: + text = _field(response, "output_text") + if isinstance(text, str) and text: + return text + + for item in _field(response, "output", []) or []: + if _field(item, "type") != "message": + continue + for part in _field(item, "content", []) or []: + if _field(part, "type") == "output_text": + part_text = _field(part, "text", "") + if isinstance(part_text, str): + text = (text or "") + part_text + return text or "" + + +def _get_function_call(response: Any) -> Any: + for item in _field(response, "output", []) or []: + if _field(item, "type") == "function_call": + return item + return None + + +def _get_weather_tool() -> dict[str, Any]: + return { + "type": "function", + "name": "get_weather", + "description": "Get the current weather for a city.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and region, for example Seattle, WA.", + } + }, + "required": ["location"], + }, + } + + +def main() -> None: + config = Configuration(app_name="ResponsesWebServiceExample") + print("Initializing Foundry Local Manager") + FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + if manager is None: + raise RuntimeError("FoundryLocalManager.initialize did not set instance") + + print("Registering execution providers...") + ep_result = manager.download_and_register_eps() + print(f"EP registration success: {ep_result.success} ({ep_result.status})") + + model = manager.catalog.get_model(MODEL_ALIAS) + if model is None: + raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog") + + if not model.is_cached: + print(f"Downloading {model.alias}...") + model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r")) + print() + + print(f"Loading {model.alias}...", end="") + model.load() + print("loaded!") + + openai_client: OpenAI | None = None + try: + print("Starting OpenAI-compatible web service...", end="") + manager.start_web_service() + if not manager.urls: + raise RuntimeError("Web service started but did not return any URLs") + print("started!") + + base_url = manager.urls[0].rstrip("/") + "/v1" + openai_client = OpenAI(base_url=base_url, api_key="notneeded") + + print("\n--- Non-streaming Responses call ---") + response = openai_client.responses.create( + model=model.id, + input="What is 2 + 2? Reply briefly.", + ) + print(_response_text(response)) + + print("\n--- Streaming Responses call ---") + stream = openai_client.responses.create( + model=model.id, + input="Count from 1 to 3, separated by spaces.", + stream=True, + ) + for event in stream: + if _field(event, "type") == "response.output_text.delta": + print(_field(event, "delta", ""), end="", flush=True) + print() + + print("\n--- Function/tool calling Responses flow ---") + weather_tool = _get_weather_tool() + tool_response = openai_client.responses.create( + model=model.id, + input="Use get_weather to check the weather in Seattle, then answer.", + tools=[weather_tool], + tool_choice="required", + store=True, + ) + function_call = _get_function_call(tool_response) + if function_call is None: + raise RuntimeError("Model did not return a function_call item") + + print(f"Tool call: {_field(function_call, 'name')}") + print(f"Arguments: {_field(function_call, 'arguments')}") + + final_response = openai_client.responses.create( + model=model.id, + previous_response_id=_field(tool_response, "id"), + input=[ + { + "type": "function_call_output", + "call_id": _field(function_call, "call_id"), + "output": json.dumps( + { + "location": "Seattle, WA", + "temperature": "68 F", + "conditions": "sunny", + } + ), + } + ], + ) + print(_response_text(final_response)) + + finally: + if openai_client is not None: + openai_client.close() + try: + manager.stop_web_service() + print("Web service stopped.") + except Exception: + pass + model.unload() + print("Model unloaded.") + + +if __name__ == "__main__": + main() diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index ce84af748..92c98b540 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -2,8 +2,8 @@ pydantic>=2.0.0 requests>=2.32.4 openai>=2.24.0 # Standard native binary packages from the ORT-Nightly PyPI feed. -foundry-local-core==1.0.0 -onnxruntime-core==1.24.4; sys_platform != "linux" -onnxruntime-gpu==1.24.4; sys_platform == "linux" -onnxruntime-genai-core==0.13.1; sys_platform != "linux" -onnxruntime-genai-cuda==0.13.1; sys_platform == "linux" +foundry-local-core==1.0.0rc1 +onnxruntime-core==1.25.1; sys_platform != "linux" +onnxruntime-gpu==1.25.1; sys_platform == "linux" +onnxruntime-genai-core==0.13.2; sys_platform != "linux" +onnxruntime-genai-cuda==0.13.2; sys_platform == "linux" diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py index ae768e7c3..14534d196 100644 --- a/sdk/python/src/__init__.py +++ b/sdk/python/src/__init__.py @@ -7,34 +7,6 @@ from .configuration import Configuration from .foundry_local_manager import FoundryLocalManager -from .openai.responses_client import ResponsesClient, ResponsesClientSettings -from .openai.responses_types import ( - ContentPart, - DeleteResponseResult, - FunctionCallItem, - FunctionCallOutputItem, - FunctionToolDefinition, - InputFileContent, - InputImageContent, - InputItemsListResponse, - InputTextContent, - ItemReference, - ListResponsesResult, - MessageItem, - OutputTextContent, - ReasoningConfig, - ReasoningItem, - RefusalContent, - ResponseError, - ResponseInputItem, - ResponseObject, - ResponseOutputItem, - ResponseUsage, - StreamingEvent, - TextConfig, - TextFormat, - parse_streaming_event, -) from .version import __version__ _logger = logging.getLogger(__name__) @@ -48,35 +20,4 @@ _logger.addHandler(_sc) _logger.propagate = False -__all__ = [ - "Configuration", - "ContentPart", - "DeleteResponseResult", - "FoundryLocalManager", - "FunctionCallItem", - "FunctionCallOutputItem", - "FunctionToolDefinition", - "InputFileContent", - "InputImageContent", - "InputItemsListResponse", - "InputTextContent", - "ItemReference", - "ListResponsesResult", - "MessageItem", - "OutputTextContent", - "ReasoningConfig", - "ReasoningItem", - "RefusalContent", - "ResponseError", - "ResponseInputItem", - "ResponseObject", - "ResponseOutputItem", - "ResponseUsage", - "ResponsesClient", - "ResponsesClientSettings", - "StreamingEvent", - "TextConfig", - "TextFormat", - "__version__", - "parse_streaming_event", -] +__all__ = ["Configuration", "FoundryLocalManager", "__version__"] diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py index e15aa583f..6d60b7a2f 100644 --- a/sdk/python/src/detail/model.py +++ b/sdk/python/src/detail/model.py @@ -11,7 +11,6 @@ from ..openai.chat_client import ChatClient from ..openai.audio_client import AudioClient from ..openai.embedding_client import EmbeddingClient -from ..openai.responses_client import ResponsesClient from .model_variant import ModelVariant from ..exception import FoundryLocalException from .core_interop import CoreInterop @@ -147,7 +146,3 @@ def get_audio_client(self) -> AudioClient: def get_embedding_client(self) -> EmbeddingClient: """Get an embedding client for the currently selected variant.""" return self._selected_variant.get_embedding_client() - - def get_responses_client(self, base_url: str) -> "ResponsesClient": - """Get a Responses API client for the currently selected variant.""" - return self._selected_variant.get_responses_client(base_url) diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py index 7e57b3a2e..76efb05cd 100644 --- a/sdk/python/src/detail/model_variant.py +++ b/sdk/python/src/detail/model_variant.py @@ -17,7 +17,6 @@ from ..openai.audio_client import AudioClient from ..openai.chat_client import ChatClient from ..openai.embedding_client import EmbeddingClient -from ..openai.responses_client import ResponsesClient logger = logging.getLogger(__name__) @@ -176,11 +175,3 @@ def get_audio_client(self) -> AudioClient: def get_embedding_client(self) -> EmbeddingClient: """Create an OpenAI-compatible ``EmbeddingClient`` for this variant.""" return EmbeddingClient(self.id, self._core_interop) - - def get_responses_client(self, base_url: str) -> ResponsesClient: - """Create an OpenAI-compatible ``ResponsesClient`` for this variant. - - :param base_url: Base URL of the running Foundry Local web service - (e.g. ``manager.urls[0]``). - """ - return ResponsesClient(base_url, self.id) diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py index b891d1b17..a649f8e56 100644 --- a/sdk/python/src/foundry_local_manager.py +++ b/sdk/python/src/foundry_local_manager.py @@ -20,7 +20,6 @@ from .detail.core_interop import CoreInterop, InteropRequest from .detail.model_load_manager import ModelLoadManager from .exception import FoundryLocalException -from .openai.responses_client import ResponsesClient logger = logging.getLogger(__name__) @@ -195,25 +194,3 @@ def stop_web_service(self): raise FoundryLocalException(f"Error stopping web service: {response.error}") self.urls = None - - def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient: - """Create a :class:`ResponsesClient` bound to the running web service. - - The Responses API is HTTP-only, so the web service must be started - before calling this. Use :meth:`start_web_service` first. - - Args: - model_id: Optional default model ID baked into the client. May also - be supplied per-call via ``options['model']``. - - Returns: - A new :class:`ResponsesClient`. - - Raises: - FoundryLocalException: If the web service has not been started. - """ - if not self.urls: - raise FoundryLocalException( - "Web service is not running. Call start_web_service() first." - ) - return ResponsesClient(self.urls[0], model_id) diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py index f76ad1b03..f723e514a 100644 --- a/sdk/python/src/imodel.py +++ b/sdk/python/src/imodel.py @@ -10,7 +10,6 @@ from .openai.chat_client import ChatClient from .openai.audio_client import AudioClient from .openai.embedding_client import EmbeddingClient -from .openai.responses_client import ResponsesClient from .detail.model_data_types import ModelInfo class IModel(ABC): @@ -137,21 +136,6 @@ def get_embedding_client(self) -> 'EmbeddingClient': """ pass - @abstractmethod - def get_responses_client(self, base_url: str) -> 'ResponsesClient': - """ - Get an OpenAI Responses API client bound to the running web service. - - Unlike the other clients, the Responses API is HTTP-only and requires - the Foundry Local web service to be started. Pass the base URL - returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``), - or use :meth:`FoundryLocalManager.create_responses_client` directly. - - :param base_url: Base URL of the running Foundry Local web service. - :return: ResponsesClient instance for this variant's model id. - """ - pass - @property @abstractmethod def variants(self) -> List['IModel']: diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py index ea97ca575..2fa51a6f6 100644 --- a/sdk/python/src/openai/__init__.py +++ b/sdk/python/src/openai/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API.""" +"""OpenAI-compatible clients for chat completions and audio transcription.""" from .chat_client import ChatClient, ChatClientSettings from .audio_client import AudioClient @@ -14,7 +14,6 @@ LiveAudioTranscriptionResponse, TranscriptionContentPart, ) -from .responses_client import ResponsesClient, ResponsesClientSettings __all__ = [ "AudioClient", @@ -25,7 +24,5 @@ "LiveAudioTranscriptionOptions", "LiveAudioTranscriptionResponse", "LiveAudioTranscriptionSession", - "ResponsesClient", - "ResponsesClientSettings", "TranscriptionContentPart", ] diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py deleted file mode 100644 index ddcce586c..000000000 --- a/sdk/python/src/openai/responses_client.py +++ /dev/null @@ -1,409 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -"""OpenAI Responses API client — HTTP-only against the Foundry Local web service. - -Unlike ``ChatClient`` / ``AudioClient`` which go through the native Core via FFI, -the Responses API is served exclusively by the embedded web service. The client -therefore uses ``requests`` for non-streaming calls and parses Server-Sent Events -inline for streaming. - -Usage ------ -:: - - manager.start_web_service() - client = manager.create_responses_client("phi-4-mini") - - # Non-streaming - resp = client.create("What is 2+2?") - print(resp.output_text) - - # Streaming - for event in client.create_streaming("Tell me a story"): - if event.type == "response.output_text.delta": - print(event.delta, end="", flush=True) -""" - -from __future__ import annotations - -import json -import logging -from dataclasses import is_dataclass -from typing import Any, Dict, Generator, List, Optional, Union -from urllib.parse import quote - -import requests - -from ..exception import FoundryLocalException -from .responses_types import ( - DeleteResponseResult, - InputItemsListResponse, - ListResponsesResult, - ReasoningConfig, - ResponseObject, - StreamingEvent, - TextConfig, - _parse_delete_result, - _parse_input_items_list, - _parse_list_responses, - _parse_response_object, - _to_dict, - parse_streaming_event, -) - -logger = logging.getLogger(__name__) - -# Align with the JS SDK limit to avoid surprising client-side rejections of valid IDs. -_MAX_ID_LEN = 1024 - - -class ResponsesClientSettings: - """Tunable settings applied to every Responses API request. - - Field names follow the OpenAI snake_case convention; serialization omits - any ``None`` values so the server applies its own defaults. - """ - - def __init__(self) -> None: - self.instructions: Optional[str] = None - self.temperature: Optional[float] = None - self.top_p: Optional[float] = None - self.max_output_tokens: Optional[int] = None - self.frequency_penalty: Optional[float] = None - self.presence_penalty: Optional[float] = None - self.tool_choice: Optional[Any] = None - self.truncation: Optional[str] = None - self.parallel_tool_calls: Optional[bool] = None - self.store: Optional[bool] = None # Omitted by default; server applies its own default. - self.metadata: Optional[Dict[str, str]] = None - self.reasoning: Optional[ReasoningConfig] = None - self.text: Optional[TextConfig] = None - self.seed: Optional[int] = None - # Transport settings — not sent to the API. - self.timeout: float = 60.0 - """Seconds to wait for the server to connect and respond on non-streaming calls. - For streaming, this is used only as the connection timeout; reads are unbounded.""" - - def _serialize(self) -> Dict[str, Any]: - raw: Dict[str, Any] = { - "instructions": self.instructions, - "temperature": self.temperature, - "top_p": self.top_p, - "max_output_tokens": self.max_output_tokens, - "frequency_penalty": self.frequency_penalty, - "presence_penalty": self.presence_penalty, - "tool_choice": _to_dict(self.tool_choice) if is_dataclass(self.tool_choice) else self.tool_choice, - "truncation": self.truncation, - "parallel_tool_calls": self.parallel_tool_calls, - "store": self.store, - "metadata": self.metadata, - "reasoning": _to_dict(self.reasoning) if self.reasoning is not None else None, - "text": _to_dict(self.text) if self.text is not None else None, - "seed": self.seed, - } - return {k: v for k, v in raw.items() if v is not None} - - -class ResponsesClient: - """Client for the OpenAI Responses API served by Foundry Local. - - Construct via ``manager.create_responses_client(model_id)`` or - ``model.get_responses_client(base_url)``. - """ - - def __init__(self, base_url: str, model_id: Optional[str] = None): - if not isinstance(base_url, str) or not base_url.strip(): - raise ValueError("base_url must be a non-empty string.") - self._base_url = base_url.rstrip("/") - self._model_id = model_id - self.settings = ResponsesClientSettings() - - # ------------------------------------------------------------------ public - - def create( - self, - input: Union[str, List[Any]], - **options: Any, - ) -> ResponseObject: - """Create a response (non-streaming).""" - body = self._build_request(input, options, stream=False) - raw = self._post_json("/v1/responses", body) - return _parse_response_object(raw) - - def create_streaming( - self, - input: Union[str, List[Any]], - **options: Any, - ) -> Generator[StreamingEvent, None, None]: - """Create a response with SSE streaming. - - Returns a generator yielding :class:`StreamingEvent` objects. The HTTP - connection is closed automatically when the generator is exhausted or - garbage-collected. - """ - body = self._build_request(input, options, stream=True) - return self._post_stream("/v1/responses", body) - - def get(self, response_id: str) -> ResponseObject: - self._validate_id(response_id, "response_id") - raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}") - return _parse_response_object(raw) - - def delete(self, response_id: str) -> DeleteResponseResult: - self._validate_id(response_id, "response_id") - raw = self._request_json("DELETE", f"/v1/responses/{quote(response_id, safe='')}") - return _parse_delete_result(raw) - - def cancel(self, response_id: str) -> ResponseObject: - self._validate_id(response_id, "response_id") - raw = self._request_json("POST", f"/v1/responses/{quote(response_id, safe='')}/cancel") - return _parse_response_object(raw) - - def get_input_items(self, response_id: str) -> InputItemsListResponse: - self._validate_id(response_id, "response_id") - raw = self._request_json("GET", f"/v1/responses/{quote(response_id, safe='')}/input_items") - return _parse_input_items_list(raw) - - def list(self) -> ListResponsesResult: - raw = self._request_json("GET", "/v1/responses") - return _parse_list_responses(raw) - - # ---------------------------------------------------------------- internal - - def _build_request( - self, - input: Union[str, List[Any]], - options: Dict[str, Any], - stream: bool, - ) -> Dict[str, Any]: - self._validate_input(input) - if options.get("tools") is not None: - self._validate_tools(options["tools"]) - - model = options.pop("model", None) or self._model_id - if not isinstance(model, str) or not model.strip(): - raise ValueError( - "Model must be specified via create_responses_client(model_id) or options['model']." - ) - - # Normalize input: convert dataclasses to dicts for the wire format. - if isinstance(input, list): - wire_input = [_to_dict(i) if is_dataclass(i) else i for i in input] - else: - wire_input = input - - # Normalize other dataclass-shaped options (tools, reasoning, etc.). - normalized_options: Dict[str, Any] = {} - for key, value in options.items(): - if value is None: - continue - if is_dataclass(value): - normalized_options[key] = _to_dict(value) - elif isinstance(value, list): - normalized_options[key] = [_to_dict(v) if is_dataclass(v) else v for v in value] - else: - normalized_options[key] = value - - body: Dict[str, Any] = {"model": model, "input": wire_input} - # Merge order: model+input → settings defaults → per-call overrides - body.update(self.settings._serialize()) - body.update(normalized_options) - if stream: - body["stream"] = True - return body - - @staticmethod - def _validate_input(input: Any) -> None: - if input is None: - raise ValueError("Input cannot be None.") - if isinstance(input, str): - if not input.strip(): - raise ValueError("Input string cannot be empty.") - return - if isinstance(input, list): - if len(input) == 0: - raise ValueError("Input items list cannot be empty.") - for i, item in enumerate(input): - if is_dataclass(item): - t = getattr(item, "type", None) - elif isinstance(item, dict): - t = item.get("type") - else: - raise ValueError(f"input[{i}] must be a dict or dataclass.") - if not isinstance(t, str) or not t.strip(): - raise ValueError(f"input[{i}] must have a non-empty 'type' field.") - return - raise ValueError("Input must be a string or a list of input items.") - - @staticmethod - def _validate_tools(tools: Any) -> None: - if not isinstance(tools, list): - raise ValueError("tools must be a list if provided.") - for i, tool in enumerate(tools): - if is_dataclass(tool): - t = getattr(tool, "type", None) - name = getattr(tool, "name", None) - elif isinstance(tool, dict): - t = tool.get("type") - name = tool.get("name") - else: - raise ValueError(f"tools[{i}] must be a dict or FunctionToolDefinition.") - if t != "function": - raise ValueError(f"tools[{i}] must have type 'function'.") - if not isinstance(name, str) or not name.strip(): - raise ValueError(f"tools[{i}] must have a non-empty 'name'.") - - @staticmethod - def _validate_id(value: str, param: str) -> None: - if not isinstance(value, str) or not value.strip(): - raise ValueError(f"{param} must be a non-empty string.") - if len(value) > _MAX_ID_LEN: - raise ValueError(f"{param} exceeds maximum length ({_MAX_ID_LEN}).") - - # ----- HTTP plumbing ----- - - def _url(self, path: str) -> str: - return f"{self._base_url}{path}" - - def _request_json(self, method: str, path: str, body: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - timeout = self.settings.timeout - try: - if body is not None: - resp = requests.request( - method, - self._url(path), - headers={"Content-Type": "application/json", "Accept": "application/json"}, - data=json.dumps(body), - timeout=timeout, - ) - else: - resp = requests.request( - method, - self._url(path), - headers={"Accept": "application/json"}, - timeout=timeout, - ) - except requests.RequestException as e: - raise FoundryLocalException(f"Network error calling {method} {path}: {e}") from e - - return self._handle_json_response(resp, method, path) - - def _post_json(self, path: str, body: Dict[str, Any]) -> Dict[str, Any]: - return self._request_json("POST", path, body) - - @staticmethod - def _handle_json_response(resp: requests.Response, method: str, path: str) -> Dict[str, Any]: - text = resp.text - if not resp.ok: - raise FoundryLocalException( - f"Responses API error ({resp.status_code}) for {method} {path}: {text[:500]}" - ) - try: - return json.loads(text) if text else {} - except json.JSONDecodeError as e: - raise FoundryLocalException( - f"Failed to parse response JSON from {method} {path}: {text[:200]}" - ) from e - - def _post_stream( - self, path: str, body: Dict[str, Any] - ) -> Generator[StreamingEvent, None, None]: - # Use (connect_timeout, None) so the connection attempt can time out but - # the read side is unbounded — streaming responses can be arbitrarily long. - connect_timeout = self.settings.timeout - try: - resp = requests.post( - self._url(path), - headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, - data=json.dumps(body), - stream=True, - timeout=(connect_timeout, None), - ) - except requests.RequestException as e: - raise FoundryLocalException(f"Network error calling POST {path}: {e}") from e - - if not resp.ok: - body_text = resp.text - resp.close() - raise FoundryLocalException( - f"Responses API error ({resp.status_code}) for POST {path}: {body_text[:500]}" - ) - - return _iter_sse_events(resp) - - -def _iter_sse_events(resp: requests.Response) -> Generator[StreamingEvent, None, None]: - """Parse an SSE response into a stream of :class:`StreamingEvent` objects. - - Closes the underlying HTTP connection when the generator ends for any - reason (completion, [DONE], exception, or GC). - - Uses a single string buffer and splits on double-newline boundaries to - avoid the O(n) cost of joining a growing list on every chunk. - """ - try: - buffer = "" - for chunk in resp.iter_content(chunk_size=None, decode_unicode=False): - if not chunk: - continue - text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk - buffer += text.replace("\r\n", "\n") - - while "\n\n" in buffer: - block, buffer = buffer.split("\n\n", 1) - event = _parse_sse_block(block) - if event is _SSE_DONE: - return - if event is not None: - yield event - - # Flush any residual block not terminated by a blank line. - tail = buffer.strip() - if tail: - event = _parse_sse_block(tail) - if event is not None and event is not _SSE_DONE: - yield event - finally: - resp.close() - - -_SSE_DONE = object() # sentinel returned for the `data: [DONE]` terminator - - -def _parse_sse_block(block: str) -> Any: - """Parse a single SSE block (already stripped of its trailing blank line).""" - trimmed = block.strip() - if not trimmed: - return None - if trimmed == "data: [DONE]": - return _SSE_DONE - - data_lines: List[str] = [] - for line in trimmed.split("\n"): - if line.startswith("data: "): - data_lines.append(line[6:]) - elif line == "data:": - data_lines.append("") - # `event:`, `id:`, `retry:` fields are ignored — the type lives in the JSON payload. - - if not data_lines: - return None - - data = "\n".join(data_lines) - if data == "[DONE]": - return _SSE_DONE - try: - parsed = json.loads(data) - except json.JSONDecodeError as e: - raise FoundryLocalException(f"Failed to parse streaming event JSON: {e}") from e - if not isinstance(parsed, dict): - return None - return parse_streaming_event(parsed) - - -__all__ = [ - "ResponsesClient", - "ResponsesClientSettings", -] diff --git a/sdk/python/src/openai/responses_types.py b/sdk/python/src/openai/responses_types.py deleted file mode 100644 index ad1266a44..000000000 --- a/sdk/python/src/openai/responses_types.py +++ /dev/null @@ -1,957 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -"""Types for the OpenAI Responses API served by Foundry Local. - -These mirror the DTOs defined by neutron-server in -``src/FoundryLocalCore/Core/Responses/Contracts/``. Dataclasses are used so -callers can construct items with keyword arguments and we can serialize -discriminated unions by the ``type`` field. -""" - -from __future__ import annotations - -import base64 -import io -import mimetypes -from dataclasses import dataclass, field, fields, is_dataclass -from typing import Any, Dict, List, Literal, Optional, Tuple, Union - - -# --------------------------------------------------------------------------- -# Image resize helper (optional — requires Pillow) -# --------------------------------------------------------------------------- - -def _resize_image(data: bytes, media_type: str, max_size: Tuple[int, int]) -> Tuple[bytes, str]: - """Resize *data* so it fits within *max_size* (width, height) while preserving - aspect ratio. Returns the re-encoded bytes and MIME type. - - Requires ``Pillow`` (``pip install pillow``). Raises ``ImportError`` if it is - not installed. - """ - try: - from PIL import Image # type: ignore[import-untyped] - except ImportError as exc: - raise ImportError( - "Image resizing requires Pillow. Install it with: pip install pillow" - ) from exc - - img = Image.open(io.BytesIO(data)) - img.thumbnail(max_size, Image.LANCZOS) - buf = io.BytesIO() - fmt = media_type.split("/")[-1].upper().replace("JPG", "JPEG") - if fmt not in ("JPEG", "PNG", "WEBP", "GIF"): - fmt = "PNG" - media_type = "image/png" - img.save(buf, format=fmt) - return buf.getvalue(), media_type - - -# --------------------------------------------------------------------------- -# Serialization helpers -# --------------------------------------------------------------------------- - -def _to_dict(obj: Any) -> Any: - """Recursively convert a dataclass (or list/dict of them) to a plain dict, - omitting ``None`` values so the wire format matches the OpenAI spec. - """ - if is_dataclass(obj) and not isinstance(obj, type): - result: Dict[str, Any] = {} - for f in fields(obj): - value = getattr(obj, f.name) - if value is None: - continue - result[f.name] = _to_dict(value) - return result - if isinstance(obj, list): - return [_to_dict(v) for v in obj] - if isinstance(obj, dict): - return {k: _to_dict(v) for k, v in obj.items() if v is not None} - return obj - - -# --------------------------------------------------------------------------- -# Content Parts -# --------------------------------------------------------------------------- - -@dataclass -class InputTextContent: - text: str = "" - type: Literal["input_text"] = "input_text" - - -@dataclass -class InputImageContent: - """Vision input. Provide exactly one of ``image_url`` or ``image_data`` (base64).""" - media_type: str = "" - image_url: Optional[str] = None - image_data: Optional[str] = None - detail: Optional[str] = None # "low" | "high" | "auto" - type: Literal["input_image"] = "input_image" - - def __post_init__(self) -> None: - has_url = self.image_url is not None - has_data = self.image_data is not None - if has_url == has_data: - raise ValueError( - "Provide exactly one of image_url or image_data, not both (or neither)." - ) - - @staticmethod - def from_file( - path: str, - detail: Optional[str] = None, - max_size: Optional[Tuple[int, int]] = None, - ) -> "InputImageContent": - """Load an image from *path*, base64-encode it, and return an :class:`InputImageContent`. - - Args: - path: Filesystem path to the image file. - detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``. - max_size: Optional ``(width, height)`` cap. If the image exceeds either - dimension it is resized proportionally (requires ``Pillow``). - """ - media_type, _ = mimetypes.guess_type(path) - if not media_type or not media_type.startswith("image/"): - raise ValueError(f"Unsupported image format: {path}") - with open(path, "rb") as fh: - raw = fh.read() - if max_size is not None: - raw, media_type = _resize_image(raw, media_type, max_size) - return InputImageContent( - image_data=base64.b64encode(raw).decode("ascii"), - media_type=media_type, - detail=detail, - ) - - @staticmethod - def from_url(url: str, detail: Optional[str] = None) -> "InputImageContent": - return InputImageContent(image_url=url, media_type="image/unknown", detail=detail) - - @staticmethod - def from_bytes( - data: bytes, - media_type: str, - detail: Optional[str] = None, - max_size: Optional[Tuple[int, int]] = None, - ) -> "InputImageContent": - """Create an :class:`InputImageContent` from raw *data* bytes. - - Args: - data: Raw image bytes. - media_type: MIME type, e.g. ``"image/png"``. - detail: OpenAI detail hint – ``"low"``, ``"high"``, or ``"auto"``. - max_size: Optional ``(width, height)`` cap. Requires ``Pillow``. - """ - if max_size is not None: - data, media_type = _resize_image(data, media_type, max_size) - return InputImageContent( - image_data=base64.b64encode(data).decode("ascii"), - media_type=media_type, - detail=detail, - ) - - -@dataclass -class InputFileContent: - filename: str = "" - file_url: str = "" - type: Literal["input_file"] = "input_file" - - -@dataclass -class OutputTextContent: - text: str = "" - annotations: Optional[List[Any]] = None - logprobs: Optional[List[Any]] = None - type: Literal["output_text"] = "output_text" - - -@dataclass -class RefusalContent: - refusal: str = "" - type: Literal["refusal"] = "refusal" - - -ContentPart = Union[ - InputTextContent, InputImageContent, InputFileContent, OutputTextContent, RefusalContent -] - - -def _parse_content_part(data: Dict[str, Any]) -> Optional[ContentPart]: - t = data.get("type") - if t == "input_text": - return InputTextContent(text=data.get("text", "")) - if t == "input_image": - return InputImageContent( - media_type=data.get("media_type", ""), - image_url=data.get("image_url"), - image_data=data.get("image_data"), - detail=data.get("detail"), - ) - if t == "input_file": - return InputFileContent(filename=data.get("filename", ""), file_url=data.get("file_url", "")) - if t == "output_text": - return OutputTextContent( - text=data.get("text", ""), - annotations=data.get("annotations"), - logprobs=data.get("logprobs"), - ) - if t == "refusal": - return RefusalContent(refusal=data.get("refusal", "")) - # Unknown content-part type — return None so callers can filter forward-compat parts. - return None - - -def _parse_content(value: Any) -> Union[str, List[ContentPart]]: - if isinstance(value, str): - return value - if isinstance(value, list): - parts = [_parse_content_part(p) if isinstance(p, dict) else p for p in value] - return [p for p in parts if p is not None] - return value - - -# --------------------------------------------------------------------------- -# Response Items (input + output) -# --------------------------------------------------------------------------- - -@dataclass -class MessageItem: - role: str = "" - content: Union[str, List[ContentPart]] = "" - id: Optional[str] = None - status: Optional[str] = None - type: Literal["message"] = "message" - - -@dataclass -class FunctionCallItem: - call_id: str = "" - name: str = "" - arguments: str = "" - id: Optional[str] = None - status: Optional[str] = None - type: Literal["function_call"] = "function_call" - - -@dataclass -class FunctionCallOutputItem: - call_id: str = "" - output: Union[str, List[ContentPart]] = "" - id: Optional[str] = None - type: Literal["function_call_output"] = "function_call_output" - - -@dataclass -class ItemReference: - id: str = "" - type: Literal["item_reference"] = "item_reference" - - -@dataclass -class ReasoningItem: - id: Optional[str] = None - content: Optional[List[ContentPart]] = None - encrypted_content: Optional[str] = None - summary: Optional[str] = None - status: Optional[str] = None - type: Literal["reasoning"] = "reasoning" - - -ResponseInputItem = Union[ - MessageItem, FunctionCallItem, FunctionCallOutputItem, ItemReference, ReasoningItem -] -ResponseOutputItem = Union[MessageItem, FunctionCallItem, ReasoningItem] - - -def _parse_response_item(data: Dict[str, Any]) -> Any: - t = data.get("type") - if t == "message": - return MessageItem( - role=data.get("role", ""), - content=_parse_content(data.get("content", "")), - id=data.get("id"), - status=data.get("status"), - ) - if t == "function_call": - return FunctionCallItem( - call_id=data.get("call_id", ""), - name=data.get("name", ""), - arguments=data.get("arguments", ""), - id=data.get("id"), - status=data.get("status"), - ) - if t == "function_call_output": - return FunctionCallOutputItem( - call_id=data.get("call_id", ""), - output=_parse_content(data.get("output", "")), - id=data.get("id"), - ) - if t == "item_reference": - return ItemReference(id=data.get("id", "")) - if t == "reasoning": - content_raw = data.get("content") - return ReasoningItem( - id=data.get("id"), - content=[_parse_content_part(p) for p in content_raw] if isinstance(content_raw, list) else None, - encrypted_content=data.get("encrypted_content"), - summary=data.get("summary"), - status=data.get("status"), - ) - # Unknown item type — return the raw dict so callers can inspect - return data - - -# --------------------------------------------------------------------------- -# Tool Definitions & Config -# --------------------------------------------------------------------------- - -@dataclass -class FunctionToolDefinition: - name: str = "" - description: Optional[str] = None - parameters: Optional[Dict[str, Any]] = None - strict: Optional[bool] = None - type: Literal["function"] = "function" - - -@dataclass -class FunctionToolChoice: - name: str = "" - type: Literal["function"] = "function" - - -ToolChoice = Union[str, FunctionToolChoice] # "none" | "auto" | "required" | {type,name} - - -@dataclass -class TextFormat: - type: str = "text" # "text" | "json_object" | "json_schema" | "lark_grammar" | "regex" - name: Optional[str] = None - description: Optional[str] = None - schema: Optional[Dict[str, Any]] = None - strict: Optional[bool] = None - - -@dataclass -class TextConfig: - format: Optional[TextFormat] = None - - -@dataclass -class ReasoningConfig: - effort: Optional[str] = None - summary: Optional[str] = None - - -# --------------------------------------------------------------------------- -# Response Object -# --------------------------------------------------------------------------- - -@dataclass -class ResponseUsage: - input_tokens: int = 0 - output_tokens: int = 0 - total_tokens: int = 0 - input_tokens_details: Optional[Dict[str, Any]] = None - output_tokens_details: Optional[Dict[str, Any]] = None - - -@dataclass -class ResponseError: - code: str = "" - message: str = "" - - -@dataclass -class IncompleteDetails: - reason: str = "" - - -@dataclass -class ResponseObject: - id: str = "" - object: str = "response" - created_at: int = 0 - status: str = "" - model: str = "" - output: List[Any] = field(default_factory=list) - completed_at: Optional[int] = None - failed_at: Optional[int] = None - cancelled_at: Optional[int] = None - error: Optional[ResponseError] = None - usage: Optional[ResponseUsage] = None - instructions: Optional[str] = None - previous_response_id: Optional[str] = None - tools: Optional[List[FunctionToolDefinition]] = None - tool_choice: Optional[Any] = None - temperature: Optional[float] = None - top_p: Optional[float] = None - max_output_tokens: Optional[int] = None - frequency_penalty: Optional[float] = None - presence_penalty: Optional[float] = None - seed: Optional[int] = None - truncation: Optional[str] = None - parallel_tool_calls: Optional[bool] = None - store: Optional[bool] = None - metadata: Optional[Dict[str, str]] = None - reasoning: Optional[ReasoningConfig] = None - text: Optional[TextConfig] = None - user: Optional[str] = None - incomplete_details: Optional[IncompleteDetails] = None - # Retain anything the server returned that we don't model explicitly. - _raw: Optional[Dict[str, Any]] = None - - @property - def output_text(self) -> str: - """Concatenated text from the first assistant ``message`` item in ``output``.""" - for item in self.output: - if isinstance(item, MessageItem) and item.role == "assistant": - content = item.content - if isinstance(content, str): - return content - if isinstance(content, list): - parts: List[str] = [] - for p in content: - text = getattr(p, "text", None) - if isinstance(text, str): - parts.append(text) - return "".join(parts) - return "" - - -def _parse_response_object(data: Dict[str, Any]) -> ResponseObject: - output = data.get("output") or [] - parsed_output = [_parse_response_item(i) if isinstance(i, dict) else i for i in output] - - tools_raw = data.get("tools") - tools = None - if isinstance(tools_raw, list): - tools = [ - FunctionToolDefinition( - name=t.get("name", ""), - description=t.get("description"), - parameters=t.get("parameters"), - strict=t.get("strict"), - ) if isinstance(t, dict) else t - for t in tools_raw - ] - - usage = None - if isinstance(data.get("usage"), dict): - u = data["usage"] - usage = ResponseUsage( - input_tokens=u.get("input_tokens", 0), - output_tokens=u.get("output_tokens", 0), - total_tokens=u.get("total_tokens", 0), - input_tokens_details=u.get("input_tokens_details"), - output_tokens_details=u.get("output_tokens_details"), - ) - - error = None - if isinstance(data.get("error"), dict): - error = ResponseError(code=data["error"].get("code", ""), message=data["error"].get("message", "")) - - incomplete = None - if isinstance(data.get("incomplete_details"), dict): - incomplete = IncompleteDetails(reason=data["incomplete_details"].get("reason", "")) - - reasoning = None - if isinstance(data.get("reasoning"), dict): - reasoning = ReasoningConfig( - effort=data["reasoning"].get("effort"), - summary=data["reasoning"].get("summary"), - ) - - text = None - if isinstance(data.get("text"), dict): - fmt_raw = data["text"].get("format") - fmt = None - if isinstance(fmt_raw, dict): - fmt = TextFormat( - type=fmt_raw.get("type", "text"), - name=fmt_raw.get("name"), - description=fmt_raw.get("description"), - schema=fmt_raw.get("schema"), - strict=fmt_raw.get("strict"), - ) - text = TextConfig(format=fmt) - - return ResponseObject( - id=data.get("id", ""), - object=data.get("object", "response"), - created_at=data.get("created_at", 0), - status=data.get("status", ""), - model=data.get("model", ""), - output=parsed_output, - completed_at=data.get("completed_at"), - failed_at=data.get("failed_at"), - cancelled_at=data.get("cancelled_at"), - error=error, - usage=usage, - instructions=data.get("instructions"), - previous_response_id=data.get("previous_response_id"), - tools=tools, - tool_choice=data.get("tool_choice"), - temperature=data.get("temperature"), - top_p=data.get("top_p"), - max_output_tokens=data.get("max_output_tokens"), - frequency_penalty=data.get("frequency_penalty"), - presence_penalty=data.get("presence_penalty"), - seed=data.get("seed"), - truncation=data.get("truncation"), - parallel_tool_calls=data.get("parallel_tool_calls"), - store=data.get("store"), - metadata=data.get("metadata"), - reasoning=reasoning, - text=text, - user=data.get("user"), - incomplete_details=incomplete, - _raw=data, - ) - - -# --------------------------------------------------------------------------- -# Delete / List helpers -# --------------------------------------------------------------------------- - -@dataclass -class DeleteResponseResult: - id: str = "" - object: str = "" - deleted: bool = False - - -@dataclass -class InputItemsListResponse: - object: str = "list" - data: List[Any] = field(default_factory=list) - - -@dataclass -class ListResponsesResult: - object: str = "list" - data: List[ResponseObject] = field(default_factory=list) - - -# --------------------------------------------------------------------------- -# Streaming Events -# --------------------------------------------------------------------------- - -@dataclass -class ResponseLifecycleEvent: - """`response.created` / `queued` / `in_progress` / `completed` / `failed` / `incomplete`.""" - type: str = "" - response: Optional[ResponseObject] = None - sequence_number: int = 0 - - -@dataclass -class OutputItemAddedEvent: - item_id: str = "" - output_index: int = 0 - item: Any = None - sequence_number: int = 0 - type: Literal["response.output_item.added"] = "response.output_item.added" - - -@dataclass -class OutputItemDoneEvent: - item_id: str = "" - output_index: int = 0 - item: Any = None - sequence_number: int = 0 - type: Literal["response.output_item.done"] = "response.output_item.done" - - -@dataclass -class ContentPartAddedEvent: - item_id: str = "" - content_index: int = 0 - part: Any = None - sequence_number: int = 0 - type: Literal["response.content_part.added"] = "response.content_part.added" - - -@dataclass -class ContentPartDoneEvent: - item_id: str = "" - content_index: int = 0 - part: Any = None - sequence_number: int = 0 - type: Literal["response.content_part.done"] = "response.content_part.done" - - -@dataclass -class OutputTextDeltaEvent: - item_id: str = "" - output_index: int = 0 - content_index: int = 0 - delta: str = "" - sequence_number: int = 0 - type: Literal["response.output_text.delta"] = "response.output_text.delta" - - -@dataclass -class OutputTextDoneEvent: - item_id: str = "" - output_index: int = 0 - content_index: int = 0 - text: str = "" - sequence_number: int = 0 - type: Literal["response.output_text.done"] = "response.output_text.done" - - -@dataclass -class OutputTextAnnotationAddedEvent: - item_id: str = "" - annotation: Any = None - sequence_number: int = 0 - type: Literal["response.output_text.annotation.added"] = "response.output_text.annotation.added" - - -@dataclass -class RefusalDeltaEvent: - item_id: str = "" - content_index: int = 0 - delta: str = "" - sequence_number: int = 0 - type: Literal["response.refusal.delta"] = "response.refusal.delta" - - -@dataclass -class RefusalDoneEvent: - item_id: str = "" - content_index: int = 0 - refusal: str = "" - sequence_number: int = 0 - type: Literal["response.refusal.done"] = "response.refusal.done" - - -@dataclass -class FunctionCallArgsDeltaEvent: - item_id: str = "" - output_index: int = 0 - delta: str = "" - sequence_number: int = 0 - type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta" - - -@dataclass -class FunctionCallArgsDoneEvent: - item_id: str = "" - output_index: int = 0 - arguments: str = "" - name: str = "" - sequence_number: int = 0 - type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done" - - -@dataclass -class ReasoningSummaryPartAddedEvent: - item_id: str = "" - part: Any = None - sequence_number: int = 0 - type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added" - - -@dataclass -class ReasoningSummaryPartDoneEvent: - item_id: str = "" - part: Any = None - sequence_number: int = 0 - type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done" - - -@dataclass -class ReasoningDeltaEvent: - item_id: str = "" - delta: str = "" - sequence_number: int = 0 - type: Literal["response.reasoning.delta"] = "response.reasoning.delta" - - -@dataclass -class ReasoningDoneEvent: - item_id: str = "" - text: str = "" - sequence_number: int = 0 - type: Literal["response.reasoning.done"] = "response.reasoning.done" - - -@dataclass -class ReasoningSummaryTextDeltaEvent: - item_id: str = "" - delta: str = "" - sequence_number: int = 0 - type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta" - - -@dataclass -class ReasoningSummaryTextDoneEvent: - item_id: str = "" - text: str = "" - sequence_number: int = 0 - type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done" - - -@dataclass -class StreamingErrorEvent: - code: Optional[str] = None - message: Optional[str] = None - param: Optional[str] = None - sequence_number: int = 0 - type: Literal["error"] = "error" - - -@dataclass -class UnknownStreamingEvent: - """Fallback for event types that aren't yet modeled.""" - type: str = "" - sequence_number: int = 0 - data: Optional[Dict[str, Any]] = None - - -StreamingEvent = Union[ - ResponseLifecycleEvent, - OutputItemAddedEvent, - OutputItemDoneEvent, - ContentPartAddedEvent, - ContentPartDoneEvent, - OutputTextDeltaEvent, - OutputTextDoneEvent, - OutputTextAnnotationAddedEvent, - RefusalDeltaEvent, - RefusalDoneEvent, - FunctionCallArgsDeltaEvent, - FunctionCallArgsDoneEvent, - ReasoningSummaryPartAddedEvent, - ReasoningSummaryPartDoneEvent, - ReasoningDeltaEvent, - ReasoningDoneEvent, - ReasoningSummaryTextDeltaEvent, - ReasoningSummaryTextDoneEvent, - StreamingErrorEvent, - UnknownStreamingEvent, -] - - -_LIFECYCLE_TYPES = { - "response.created", - "response.queued", - "response.in_progress", - "response.completed", - "response.failed", - "response.incomplete", -} - - -def parse_streaming_event(data: Dict[str, Any]) -> StreamingEvent: - """Build a typed streaming-event dataclass from a server-sent JSON payload.""" - t = data.get("type", "") - seq = data.get("sequence_number", 0) - - if t in _LIFECYCLE_TYPES: - resp_raw = data.get("response") - resp = _parse_response_object(resp_raw) if isinstance(resp_raw, dict) else None - return ResponseLifecycleEvent(type=t, response=resp, sequence_number=seq) - - if t == "response.output_item.added": - item = data.get("item") - return OutputItemAddedEvent( - item_id=data.get("item_id", ""), - output_index=data.get("output_index", 0), - item=_parse_response_item(item) if isinstance(item, dict) else item, - sequence_number=seq, - ) - if t == "response.output_item.done": - item = data.get("item") - return OutputItemDoneEvent( - item_id=data.get("item_id", ""), - output_index=data.get("output_index", 0), - item=_parse_response_item(item) if isinstance(item, dict) else item, - sequence_number=seq, - ) - if t == "response.content_part.added": - part = data.get("part") - return ContentPartAddedEvent( - item_id=data.get("item_id", ""), - content_index=data.get("content_index", 0), - part=_parse_content_part(part) if isinstance(part, dict) else part, - sequence_number=seq, - ) - if t == "response.content_part.done": - part = data.get("part") - return ContentPartDoneEvent( - item_id=data.get("item_id", ""), - content_index=data.get("content_index", 0), - part=_parse_content_part(part) if isinstance(part, dict) else part, - sequence_number=seq, - ) - if t == "response.output_text.delta": - return OutputTextDeltaEvent( - item_id=data.get("item_id", ""), - output_index=data.get("output_index", 0), - content_index=data.get("content_index", 0), - delta=data.get("delta", ""), - sequence_number=seq, - ) - if t == "response.output_text.done": - return OutputTextDoneEvent( - item_id=data.get("item_id", ""), - output_index=data.get("output_index", 0), - content_index=data.get("content_index", 0), - text=data.get("text", ""), - sequence_number=seq, - ) - if t == "response.output_text.annotation.added": - return OutputTextAnnotationAddedEvent( - item_id=data.get("item_id", ""), - annotation=data.get("annotation"), - sequence_number=seq, - ) - if t == "response.refusal.delta": - return RefusalDeltaEvent( - item_id=data.get("item_id", ""), - content_index=data.get("content_index", 0), - delta=data.get("delta", ""), - sequence_number=seq, - ) - if t == "response.refusal.done": - return RefusalDoneEvent( - item_id=data.get("item_id", ""), - content_index=data.get("content_index", 0), - refusal=data.get("refusal", ""), - sequence_number=seq, - ) - if t == "response.function_call_arguments.delta": - return FunctionCallArgsDeltaEvent( - item_id=data.get("item_id", ""), - output_index=data.get("output_index", 0), - delta=data.get("delta", ""), - sequence_number=seq, - ) - if t == "response.function_call_arguments.done": - return FunctionCallArgsDoneEvent( - item_id=data.get("item_id", ""), - output_index=data.get("output_index", 0), - arguments=data.get("arguments", ""), - name=data.get("name", ""), - sequence_number=seq, - ) - if t == "response.reasoning_summary_part.added": - return ReasoningSummaryPartAddedEvent( - item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq - ) - if t == "response.reasoning_summary_part.done": - return ReasoningSummaryPartDoneEvent( - item_id=data.get("item_id", ""), part=data.get("part"), sequence_number=seq - ) - if t == "response.reasoning.delta": - return ReasoningDeltaEvent( - item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq - ) - if t == "response.reasoning.done": - return ReasoningDoneEvent( - item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq - ) - if t == "response.reasoning_summary_text.delta": - return ReasoningSummaryTextDeltaEvent( - item_id=data.get("item_id", ""), delta=data.get("delta", ""), sequence_number=seq - ) - if t == "response.reasoning_summary_text.done": - return ReasoningSummaryTextDoneEvent( - item_id=data.get("item_id", ""), text=data.get("text", ""), sequence_number=seq - ) - if t == "error": - return StreamingErrorEvent( - code=data.get("code"), - message=data.get("message"), - param=data.get("param"), - sequence_number=seq, - ) - - return UnknownStreamingEvent(type=t, sequence_number=seq, data=data) - - -def _parse_delete_result(data: Dict[str, Any]) -> DeleteResponseResult: - return DeleteResponseResult( - id=data.get("id", ""), - object=data.get("object", ""), - deleted=bool(data.get("deleted", False)), - ) - - -def _parse_input_items_list(data: Dict[str, Any]) -> InputItemsListResponse: - raw = data.get("data") or [] - return InputItemsListResponse( - object=data.get("object", "list"), - data=[_parse_response_item(i) if isinstance(i, dict) else i for i in raw], - ) - - -def _parse_list_responses(data: Dict[str, Any]) -> ListResponsesResult: - raw = data.get("data") or [] - return ListResponsesResult( - object=data.get("object", "list"), - data=[_parse_response_object(r) if isinstance(r, dict) else r for r in raw], - ) - - -__all__ = [ - # Content parts - "InputTextContent", - "InputImageContent", - "InputFileContent", - "OutputTextContent", - "RefusalContent", - "ContentPart", - # Items - "MessageItem", - "FunctionCallItem", - "FunctionCallOutputItem", - "ItemReference", - "ReasoningItem", - "ResponseInputItem", - "ResponseOutputItem", - # Tools & config - "FunctionToolDefinition", - "FunctionToolChoice", - "ToolChoice", - "TextFormat", - "TextConfig", - "ReasoningConfig", - # Response - "ResponseObject", - "ResponseUsage", - "ResponseError", - "IncompleteDetails", - "DeleteResponseResult", - "InputItemsListResponse", - "ListResponsesResult", - # Streaming events - "StreamingEvent", - "ResponseLifecycleEvent", - "OutputItemAddedEvent", - "OutputItemDoneEvent", - "ContentPartAddedEvent", - "ContentPartDoneEvent", - "OutputTextDeltaEvent", - "OutputTextDoneEvent", - "OutputTextAnnotationAddedEvent", - "RefusalDeltaEvent", - "RefusalDoneEvent", - "FunctionCallArgsDeltaEvent", - "FunctionCallArgsDoneEvent", - "ReasoningSummaryPartAddedEvent", - "ReasoningSummaryPartDoneEvent", - "ReasoningDeltaEvent", - "ReasoningDoneEvent", - "ReasoningSummaryTextDeltaEvent", - "ReasoningSummaryTextDoneEvent", - "StreamingErrorEvent", - "UnknownStreamingEvent", - "parse_streaming_event", -] diff --git a/sdk/python/test/openai/test_responses_client.py b/sdk/python/test/openai/test_responses_client.py deleted file mode 100644 index 871fe301c..000000000 --- a/sdk/python/test/openai/test_responses_client.py +++ /dev/null @@ -1,612 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -"""Unit tests for the Responses API client (no live server required). - -Mirrors the scenarios covered by the JS SDK's ``responsesClient.test.ts`` and -the Python spec's §5. HTTP calls are intercepted via :mod:`unittest.mock`. -""" - -from __future__ import annotations - -import base64 -import io -import json -from typing import Any, Dict, List -from unittest.mock import MagicMock, patch - -import pytest - -from foundry_local_sdk.exception import FoundryLocalException -from foundry_local_sdk.openai.responses_client import ( - ResponsesClient, - ResponsesClientSettings, - _parse_sse_block, - _iter_sse_events, - _SSE_DONE, -) -from foundry_local_sdk.openai.responses_types import ( - FunctionCallItem, - FunctionToolDefinition, - InputImageContent, - InputTextContent, - MessageItem, - OutputTextContent, - ReasoningConfig, - ResponseObject, - TextConfig, - TextFormat, - _to_dict, - parse_streaming_event, - OutputTextDeltaEvent, - ResponseLifecycleEvent, - StreamingErrorEvent, - UnknownStreamingEvent, -) - -BASE_URL = "http://127.0.0.1:5273" -MODEL_ID = "test-model" - - -def _fake_json_response(payload: Dict[str, Any], status: int = 200): - resp = MagicMock() - resp.ok = 200 <= status < 300 - resp.status_code = status - resp.text = json.dumps(payload) - return resp - - -def _fake_stream_response(sse_payload: str, status: int = 200): - resp = MagicMock() - resp.ok = 200 <= status < 300 - resp.status_code = status - resp.text = sse_payload - # iter_content returns the full payload in one bytes chunk. - resp.iter_content = MagicMock(return_value=iter([sse_payload.encode("utf-8")])) - resp.close = MagicMock() - return resp - - -# --------------------------------------------------------------------------- -# Settings -# --------------------------------------------------------------------------- - -class TestResponsesClientSettings: - def test_serialize_defaults_empty(self): - # No fields set by default — server applies its own defaults - s = ResponsesClientSettings() - assert s._serialize() == {} - - def test_store_defaults_to_none(self): - assert ResponsesClientSettings().store is None - - def test_serialize_all_fields(self): - s = ResponsesClientSettings() - s.instructions = "Be concise." - s.temperature = 0.2 - s.top_p = 0.9 - s.max_output_tokens = 256 - s.frequency_penalty = 0.1 - s.presence_penalty = 0.2 - s.tool_choice = "auto" - s.truncation = "auto" - s.parallel_tool_calls = False - s.store = False - s.metadata = {"run": "1"} - s.reasoning = ReasoningConfig(effort="medium") - s.text = TextConfig(format=TextFormat(type="json_object")) - s.seed = 42 - - out = s._serialize() - assert out["instructions"] == "Be concise." - assert out["temperature"] == 0.2 - assert out["top_p"] == 0.9 - assert out["max_output_tokens"] == 256 - assert out["frequency_penalty"] == 0.1 - assert out["presence_penalty"] == 0.2 - assert out["tool_choice"] == "auto" - assert out["truncation"] == "auto" - assert out["parallel_tool_calls"] is False - assert out["store"] is False - assert out["metadata"] == {"run": "1"} - assert out["reasoning"] == {"effort": "medium"} - assert out["text"] == {"format": {"type": "json_object"}} - assert out["seed"] == 42 - - def test_timeout_not_serialized(self): - # timeout is a transport setting and must NOT appear in the API payload - s = ResponsesClientSettings() - s.timeout = 30.0 - assert "timeout" not in s._serialize() - - def test_timeout_default(self): - assert ResponsesClientSettings().timeout == 60.0 - - -# --------------------------------------------------------------------------- -# Input / tool / id validation -# --------------------------------------------------------------------------- - -class TestInputValidation: - def setup_method(self): - self.client = ResponsesClient(BASE_URL, MODEL_ID) - - def test_rejects_none(self): - with pytest.raises(ValueError, match="None"): - self.client._build_request(None, {}, stream=False) - - def test_rejects_empty_string(self): - with pytest.raises(ValueError, match="empty"): - self.client._build_request("", {}, stream=False) - - def test_rejects_whitespace_string(self): - with pytest.raises(ValueError, match="empty"): - self.client._build_request(" ", {}, stream=False) - - def test_rejects_empty_array(self): - with pytest.raises(ValueError, match="empty"): - self.client._build_request([], {}, stream=False) - - def test_rejects_item_without_type(self): - with pytest.raises(ValueError, match="type"): - self.client._build_request([{"role": "user"}], {}, stream=False) - - def test_accepts_string_input(self): - body = self.client._build_request("Hi", {}, stream=False) - assert body["input"] == "Hi" - assert body["model"] == MODEL_ID - - def test_accepts_dict_input_items(self): - body = self.client._build_request( - [{"type": "message", "role": "user", "content": "hi"}], {}, stream=False - ) - assert isinstance(body["input"], list) - assert body["input"][0]["type"] == "message" - - def test_accepts_dataclass_input_items(self): - item = MessageItem(role="user", content="hello") - body = self.client._build_request([item], {}, stream=False) - assert body["input"][0]["type"] == "message" - assert body["input"][0]["role"] == "user" - assert body["input"][0]["content"] == "hello" - - def test_stream_flag_set(self): - body = self.client._build_request("hi", {}, stream=True) - assert body["stream"] is True - - def test_requires_model(self): - c = ResponsesClient(BASE_URL) # no default model - with pytest.raises(ValueError, match="[Mm]odel"): - c._build_request("hi", {}, stream=False) - - def test_options_model_overrides_default(self): - body = self.client._build_request("hi", {"model": "override"}, stream=False) - assert body["model"] == "override" - - -class TestToolValidation: - def setup_method(self): - self.client = ResponsesClient(BASE_URL, MODEL_ID) - - def test_rejects_non_function_type(self): - with pytest.raises(ValueError, match="function"): - self.client._build_request("hi", {"tools": [{"type": "retrieval", "name": "x"}]}, stream=False) - - def test_rejects_empty_name(self): - with pytest.raises(ValueError, match="name"): - self.client._build_request("hi", {"tools": [{"type": "function", "name": ""}]}, stream=False) - - def test_rejects_non_list(self): - with pytest.raises(ValueError, match="list"): - self.client._build_request("hi", {"tools": "nope"}, stream=False) - - def test_accepts_valid_dict_tool(self): - body = self.client._build_request( - "hi", - {"tools": [{"type": "function", "name": "multiply", "parameters": {}}]}, - stream=False, - ) - assert body["tools"][0]["name"] == "multiply" - - def test_accepts_dataclass_tool(self): - tool = FunctionToolDefinition(name="multiply", description="x*y") - body = self.client._build_request("hi", {"tools": [tool]}, stream=False) - assert body["tools"][0]["type"] == "function" - assert body["tools"][0]["name"] == "multiply" - assert body["tools"][0]["description"] == "x*y" - - -class TestIdValidation: - def setup_method(self): - self.client = ResponsesClient(BASE_URL, MODEL_ID) - - def test_rejects_empty_id(self): - with pytest.raises(ValueError, match="non-empty"): - self.client.get("") - - def test_rejects_whitespace_id(self): - with pytest.raises(ValueError, match="non-empty"): - self.client.get(" ") - - def test_rejects_too_long_id(self): - with pytest.raises(ValueError, match="length"): - self.client.get("x" * 1025) - - -# --------------------------------------------------------------------------- -# output_text convenience -# --------------------------------------------------------------------------- - -class TestOutputText: - def test_extracts_from_string_content(self): - resp = ResponseObject(output=[MessageItem(role="assistant", content="hello world")]) - assert resp.output_text == "hello world" - - def test_extracts_from_content_parts(self): - resp = ResponseObject(output=[ - MessageItem( - role="assistant", - content=[OutputTextContent(text="foo "), OutputTextContent(text="bar")], - ) - ]) - assert resp.output_text == "foo bar" - - def test_returns_empty_when_no_assistant(self): - resp = ResponseObject(output=[MessageItem(role="user", content="hi")]) - assert resp.output_text == "" - - def test_returns_empty_for_empty_output(self): - assert ResponseObject().output_text == "" - - def test_skips_function_call_items(self): - resp = ResponseObject(output=[ - FunctionCallItem(call_id="c1", name="f", arguments="{}"), - MessageItem(role="assistant", content="done"), - ]) - assert resp.output_text == "done" - - -# --------------------------------------------------------------------------- -# SSE parsing -# --------------------------------------------------------------------------- - -class TestSSEParsing: - def test_parses_complete_event(self): - block = 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"hi","sequence_number":3}' - evt = _parse_sse_block(block) - assert isinstance(evt, OutputTextDeltaEvent) - assert evt.delta == "hi" - assert evt.sequence_number == 3 - - def test_done_signal(self): - assert _parse_sse_block("data: [DONE]") is _SSE_DONE - - def test_multi_line_data(self): - # Per SSE spec, multiple data: lines join with \n into one JSON doc. - block = 'data: {"type":"error",\ndata: "message":"oops","sequence_number":0}' - evt = _parse_sse_block(block) - assert isinstance(evt, StreamingErrorEvent) - assert evt.message == "oops" - - def test_invalid_json_raises(self): - block = 'data: {not valid json' - with pytest.raises(FoundryLocalException): - _parse_sse_block(block) - - def test_empty_block_returns_none(self): - assert _parse_sse_block("") is None - assert _parse_sse_block("\n\n") is None - - def test_ignores_non_data_lines(self): - block = 'id: 1\nretry: 1000\nevent: response.created\ndata: {"type":"response.created","response":{"id":"r1"},"sequence_number":0}' - evt = _parse_sse_block(block) - assert isinstance(evt, ResponseLifecycleEvent) - assert evt.type == "response.created" - - def test_error_event(self): - block = 'data: {"type":"error","code":"bad","message":"oops","sequence_number":0}' - evt = _parse_sse_block(block) - assert isinstance(evt, StreamingErrorEvent) - assert evt.code == "bad" - assert evt.message == "oops" - - def test_iter_sse_events_handles_partial_chunks(self): - payload_events = [ - 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"Hel","sequence_number":1}\n\n', - 'event: response.output_text.delta\ndata: {"type":"response.output_text.delta","delta":"lo","sequence_number":2}\n\n', - 'data: [DONE]\n\n', - ] - full = "".join(payload_events).encode("utf-8") - - # Split the bytes into irregular chunks to exercise buffering. - chunks = [full[i:i + 7] for i in range(0, len(full), 7)] - - resp = MagicMock() - resp.iter_content = MagicMock(return_value=iter(chunks)) - resp.close = MagicMock() - - events = list(_iter_sse_events(resp)) - assert len(events) == 2 - assert all(isinstance(e, OutputTextDeltaEvent) for e in events) - assert "".join(e.delta for e in events) == "Hello" - resp.close.assert_called() - - def test_iter_sse_handles_crlf(self): - payload = ( - 'event: response.output_text.delta\r\n' - 'data: {"type":"response.output_text.delta","delta":"x","sequence_number":0}\r\n' - '\r\n' - 'data: [DONE]\r\n\r\n' - ) - resp = MagicMock() - resp.iter_content = MagicMock(return_value=iter([payload.encode("utf-8")])) - resp.close = MagicMock() - - events = list(_iter_sse_events(resp)) - assert len(events) == 1 - assert events[0].delta == "x" - - def test_unknown_event_type(self): - block = 'data: {"type":"response.brand_new_event","sequence_number":7}' - evt = _parse_sse_block(block) - assert isinstance(evt, UnknownStreamingEvent) - assert evt.type == "response.brand_new_event" - - -# --------------------------------------------------------------------------- -# Vision types -# --------------------------------------------------------------------------- - -class TestVisionTypes: - def test_input_image_from_bytes(self): - data = b"\x89PNG\r\n\x1a\nfakedata" - img = InputImageContent.from_bytes(data, "image/png", detail="high") - assert img.media_type == "image/png" - assert img.detail == "high" - assert base64.b64decode(img.image_data) == data - - def test_input_image_from_url(self): - img = InputImageContent.from_url("https://example.com/x.png") - assert img.image_url == "https://example.com/x.png" - assert img.image_data is None - - def test_input_image_from_file(self, tmp_path): - data = b"\x89PNG\r\n\x1a\nfakedata" - p = tmp_path / "test.png" - p.write_bytes(data) - img = InputImageContent.from_file(str(p)) - assert img.media_type == "image/png" - assert base64.b64decode(img.image_data) == data - - def test_input_image_from_file_rejects_non_image(self, tmp_path): - p = tmp_path / "text.txt" - p.write_text("not an image") - with pytest.raises(ValueError, match="Unsupported"): - InputImageContent.from_file(str(p)) - - def test_input_image_serialization(self): - img = InputImageContent(media_type="image/png", image_data="abc", detail="low") - d = _to_dict(img) - assert d == {"media_type": "image/png", "image_data": "abc", "detail": "low", "type": "input_image"} - # image_url left unset should be omitted - assert "image_url" not in d - - def test_input_image_mutual_exclusivity(self): - with pytest.raises(ValueError, match="exactly one"): - InputImageContent(media_type="image/png") # neither set - with pytest.raises(ValueError, match="exactly one"): - InputImageContent(media_type="image/png", image_url="http://x.com/a.png", image_data="abc") # both set - - -# --------------------------------------------------------------------------- -# Type serialization & parsing -# --------------------------------------------------------------------------- - -class TestTypeSerialization: - def test_message_item_to_dict(self): - msg = MessageItem( - role="user", - content=[InputTextContent(text="Hi"), InputImageContent(media_type="image/png", image_data="abc")], - ) - d = _to_dict(msg) - assert d["type"] == "message" - assert d["role"] == "user" - assert d["content"][0] == {"text": "Hi", "type": "input_text"} - assert d["content"][1]["type"] == "input_image" - assert "id" not in d # None omitted - - def test_function_tool_to_dict(self): - tool = FunctionToolDefinition( - name="multiply", - description="x*y", - parameters={"type": "object", "properties": {"a": {"type": "number"}}}, - strict=True, - ) - d = _to_dict(tool) - assert d == { - "name": "multiply", - "description": "x*y", - "parameters": {"type": "object", "properties": {"a": {"type": "number"}}}, - "strict": True, - "type": "function", - } - - def test_response_object_from_dict(self): - from foundry_local_sdk.openai.responses_types import _parse_response_object - - payload = { - "id": "resp_abc", - "object": "response", - "created_at": 1700000000, - "status": "completed", - "model": "phi-4-mini", - "output": [ - { - "type": "message", - "role": "assistant", - "content": [{"type": "output_text", "text": "Hello!"}], - } - ], - "usage": {"input_tokens": 3, "output_tokens": 2, "total_tokens": 5}, - "store": True, - } - r = _parse_response_object(payload) - assert r.id == "resp_abc" - assert r.status == "completed" - assert r.usage.total_tokens == 5 - assert r.output_text == "Hello!" - - def test_streaming_event_parsing_lifecycle(self): - evt = parse_streaming_event( - { - "type": "response.completed", - "response": {"id": "resp_1", "status": "completed"}, - "sequence_number": 10, - } - ) - assert isinstance(evt, ResponseLifecycleEvent) - assert evt.type == "response.completed" - assert evt.response.id == "resp_1" - assert evt.sequence_number == 10 - - -# --------------------------------------------------------------------------- -# End-to-end (mocked HTTP) -# --------------------------------------------------------------------------- - -class TestClientHTTPFlow: - def setup_method(self): - self.client = ResponsesClient(BASE_URL, MODEL_ID) - - def test_create_posts_correct_body(self): - payload = { - "id": "resp_1", - "object": "response", - "status": "completed", - "model": MODEL_ID, - "output": [ - {"type": "message", "role": "assistant", "content": "ok"}, - ], - } - with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: - mock_req.return_value = _fake_json_response(payload) - result = self.client.create("hello", temperature=0.3) - - assert result.id == "resp_1" - assert result.output_text == "ok" - - _, kwargs = mock_req.call_args - assert mock_req.call_args.args[0] == "POST" - assert mock_req.call_args.args[1] == f"{BASE_URL}/v1/responses" - body = json.loads(kwargs["data"]) - assert body["model"] == MODEL_ID - assert body["input"] == "hello" - assert body["temperature"] == 0.3 - assert "store" not in body # store=None is omitted from request - assert "stream" not in body - - def test_get_uses_url_encoded_path(self): - weird_id = "resp_with/slashes and spaces" - with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: - mock_req.return_value = _fake_json_response( - {"id": weird_id, "object": "response", "status": "completed", "model": MODEL_ID, "output": []} - ) - self.client.get(weird_id) - - path = mock_req.call_args.args[1] - assert "resp_with%2Fslashes%20and%20spaces" in path - assert mock_req.call_args.args[0] == "GET" - - def test_delete_parses_result(self): - with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: - mock_req.return_value = _fake_json_response( - {"id": "resp_1", "object": "response.deleted", "deleted": True} - ) - result = self.client.delete("resp_1") - assert result.deleted is True - assert result.id == "resp_1" - - def test_http_error_raises_foundry_local_exception(self): - resp = MagicMock() - resp.ok = False - resp.status_code = 400 - resp.text = '{"error":{"message":"bad"}}' - with patch("foundry_local_sdk.openai.responses_client.requests.request", return_value=resp): - with pytest.raises(FoundryLocalException) as excinfo: - self.client.create("hi") - assert "400" in str(excinfo.value) - assert "bad" in str(excinfo.value) - - def test_create_streaming_yields_events(self): - sse = ( - 'event: response.output_text.delta\n' - 'data: {"type":"response.output_text.delta","delta":"a","sequence_number":1}\n' - '\n' - 'event: response.output_text.delta\n' - 'data: {"type":"response.output_text.delta","delta":"b","sequence_number":2}\n' - '\n' - 'data: [DONE]\n\n' - ) - with patch("foundry_local_sdk.openai.responses_client.requests.post") as mock_post: - mock_post.return_value = _fake_stream_response(sse) - events = list(self.client.create_streaming("hi")) - - assert len(events) == 2 - assert "".join(e.delta for e in events) == "ab" - _, kwargs = mock_post.call_args - body = json.loads(kwargs["data"]) - assert body["stream"] is True - assert kwargs["headers"]["Accept"] == "text/event-stream" - - def test_streaming_http_error(self): - resp = MagicMock() - resp.ok = False - resp.status_code = 500 - resp.text = "boom" - resp.close = MagicMock() - with patch("foundry_local_sdk.openai.responses_client.requests.post", return_value=resp): - with pytest.raises(FoundryLocalException) as excinfo: - list(self.client.create_streaming("hi")) - assert "500" in str(excinfo.value) - - def test_settings_merge_precedence(self): - self.client.settings.temperature = 0.1 - self.client.settings.max_output_tokens = 100 - with patch("foundry_local_sdk.openai.responses_client.requests.request") as mock_req: - mock_req.return_value = _fake_json_response( - {"id": "r", "object": "response", "status": "completed", "model": MODEL_ID, "output": []} - ) - # Per-call overrides client settings - self.client.create("hi", temperature=0.9) - - body = json.loads(mock_req.call_args.kwargs["data"]) - assert body["temperature"] == 0.9 # per-call wins - assert body["max_output_tokens"] == 100 # settings default preserved - - -class TestManagerFactory: - """Ensure the factory method wiring doesn't require a running server.""" - - def test_manager_raises_if_web_service_not_started(self): - from foundry_local_sdk.exception import FoundryLocalException - - # Build a stand-in manager without going through the constructor's - # heavy initialization path. - mgr = MagicMock() - mgr.urls = None - # Bind the real method to our MagicMock so we exercise actual logic. - from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M - - with pytest.raises(FoundryLocalException, match="[Ww]eb service"): - M.create_responses_client(mgr, "some-model") - - def test_manager_returns_client_when_urls_set(self): - mgr = MagicMock() - mgr.urls = [BASE_URL] - from foundry_local_sdk.foundry_local_manager import FoundryLocalManager as M - - client = M.create_responses_client(mgr, "phi") - assert isinstance(client, ResponsesClient) - assert client._model_id == "phi" - assert client._base_url == BASE_URL diff --git a/sdk/python/test/openai/test_responses_integration.py b/sdk/python/test/openai/test_responses_integration.py deleted file mode 100644 index cb4eee456..000000000 --- a/sdk/python/test/openai/test_responses_integration.py +++ /dev/null @@ -1,288 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -"""Integration tests for the Responses API client. - -These require a real Foundry Local runtime + a cached model. They are only -run when ``FOUNDRY_INTEGRATION_TESTS=1`` is set in the environment. -""" - -from __future__ import annotations - -import json -import os - -import pytest - -from foundry_local_sdk import ( - FunctionToolDefinition, - InputImageContent, - InputTextContent, - MessageItem, -) - -from ..conftest import TEST_MODEL_ALIAS - -pytestmark = pytest.mark.skipif( - not os.environ.get("FOUNDRY_INTEGRATION_TESTS"), - reason="Set FOUNDRY_INTEGRATION_TESTS=1 to run Responses API integration tests.", -) - - -def _get_loaded_model(catalog): - cached = catalog.get_cached_models() - assert cached, "No cached models found" - variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) - assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached" - - model = catalog.get_model(TEST_MODEL_ALIAS) - assert model is not None - model.select_variant(variant) - model.load() - return model - - -@pytest.fixture(scope="module") -def responses_client(manager, catalog): - """Start the web service, return a ResponsesClient tied to the test model.""" - model = _get_loaded_model(catalog) - manager.start_web_service() - client = manager.create_responses_client(model.id) - try: - yield client - finally: - try: - manager.stop_web_service() - finally: - model.unload() - - -# --------------------------------------------------------------------------- -# Non-streaming -# --------------------------------------------------------------------------- - -class TestNonStreaming: - def test_simple_string_input(self, responses_client): - resp = responses_client.create("What is 2 + 2? Reply with just the number.") - assert resp.id - assert resp.status in {"completed", "incomplete"} - assert resp.output_text # Non-empty - - def test_with_options(self, responses_client): - resp = responses_client.create( - "Say hello.", - temperature=0.0, - max_output_tokens=32, - ) - assert resp.output_text - - def test_structured_input(self, responses_client): - # Validates that structured MessageItem input is accepted and produces - # a well-formed response. Not asserting content (too model-dependent). - resp = responses_client.create( - [ - MessageItem(role="user", content="Reply with the single word: ping"), - ], - temperature=0.0, - ) - assert resp.status in {"completed", "incomplete"} - assert resp.output_text.strip() - - def test_with_instructions(self, responses_client): - resp = responses_client.create( - "Who are you?", - instructions="You are a terse assistant. Answer in exactly three words.", - temperature=0.0, - ) - assert resp.output_text - - def test_multi_turn(self, responses_client): - # Validates previous_response_id wiring: the second response should - # link back to the first via previous_response_id. We don't assert on - # recall quality (too model-dependent for tiny test models). - first = responses_client.create( - "My favourite colour is green. Just acknowledge with 'ok'.", - temperature=0.0, - store=True, - ) - assert first.id - second = responses_client.create( - "What colour did I mention?", - previous_response_id=first.id, - temperature=0.0, - ) - assert second.previous_response_id == first.id - assert second.output_text.strip() - - -# --------------------------------------------------------------------------- -# Streaming -# --------------------------------------------------------------------------- - -class TestStreaming: - def test_basic_streaming(self, responses_client): - chunks = [] - completed = False - for event in responses_client.create_streaming( - "Count 1, 2, 3. Reply with just the digits separated by spaces.", - temperature=0.0, - ): - if event.type == "response.output_text.delta": - chunks.append(event.delta) - elif event.type == "response.completed": - completed = True - assert completed - assert "".join(chunks).strip() - - def test_streaming_with_options(self, responses_client): - saw_completed = False - for event in responses_client.create_streaming( - "Hello", - temperature=0.0, - max_output_tokens=16, - ): - if event.type == "response.completed": - saw_completed = True - assert saw_completed - - def test_streaming_events_sequence(self, responses_client): - # Expect created → in_progress → ... → completed - types_seen = [] - for event in responses_client.create_streaming("Say hi.", temperature=0.0): - types_seen.append(event.type) - assert "response.created" in types_seen - assert "response.completed" in types_seen - assert types_seen.index("response.created") < types_seen.index("response.completed") - - -# --------------------------------------------------------------------------- -# Storage: get / delete / list -# --------------------------------------------------------------------------- - -class TestStorage: - def test_get_stored_response(self, responses_client): - first = responses_client.create("Store this.", store=True, temperature=0.0) - fetched = responses_client.get(first.id) - assert fetched.id == first.id - assert fetched.output_text == first.output_text - - def test_delete_response(self, responses_client): - created = responses_client.create("Delete me.", store=True, temperature=0.0) - result = responses_client.delete(created.id) - assert result.id == created.id - assert result.deleted is True - - def test_list_responses(self, responses_client): - # Create one so the list is guaranteed non-empty. - responses_client.create("A listable response.", store=True, temperature=0.0) - result = responses_client.list() - assert result.object == "list" - assert len(result.data) >= 1 - - -# --------------------------------------------------------------------------- -# Tool calling -# --------------------------------------------------------------------------- - -class TestToolCalling: - def test_function_call_round_trip(self, responses_client): - tool = FunctionToolDefinition( - name="multiply_numbers", - description="Multiply two integers.", - parameters={ - "type": "object", - "properties": { - "a": {"type": "integer"}, - "b": {"type": "integer"}, - }, - "required": ["a", "b"], - }, - ) - first = responses_client.create( - "What is 7 times 6? Use the multiply_numbers tool.", - tools=[tool], - temperature=0.0, - ) - - # Find the function_call item. - call = next( - (item for item in first.output if getattr(item, "type", None) == "function_call"), - None, - ) - if call is None: - pytest.skip("Model did not emit a tool call for this prompt") - - args = json.loads(call.arguments) - # Model may use the declared parameter names or invent its own. - # Extract the two integer values robustly. - int_values = [int(v) for v in args.values() if isinstance(v, (int, str)) and str(v).lstrip("-").isdigit()] - if len(int_values) < 2: - pytest.skip(f"Model produced unusable tool args: {args!r}") - product = int_values[0] * int_values[1] - - follow = responses_client.create( - [ - MessageItem(role="user", content="What is 7 times 6? Use the multiply_numbers tool."), - call, - { - "type": "function_call_output", - "call_id": call.call_id, - "output": str(product), - }, - ], - tools=[tool], - temperature=0.0, - ) - # Validates the round-trip: the follow-up should produce a completed - # response that references the tool output. We don't assert content. - assert follow.status in {"completed", "incomplete"} - assert follow.output_text.strip() - - -# --------------------------------------------------------------------------- -# Vision -# --------------------------------------------------------------------------- - -class TestVision: - """These tests require a vision-capable model and will be skipped otherwise.""" - - def _run_or_skip(self, responses_client, content): - try: - return responses_client.create( - [MessageItem(role="user", content=content)], - temperature=0.0, - ) - except Exception as e: - pytest.skip(f"Model does not appear to support vision: {e}") - - def test_image_base64_input(self, responses_client): - # Minimal 1x1 PNG. - png = bytes.fromhex( - "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" - "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" - "ae426082" - ) - resp = self._run_or_skip( - responses_client, - [ - InputTextContent(text="Describe this image briefly."), - InputImageContent.from_bytes(png, "image/png"), - ], - ) - assert resp.status in {"completed", "incomplete"} - - def test_image_with_text(self, responses_client): - png = bytes.fromhex( - "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" - "890000000d49444154789c6300010000000500010d0a2db40000000049454e44" - "ae426082" - ) - resp = self._run_or_skip( - responses_client, - [ - InputTextContent(text="What colour is this?"), - InputImageContent.from_bytes(png, "image/png"), - ], - ) - assert resp.status in {"completed", "incomplete"} diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py new file mode 100644 index 000000000..aee569034 --- /dev/null +++ b/sdk/python/test/openai/test_responses_web_service.py @@ -0,0 +1,194 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Integration tests for /v1/responses through the local web service. + +These tests intentionally use FoundryLocalManager only for SDK setup, model +lifecycle, and web-service lifecycle. Actual Responses API calls go through the +official OpenAI Python client against the local OpenAI-compatible endpoint. +""" + +from __future__ import annotations + +import json +from typing import Any + +import pytest +from openai import OpenAI + +from ..conftest import TEST_MODEL_ALIAS, skip_in_ci + + +pytestmark = skip_in_ci + + +def _field(value: Any, name: str, default: Any = None) -> Any: + if isinstance(value, dict): + return value.get(name, default) + return getattr(value, name, default) + + +def _response_text(response: Any) -> str: + text = _field(response, "output_text") + if isinstance(text, str) and text: + return text + + output_text = "" + for item in _field(response, "output", []) or []: + if _field(item, "type") != "message": + continue + for part in _field(item, "content", []) or []: + if _field(part, "type") == "output_text": + part_text = _field(part, "text", "") + if isinstance(part_text, str): + output_text += part_text + return output_text + + +def _get_function_call(response: Any) -> Any: + for item in _field(response, "output", []) or []: + if _field(item, "type") == "function_call": + return item + return None + + +def _get_weather_tool() -> dict[str, Any]: + return { + "type": "function", + "name": "get_weather", + "description": "Get the current weather for a city.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and region, for example Seattle, WA.", + } + }, + "required": ["location"], + }, + } + + +@pytest.fixture(scope="module") +def responses_web_service(manager, catalog): + cached = catalog.get_cached_models() + cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + if cached_variant is None: + pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests") + + model = catalog.get_model(TEST_MODEL_ALIAS) + if model is None: + pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog") + + model.select_variant(cached_variant) + client = None + service_started = False + model_loaded = False + + try: + try: + model.load() + model_loaded = True + manager.start_web_service() + service_started = True + except Exception as exc: + message = str(exc) + if "execute_command_with_binary" in message: + pytest.skip( + "Local Foundry Local Core/native runtime is stale: " + "failed to resolve execute_command_with_binary" + ) + pytest.skip(f"Failed to start Responses web-service test prerequisites: {exc}") + + if not manager.urls: + pytest.skip("Web service started but did not return any URLs") + + base_url = manager.urls[0].rstrip("/") + "/v1" + client = OpenAI(base_url=base_url, api_key="notneeded") + if not hasattr(client, "responses"): + pytest.skip("Installed openai package does not expose the Responses API") + yield client, model.id + finally: + if client is not None: + client.close() + if service_started: + try: + manager.stop_web_service() + except Exception: + pass + if model_loaded: + try: + model.unload() + except Exception: + pass + + +class TestResponsesWebService: + def test_should_create_non_streaming_response(self, responses_web_service): + client, model_id = responses_web_service + + response = client.responses.create( + model=model_id, + input="What is 2 + 2? Reply briefly.", + ) + + assert _response_text(response).strip() + + def test_should_stream_response_events(self, responses_web_service): + client, model_id = responses_web_service + saw_text_delta = False + saw_completion = False + + stream = client.responses.create( + model=model_id, + input="Count from 1 to 3, separated by spaces.", + stream=True, + ) + for event in stream: + event_type = _field(event, "type") + if event_type == "response.output_text.delta" and _field(event, "delta"): + saw_text_delta = True + if event_type == "response.completed": + saw_completion = True + + assert saw_text_delta + assert saw_completion + + def test_should_round_trip_function_call_output(self, responses_web_service): + client, model_id = responses_web_service + weather_tool = _get_weather_tool() + + tool_response = client.responses.create( + model=model_id, + input="Use get_weather to check the weather in Seattle, then answer.", + tools=[weather_tool], + tool_choice="required", + store=True, + ) + function_call = _get_function_call(tool_response) + + assert function_call is not None + assert _field(function_call, "name") == "get_weather" + assert _field(function_call, "call_id") + + final_response = client.responses.create( + model=model_id, + previous_response_id=_field(tool_response, "id"), + input=[ + { + "type": "function_call_output", + "call_id": _field(function_call, "call_id"), + "output": json.dumps( + { + "location": "Seattle, WA", + "temperature": "68 F", + "conditions": "sunny", + } + ), + } + ], + ) + + assert _response_text(final_response).strip() From 718e78cca24ccd97ce85e04c3c343a95da48805e Mon Sep 17 00:00:00 2001 From: maanavd Date: Fri, 1 May 2026 16:17:02 -0400 Subject: [PATCH 06/12] refactor(sdk/python): align responses web-service sample tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- samples/README.md | 2 +- samples/python/README.md | 1 + .../web-server-responses/requirements.txt | 3 + .../python/web-server-responses/src/app.py | 152 ++++++++++++ sdk/python/README.md | 3 +- sdk/python/examples/responses_web_service.py | 176 ------------- .../test/openai/test_responses_web_service.py | 232 +++++++++++------- 7 files changed, 299 insertions(+), 270 deletions(-) create mode 100644 samples/python/web-server-responses/requirements.txt create mode 100644 samples/python/web-server-responses/src/app.py delete mode 100644 sdk/python/examples/responses_web_service.py diff --git a/samples/README.md b/samples/README.md index bcac6bf3a..bed7e41c1 100644 --- a/samples/README.md +++ b/samples/README.md @@ -10,5 +10,5 @@ Explore complete working examples that demonstrate how to use Foundry Local — |----------|---------|-------------| | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. | | [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. | -| [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. | +| [**Python**](python/) | 11 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, Responses API, and tutorials. | | [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. | diff --git a/samples/python/README.md b/samples/python/README.md index 7262f012a..49e99c8a6 100644 --- a/samples/python/README.md +++ b/samples/python/README.md @@ -14,6 +14,7 @@ These samples demonstrate how to use Foundry Local with Python. | [embeddings](embeddings/) | Generate single and batch text embeddings using the Foundry Local SDK. | | [audio-transcription](audio-transcription/) | Transcribe audio files using the Whisper model. | | [web-server](web-server/) | Start a local OpenAI-compatible web server and call it with the OpenAI Python SDK. | +| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. | | [tool-calling](tool-calling/) | Tool calling with custom function definitions (get_weather, calculate). | | [langchain-integration](langchain-integration/) | LangChain integration for building translation and text generation chains. | | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). | diff --git a/samples/python/web-server-responses/requirements.txt b/samples/python/web-server-responses/requirements.txt new file mode 100644 index 000000000..db870f608 --- /dev/null +++ b/samples/python/web-server-responses/requirements.txt @@ -0,0 +1,3 @@ +foundry-local-sdk; sys_platform != "win32" +foundry-local-sdk-winml; sys_platform == "win32" +openai diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py new file mode 100644 index 000000000..6f186a2a6 --- /dev/null +++ b/samples/python/web-server-responses/src/app.py @@ -0,0 +1,152 @@ +# +# +import json +from typing import Any + +from openai import OpenAI + +from foundry_local_sdk import Configuration, FoundryLocalManager +# + + +def get_response_text(response: Any) -> str: + if isinstance(getattr(response, "output_text", None), str): + return response.output_text + return "".join( + getattr(part, "text", "") + for item in getattr(response, "output", []) or [] + for part in getattr(item, "content", []) or [] + if getattr(part, "type", None) == "output_text" + ) + + +# +# Initialize the Foundry Local SDK +config = Configuration(app_name="foundry_local_samples") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance + +# Download and register all execution providers. +current_ep = "" + + +def _ep_progress(ep_name: str, percent: float): + global current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) + + +manager.download_and_register_eps(progress_callback=_ep_progress) +if current_ep: + print() +# + +# +model_alias = "qwen2.5-0.5b" +model = manager.catalog.get_model(model_alias) + +print(f"\nDownloading model {model_alias}...") +model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) +) +print("\nModel downloaded") + +print("\nLoading model...") +model.load() +print("Model loaded") +# + +# +print("\nStarting web service...") +manager.start_web_service() +base_url = manager.urls[0].rstrip("/") + "/v1" +print("Web service started") + +# <<<<<< OPENAI SDK USAGE >>>>>> +# Use the OpenAI SDK to call the local Foundry web service Responses API +openai = OpenAI( + base_url=base_url, + api_key="notneeded", +) +# + +try: + print("\nTesting a non-streaming Responses call...") + response = openai.responses.create( + model=model.id, + input="Reply with one short sentence about local AI.", + ) + print(f"[ASSISTANT]: {get_response_text(response)}") + + print("\nTesting a streaming Responses call...") + stream = openai.responses.create( + model=model.id, + input="Count from one to three.", + stream=True, + ) + + print("[ASSISTANT STREAM]: ", end="", flush=True) + for event in stream: + if getattr(event, "type", None) == "response.output_text.delta": + print(getattr(event, "delta", ""), end="", flush=True) + print() + + print("\nTesting Responses tool calling...") + tools = [ + { + "type": "function", + "name": "get_weather", + "description": "Get the current weather. This sample always returns Seattle weather.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": False, + }, + }, + ] + + tool_response = openai.responses.create( + model=model.id, + input="Use the get_weather tool and then answer with the weather.", + tools=tools, + tool_choice="required", + store=True, + ) + + function_call = next( + (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"), + None, + ) + if function_call is None: + raise RuntimeError("Expected the model to call get_weather.") + + print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})") + + final_response = openai.responses.create( + model=model.id, + previous_response_id=tool_response.id, + input=[ + { + "type": "function_call_output", + "call_id": function_call.call_id, + "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), + } + ], + tools=tools, + ) + + print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}") + # <<<<<< END OPENAI SDK USAGE >>>>>> +finally: + # Tidy up + openai.close() + manager.stop_web_service() + model.unload() +# diff --git a/sdk/python/README.md b/sdk/python/README.md index 0c065bc85..2a121411e 100644 --- a/sdk/python/README.md +++ b/sdk/python/README.md @@ -328,5 +328,4 @@ See [test/README.md](test/README.md) for detailed test setup and structure. ```bash python examples/chat_completion.py -python examples/responses_web_service.py -``` +``` \ No newline at end of file diff --git a/sdk/python/examples/responses_web_service.py b/sdk/python/examples/responses_web_service.py deleted file mode 100644 index fe9517949..000000000 --- a/sdk/python/examples/responses_web_service.py +++ /dev/null @@ -1,176 +0,0 @@ -#!/usr/bin/env python3 -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- - -"""Example: Responses API through the Foundry Local web service. - -Foundry Local manages setup, model lifecycle, and the local OpenAI-compatible -web service. The official OpenAI Python client sends the actual /v1/responses -requests to that local service. -""" - -from __future__ import annotations - -import json -from typing import Any - -from openai import OpenAI - -from foundry_local_sdk import Configuration, FoundryLocalManager - - -MODEL_ALIAS = "qwen2.5-0.5b" - - -def _field(value: Any, name: str, default: Any = None) -> Any: - if isinstance(value, dict): - return value.get(name, default) - return getattr(value, name, default) - - -def _response_text(response: Any) -> str: - text = _field(response, "output_text") - if isinstance(text, str) and text: - return text - - for item in _field(response, "output", []) or []: - if _field(item, "type") != "message": - continue - for part in _field(item, "content", []) or []: - if _field(part, "type") == "output_text": - part_text = _field(part, "text", "") - if isinstance(part_text, str): - text = (text or "") + part_text - return text or "" - - -def _get_function_call(response: Any) -> Any: - for item in _field(response, "output", []) or []: - if _field(item, "type") == "function_call": - return item - return None - - -def _get_weather_tool() -> dict[str, Any]: - return { - "type": "function", - "name": "get_weather", - "description": "Get the current weather for a city.", - "parameters": { - "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and region, for example Seattle, WA.", - } - }, - "required": ["location"], - }, - } - - -def main() -> None: - config = Configuration(app_name="ResponsesWebServiceExample") - print("Initializing Foundry Local Manager") - FoundryLocalManager.initialize(config) - manager = FoundryLocalManager.instance - if manager is None: - raise RuntimeError("FoundryLocalManager.initialize did not set instance") - - print("Registering execution providers...") - ep_result = manager.download_and_register_eps() - print(f"EP registration success: {ep_result.success} ({ep_result.status})") - - model = manager.catalog.get_model(MODEL_ALIAS) - if model is None: - raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog") - - if not model.is_cached: - print(f"Downloading {model.alias}...") - model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r")) - print() - - print(f"Loading {model.alias}...", end="") - model.load() - print("loaded!") - - openai_client: OpenAI | None = None - try: - print("Starting OpenAI-compatible web service...", end="") - manager.start_web_service() - if not manager.urls: - raise RuntimeError("Web service started but did not return any URLs") - print("started!") - - base_url = manager.urls[0].rstrip("/") + "/v1" - openai_client = OpenAI(base_url=base_url, api_key="notneeded") - - print("\n--- Non-streaming Responses call ---") - response = openai_client.responses.create( - model=model.id, - input="What is 2 + 2? Reply briefly.", - ) - print(_response_text(response)) - - print("\n--- Streaming Responses call ---") - stream = openai_client.responses.create( - model=model.id, - input="Count from 1 to 3, separated by spaces.", - stream=True, - ) - for event in stream: - if _field(event, "type") == "response.output_text.delta": - print(_field(event, "delta", ""), end="", flush=True) - print() - - print("\n--- Function/tool calling Responses flow ---") - weather_tool = _get_weather_tool() - tool_response = openai_client.responses.create( - model=model.id, - input="Use get_weather to check the weather in Seattle, then answer.", - tools=[weather_tool], - tool_choice="required", - store=True, - ) - function_call = _get_function_call(tool_response) - if function_call is None: - raise RuntimeError("Model did not return a function_call item") - - print(f"Tool call: {_field(function_call, 'name')}") - print(f"Arguments: {_field(function_call, 'arguments')}") - - final_response = openai_client.responses.create( - model=model.id, - previous_response_id=_field(tool_response, "id"), - input=[ - { - "type": "function_call_output", - "call_id": _field(function_call, "call_id"), - "output": json.dumps( - { - "location": "Seattle, WA", - "temperature": "68 F", - "conditions": "sunny", - } - ), - } - ], - ) - print(_response_text(final_response)) - - finally: - if openai_client is not None: - openai_client.close() - try: - manager.stop_web_service() - print("Web service stopped.") - except Exception: - pass - model.unload() - print("Model unloaded.") - - -if __name__ == "__main__": - main() diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py index aee569034..e323a892e 100644 --- a/sdk/python/test/openai/test_responses_web_service.py +++ b/sdk/python/test/openai/test_responses_web_service.py @@ -6,16 +6,15 @@ These tests intentionally use FoundryLocalManager only for SDK setup, model lifecycle, and web-service lifecycle. Actual Responses API calls go through the -official OpenAI Python client against the local OpenAI-compatible endpoint. +OpenAI-compatible HTTP endpoint directly. """ from __future__ import annotations import json -from typing import Any import pytest -from openai import OpenAI +import requests from ..conftest import TEST_MODEL_ALIAS, skip_in_ci @@ -23,50 +22,95 @@ pytestmark = skip_in_ci -def _field(value: Any, name: str, default: Any = None) -> Any: - if isinstance(value, dict): - return value.get(name, default) - return getattr(value, name, default) - - -def _response_text(response: Any) -> str: - text = _field(response, "output_text") +def _response_text(response: dict) -> str: + text = response.get("output_text") if isinstance(text, str) and text: return text - output_text = "" - for item in _field(response, "output", []) or []: - if _field(item, "type") != "message": - continue - for part in _field(item, "content", []) or []: - if _field(part, "type") == "output_text": - part_text = _field(part, "text", "") - if isinstance(part_text, str): - output_text += part_text - return output_text - - -def _get_function_call(response: Any) -> Any: - for item in _field(response, "output", []) or []: - if _field(item, "type") == "function_call": + return "".join( + part.get("text", "") + for item in response.get("output", []) or [] + if item.get("type") == "message" + for part in item.get("content", []) or [] + if part.get("type") == "output_text" and isinstance(part.get("text"), str) + ) + + +def _post_response(base_url: str, body: dict) -> dict: + response = requests.post( + f"{base_url}/v1/responses", + headers={"Content-Type": "application/json"}, + json=body, + timeout=60, + ) + assert response.ok, response.text + return response.json() + + +def _post_streaming_response(base_url: str, body: dict) -> list[dict]: + response = requests.post( + f"{base_url}/v1/responses", + headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, + json={**body, "stream": True}, + stream=True, + timeout=(60, None), + ) + assert response.ok, response.text + + events: list[dict] = [] + buffer = "" + try: + for chunk in response.iter_content(chunk_size=None, decode_unicode=False): + if not chunk: + continue + text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk + buffer += text.replace("\r\n", "\n") + + while "\n\n" in buffer: + block, buffer = buffer.split("\n\n", 1) + data = _sse_data(block) + if not data: + continue + if data == "[DONE]": + return events + events.append(json.loads(data)) + finally: + response.close() + + tail = buffer.strip() + if tail: + data = _sse_data(tail) + if data and data != "[DONE]": + events.append(json.loads(data)) + return events + + +def _sse_data(block: str) -> str: + lines: list[str] = [] + for line in block.strip().split("\n"): + if line.startswith("data: "): + lines.append(line[6:]) + elif line == "data:": + lines.append("") + return "\n".join(lines).strip() + + +def _get_function_call(response: dict) -> dict | None: + for item in response.get("output", []) or []: + if item.get("type") == "function_call": return item return None -def _get_weather_tool() -> dict[str, Any]: +def _get_weather_tool() -> dict: return { "type": "function", "name": "get_weather", - "description": "Get the current weather for a city.", + "description": "Get the current weather. This test always returns Seattle weather.", "parameters": { "type": "object", - "properties": { - "location": { - "type": "string", - "description": "The city and region, for example Seattle, WA.", - } - }, - "required": ["location"], + "properties": {}, + "additionalProperties": False, }, } @@ -83,7 +127,6 @@ def responses_web_service(manager, catalog): pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog") model.select_variant(cached_variant) - client = None service_started = False model_loaded = False @@ -105,14 +148,8 @@ def responses_web_service(manager, catalog): if not manager.urls: pytest.skip("Web service started but did not return any URLs") - base_url = manager.urls[0].rstrip("/") + "/v1" - client = OpenAI(base_url=base_url, api_key="notneeded") - if not hasattr(client, "responses"): - pytest.skip("Installed openai package does not expose the Responses API") - yield client, model.id + yield manager.urls[0].rstrip("/"), model.id finally: - if client is not None: - client.close() if service_started: try: manager.stop_web_service() @@ -127,68 +164,81 @@ def responses_web_service(manager, catalog): class TestResponsesWebService: def test_should_create_non_streaming_response(self, responses_web_service): - client, model_id = responses_web_service - - response = client.responses.create( - model=model_id, - input="What is 2 + 2? Reply briefly.", + base_url, model_id = responses_web_service + + response = _post_response( + base_url, + { + "model": model_id, + "input": "What is 2 + 2? Answer with just the number.", + "temperature": 0, + "max_output_tokens": 64, + "store": False, + }, ) + assert response["object"] == "response" + assert response["status"] == "completed" assert _response_text(response).strip() def test_should_stream_response_events(self, responses_web_service): - client, model_id = responses_web_service - saw_text_delta = False - saw_completion = False - - stream = client.responses.create( - model=model_id, - input="Count from 1 to 3, separated by spaces.", - stream=True, + base_url, model_id = responses_web_service + + events = _post_streaming_response( + base_url, + { + "model": model_id, + "input": "Count from 1 to 3.", + "temperature": 0, + "max_output_tokens": 64, + "store": False, + }, ) - for event in stream: - event_type = _field(event, "type") - if event_type == "response.output_text.delta" and _field(event, "delta"): - saw_text_delta = True - if event_type == "response.completed": - saw_completion = True - assert saw_text_delta - assert saw_completion + assert any(event.get("type") == "response.created" for event in events) + assert any(event.get("type") == "response.output_text.delta" for event in events) + assert any(event.get("type") == "response.completed" for event in events) def test_should_round_trip_function_call_output(self, responses_web_service): - client, model_id = responses_web_service + base_url, model_id = responses_web_service weather_tool = _get_weather_tool() - tool_response = client.responses.create( - model=model_id, - input="Use get_weather to check the weather in Seattle, then answer.", - tools=[weather_tool], - tool_choice="required", - store=True, + tool_response = _post_response( + base_url, + { + "model": model_id, + "input": "Use the get_weather tool and then answer with the weather.", + "tools": [weather_tool], + "tool_choice": "required", + "temperature": 0, + "max_output_tokens": 64, + "store": True, + }, ) function_call = _get_function_call(tool_response) - assert function_call is not None - assert _field(function_call, "name") == "get_weather" - assert _field(function_call, "call_id") - - final_response = client.responses.create( - model=model_id, - previous_response_id=_field(tool_response, "id"), - input=[ - { - "type": "function_call_output", - "call_id": _field(function_call, "call_id"), - "output": json.dumps( - { - "location": "Seattle, WA", - "temperature": "68 F", - "conditions": "sunny", - } - ), - } - ], + assert function_call is not None, json.dumps(tool_response.get("output", [])) + assert function_call["name"] == "get_weather" + assert isinstance(function_call["call_id"], str) + + final_response = _post_response( + base_url, + { + "model": model_id, + "previous_response_id": tool_response["id"], + "input": [ + { + "type": "function_call_output", + "call_id": function_call["call_id"], + "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), + } + ], + "tools": [weather_tool], + "temperature": 0, + "max_output_tokens": 64, + "store": False, + }, ) + assert final_response["status"] == "completed" assert _response_text(final_response).strip() From 0808187d82a8eb731583cb6896ea3ad5958bdd42 Mon Sep 17 00:00:00 2001 From: maanavd Date: Fri, 1 May 2026 17:54:21 -0400 Subject: [PATCH 07/12] docs(samples): add Python responses web-service README Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- samples/python/web-server-responses/README.md | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 samples/python/web-server-responses/README.md diff --git a/samples/python/web-server-responses/README.md b/samples/python/web-server-responses/README.md new file mode 100644 index 000000000..95666d910 --- /dev/null +++ b/samples/python/web-server-responses/README.md @@ -0,0 +1,44 @@ +# Foundry Local Python Responses Web-Service Sample + +This sample starts the Foundry Local OpenAI-compatible web service, then calls the Responses API with the official OpenAI Python client. + +It demonstrates: + +- A non-streaming `/v1/responses` call +- A streaming `/v1/responses` call +- A function/tool-calling round trip using `previous_response_id` + +## What gets installed + +Install the sample dependencies from `requirements.txt`: + +```bash +pip install -r requirements.txt +``` + +That installs: + +- `foundry-local-sdk` on non-Windows platforms +- `foundry-local-sdk-winml` on Windows +- `openai` + +The sample downloads/registers Foundry Local execution providers and downloads the `qwen2.5-0.5b` model the first time it runs. + +## Run the sample + +From this directory: + +```bash +python -m venv .venv +.\.venv\Scripts\activate +pip install -r requirements.txt +python src\app.py +``` + +On macOS or Linux, activate the virtual environment with: + +```bash +source .venv/bin/activate +``` + +The sample starts the local web service, sends Responses API requests to `http://localhost:/v1`, prints the model output, and then unloads the model and stops the web service. From a3e8a0f8001822dea29a997b04d7322d7bb4985a Mon Sep 17 00:00:00 2001 From: maanavd Date: Tue, 5 May 2026 21:49:04 +0200 Subject: [PATCH 08/12] test(sdk/python): add Responses image URL coverage Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../test/openai/test_responses_web_service.py | 75 +++++++++++++++++-- 1 file changed, 68 insertions(+), 7 deletions(-) diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py index e323a892e..0b1a4cb45 100644 --- a/sdk/python/test/openai/test_responses_web_service.py +++ b/sdk/python/test/openai/test_responses_web_service.py @@ -21,6 +21,11 @@ pytestmark = skip_in_ci +VISION_MODEL_ALIAS = "qwen3-vl-2b-instruct" +VISION_IMAGE_URL = ( + "https://raw.githubusercontent.com/microsoft/fluentui-emoji/main/assets/Camera/3D/camera_3d.png" +) + def _response_text(response: dict) -> str: text = response.get("output_text") @@ -115,18 +120,21 @@ def _get_weather_tool() -> dict: } -@pytest.fixture(scope="module") -def responses_web_service(manager, catalog): +def _get_cached_model(catalog, model_alias: str): cached = catalog.get_cached_models() - cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) + cached_variant = next((m for m in cached if m.alias == model_alias), None) if cached_variant is None: - pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests") + pytest.skip(f"{model_alias} must be cached to run Responses web-service tests") - model = catalog.get_model(TEST_MODEL_ALIAS) + model = catalog.get_model(model_alias) if model is None: - pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog") + pytest.skip(f"{model_alias} was not found in the catalog") model.select_variant(cached_variant) + return model + + +def _run_responses_web_service(manager, model): service_started = False model_loaded = False @@ -162,6 +170,22 @@ def responses_web_service(manager, catalog): pass +@pytest.fixture(scope="class") +def responses_web_service(manager, catalog): + model = _get_cached_model(catalog, TEST_MODEL_ALIAS) + yield from _run_responses_web_service(manager, model) + + +@pytest.fixture(scope="class") +def responses_vision_web_service(manager, catalog): + model = _get_cached_model(catalog, VISION_MODEL_ALIAS) + input_modalities = model.input_modalities or "" + if "image" not in input_modalities.split(","): + pytest.skip(f"{VISION_MODEL_ALIAS} does not advertise image input support") + + yield from _run_responses_web_service(manager, model) + + class TestResponsesWebService: def test_should_create_non_streaming_response(self, responses_web_service): base_url, model_id = responses_web_service @@ -178,7 +202,7 @@ def test_should_create_non_streaming_response(self, responses_web_service): ) assert response["object"] == "response" - assert response["status"] == "completed" + assert response["status"] == "completed", response.get("error") assert _response_text(response).strip() def test_should_stream_response_events(self, responses_web_service): @@ -242,3 +266,40 @@ def test_should_round_trip_function_call_output(self, responses_web_service): assert final_response["status"] == "completed" assert _response_text(final_response).strip() + + +class TestResponsesVisionWebService: + def test_should_create_response_with_image_url(self, responses_vision_web_service): + base_url, model_id = responses_vision_web_service + + response = _post_response( + base_url, + { + "model": model_id, + "input": [ + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Describe this image in one short sentence.", + }, + { + "type": "input_image", + "image_url": VISION_IMAGE_URL, + "media_type": "image/png", + "detail": "low", + }, + ], + } + ], + "temperature": 0, + "max_output_tokens": 128, + "store": False, + }, + ) + + assert response["object"] == "response" + assert response["status"] == "completed", response.get("error") + assert _response_text(response).strip() From 3f0579ed3817486e924dcb593e9dda7d1fa38078 Mon Sep 17 00:00:00 2001 From: maanavd Date: Tue, 5 May 2026 22:00:11 +0200 Subject: [PATCH 09/12] feat(python): add ResponsesClient backed by native openai SDK Add ResponsesClient to the Python SDK, backed by the native openai SDK pointed at Foundry Local's OpenAI-compatible web service. Adds FoundryLocalManager.create_responses_client(model_id) factory. Usage: manager.start_web_service() client = manager.create_responses_client(model.id) response = client.create('What is 2 + 2?') print(response.output_text) - ResponsesClient: thin wrapper over openai.OpenAI targeting /v1 - ResponsesClientSettings: default settings (temperature, store, etc.) - create() / create_streaming() / stream() / get() / delete() / cancel() - context-manager support (close() releases underlying HTTP client) - Exported from foundry_local_sdk and foundry_local_sdk.openai - Integration tests updated to use ResponsesClient (3 pass, 1 skipped) - Sample updated to use manager.create_responses_client() Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../python/web-server-responses/src/app.py | 70 ++--- sdk/python/src/__init__.py | 3 +- sdk/python/src/foundry_local_manager.py | 32 ++ sdk/python/src/openai/__init__.py | 5 +- sdk/python/src/openai/responses_client.py | 267 +++++++++++++++++ .../test/openai/test_responses_web_service.py | 274 ++++++------------ 6 files changed, 420 insertions(+), 231 deletions(-) create mode 100644 sdk/python/src/openai/responses_client.py diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py index 6f186a2a6..e58df3e4e 100644 --- a/samples/python/web-server-responses/src/app.py +++ b/samples/python/web-server-responses/src/app.py @@ -1,25 +1,11 @@ # # import json -from typing import Any - -from openai import OpenAI from foundry_local_sdk import Configuration, FoundryLocalManager # -def get_response_text(response: Any) -> str: - if isinstance(getattr(response, "output_text", None), str): - return response.output_text - return "".join( - getattr(part, "text", "") - for item in getattr(response, "output", []) or [] - for part in getattr(item, "content", []) or [] - if getattr(part, "type", None) == "output_text" - ) - - # # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") @@ -66,36 +52,29 @@ def _ep_progress(ep_name: str, percent: float): # print("\nStarting web service...") manager.start_web_service() -base_url = manager.urls[0].rstrip("/") + "/v1" print("Web service started") - -# <<<<<< OPENAI SDK USAGE >>>>>> -# Use the OpenAI SDK to call the local Foundry web service Responses API -openai = OpenAI( - base_url=base_url, - api_key="notneeded", -) # +# +# Create a Responses API client via the SDK manager — no manual URL or API key needed. +client = manager.create_responses_client(model.id) +# + try: print("\nTesting a non-streaming Responses call...") - response = openai.responses.create( - model=model.id, - input="Reply with one short sentence about local AI.", - ) - print(f"[ASSISTANT]: {get_response_text(response)}") + response = client.create("Reply with one short sentence about local AI.") + print(f"[ASSISTANT]: {response.output_text}") print("\nTesting a streaming Responses call...") - stream = openai.responses.create( - model=model.id, - input="Count from one to three.", - stream=True, - ) - print("[ASSISTANT STREAM]: ", end="", flush=True) - for event in stream: - if getattr(event, "type", None) == "response.output_text.delta": - print(getattr(event, "delta", ""), end="", flush=True) + client.create_streaming( + "Count from one to three.", + callback=lambda event: print( + getattr(event, "delta", ""), + end="", + flush=True, + ) if getattr(event, "type", None) == "response.output_text.delta" else None, + ) print() print("\nTesting Responses tool calling...") @@ -112,16 +91,15 @@ def _ep_progress(ep_name: str, percent: float): }, ] - tool_response = openai.responses.create( - model=model.id, - input="Use the get_weather tool and then answer with the weather.", + tool_response = client.create( + "Use the get_weather tool and then answer with the weather.", tools=tools, tool_choice="required", store=True, ) function_call = next( - (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"), + (item for item in tool_response.output if item.type == "function_call"), None, ) if function_call is None: @@ -129,24 +107,22 @@ def _ep_progress(ep_name: str, percent: float): print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})") - final_response = openai.responses.create( - model=model.id, - previous_response_id=tool_response.id, - input=[ + final_response = client.create( + [ { "type": "function_call_output", "call_id": function_call.call_id, "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), } ], + previous_response_id=tool_response.id, tools=tools, ) - print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}") - # <<<<<< END OPENAI SDK USAGE >>>>>> + print(f"[ASSISTANT FINAL]: {final_response.output_text}") finally: # Tidy up - openai.close() + client.close() manager.stop_web_service() model.unload() # diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py index 14534d196..9fecca997 100644 --- a/sdk/python/src/__init__.py +++ b/sdk/python/src/__init__.py @@ -7,6 +7,7 @@ from .configuration import Configuration from .foundry_local_manager import FoundryLocalManager +from .openai.responses_client import ResponsesClient, ResponsesClientSettings from .version import __version__ _logger = logging.getLogger(__name__) @@ -20,4 +21,4 @@ _logger.addHandler(_sc) _logger.propagate = False -__all__ = ["Configuration", "FoundryLocalManager", "__version__"] +__all__ = ["Configuration", "FoundryLocalManager", "ResponsesClient", "ResponsesClientSettings", "__version__"] diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py index a649f8e56..902db6e18 100644 --- a/sdk/python/src/foundry_local_manager.py +++ b/sdk/python/src/foundry_local_manager.py @@ -20,6 +20,7 @@ from .detail.core_interop import CoreInterop, InteropRequest from .detail.model_load_manager import ModelLoadManager from .exception import FoundryLocalException +from .openai.responses_client import ResponsesClient logger = logging.getLogger(__name__) @@ -194,3 +195,34 @@ def stop_web_service(self): raise FoundryLocalException(f"Error stopping web service: {response.error}") self.urls = None + + def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient: + """Create a :class:`ResponsesClient` for the Responses API. + + The web service must be running before calling this method. Start it + with :meth:`start_web_service` first. + + Args: + model_id: Default model ID for requests. Can be overridden + per-request via the ``model`` keyword argument on + :meth:`~ResponsesClient.create`. + + Returns: + A :class:`ResponsesClient` pointed at the running web service. + + Raises: + FoundryLocalException: If the web service is not running. + + Example:: + + manager.start_web_service() + client = manager.create_responses_client(model.id) + response = client.create("What is 2 + 2?") + print(response.output_text) + """ + if not self.urls: + raise FoundryLocalException( + "Web service is not running. Call start_web_service() before " + "creating a ResponsesClient." + ) + return ResponsesClient(self.urls[0], model_id) diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py index 2fa51a6f6..e7016799a 100644 --- a/sdk/python/src/openai/__init__.py +++ b/sdk/python/src/openai/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""OpenAI-compatible clients for chat completions and audio transcription.""" +"""OpenAI-compatible clients for chat completions, audio transcription, and responses.""" from .chat_client import ChatClient, ChatClientSettings from .audio_client import AudioClient @@ -14,6 +14,7 @@ LiveAudioTranscriptionResponse, TranscriptionContentPart, ) +from .responses_client import ResponsesClient, ResponsesClientSettings __all__ = [ "AudioClient", @@ -24,5 +25,7 @@ "LiveAudioTranscriptionOptions", "LiveAudioTranscriptionResponse", "LiveAudioTranscriptionSession", + "ResponsesClient", + "ResponsesClientSettings", "TranscriptionContentPart", ] diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py new file mode 100644 index 000000000..55121e320 --- /dev/null +++ b/sdk/python/src/openai/responses_client.py @@ -0,0 +1,267 @@ +# ------------------------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# -------------------------------------------------------------------------- +"""Responses API client for Foundry Local's embedded web service. + +Uses the native ``openai`` SDK to call the Responses API on Foundry Local's +OpenAI-compatible web service. Create via +``FoundryLocalManager.create_responses_client()`` or +``model.create_responses_client(base_url)``. + +Example:: + + manager.start_web_service() + client = manager.create_responses_client(model.id) + + # Non-streaming + response = client.create("Hello, world!") + print(response.output_text) + + # Streaming + client.create_streaming("Tell me a story", lambda event: print(event)) +""" + +from __future__ import annotations + +import logging +from typing import Any, Callable, Iterator, Optional, Union + +from openai import OpenAI + +logger = logging.getLogger(__name__) + + +class ResponsesClientSettings: + """Default settings applied to every request made by a :class:`ResponsesClient`. + + Per-call keyword arguments passed to :meth:`ResponsesClient.create` override + these defaults. Attribute names match the OpenAI Responses API parameters + (snake_case). + """ + + def __init__( + self, + instructions: Optional[str] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + max_output_tokens: Optional[int] = None, + frequency_penalty: Optional[float] = None, + presence_penalty: Optional[float] = None, + tool_choice: Optional[Any] = None, + truncation: Optional[str] = None, + parallel_tool_calls: Optional[bool] = None, + store: Optional[bool] = None, + seed: Optional[int] = None, + ): + self.instructions = instructions + self.temperature = temperature + self.top_p = top_p + self.max_output_tokens = max_output_tokens + self.frequency_penalty = frequency_penalty + self.presence_penalty = presence_penalty + self.tool_choice = tool_choice + self.truncation = truncation + self.parallel_tool_calls = parallel_tool_calls + self.store = store + self.seed = seed + + def _as_kwargs(self) -> dict[str, Any]: + """Return non-None settings as keyword arguments for the openai SDK.""" + return { + k: v for k, v in { + "instructions": self.instructions, + "temperature": self.temperature, + "top_p": self.top_p, + "max_output_tokens": self.max_output_tokens, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.presence_penalty, + "tool_choice": self.tool_choice, + "truncation": self.truncation, + "parallel_tool_calls": self.parallel_tool_calls, + "store": self.store, + "seed": self.seed, + }.items() if v is not None + } + + +class ResponsesClient: + """Client for the OpenAI Responses API served by Foundry Local. + + Backed by the native ``openai`` SDK pointed at the local web service. + Create via :meth:`FoundryLocalManager.create_responses_client` or + :meth:`model.create_responses_client`. + + Args: + base_url: Base URL of the Foundry Local web service (e.g. + ``"http://127.0.0.1:5273"``). Do **not** include ``/v1`` — it is + appended automatically. Trailing slashes are stripped. + model_id: Default model ID. Can be overridden per-request via the + ``model`` keyword argument to :meth:`create`. + """ + + def __init__(self, base_url: str, model_id: Optional[str] = None): + if not base_url or not isinstance(base_url, str) or not base_url.strip(): + raise ValueError("base_url must be a non-empty string.") + openai_base = base_url.rstrip("/") + "/v1" + self._client = OpenAI(base_url=openai_base, api_key="notneeded") + self._model_id = model_id + self.settings = ResponsesClientSettings() + + # ========================================================================= + # Public API + # ========================================================================= + + def create(self, input: Union[str, list], **options: Any) -> Any: # noqa: A002 + """Create a model response (non-streaming). + + Args: + input: A string prompt or a list of Responses API input items. + Each dict item must have a ``"type"`` field (e.g. + ``{"type": "message", "role": "user", "content": [...]}``).. + **options: Additional parameters forwarded to + ``openai.responses.create``. Pass ``model="..."`` to override + the constructor default. + + Returns: + An ``openai.types.responses.Response`` object. Use + ``.output_text`` for the assistant text, ``.output`` for the full + item list, and ``.id`` for chaining with ``previous_response_id``. + + Raises: + ValueError: If ``input`` is invalid or no model is specified. + openai.OpenAIError: On API or network errors. + """ + model = options.pop("model", None) or self._model_id + self._require_model(model) + kwargs = {**self.settings._as_kwargs(), **options} + return self._client.responses.create(model=model, input=input, **kwargs) + + def create_streaming( + self, + input: Union[str, list], # noqa: A002 + callback: Callable[[Any], None], + **options: Any, + ) -> None: + """Create a model response with streaming. + + Each event object from the openai stream is delivered to *callback*. + + Args: + input: A string prompt or a list of Responses API input items. + callback: Called for each streaming event. Events are typed + ``openai`` SDK objects with a ``.type`` attribute. + **options: Additional parameters forwarded to + ``openai.responses.create``. + + Raises: + ValueError: If ``input`` is invalid or *callback* is not callable. + openai.OpenAIError: On API or network errors. + """ + if not callable(callback): + raise ValueError("callback must be a callable.") + model = options.pop("model", None) or self._model_id + self._require_model(model) + kwargs = {**self.settings._as_kwargs(), **options} + with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream: + for event in stream: + callback(event) + + def stream(self, input: Union[str, list], **options: Any) -> Iterator[Any]: # noqa: A002 + """Create a model response and return an iterator of streaming events. + + This is a generator-style alternative to :meth:`create_streaming` that + yields each event instead of using a callback. + + Args: + input: A string prompt or a list of Responses API input items. + **options: Additional parameters forwarded to + ``openai.responses.create``. + + Yields: + Streaming event objects from the openai SDK. + + Raises: + ValueError: If no model is specified. + openai.OpenAIError: On API or network errors. + """ + model = options.pop("model", None) or self._model_id + self._require_model(model) + kwargs = {**self.settings._as_kwargs(), **options} + with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream: + yield from stream + + def get(self, response_id: str) -> Any: + """Retrieve a stored response by ID. + + Args: + response_id: The ID of the response to retrieve. + + Returns: + An ``openai.types.responses.Response`` object. + """ + self._validate_id(response_id, "response_id") + return self._client.responses.retrieve(response_id) + + def delete(self, response_id: str) -> Any: + """Delete a stored response by ID. + + Args: + response_id: The ID of the response to delete. + + Returns: + The deletion result object. + """ + self._validate_id(response_id, "response_id") + return self._client.responses.delete(response_id) + + def cancel(self, response_id: str) -> Any: + """Cancel an in-progress response. + + Args: + response_id: The ID of the response to cancel. + + Returns: + The cancelled ``openai.types.responses.Response`` object. + """ + self._validate_id(response_id, "response_id") + return self._client.responses.cancel(response_id) + + def get_input_items(self, response_id: str) -> Any: + """Retrieve the input items for a stored response. + + Args: + response_id: The ID of the response. + + Returns: + A paginated list of input items. + """ + self._validate_id(response_id, "response_id") + return self._client.responses.input_items.list(response_id) + + def close(self) -> None: + """Close the underlying OpenAI HTTP client and release resources.""" + self._client.close() + + def __enter__(self) -> "ResponsesClient": + return self + + def __exit__(self, *args: Any) -> None: + self.close() + + # ========================================================================= + # Internal helpers + # ========================================================================= + + def _require_model(self, model: Optional[str]) -> None: + if not model or not isinstance(model, str) or not model.strip(): + raise ValueError( + "model must be specified either in the constructor via " + "create_responses_client(model_id) or as an options keyword argument." + ) + + def _validate_id(self, value: Any, param: str) -> None: + if not isinstance(value, str) or not value.strip(): + raise ValueError(f"{param} must be a non-empty string.") + if len(value) > 1024: + raise ValueError(f"{param} exceeds the maximum length of 1024 characters.") diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py index 0b1a4cb45..7683fc768 100644 --- a/sdk/python/test/openai/test_responses_web_service.py +++ b/sdk/python/test/openai/test_responses_web_service.py @@ -4,9 +4,9 @@ # -------------------------------------------------------------------------- """Integration tests for /v1/responses through the local web service. -These tests intentionally use FoundryLocalManager only for SDK setup, model -lifecycle, and web-service lifecycle. Actual Responses API calls go through the -OpenAI-compatible HTTP endpoint directly. +These tests use FoundryLocalManager for SDK setup, model lifecycle, and web-service +lifecycle. Actual Responses API calls go through ResponsesClient, which is backed +by the native openai SDK pointed at the local web service. """ from __future__ import annotations @@ -14,7 +14,9 @@ import json import pytest -import requests + +from foundry_local_sdk import FoundryLocalManager +from foundry_local_sdk.openai import ResponsesClient from ..conftest import TEST_MODEL_ALIAS, skip_in_ci @@ -27,86 +29,6 @@ ) -def _response_text(response: dict) -> str: - text = response.get("output_text") - if isinstance(text, str) and text: - return text - - return "".join( - part.get("text", "") - for item in response.get("output", []) or [] - if item.get("type") == "message" - for part in item.get("content", []) or [] - if part.get("type") == "output_text" and isinstance(part.get("text"), str) - ) - - -def _post_response(base_url: str, body: dict) -> dict: - response = requests.post( - f"{base_url}/v1/responses", - headers={"Content-Type": "application/json"}, - json=body, - timeout=60, - ) - assert response.ok, response.text - return response.json() - - -def _post_streaming_response(base_url: str, body: dict) -> list[dict]: - response = requests.post( - f"{base_url}/v1/responses", - headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, - json={**body, "stream": True}, - stream=True, - timeout=(60, None), - ) - assert response.ok, response.text - - events: list[dict] = [] - buffer = "" - try: - for chunk in response.iter_content(chunk_size=None, decode_unicode=False): - if not chunk: - continue - text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk - buffer += text.replace("\r\n", "\n") - - while "\n\n" in buffer: - block, buffer = buffer.split("\n\n", 1) - data = _sse_data(block) - if not data: - continue - if data == "[DONE]": - return events - events.append(json.loads(data)) - finally: - response.close() - - tail = buffer.strip() - if tail: - data = _sse_data(tail) - if data and data != "[DONE]": - events.append(json.loads(data)) - return events - - -def _sse_data(block: str) -> str: - lines: list[str] = [] - for line in block.strip().split("\n"): - if line.startswith("data: "): - lines.append(line[6:]) - elif line == "data:": - lines.append("") - return "\n".join(lines).strip() - - -def _get_function_call(response: dict) -> dict | None: - for item in response.get("output", []) or []: - if item.get("type") == "function_call": - return item - return None - - def _get_weather_tool() -> dict: return { "type": "function", @@ -134,9 +56,10 @@ def _get_cached_model(catalog, model_alias: str): return model -def _run_responses_web_service(manager, model): +def _run_responses_web_service(manager: FoundryLocalManager, model): service_started = False model_loaded = False + client: ResponsesClient | None = None try: try: @@ -156,8 +79,11 @@ def _run_responses_web_service(manager, model): if not manager.urls: pytest.skip("Web service started but did not return any URLs") - yield manager.urls[0].rstrip("/"), model.id + client = manager.create_responses_client(model.id) + yield client, model.id finally: + if client is not None: + client.close() if service_started: try: manager.stop_web_service() @@ -188,118 +114,102 @@ def responses_vision_web_service(manager, catalog): class TestResponsesWebService: def test_should_create_non_streaming_response(self, responses_web_service): - base_url, model_id = responses_web_service - - response = _post_response( - base_url, - { - "model": model_id, - "input": "What is 2 + 2? Answer with just the number.", - "temperature": 0, - "max_output_tokens": 64, - "store": False, - }, + client, model_id = responses_web_service + + response = client.create( + "What is 2 + 2? Answer with just the number.", + temperature=0, + max_output_tokens=64, + store=False, ) - assert response["object"] == "response" - assert response["status"] == "completed", response.get("error") - assert _response_text(response).strip() + assert response.status == "completed", response.error + assert response.output_text.strip(), "Expected non-empty assistant text" def test_should_stream_response_events(self, responses_web_service): - base_url, model_id = responses_web_service - - events = _post_streaming_response( - base_url, - { - "model": model_id, - "input": "Count from 1 to 3.", - "temperature": 0, - "max_output_tokens": 64, - "store": False, - }, + client, model_id = responses_web_service + + event_types: list[str] = [] + client.create_streaming( + "Count from 1 to 3.", + callback=lambda e: event_types.append(getattr(e, "type", "")), + temperature=0, + max_output_tokens=64, + store=False, ) - assert any(event.get("type") == "response.created" for event in events) - assert any(event.get("type") == "response.output_text.delta" for event in events) - assert any(event.get("type") == "response.completed" for event in events) + assert "response.created" in event_types, f"Events seen: {event_types}" + assert "response.output_text.delta" in event_types, f"Events seen: {event_types}" + assert "response.completed" in event_types, f"Events seen: {event_types}" def test_should_round_trip_function_call_output(self, responses_web_service): - base_url, model_id = responses_web_service + client, model_id = responses_web_service weather_tool = _get_weather_tool() - tool_response = _post_response( - base_url, - { - "model": model_id, - "input": "Use the get_weather tool and then answer with the weather.", - "tools": [weather_tool], - "tool_choice": "required", - "temperature": 0, - "max_output_tokens": 64, - "store": True, - }, + tool_response = client.create( + "Use the get_weather tool and then answer with the weather.", + tools=[weather_tool], + tool_choice="required", + temperature=0, + max_output_tokens=64, + store=True, + ) + + function_call = next( + (item for item in tool_response.output if item.type == "function_call"), + None, + ) + assert function_call is not None, ( + f"Expected a function_call item. Output: {tool_response.output}" ) - function_call = _get_function_call(tool_response) - - assert function_call is not None, json.dumps(tool_response.get("output", [])) - assert function_call["name"] == "get_weather" - assert isinstance(function_call["call_id"], str) - - final_response = _post_response( - base_url, - { - "model": model_id, - "previous_response_id": tool_response["id"], - "input": [ - { - "type": "function_call_output", - "call_id": function_call["call_id"], - "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), - } - ], - "tools": [weather_tool], - "temperature": 0, - "max_output_tokens": 64, - "store": False, - }, + assert function_call.name == "get_weather" + + final_response = client.create( + [ + { + "type": "function_call_output", + "call_id": function_call.call_id, + "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), + } + ], + previous_response_id=tool_response.id, + tools=[weather_tool], + temperature=0, + max_output_tokens=64, + store=False, ) - assert final_response["status"] == "completed" - assert _response_text(final_response).strip() + assert final_response.status == "completed" + assert final_response.output_text.strip(), "Expected non-empty final assistant text" class TestResponsesVisionWebService: def test_should_create_response_with_image_url(self, responses_vision_web_service): - base_url, model_id = responses_vision_web_service - - response = _post_response( - base_url, - { - "model": model_id, - "input": [ - { - "type": "message", - "role": "user", - "content": [ - { - "type": "input_text", - "text": "Describe this image in one short sentence.", - }, - { - "type": "input_image", - "image_url": VISION_IMAGE_URL, - "media_type": "image/png", - "detail": "low", - }, - ], - } - ], - "temperature": 0, - "max_output_tokens": 128, - "store": False, - }, + client, model_id = responses_vision_web_service + + response = client.create( + [ + { + "type": "message", + "role": "user", + "content": [ + { + "type": "input_text", + "text": "Describe this image in one short sentence.", + }, + { + "type": "input_image", + "image_url": VISION_IMAGE_URL, + "media_type": "image/png", + "detail": "low", + }, + ], + } + ], + temperature=0, + max_output_tokens=128, + store=False, ) - assert response["object"] == "response" - assert response["status"] == "completed", response.get("error") - assert _response_text(response).strip() + assert response.status == "completed", response.error + assert response.output_text.strip(), "Expected non-empty vision response text" From 3bde49aeca1b0551f4f211a22ea95f46002371f8 Mon Sep 17 00:00:00 2001 From: maanavd Date: Tue, 5 May 2026 22:11:29 +0200 Subject: [PATCH 10/12] revert(python): restore responses web-service sample baseline Revert the ResponsesClient wrapper and image URL coverage commits to return the PR to the text-only web-service sample/test baseline from 0808187. The sample still uses the native openai Python SDK against the local /v1 web-service endpoint. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../python/web-server-responses/src/app.py | 70 +++-- sdk/python/src/__init__.py | 3 +- sdk/python/src/foundry_local_manager.py | 32 -- sdk/python/src/openai/__init__.py | 5 +- sdk/python/src/openai/responses_client.py | 267 ----------------- .../test/openai/test_responses_web_service.py | 281 ++++++++++-------- 6 files changed, 204 insertions(+), 454 deletions(-) delete mode 100644 sdk/python/src/openai/responses_client.py diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py index e58df3e4e..6f186a2a6 100644 --- a/samples/python/web-server-responses/src/app.py +++ b/samples/python/web-server-responses/src/app.py @@ -1,11 +1,25 @@ # # import json +from typing import Any + +from openai import OpenAI from foundry_local_sdk import Configuration, FoundryLocalManager # +def get_response_text(response: Any) -> str: + if isinstance(getattr(response, "output_text", None), str): + return response.output_text + return "".join( + getattr(part, "text", "") + for item in getattr(response, "output", []) or [] + for part in getattr(item, "content", []) or [] + if getattr(part, "type", None) == "output_text" + ) + + # # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") @@ -52,29 +66,36 @@ def _ep_progress(ep_name: str, percent: float): # print("\nStarting web service...") manager.start_web_service() +base_url = manager.urls[0].rstrip("/") + "/v1" print("Web service started") -# -# -# Create a Responses API client via the SDK manager — no manual URL or API key needed. -client = manager.create_responses_client(model.id) -# +# <<<<<< OPENAI SDK USAGE >>>>>> +# Use the OpenAI SDK to call the local Foundry web service Responses API +openai = OpenAI( + base_url=base_url, + api_key="notneeded", +) +# try: print("\nTesting a non-streaming Responses call...") - response = client.create("Reply with one short sentence about local AI.") - print(f"[ASSISTANT]: {response.output_text}") + response = openai.responses.create( + model=model.id, + input="Reply with one short sentence about local AI.", + ) + print(f"[ASSISTANT]: {get_response_text(response)}") print("\nTesting a streaming Responses call...") - print("[ASSISTANT STREAM]: ", end="", flush=True) - client.create_streaming( - "Count from one to three.", - callback=lambda event: print( - getattr(event, "delta", ""), - end="", - flush=True, - ) if getattr(event, "type", None) == "response.output_text.delta" else None, + stream = openai.responses.create( + model=model.id, + input="Count from one to three.", + stream=True, ) + + print("[ASSISTANT STREAM]: ", end="", flush=True) + for event in stream: + if getattr(event, "type", None) == "response.output_text.delta": + print(getattr(event, "delta", ""), end="", flush=True) print() print("\nTesting Responses tool calling...") @@ -91,15 +112,16 @@ def _ep_progress(ep_name: str, percent: float): }, ] - tool_response = client.create( - "Use the get_weather tool and then answer with the weather.", + tool_response = openai.responses.create( + model=model.id, + input="Use the get_weather tool and then answer with the weather.", tools=tools, tool_choice="required", store=True, ) function_call = next( - (item for item in tool_response.output if item.type == "function_call"), + (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"), None, ) if function_call is None: @@ -107,22 +129,24 @@ def _ep_progress(ep_name: str, percent: float): print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})") - final_response = client.create( - [ + final_response = openai.responses.create( + model=model.id, + previous_response_id=tool_response.id, + input=[ { "type": "function_call_output", "call_id": function_call.call_id, "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), } ], - previous_response_id=tool_response.id, tools=tools, ) - print(f"[ASSISTANT FINAL]: {final_response.output_text}") + print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}") + # <<<<<< END OPENAI SDK USAGE >>>>>> finally: # Tidy up - client.close() + openai.close() manager.stop_web_service() model.unload() # diff --git a/sdk/python/src/__init__.py b/sdk/python/src/__init__.py index 9fecca997..14534d196 100644 --- a/sdk/python/src/__init__.py +++ b/sdk/python/src/__init__.py @@ -7,7 +7,6 @@ from .configuration import Configuration from .foundry_local_manager import FoundryLocalManager -from .openai.responses_client import ResponsesClient, ResponsesClientSettings from .version import __version__ _logger = logging.getLogger(__name__) @@ -21,4 +20,4 @@ _logger.addHandler(_sc) _logger.propagate = False -__all__ = ["Configuration", "FoundryLocalManager", "ResponsesClient", "ResponsesClientSettings", "__version__"] +__all__ = ["Configuration", "FoundryLocalManager", "__version__"] diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py index 902db6e18..a649f8e56 100644 --- a/sdk/python/src/foundry_local_manager.py +++ b/sdk/python/src/foundry_local_manager.py @@ -20,7 +20,6 @@ from .detail.core_interop import CoreInterop, InteropRequest from .detail.model_load_manager import ModelLoadManager from .exception import FoundryLocalException -from .openai.responses_client import ResponsesClient logger = logging.getLogger(__name__) @@ -195,34 +194,3 @@ def stop_web_service(self): raise FoundryLocalException(f"Error stopping web service: {response.error}") self.urls = None - - def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient: - """Create a :class:`ResponsesClient` for the Responses API. - - The web service must be running before calling this method. Start it - with :meth:`start_web_service` first. - - Args: - model_id: Default model ID for requests. Can be overridden - per-request via the ``model`` keyword argument on - :meth:`~ResponsesClient.create`. - - Returns: - A :class:`ResponsesClient` pointed at the running web service. - - Raises: - FoundryLocalException: If the web service is not running. - - Example:: - - manager.start_web_service() - client = manager.create_responses_client(model.id) - response = client.create("What is 2 + 2?") - print(response.output_text) - """ - if not self.urls: - raise FoundryLocalException( - "Web service is not running. Call start_web_service() before " - "creating a ResponsesClient." - ) - return ResponsesClient(self.urls[0], model_id) diff --git a/sdk/python/src/openai/__init__.py b/sdk/python/src/openai/__init__.py index e7016799a..2fa51a6f6 100644 --- a/sdk/python/src/openai/__init__.py +++ b/sdk/python/src/openai/__init__.py @@ -2,7 +2,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. # -------------------------------------------------------------------------- -"""OpenAI-compatible clients for chat completions, audio transcription, and responses.""" +"""OpenAI-compatible clients for chat completions and audio transcription.""" from .chat_client import ChatClient, ChatClientSettings from .audio_client import AudioClient @@ -14,7 +14,6 @@ LiveAudioTranscriptionResponse, TranscriptionContentPart, ) -from .responses_client import ResponsesClient, ResponsesClientSettings __all__ = [ "AudioClient", @@ -25,7 +24,5 @@ "LiveAudioTranscriptionOptions", "LiveAudioTranscriptionResponse", "LiveAudioTranscriptionSession", - "ResponsesClient", - "ResponsesClientSettings", "TranscriptionContentPart", ] diff --git a/sdk/python/src/openai/responses_client.py b/sdk/python/src/openai/responses_client.py deleted file mode 100644 index 55121e320..000000000 --- a/sdk/python/src/openai/responses_client.py +++ /dev/null @@ -1,267 +0,0 @@ -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -# -------------------------------------------------------------------------- -"""Responses API client for Foundry Local's embedded web service. - -Uses the native ``openai`` SDK to call the Responses API on Foundry Local's -OpenAI-compatible web service. Create via -``FoundryLocalManager.create_responses_client()`` or -``model.create_responses_client(base_url)``. - -Example:: - - manager.start_web_service() - client = manager.create_responses_client(model.id) - - # Non-streaming - response = client.create("Hello, world!") - print(response.output_text) - - # Streaming - client.create_streaming("Tell me a story", lambda event: print(event)) -""" - -from __future__ import annotations - -import logging -from typing import Any, Callable, Iterator, Optional, Union - -from openai import OpenAI - -logger = logging.getLogger(__name__) - - -class ResponsesClientSettings: - """Default settings applied to every request made by a :class:`ResponsesClient`. - - Per-call keyword arguments passed to :meth:`ResponsesClient.create` override - these defaults. Attribute names match the OpenAI Responses API parameters - (snake_case). - """ - - def __init__( - self, - instructions: Optional[str] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_output_tokens: Optional[int] = None, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - tool_choice: Optional[Any] = None, - truncation: Optional[str] = None, - parallel_tool_calls: Optional[bool] = None, - store: Optional[bool] = None, - seed: Optional[int] = None, - ): - self.instructions = instructions - self.temperature = temperature - self.top_p = top_p - self.max_output_tokens = max_output_tokens - self.frequency_penalty = frequency_penalty - self.presence_penalty = presence_penalty - self.tool_choice = tool_choice - self.truncation = truncation - self.parallel_tool_calls = parallel_tool_calls - self.store = store - self.seed = seed - - def _as_kwargs(self) -> dict[str, Any]: - """Return non-None settings as keyword arguments for the openai SDK.""" - return { - k: v for k, v in { - "instructions": self.instructions, - "temperature": self.temperature, - "top_p": self.top_p, - "max_output_tokens": self.max_output_tokens, - "frequency_penalty": self.frequency_penalty, - "presence_penalty": self.presence_penalty, - "tool_choice": self.tool_choice, - "truncation": self.truncation, - "parallel_tool_calls": self.parallel_tool_calls, - "store": self.store, - "seed": self.seed, - }.items() if v is not None - } - - -class ResponsesClient: - """Client for the OpenAI Responses API served by Foundry Local. - - Backed by the native ``openai`` SDK pointed at the local web service. - Create via :meth:`FoundryLocalManager.create_responses_client` or - :meth:`model.create_responses_client`. - - Args: - base_url: Base URL of the Foundry Local web service (e.g. - ``"http://127.0.0.1:5273"``). Do **not** include ``/v1`` — it is - appended automatically. Trailing slashes are stripped. - model_id: Default model ID. Can be overridden per-request via the - ``model`` keyword argument to :meth:`create`. - """ - - def __init__(self, base_url: str, model_id: Optional[str] = None): - if not base_url or not isinstance(base_url, str) or not base_url.strip(): - raise ValueError("base_url must be a non-empty string.") - openai_base = base_url.rstrip("/") + "/v1" - self._client = OpenAI(base_url=openai_base, api_key="notneeded") - self._model_id = model_id - self.settings = ResponsesClientSettings() - - # ========================================================================= - # Public API - # ========================================================================= - - def create(self, input: Union[str, list], **options: Any) -> Any: # noqa: A002 - """Create a model response (non-streaming). - - Args: - input: A string prompt or a list of Responses API input items. - Each dict item must have a ``"type"`` field (e.g. - ``{"type": "message", "role": "user", "content": [...]}``).. - **options: Additional parameters forwarded to - ``openai.responses.create``. Pass ``model="..."`` to override - the constructor default. - - Returns: - An ``openai.types.responses.Response`` object. Use - ``.output_text`` for the assistant text, ``.output`` for the full - item list, and ``.id`` for chaining with ``previous_response_id``. - - Raises: - ValueError: If ``input`` is invalid or no model is specified. - openai.OpenAIError: On API or network errors. - """ - model = options.pop("model", None) or self._model_id - self._require_model(model) - kwargs = {**self.settings._as_kwargs(), **options} - return self._client.responses.create(model=model, input=input, **kwargs) - - def create_streaming( - self, - input: Union[str, list], # noqa: A002 - callback: Callable[[Any], None], - **options: Any, - ) -> None: - """Create a model response with streaming. - - Each event object from the openai stream is delivered to *callback*. - - Args: - input: A string prompt or a list of Responses API input items. - callback: Called for each streaming event. Events are typed - ``openai`` SDK objects with a ``.type`` attribute. - **options: Additional parameters forwarded to - ``openai.responses.create``. - - Raises: - ValueError: If ``input`` is invalid or *callback* is not callable. - openai.OpenAIError: On API or network errors. - """ - if not callable(callback): - raise ValueError("callback must be a callable.") - model = options.pop("model", None) or self._model_id - self._require_model(model) - kwargs = {**self.settings._as_kwargs(), **options} - with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream: - for event in stream: - callback(event) - - def stream(self, input: Union[str, list], **options: Any) -> Iterator[Any]: # noqa: A002 - """Create a model response and return an iterator of streaming events. - - This is a generator-style alternative to :meth:`create_streaming` that - yields each event instead of using a callback. - - Args: - input: A string prompt or a list of Responses API input items. - **options: Additional parameters forwarded to - ``openai.responses.create``. - - Yields: - Streaming event objects from the openai SDK. - - Raises: - ValueError: If no model is specified. - openai.OpenAIError: On API or network errors. - """ - model = options.pop("model", None) or self._model_id - self._require_model(model) - kwargs = {**self.settings._as_kwargs(), **options} - with self._client.responses.create(model=model, input=input, stream=True, **kwargs) as stream: - yield from stream - - def get(self, response_id: str) -> Any: - """Retrieve a stored response by ID. - - Args: - response_id: The ID of the response to retrieve. - - Returns: - An ``openai.types.responses.Response`` object. - """ - self._validate_id(response_id, "response_id") - return self._client.responses.retrieve(response_id) - - def delete(self, response_id: str) -> Any: - """Delete a stored response by ID. - - Args: - response_id: The ID of the response to delete. - - Returns: - The deletion result object. - """ - self._validate_id(response_id, "response_id") - return self._client.responses.delete(response_id) - - def cancel(self, response_id: str) -> Any: - """Cancel an in-progress response. - - Args: - response_id: The ID of the response to cancel. - - Returns: - The cancelled ``openai.types.responses.Response`` object. - """ - self._validate_id(response_id, "response_id") - return self._client.responses.cancel(response_id) - - def get_input_items(self, response_id: str) -> Any: - """Retrieve the input items for a stored response. - - Args: - response_id: The ID of the response. - - Returns: - A paginated list of input items. - """ - self._validate_id(response_id, "response_id") - return self._client.responses.input_items.list(response_id) - - def close(self) -> None: - """Close the underlying OpenAI HTTP client and release resources.""" - self._client.close() - - def __enter__(self) -> "ResponsesClient": - return self - - def __exit__(self, *args: Any) -> None: - self.close() - - # ========================================================================= - # Internal helpers - # ========================================================================= - - def _require_model(self, model: Optional[str]) -> None: - if not model or not isinstance(model, str) or not model.strip(): - raise ValueError( - "model must be specified either in the constructor via " - "create_responses_client(model_id) or as an options keyword argument." - ) - - def _validate_id(self, value: Any, param: str) -> None: - if not isinstance(value, str) or not value.strip(): - raise ValueError(f"{param} must be a non-empty string.") - if len(value) > 1024: - raise ValueError(f"{param} exceeds the maximum length of 1024 characters.") diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py index 7683fc768..e323a892e 100644 --- a/sdk/python/test/openai/test_responses_web_service.py +++ b/sdk/python/test/openai/test_responses_web_service.py @@ -4,9 +4,9 @@ # -------------------------------------------------------------------------- """Integration tests for /v1/responses through the local web service. -These tests use FoundryLocalManager for SDK setup, model lifecycle, and web-service -lifecycle. Actual Responses API calls go through ResponsesClient, which is backed -by the native openai SDK pointed at the local web service. +These tests intentionally use FoundryLocalManager only for SDK setup, model +lifecycle, and web-service lifecycle. Actual Responses API calls go through the +OpenAI-compatible HTTP endpoint directly. """ from __future__ import annotations @@ -14,19 +14,92 @@ import json import pytest - -from foundry_local_sdk import FoundryLocalManager -from foundry_local_sdk.openai import ResponsesClient +import requests from ..conftest import TEST_MODEL_ALIAS, skip_in_ci pytestmark = skip_in_ci -VISION_MODEL_ALIAS = "qwen3-vl-2b-instruct" -VISION_IMAGE_URL = ( - "https://raw.githubusercontent.com/microsoft/fluentui-emoji/main/assets/Camera/3D/camera_3d.png" -) + +def _response_text(response: dict) -> str: + text = response.get("output_text") + if isinstance(text, str) and text: + return text + + return "".join( + part.get("text", "") + for item in response.get("output", []) or [] + if item.get("type") == "message" + for part in item.get("content", []) or [] + if part.get("type") == "output_text" and isinstance(part.get("text"), str) + ) + + +def _post_response(base_url: str, body: dict) -> dict: + response = requests.post( + f"{base_url}/v1/responses", + headers={"Content-Type": "application/json"}, + json=body, + timeout=60, + ) + assert response.ok, response.text + return response.json() + + +def _post_streaming_response(base_url: str, body: dict) -> list[dict]: + response = requests.post( + f"{base_url}/v1/responses", + headers={"Content-Type": "application/json", "Accept": "text/event-stream"}, + json={**body, "stream": True}, + stream=True, + timeout=(60, None), + ) + assert response.ok, response.text + + events: list[dict] = [] + buffer = "" + try: + for chunk in response.iter_content(chunk_size=None, decode_unicode=False): + if not chunk: + continue + text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk + buffer += text.replace("\r\n", "\n") + + while "\n\n" in buffer: + block, buffer = buffer.split("\n\n", 1) + data = _sse_data(block) + if not data: + continue + if data == "[DONE]": + return events + events.append(json.loads(data)) + finally: + response.close() + + tail = buffer.strip() + if tail: + data = _sse_data(tail) + if data and data != "[DONE]": + events.append(json.loads(data)) + return events + + +def _sse_data(block: str) -> str: + lines: list[str] = [] + for line in block.strip().split("\n"): + if line.startswith("data: "): + lines.append(line[6:]) + elif line == "data:": + lines.append("") + return "\n".join(lines).strip() + + +def _get_function_call(response: dict) -> dict | None: + for item in response.get("output", []) or []: + if item.get("type") == "function_call": + return item + return None def _get_weather_tool() -> dict: @@ -42,24 +115,20 @@ def _get_weather_tool() -> dict: } -def _get_cached_model(catalog, model_alias: str): +@pytest.fixture(scope="module") +def responses_web_service(manager, catalog): cached = catalog.get_cached_models() - cached_variant = next((m for m in cached if m.alias == model_alias), None) + cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) if cached_variant is None: - pytest.skip(f"{model_alias} must be cached to run Responses web-service tests") + pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests") - model = catalog.get_model(model_alias) + model = catalog.get_model(TEST_MODEL_ALIAS) if model is None: - pytest.skip(f"{model_alias} was not found in the catalog") + pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog") model.select_variant(cached_variant) - return model - - -def _run_responses_web_service(manager: FoundryLocalManager, model): service_started = False model_loaded = False - client: ResponsesClient | None = None try: try: @@ -79,11 +148,8 @@ def _run_responses_web_service(manager: FoundryLocalManager, model): if not manager.urls: pytest.skip("Web service started but did not return any URLs") - client = manager.create_responses_client(model.id) - yield client, model.id + yield manager.urls[0].rstrip("/"), model.id finally: - if client is not None: - client.close() if service_started: try: manager.stop_web_service() @@ -96,120 +162,83 @@ def _run_responses_web_service(manager: FoundryLocalManager, model): pass -@pytest.fixture(scope="class") -def responses_web_service(manager, catalog): - model = _get_cached_model(catalog, TEST_MODEL_ALIAS) - yield from _run_responses_web_service(manager, model) - - -@pytest.fixture(scope="class") -def responses_vision_web_service(manager, catalog): - model = _get_cached_model(catalog, VISION_MODEL_ALIAS) - input_modalities = model.input_modalities or "" - if "image" not in input_modalities.split(","): - pytest.skip(f"{VISION_MODEL_ALIAS} does not advertise image input support") - - yield from _run_responses_web_service(manager, model) - - class TestResponsesWebService: def test_should_create_non_streaming_response(self, responses_web_service): - client, model_id = responses_web_service - - response = client.create( - "What is 2 + 2? Answer with just the number.", - temperature=0, - max_output_tokens=64, - store=False, + base_url, model_id = responses_web_service + + response = _post_response( + base_url, + { + "model": model_id, + "input": "What is 2 + 2? Answer with just the number.", + "temperature": 0, + "max_output_tokens": 64, + "store": False, + }, ) - assert response.status == "completed", response.error - assert response.output_text.strip(), "Expected non-empty assistant text" + assert response["object"] == "response" + assert response["status"] == "completed" + assert _response_text(response).strip() def test_should_stream_response_events(self, responses_web_service): - client, model_id = responses_web_service - - event_types: list[str] = [] - client.create_streaming( - "Count from 1 to 3.", - callback=lambda e: event_types.append(getattr(e, "type", "")), - temperature=0, - max_output_tokens=64, - store=False, + base_url, model_id = responses_web_service + + events = _post_streaming_response( + base_url, + { + "model": model_id, + "input": "Count from 1 to 3.", + "temperature": 0, + "max_output_tokens": 64, + "store": False, + }, ) - assert "response.created" in event_types, f"Events seen: {event_types}" - assert "response.output_text.delta" in event_types, f"Events seen: {event_types}" - assert "response.completed" in event_types, f"Events seen: {event_types}" + assert any(event.get("type") == "response.created" for event in events) + assert any(event.get("type") == "response.output_text.delta" for event in events) + assert any(event.get("type") == "response.completed" for event in events) def test_should_round_trip_function_call_output(self, responses_web_service): - client, model_id = responses_web_service + base_url, model_id = responses_web_service weather_tool = _get_weather_tool() - tool_response = client.create( - "Use the get_weather tool and then answer with the weather.", - tools=[weather_tool], - tool_choice="required", - temperature=0, - max_output_tokens=64, - store=True, - ) - - function_call = next( - (item for item in tool_response.output if item.type == "function_call"), - None, + tool_response = _post_response( + base_url, + { + "model": model_id, + "input": "Use the get_weather tool and then answer with the weather.", + "tools": [weather_tool], + "tool_choice": "required", + "temperature": 0, + "max_output_tokens": 64, + "store": True, + }, ) - assert function_call is not None, ( - f"Expected a function_call item. Output: {tool_response.output}" - ) - assert function_call.name == "get_weather" - - final_response = client.create( - [ - { - "type": "function_call_output", - "call_id": function_call.call_id, - "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), - } - ], - previous_response_id=tool_response.id, - tools=[weather_tool], - temperature=0, - max_output_tokens=64, - store=False, - ) - - assert final_response.status == "completed" - assert final_response.output_text.strip(), "Expected non-empty final assistant text" - - -class TestResponsesVisionWebService: - def test_should_create_response_with_image_url(self, responses_vision_web_service): - client, model_id = responses_vision_web_service - - response = client.create( - [ - { - "type": "message", - "role": "user", - "content": [ - { - "type": "input_text", - "text": "Describe this image in one short sentence.", - }, - { - "type": "input_image", - "image_url": VISION_IMAGE_URL, - "media_type": "image/png", - "detail": "low", - }, - ], - } - ], - temperature=0, - max_output_tokens=128, - store=False, + function_call = _get_function_call(tool_response) + + assert function_call is not None, json.dumps(tool_response.get("output", [])) + assert function_call["name"] == "get_weather" + assert isinstance(function_call["call_id"], str) + + final_response = _post_response( + base_url, + { + "model": model_id, + "previous_response_id": tool_response["id"], + "input": [ + { + "type": "function_call_output", + "call_id": function_call["call_id"], + "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}), + } + ], + "tools": [weather_tool], + "temperature": 0, + "max_output_tokens": 64, + "store": False, + }, ) - assert response.status == "completed", response.error - assert response.output_text.strip(), "Expected non-empty vision response text" + assert final_response["status"] == "completed" + assert _response_text(final_response).strip() From 84861db45f98d1265d0541f4eaf5e27836dd3931 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Tue, 5 May 2026 17:14:40 -0700 Subject: [PATCH 11/12] Add ResponseAPI vision sample --- .../web-server-responses-vision/README.md | 53 ++++++++ .../requirements.txt | 3 + .../web-server-responses-vision/src/app.py | 115 ++++++++++++++++++ 3 files changed, 171 insertions(+) create mode 100644 samples/python/web-server-responses-vision/README.md create mode 100644 samples/python/web-server-responses-vision/requirements.txt create mode 100644 samples/python/web-server-responses-vision/src/app.py diff --git a/samples/python/web-server-responses-vision/README.md b/samples/python/web-server-responses-vision/README.md new file mode 100644 index 000000000..fc7fff702 --- /dev/null +++ b/samples/python/web-server-responses-vision/README.md @@ -0,0 +1,53 @@ +# Foundry Local Python Vision Sample (Responses API) + +This sample demonstrates vision (image understanding) capabilities using the Foundry Local web service and the OpenAI Responses API. + +It demonstrates: + +- Streaming a vision response with a local image via the Responses API +- Streaming a text-only response (when no image is provided) + +## What gets installed + +Install the sample dependencies from `requirements.txt`: + +```bash +pip install -r requirements.txt +``` + +That installs: + +- `foundry-local-sdk` +- `openai` +- `Pillow` (for image resizing) + +The sample downloads the specified model the first time it runs (skips if already cached). + +## Run the sample + +From this directory: + +```bash +python -m venv .venv +.\.venv\Scripts\activate +pip install -r requirements.txt +python src\app.py [image_path] +``` + +Examples: + +```bash +# Vision with an image +python src\app.py qwen3.5-0.8b path\to\image.jpg + +# Text only +python src\app.py qwen3.5-0.8b +``` + +On macOS or Linux, activate the virtual environment with: + +```bash +source .venv/bin/activate +``` + +The sample starts the local web service, sends vision requests via the Responses API to `http://localhost:/v1`, prints the model output, and then stops the web service. diff --git a/samples/python/web-server-responses-vision/requirements.txt b/samples/python/web-server-responses-vision/requirements.txt new file mode 100644 index 000000000..d948ff7bb --- /dev/null +++ b/samples/python/web-server-responses-vision/requirements.txt @@ -0,0 +1,3 @@ +foundry-local-sdk +openai +Pillow diff --git a/samples/python/web-server-responses-vision/src/app.py b/samples/python/web-server-responses-vision/src/app.py new file mode 100644 index 000000000..02a70889e --- /dev/null +++ b/samples/python/web-server-responses-vision/src/app.py @@ -0,0 +1,115 @@ +# +# +import base64 +import io +import sys + +from PIL import Image +from openai import OpenAI + +from foundry_local_sdk import Configuration, FoundryLocalManager +# + +if len(sys.argv) < 2: + print("Usage: python src/app.py [image_path]") + print(" Example: python src/app.py qwen3.5-0.8b path/to/image.jpg") + print(" Text only: python src/app.py qwen3.5-0.8b") + sys.exit(1) + +model_alias = sys.argv[1] +image_path = sys.argv[2] if len(sys.argv) > 2 else None + + +def resize_and_encode(path, max_dim=512): + """Load and resize a local image, returning (base64_str, media_type).""" + img = Image.open(path) + if max(img.size) > max_dim: + img.thumbnail((max_dim, max_dim)) + print(f" (resized to {img.size[0]}x{img.size[1]})") + buf = io.BytesIO() + img.save(buf, format="JPEG") + return base64.b64encode(buf.getvalue()).decode(), "image/jpeg" + + +# +config = Configuration(app_name="foundry_local_samples") +FoundryLocalManager.initialize(config) +manager = FoundryLocalManager.instance +# + +# +model = manager.catalog.get_model(model_alias) +if model is None: + available = [m.alias for m in manager.catalog.list_models()] + print(f"\nModel '{model_alias}' not found in catalog.") + print(f"Available models: {available}") + sys.exit(1) + +if not model.is_cached: + print(f"\nDownloading model {model_alias}...") + model.download( + lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True) + ) + print("\nModel downloaded") + +print("\nLoading model...") +model.load() +print("Model loaded") +# + +# +print("\nStarting web service...") +manager.start_web_service() +base_url = manager.urls[0].rstrip("/") + "/v1" +print("Web service started") + +# <<<<<< OPENAI SDK USAGE >>>>>> +# Use the OpenAI SDK to call the local Foundry web service Responses API +openai = OpenAI(base_url=base_url, api_key="notneeded") +# + +# +if image_path: + print(f"\nPreparing image: {image_path}") + image_b64, media_type = resize_and_encode(image_path) + + vision_input = [ + { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "Describe this image."}, + { + "type": "input_image", + "image_data": image_b64, + "media_type": media_type, + }, + ], + } + ] + + print("\nStreaming vision response...") + stream = openai.responses.create( + model=model.id, + input="placeholder", + extra_body={"input": vision_input}, + stream=True, + ) +else: + print("\nStreaming text response...") + stream = openai.responses.create( + model=model.id, + input="Reply with one short sentence about local AI.", + stream=True, + ) + +print("[ASSISTANT]: ", end="", flush=True) +for event in stream: + if getattr(event, "type", None) == "response.output_text.delta": + print(getattr(event, "delta", ""), end="", flush=True) +print() +# + +openai.close() +manager.stop_web_service() +model.unload() From 6d73032772caaf21baf53663f99ea81104825319 Mon Sep 17 00:00:00 2001 From: Akshay Sonawane Date: Tue, 5 May 2026 17:24:48 -0700 Subject: [PATCH 12/12] Add default image --- .../web-server-responses-vision/README.md | 16 ++--- .../web-server-responses-vision/src/app.py | 66 ++++++++---------- .../src/test_image.jpg | Bin 0 -> 6828 bytes 3 files changed, 33 insertions(+), 49 deletions(-) create mode 100644 samples/python/web-server-responses-vision/src/test_image.jpg diff --git a/samples/python/web-server-responses-vision/README.md b/samples/python/web-server-responses-vision/README.md index fc7fff702..75e16950a 100644 --- a/samples/python/web-server-responses-vision/README.md +++ b/samples/python/web-server-responses-vision/README.md @@ -4,8 +4,8 @@ This sample demonstrates vision (image understanding) capabilities using the Fou It demonstrates: -- Streaming a vision response with a local image via the Responses API -- Streaming a text-only response (when no image is provided) +- Streaming a vision response via the Responses API +- Uses a default test image (`src/test_image.jpg`) if no image path is provided ## What gets installed @@ -31,19 +31,11 @@ From this directory: python -m venv .venv .\.venv\Scripts\activate pip install -r requirements.txt -python src\app.py [image_path] -``` - -Examples: - -```bash -# Vision with an image -python src\app.py qwen3.5-0.8b path\to\image.jpg - -# Text only python src\app.py qwen3.5-0.8b ``` +You can also pass a custom image path as the second argument. + On macOS or Linux, activate the virtual environment with: ```bash diff --git a/samples/python/web-server-responses-vision/src/app.py b/samples/python/web-server-responses-vision/src/app.py index 02a70889e..d77170a89 100644 --- a/samples/python/web-server-responses-vision/src/app.py +++ b/samples/python/web-server-responses-vision/src/app.py @@ -9,16 +9,16 @@ from foundry_local_sdk import Configuration, FoundryLocalManager # +import os if len(sys.argv) < 2: print("Usage: python src/app.py [image_path]") - print(" Example: python src/app.py qwen3.5-0.8b path/to/image.jpg") - print(" Text only: python src/app.py qwen3.5-0.8b") + print(" Example: python src/app.py qwen3.5-0.8b") sys.exit(1) model_alias = sys.argv[1] -image_path = sys.argv[2] if len(sys.argv) > 2 else None - +default_image = os.path.join(os.path.dirname(__file__), "test_image.jpg") +image_path = sys.argv[2] if len(sys.argv) > 2 else default_image def resize_and_encode(path, max_dim=512): """Load and resize a local image, returning (base64_str, media_type).""" @@ -69,39 +69,31 @@ def resize_and_encode(path, max_dim=512): # # -if image_path: - print(f"\nPreparing image: {image_path}") - image_b64, media_type = resize_and_encode(image_path) - - vision_input = [ - { - "type": "message", - "role": "user", - "content": [ - {"type": "input_text", "text": "Describe this image."}, - { - "type": "input_image", - "image_data": image_b64, - "media_type": media_type, - }, - ], - } - ] - - print("\nStreaming vision response...") - stream = openai.responses.create( - model=model.id, - input="placeholder", - extra_body={"input": vision_input}, - stream=True, - ) -else: - print("\nStreaming text response...") - stream = openai.responses.create( - model=model.id, - input="Reply with one short sentence about local AI.", - stream=True, - ) +print(f"\nPreparing image: {image_path}") +image_b64, media_type = resize_and_encode(image_path) + +vision_input = [ + { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "Describe this image."}, + { + "type": "input_image", + "image_data": image_b64, + "media_type": media_type, + }, + ], + } +] + +print("\nStreaming vision response...") +stream = openai.responses.create( + model=model.id, + input="placeholder", + extra_body={"input": vision_input}, + stream=True, +) print("[ASSISTANT]: ", end="", flush=True) for event in stream: diff --git a/samples/python/web-server-responses-vision/src/test_image.jpg b/samples/python/web-server-responses-vision/src/test_image.jpg new file mode 100644 index 0000000000000000000000000000000000000000..73a4e8004db0fd82a2913bd14ad8b97672097ac5 GIT binary patch literal 6828 zcmc&&cT^MmwjOE#k)nx!lu)HAB1n;DIROy`K_EyK5D_?l^r8?20gV(1f*z`X2+|_b zM0)R{(v%{-w-8zqQr>XxJ?HAVuiX3IA8)euT9Y+1d;h-K-*4}4Htj2I0^qu&qo)JV z(E$J*_ycGozy*MXnHj>&!~%gpSXo)v*r5m64;*0UJ9L;6DtHtwBzROnKv?{Qq_C)* zn1FzkinQEG1tldVxTKn<>M4yAib|*UozSteva%mw=RJ6k_mqf$$f zpc4h?x#<|V>1fRW99$<8-R}qR_k)g}fsu(B!otdS0KB1$3!tZCV4!DYU}9oq1n&+8 zuLF$SOov1i&M_Z0vV(|vLQg%2%U}^ZU)%sQ{)`h>wD$^SWjn&d%Xd`bn52}njM8ak z6;-t}7k)wL=w8&jbj`%n3~7Gd!r`XlEvMVgF5W)Ae*OW0L17QWBO)I?Mq}ciB_uw7 z@mo@6)~oE-Ik|80-j$S=l~=s4{Lt9c+|t_C-qHD`uYX{0=xkPlAYPx`Yw@#N?MXjWe+_u4j#5;eh}NpKCDt**m%bxufL0ye8TH$; zEv`y?$%Y1Wj8lhne~31O`K&u6O9NCU!YGztp!p%{75UBo9)c8U7BxraLgfeAl1ZS; zY^qZss&Rz|ytzvQBvE_TL4FCrl$=-BZHXba!!Kjpl1nPQ!lHItU1x(x{*B{!soQni zlMw0?i`9mqH^x`4j1r3^I&m*#OlBh^W!=WjNP*?9MhMsfTnK7p6&pL%gmJJ8~q4kHU@WQ^ryZ;f0i zP=?pav9^7O3yZ~v)=w`fZqB1aL<#XNBU%0O_wp5KzzM(lHXhtIpG4)=5c@mcQAk-^ z=g>(eM>(tQRrn?ikl0kqenh=CewuTb$1ukCs7K_QyU0l5-Q@7_u$voY7eWZdOA_QUm!Ozg`3J9Ta`wGTc^qFZJFzo+b_llRVx#jeN+u2m z%!eY;w_j*@>9kzu6$oXS;kr9?ME2p^1lU4H17(>^0}f@`=zFg&$tZ-U^NEiUA}~B0YIK$fG@&aSCDa?JoedgLy4>M9SgVZc z=A?et-boES9$?-(-8D;u=i$c5=tb1XIO!U=Su11jNM)jdA5C^~`~a#wBa;RUJ%p2v zyUxk{%#p2A>uBB9jaZy?P+j|ACc1OuL=`0Bo(ig9v}j?~Uhc5Chj(MqS=BBs#Tdn; z9p^^VV;M|-TQ)iMDsBe4Q#sAbxLe7vv=r{Ly8{mV3XGO7r@LkQFe8Oy!nY=hg;g+AII3`d$ZQJ$wK+*QV3QUefQ|2W{qnUSpiWJNW{`-xyz5CY${# zzMu}*;Zt@0KJhfLQ&y(mdGPgOKk~`gLFLrXJFlbjEnJ6Bzs|ScD=f-9bnK#p_kaN_ z-Gg>94K9QYHi^m}(6a?UbmFm>ch1tgJo@Xg2`VC&9>i@5NKfzO2+M0MC zEqd_eh1lYh$w8@G^B;?#V9p%(bN2s`FDwILZ%QZKxEMEjB7)zS2gz@)u*olC66JW& zDy{l4M1aADXe&gRzvgD!W-+$-UjO0UjziK84Q9~>{S5-##mF>DAhve$vP^!IuVX2U z>VY7orwk=zIizQzWw+v^eq_@Ax$JUXabaou@7}-|J-pE>Tk;<$SSKp^M=dqug;j%5i_8f!; zc)<4zsuQc_oHl0hXq%ZsepKDcghv zt513mp-Q$STo?^N6bAA6w}?##v=N{+wll8CPFvWP@+YdOi)$GfNqF%Hf1+=p>z+yv_oZ*z(B&mnMg2j-^|~ zEwT?WJ#{cv$uK216HWPL@}HChNp|8`#Gn?mdSC{L>~}gKWDdJM)y@vg2(AjNQ5U8w zahm*kRYPUTS7cgLti~>hT@F1zpD#dv{sG276-e6S*ZJb6BzUy%Qt_4Y!D5y^VfwF2 zZ2*1qOni*qv0B|Ho_T3URUNBS$HB@M=24jn74!Gnh8o$X3EF`vgT&wal?J?kQ>sy` zXYdx=*y!|41{zQiOUbd(w>hFnz!O7sm#S50fanL?on7iPFfT<8qyLQJr+!OB*QVks z$mpZ;lz?+IAak&Wy=p3MquPPYW`o6_&QqH}&#?-pi6`=zyozhjNa&B^IyVujZt@IS zkXoi&50}2Dk5j5;+hFBhIb*%7k%%y#g>t`W7f&gg<(n4{8xr<3-SK!-t^S&jqm!&Z zAhIbOpWrw-<@x#3j$Qg3$2nng!!#-~sqVpibWx|(*UK$-RSyMU%}koZKQyyX#|<$~ zOm9_3YDAn!%D6rZzL$<_%3TUI#n;)ZsxU?eEror3@*fDXZUk@KEnBT|iwzSg5jKg) zD>w9y^a|Y(1L(p^Qsxgv!8BCKjSQVQ@|w@J^1W`9sRpn%S94 zgZjHkEHB{6YV}TxcOSHd++r94X)f93;NsxgSz|pu@j-sMe=W+y^eHGv+OHZRpma0i zKeD|xxAsfvs$th&aCqVP^VGC^vd2cJRokak;Rz-*;Gt#H_!iw48i3=_#$mN;*-T~B zaw>QDY!7{DuoH=V#_w5MVQq9bCI$@M59UmRBjJW)F+5O4X9V80gbT)M71UjI4dYm1Au87z@}Z2 zO>UsR|3m}Q4cB$MO^|(cKe~jX8jnw|lYm7O0Rkr&QBvjSIJa9P$s|;emVIU|s6T&N ze%4>Ztt_HWqKcHX=lk|P|IYxRC?|bl7{xOK)THU(V_)R|-1l$)C0e0sW(ti z^!ZHT##{jaSdh&>7~x~-mL?RKcCGtOBUhN2e}6F4$U3qoTvvnJszLm4Gp0%A8Ac6y z>O)*8iw2jj3kH3-JeN~o!?51 z7;cL1wDr+B7Sju*ON?<+lDE4uWH3mr#`L+IP@gT{L=&p>Qgmd7j5SH_D=K= zov~U-K$(R*cy;U|J5<1z54JUyn(#W+C9z!j?TDpr>eB|^U#wD6=;~+U3H{qG=xMe- zvy3acJtZ)cq;BB+J@Q^D4hJPP* z#p*I8+gVLp(FbD&b`dj%e{8rPKeq}vVT5;GjbBe&E{mCG-QbNW_$v7nvSi|! z^7z>^)7fIh6gwVi&4VQAIDG-!t55A1glL5Bg@hY{jNJX#meBvXmdZ`1bhotS@ zf*hMvwbsLaULzleqmOM%fF0jxzy1DK#-3;1NmYu>gAVbu&|2?}w#TnF zR37D|I{FodNBKQ6?Bw!%&(40NbxkLTqp^m6suyi(&=-HcOf_vsbMw6%8zKczXc)b~ z(0lfrK$wlRjoF#9@e*6b=I&yikWNh%&F7OUXzI`{(m~2ccagc?Zg(3Eudvw3)}Z*X z`o^K0q0V8OY|pHv#NqHPx3uqPmpg#zTvg$Ts8ID#*TrZEGScSL%JxO({`aw-HU_N% zr)D0coD@@?ht;xGXaou^Ity;(Ut*3q)&9kdGh(=y=@mvUYMu2sb zqxx#C7;Xun6f+!-DL^tkAV#Xg#ZxdmX++3<1?%u&#$!1!iGEU&FYKap+bvL$80~l5 z--VLt+8o|a7@qnbW?#JKxcOR6YhfZ-wth;kYsp;aC55<9hs2YIPiJvBbA$bWXg8|I z_?jJ*DfadcKL$gb18dGWZV|^?d0XIZ)O2{H4=H?cYA0Z{90P|iOM)X zu#tU=Ir~FO|DN8P@7fsUmA;63>SU*mPMUD&iElyay1|J<%fZQ5j?(fkqDwpnXD#U& zZqR_k6KQARMwm+8)0>4Jgs$f3sdVluxr_m;jgJba@41_|6{%zN)}y8+VGnGM$W)Z2 zC5z1!zle3ab|tX^6o+c<`%26`K)=~Lz_yudyscYu*PpdiA>c97{SE;f%ZwLuKsz?u zK2dE_FC$JIjbM+7l&QzFh~;AA^S^3w2V~!@(BiLD#c1M(i)9$3)o!*KpOYISNWQdK zuOFO!UJ`70?Rc|S*EfBqiZ@Hoj1WP8=)ZHq^m_Gz3+}^jWo^1^hj*|XVHw|D;-fn}geIz^R{~7i zA$<1Co~^EP!Pku651!UfJ-=FE=qchi^HlK@JLV2EAb20v6kXUMgs39NswgzyRlTda z1`4%osFBUN2~Z3Ft%8}qt^3d?pHcHJb)qu7Li|ZTd`X=vKn#sVu#T804g2f%3keM! zzlX>iSyC=~c=4s)(elg2H{37O&jd5;aGsYnu9lqiKwIh#@#v+@^dX>P-e=1M$gF_Jz0i|q-PLDeSb8qcW$;LTLMOmf)w*s5&bQb?PhbmAB)EztTWkJy zb0RmeBw|`4ycKM86;}~~+fZb0f9{fANBVJ#{CpKPUyvs+yML@+vI~ZhHd40S1Ygl4 zUAOZcX0oC6OCwGQs%kI0LR|dl&1qqw zfzJ_zD>)%iOAS++(fT!M^L;)h$`x0?e{1k}-89@WIKiErqc z19&2{m}ie`uYRwaUFb#0%^S@Y6uQ-6MWbOJds?>Zl5?oW*cBqGI{i0rorLrijW+7N zlA)cqYbKe6G{C8gGDGeKC4aj7Zv3Y9r0$N811QVQ@0$C|7@5(4l36M~#EC3L?R~!2 zK}^SG?QtrADV5VKP*0G`M}5Q7fIw|UDk$mya1{9h9A+m94GMNXGp+kGu=byhnz~W* zU7W5Qv!?b1ym!vE7ruCPsJTcTo$<#K%-rlSKi=6?w7+kXIB{W3HF literal 0 HcmV?d00001