From e1ad530e12d40e0a705d28e27000efa5896ff2f4 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 23 Apr 2026 12:54:34 -0400
Subject: [PATCH 01/10] feat(sdk/js): add vision support, list(), and missing
 types to Responses API client

- Add InputImageContent and InputFileContent content part types
- Expand ContentPart union to include new types
- Add 7 new reasoning/annotation streaming event interfaces and expand StreamingEvent union
- Add ListResponsesResult type
- Add list() method to ResponsesClient (GET /v1/responses)
- Default store to true in ResponsesClientSettings._serialize()
- Add vision.ts with createImageContentFromFile and createImageContentFromUrl helpers
- Export vision helpers from index.ts
- Add unit tests: vision helpers, list(), reasoning event types, store default
- Add integration tests for list() and vision (skipped when addon unavailable)
- Add IS_NATIVE_ADDON_AVAILABLE guard to testUtils to skip integration tests gracefully
- Update responses.ts example with list() and vision examples

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/js/examples/responses.ts               |  34 +++-
 sdk/js/src/index.ts                        |   1 +
 sdk/js/src/openai/responsesClient.ts       |  12 +-
 sdk/js/src/openai/vision.ts                |  63 ++++++
 sdk/js/src/types.ts                        |  79 ++++++-
 sdk/js/test/openai/responsesClient.test.ts | 226 ++++++++++++++++++++-
 sdk/js/test/testUtils.ts                   |  17 ++
 7 files changed, 424 insertions(+), 8 deletions(-)
 create mode 100644 sdk/js/src/openai/vision.ts

diff --git a/sdk/js/examples/responses.ts b/sdk/js/examples/responses.ts
index fa8a6d937..26a639cd7 100644
--- a/sdk/js/examples/responses.ts
+++ b/sdk/js/examples/responses.ts
@@ -3,8 +3,9 @@
 // Licensed under the MIT License.
 // -------------------------------------------------------------------------
 
-import { FoundryLocalManager, getOutputText } from '../src/index.js';
-import type { StreamingEvent, FunctionToolDefinition, FunctionCallItem } from '../src/types.js';
+import * as fs from 'fs';
+import { FoundryLocalManager, getOutputText, createImageContentFromFile } from '../src/index.js';
+import type { StreamingEvent, FunctionToolDefinition, FunctionCallItem, MessageItem } from '../src/types.js';
 
 async function main() {
     try {
@@ -121,6 +122,35 @@ async function main() {
         const deleted = await client.delete(stored.id);
         console.log(`Deleted: ${deleted.deleted}`);
 
+        // =================================================================
+        // Example 6: List all stored responses
+        // =================================================================
+        console.log('\n--- Example 6: List stored responses ---');
+        const allResponses = await client.list();
+        console.log(`Listed ${allResponses.data.length} stored responses`);
+
+        // =================================================================
+        // Example 7: Vision — describe an image
+        // =================================================================
+        console.log('\n--- Example 7: Vision ---');
+        const testImagePath = 'path/to/test-image.png'; // Replace with a real image path
+        if (fs.existsSync(testImagePath)) {
+            const imageContent = createImageContentFromFile(testImagePath);
+            const visionResponse = await client.create([
+                {
+                    type: 'message',
+                    role: 'user',
+                    content: [
+                        { type: 'input_text', text: 'Describe this image in one sentence.' },
+                        imageContent,
+                    ],
+                } as MessageItem,
+            ]);
+            console.log(`Vision: ${getOutputText(visionResponse)}`);
+        } else {
+            console.log('(Skipped: test image not found)');
+        }
+
         // Cleanup
         manager.stopWebService();
         await model.unload();
diff --git a/sdk/js/src/index.ts b/sdk/js/src/index.ts
index bc27293bb..c2fb72169 100644
--- a/sdk/js/src/index.ts
+++ b/sdk/js/src/index.ts
@@ -12,6 +12,7 @@ export { EmbeddingClient } from './openai/embeddingClient.js';
 export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioTranscriptionClient.js';
 export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTranscriptionTypes.js';
 export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
+export { createImageContentFromFile, createImageContentFromUrl } from './openai/vision.js';
 export { ModelLoadManager } from './detail/modelLoadManager.js';
 /** @internal */
 export { CoreInterop } from './detail/coreInterop.js';
diff --git a/sdk/js/src/openai/responsesClient.ts b/sdk/js/src/openai/responsesClient.ts
index 711efb78d..64c2c8f81 100644
--- a/sdk/js/src/openai/responsesClient.ts
+++ b/sdk/js/src/openai/responsesClient.ts
@@ -9,6 +9,7 @@ import {
     StreamingEvent,
     InputItemsListResponse,
     DeleteResponseResult,
+    ListResponsesResult,
     ResponseInputItem,
     MessageItem,
     ContentPart,
@@ -76,7 +77,8 @@ export class ResponsesClientSettings {
             tool_choice: this.toolChoice,
             truncation: this.truncation,
             parallel_tool_calls: this.parallelToolCalls,
-            store: this.store,
+            // Default store to true when not explicitly set
+            store: this.store !== undefined ? this.store : true,
             metadata: this.metadata,
             reasoning: this.reasoning ? filterUndefined(this.reasoning) : undefined,
             text: this.text ? filterUndefined(this.text) : undefined,
@@ -275,6 +277,14 @@ export class ResponsesClient {
         );
     }
 
+    /**
+     * Lists all stored responses.
+     * @returns The list of Response objects.
+     */
+    public async list(): Promise<ListResponsesResult> {
+        return this.fetchJson<ListResponsesResult>('/v1/responses', { method: 'GET' });
+    }
+
     // ========================================================================
     // Internal helpers
     // ========================================================================
diff --git a/sdk/js/src/openai/vision.ts b/sdk/js/src/openai/vision.ts
new file mode 100644
index 000000000..ddf116e13
--- /dev/null
+++ b/sdk/js/src/openai/vision.ts
@@ -0,0 +1,63 @@
+// -------------------------------------------------------------------------
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// -------------------------------------------------------------------------
+
+import * as fs from 'fs';
+import * as path from 'path';
+import type { InputImageContent } from '../types.js';
+
+const MEDIA_TYPE_MAP: Record<string, string> = {
+    '.png': 'image/png',
+    '.jpg': 'image/jpeg',
+    '.jpeg': 'image/jpeg',
+    '.gif': 'image/gif',
+    '.webp': 'image/webp',
+};
+
+/**
+ * Creates an `InputImageContent` part by reading an image file from disk.
+ * The file is base64-encoded and embedded directly in the content part.
+ *
+ * @param filePath - Absolute or relative path to the image file.
+ * @param detail - Optional detail level hint for the model ('low' | 'high' | 'auto').
+ * @returns An `InputImageContent` object with base64-encoded image data.
+ * @throws If the file extension is not a supported image format.
+ */
+export function createImageContentFromFile(filePath: string, detail?: 'low' | 'high' | 'auto'): InputImageContent {
+    const ext = path.extname(filePath).toLowerCase();
+    const mediaType = MEDIA_TYPE_MAP[ext];
+    if (!mediaType) {
+        throw new Error(`Unsupported image format: ${ext}. Supported formats: ${Object.keys(MEDIA_TYPE_MAP).join(', ')}`);
+    }
+
+    const data = fs.readFileSync(filePath);
+    const content: InputImageContent = {
+        type: 'input_image',
+        image_data: data.toString('base64'),
+        media_type: mediaType,
+    };
+    if (detail !== undefined) {
+        content.detail = detail;
+    }
+    return content;
+}
+
+/**
+ * Creates an `InputImageContent` part from a URL.
+ *
+ * @param url - Public URL pointing to the image.
+ * @param detail - Optional detail level hint for the model ('low' | 'high' | 'auto').
+ * @returns An `InputImageContent` object with the image URL.
+ */
+export function createImageContentFromUrl(url: string, detail?: 'low' | 'high' | 'auto'): InputImageContent {
+    const content: InputImageContent = {
+        type: 'input_image',
+        image_url: url,
+        media_type: 'image/unknown', // server will detect from URL
+    };
+    if (detail !== undefined) {
+        content.detail = detail;
+    }
+    return content;
+}
diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts
index 521ae34b4..304cceef5 100644
--- a/sdk/js/src/types.ts
+++ b/sdk/js/src/types.ts
@@ -127,6 +127,20 @@ export interface InputTextContent {
     text: string;
 }
 
+export interface InputImageContent {
+    type: 'input_image';
+    image_url?: string;
+    image_data?: string;       // base64-encoded
+    media_type: string;        // e.g. "image/png"
+    detail?: 'low' | 'high' | 'auto';
+}
+
+export interface InputFileContent {
+    type: 'input_file';
+    filename: string;
+    file_url: string;
+}
+
 export interface OutputTextContent {
     type: 'output_text';
     text: string;
@@ -139,7 +153,7 @@ export interface RefusalContent {
     refusal: string;
 }
 
-export type ContentPart = InputTextContent | OutputTextContent | RefusalContent;
+export type ContentPart = InputTextContent | InputImageContent | InputFileContent | OutputTextContent | RefusalContent;
 
 export interface Annotation {
     type: string;
@@ -419,6 +433,55 @@ export interface FunctionCallArgsDoneEvent {
     sequence_number: number;
 }
 
+export interface ReasoningSummaryPartAddedEvent {
+    type: 'response.reasoning_summary_part.added';
+    item_id: string;
+    part: ContentPart;
+    sequence_number: number;
+}
+
+export interface ReasoningSummaryPartDoneEvent {
+    type: 'response.reasoning_summary_part.done';
+    item_id: string;
+    part: ContentPart;
+    sequence_number: number;
+}
+
+export interface ReasoningDeltaEvent {
+    type: 'response.reasoning.delta';
+    item_id: string;
+    delta: string;
+    sequence_number: number;
+}
+
+export interface ReasoningDoneEvent {
+    type: 'response.reasoning.done';
+    item_id: string;
+    text: string;
+    sequence_number: number;
+}
+
+export interface ReasoningSummaryTextDeltaEvent {
+    type: 'response.reasoning_summary_text.delta';
+    item_id: string;
+    delta: string;
+    sequence_number: number;
+}
+
+export interface ReasoningSummaryTextDoneEvent {
+    type: 'response.reasoning_summary_text.done';
+    item_id: string;
+    text: string;
+    sequence_number: number;
+}
+
+export interface OutputTextAnnotationAddedEvent {
+    type: 'response.output_text.annotation.added';
+    item_id: string;
+    annotation: Annotation;
+    sequence_number: number;
+}
+
 export interface StreamingErrorEvent {
     type: 'error';
     code?: string;
@@ -439,4 +502,18 @@ export type StreamingEvent =
     | RefusalDoneEvent
     | FunctionCallArgsDeltaEvent
     | FunctionCallArgsDoneEvent
+    | ReasoningSummaryPartAddedEvent
+    | ReasoningSummaryPartDoneEvent
+    | ReasoningDeltaEvent
+    | ReasoningDoneEvent
+    | ReasoningSummaryTextDeltaEvent
+    | ReasoningSummaryTextDoneEvent
+    | OutputTextAnnotationAddedEvent
     | StreamingErrorEvent;
+
+// --- List Responses ---
+
+export interface ListResponsesResult {
+    object: 'list';
+    data: ResponseObject[];
+}
diff --git a/sdk/js/test/openai/responsesClient.test.ts b/sdk/js/test/openai/responsesClient.test.ts
index f0dbf4b03..936e69ab4 100644
--- a/sdk/js/test/openai/responsesClient.test.ts
+++ b/sdk/js/test/openai/responsesClient.test.ts
@@ -1,13 +1,26 @@
 import { describe, it, before, after } from 'mocha';
 import { expect } from 'chai';
-import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI } from '../testUtils.js';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI, IS_NATIVE_ADDON_AVAILABLE } from '../testUtils.js';
 import { ResponsesClient, ResponsesClientSettings, getOutputText } from '../../src/openai/responsesClient.js';
+import { createImageContentFromFile, createImageContentFromUrl } from '../../src/openai/vision.js';
 import type {
     StreamingEvent,
     FunctionToolDefinition,
     ResponseInputItem,
     ResponseObject,
     MessageItem,
+    InputImageContent,
+    ReasoningDeltaEvent,
+    ReasoningDoneEvent,
+    ReasoningSummaryTextDeltaEvent,
+    ReasoningSummaryTextDoneEvent,
+    ReasoningSummaryPartAddedEvent,
+    ReasoningSummaryPartDoneEvent,
+    OutputTextAnnotationAddedEvent,
+    ListResponsesResult,
 } from '../../src/types.js';
 import { FoundryLocalManager } from '../../src/foundryLocalManager.js';
 import type { IModel } from '../../src/imodel.js';
@@ -64,10 +77,11 @@ describe('ResponsesClient Tests', () => {
             expect(result.seed).to.equal(42);
         });
 
-        it('should return empty object when no settings defined', () => {
+        it('should serialize store as true by default when no settings defined', () => {
             const settings = new ResponsesClientSettings();
             const result = settings._serialize();
-            expect(Object.keys(result).length).to.equal(0);
+            expect(Object.keys(result).length).to.equal(1);
+            expect(result.store).to.be.true;
         });
     });
 
@@ -365,6 +379,173 @@ describe('ResponsesClient Tests', () => {
         });
     });
 
+    // ========================================================================
+    // Vision helper functions
+    // ========================================================================
+
+    describe('vision helpers', () => {
+        it('should create InputImageContent from URL', () => {
+            const content = createImageContentFromUrl('https://example.com/image.png');
+            expect(content.type).to.equal('input_image');
+            expect(content.image_url).to.equal('https://example.com/image.png');
+            expect(content.media_type).to.equal('image/unknown');
+            expect(content.detail).to.be.undefined;
+            expect(content.image_data).to.be.undefined;
+        });
+
+        it('should create InputImageContent from URL with detail', () => {
+            const content = createImageContentFromUrl('https://example.com/image.jpg', 'high');
+            expect(content.type).to.equal('input_image');
+            expect(content.detail).to.equal('high');
+        });
+
+        it('should satisfy InputImageContent type for base64 variant', () => {
+            // Verify the type is correct by construction
+            const content: InputImageContent = {
+                type: 'input_image',
+                image_data: 'base64data==',
+                media_type: 'image/png',
+                detail: 'low',
+            };
+            expect(content.type).to.equal('input_image');
+            expect(content.image_data).to.equal('base64data==');
+            expect(content.media_type).to.equal('image/png');
+            expect(content.detail).to.equal('low');
+            expect(content.image_url).to.be.undefined;
+        });
+
+        it('should create InputImageContent from file for a temp PNG', () => {
+            // Write a minimal 1×1 PNG to a temp file
+            const tmpFile = path.join(os.tmpdir(), 'test-image.png');
+            // Minimal valid PNG bytes (1×1 white pixel)
+            const pngBuffer = Buffer.from(
+                '89504e470d0a1a0a0000000d49484452000000010000000108020000009001' +
+                '2e00000000c4944415478016360f8cfc000000002000176dd24100000000049454e44ae426082',
+                'hex'
+            );
+            fs.writeFileSync(tmpFile, pngBuffer);
+
+            try {
+                const content = createImageContentFromFile(tmpFile);
+                expect(content.type).to.equal('input_image');
+                expect(content.media_type).to.equal('image/png');
+                expect(content.image_data).to.be.a('string');
+                expect(content.image_data!.length).to.be.greaterThan(0);
+                expect(content.image_url).to.be.undefined;
+            } finally {
+                fs.unlinkSync(tmpFile);
+            }
+        });
+
+        it('should throw createImageContentFromFile for unsupported extension', () => {
+            expect(() => createImageContentFromFile('/tmp/image.bmp')).to.throw('Unsupported image format');
+        });
+    });
+
+    // ========================================================================
+    // list() method — network error
+    // ========================================================================
+
+    describe('list()', () => {
+        it('should throw a network error when server is unreachable', async () => {
+            const client = new ResponsesClient('http://localhost:1', 'test-model');
+            try {
+                await client.list();
+                expect.fail('Should have thrown');
+            } catch (error) {
+                expect(error).to.be.instanceOf(Error);
+            }
+        });
+    });
+
+    // ========================================================================
+    // Reasoning streaming event types
+    // ========================================================================
+
+    describe('reasoning streaming event types', () => {
+        it('should construct ReasoningDeltaEvent', () => {
+            const event: ReasoningDeltaEvent = {
+                type: 'response.reasoning.delta',
+                item_id: 'item_1',
+                delta: 'thinking...',
+                sequence_number: 1,
+            };
+            expect(event.type).to.equal('response.reasoning.delta');
+            expect(event.delta).to.equal('thinking...');
+        });
+
+        it('should construct ReasoningDoneEvent', () => {
+            const event: ReasoningDoneEvent = {
+                type: 'response.reasoning.done',
+                item_id: 'item_1',
+                text: 'final reasoning text',
+                sequence_number: 2,
+            };
+            expect(event.type).to.equal('response.reasoning.done');
+            expect(event.text).to.equal('final reasoning text');
+        });
+
+        it('should construct ReasoningSummaryTextDeltaEvent', () => {
+            const event: ReasoningSummaryTextDeltaEvent = {
+                type: 'response.reasoning_summary_text.delta',
+                item_id: 'item_2',
+                delta: 'summary delta',
+                sequence_number: 3,
+            };
+            expect(event.type).to.equal('response.reasoning_summary_text.delta');
+        });
+
+        it('should construct ReasoningSummaryTextDoneEvent', () => {
+            const event: ReasoningSummaryTextDoneEvent = {
+                type: 'response.reasoning_summary_text.done',
+                item_id: 'item_2',
+                text: 'full summary',
+                sequence_number: 4,
+            };
+            expect(event.type).to.equal('response.reasoning_summary_text.done');
+        });
+
+        it('should construct ReasoningSummaryPartAddedEvent', () => {
+            const event: ReasoningSummaryPartAddedEvent = {
+                type: 'response.reasoning_summary_part.added',
+                item_id: 'item_3',
+                part: { type: 'output_text', text: 'summary part' },
+                sequence_number: 5,
+            };
+            expect(event.type).to.equal('response.reasoning_summary_part.added');
+        });
+
+        it('should construct ReasoningSummaryPartDoneEvent', () => {
+            const event: ReasoningSummaryPartDoneEvent = {
+                type: 'response.reasoning_summary_part.done',
+                item_id: 'item_3',
+                part: { type: 'output_text', text: 'done summary part' },
+                sequence_number: 6,
+            };
+            expect(event.type).to.equal('response.reasoning_summary_part.done');
+        });
+
+        it('should construct OutputTextAnnotationAddedEvent', () => {
+            const event: OutputTextAnnotationAddedEvent = {
+                type: 'response.output_text.annotation.added',
+                item_id: 'item_4',
+                annotation: { type: 'url_citation', start_index: 0, end_index: 5 },
+                sequence_number: 7,
+            };
+            expect(event.type).to.equal('response.output_text.annotation.added');
+        });
+
+        it('should accept reasoning events in StreamingEvent union', () => {
+            const events: StreamingEvent[] = [
+                { type: 'response.reasoning.delta', item_id: 'x', delta: 'd', sequence_number: 1 },
+                { type: 'response.reasoning.done', item_id: 'x', text: 't', sequence_number: 2 },
+                { type: 'response.reasoning_summary_text.delta', item_id: 'x', delta: 'd', sequence_number: 3 },
+                { type: 'response.reasoning_summary_text.done', item_id: 'x', text: 't', sequence_number: 4 },
+            ];
+            expect(events.length).to.equal(4);
+        });
+    });
+
     // ========================================================================
     // Integration tests (require running web service + loaded model)
     // ========================================================================
@@ -377,7 +558,7 @@ describe('ResponsesClient Tests', () => {
 
         before(async function() {
             this.timeout(30000);
-            if (IS_RUNNING_IN_CI) {
+            if (IS_RUNNING_IN_CI || !IS_NATIVE_ADDON_AVAILABLE) {
                 skipped = true;
                 this.skip();
                 return;
@@ -567,5 +748,42 @@ describe('ResponsesClient Tests', () => {
                 expect((functionCall as any).name).to.equal('get_weather');
             }
         });
+
+        it('should list stored responses', async function() {
+            this.timeout(30000);
+
+            const result = await client.list();
+
+            expect(result).to.not.be.undefined;
+            expect(result.object).to.equal('list');
+            expect(result.data).to.be.an('array');
+            console.log(`Listed ${result.data.length} responses`);
+        });
+
+        it('should create a vision response with base64 image', async function() {
+            this.timeout(60000);
+
+            // Minimal 1×1 red PNG (base64)
+            const minimalPng = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADklEQVQI12P4z8BQDwADhQGAWjR9awAAAABJRU5ErkJggg==';
+
+            const response = await client.create([
+                {
+                    type: 'message',
+                    role: 'user',
+                    content: [
+                        { type: 'input_text', text: 'What color is the dominant color in this image? Answer with one word.' },
+                        { type: 'input_image', image_data: minimalPng, media_type: 'image/png' },
+                    ],
+                } as MessageItem,
+            ]);
+
+            expect(response).to.not.be.undefined;
+            const text = getOutputText(response);
+            console.log(`Vision response: ${text}`);
+            // Just verify we got a non-empty response — vision support depends on the loaded model
+            if (response.status === 'completed') {
+                expect(text.length).to.be.greaterThan(0);
+            }
+        });
     });
 });
\ No newline at end of file
diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts
index 7cac6b293..165e2c22b 100644
--- a/sdk/js/test/testUtils.ts
+++ b/sdk/js/test/testUtils.ts
@@ -46,6 +46,23 @@ export const TEST_CONFIG: FoundryLocalConfig = {
 export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
 export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
 
+// Detect whether the native addon is available without throwing at import time.
+// Must be declared after TEST_CONFIG.
+function checkNativeAddonAvailable(): boolean {
+    try {
+        FoundryLocalManager.create(TEST_CONFIG);
+        return true;
+    } catch (e) {
+        // The addon-not-found error message contains 'foundry_local_napi.node'
+        if (e instanceof Error && e.message.includes('foundry_local_napi.node')) {
+            return false;
+        }
+        return true; // different error — addon may still be present
+    }
+}
+
+export const IS_NATIVE_ADDON_AVAILABLE = checkNativeAddonAvailable();
+
 export function getTestManager() {
     return FoundryLocalManager.create(TEST_CONFIG);
 }

From e83136926b0e1c2373ba166d23de4471ff32a382 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 23 Apr 2026 17:10:30 -0400
Subject: [PATCH 02/10] fix(sdk/js): address PR review comments

- Make media_type optional in InputImageContent (server can infer)
- Add file existence check in createImageContentFromFile
- Add bmp support to MEDIA_TYPE_MAP
- Add optional maxDimension resize via soft-peer sharp dependency
- Omit media_type in createImageContentFromUrl (server infers from URL)
- Add JSDoc on ResponsesClientSettings.store documenting default=true
- Replace FoundryLocalManager.create() in checkNativeAddonAvailable with
  file-existence checks (avoids side effects)
- Replace unreachable-server list() test with globalThis.fetch mock
- Use fs.mkdtempSync for unique temp dirs in file-based tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/js/examples/responses.ts               |  2 +-
 sdk/js/src/openai/responsesClient.ts       |  5 ++
 sdk/js/src/openai/vision.ts                | 89 +++++++++++++++++++---
 sdk/js/src/types.ts                        |  2 +-
 sdk/js/test/openai/responsesClient.test.ts | 51 +++++++++----
 sdk/js/test/testUtils.ts                   | 23 +++---
 6 files changed, 135 insertions(+), 37 deletions(-)

diff --git a/sdk/js/examples/responses.ts b/sdk/js/examples/responses.ts
index 26a639cd7..111e2222a 100644
--- a/sdk/js/examples/responses.ts
+++ b/sdk/js/examples/responses.ts
@@ -135,7 +135,7 @@ async function main() {
         console.log('\n--- Example 7: Vision ---');
         const testImagePath = 'path/to/test-image.png'; // Replace with a real image path
         if (fs.existsSync(testImagePath)) {
-            const imageContent = createImageContentFromFile(testImagePath);
+            const imageContent = await createImageContentFromFile(testImagePath);
             const visionResponse = await client.create([
                 {
                     type: 'message',
diff --git a/sdk/js/src/openai/responsesClient.ts b/sdk/js/src/openai/responsesClient.ts
index 64c2c8f81..5d9f9ac79 100644
--- a/sdk/js/src/openai/responsesClient.ts
+++ b/sdk/js/src/openai/responsesClient.ts
@@ -53,6 +53,11 @@ export class ResponsesClientSettings {
     toolChoice?: ResponseToolChoice;
     truncation?: TruncationStrategy;
     parallelToolCalls?: boolean;
+    /**
+     * Whether to store the response server-side so it can be retrieved via `get()`, `list()`,
+     * `getInputItems()`, or referenced by `previous_response_id`. Defaults to `true` when not
+     * explicitly set. Set to `false` to disable persistence for a given client.
+     */
     store?: boolean;
     metadata?: Record<string, string>;
     reasoning?: ReasoningConfig;
diff --git a/sdk/js/src/openai/vision.ts b/sdk/js/src/openai/vision.ts
index ddf116e13..7649e1674 100644
--- a/sdk/js/src/openai/vision.ts
+++ b/sdk/js/src/openai/vision.ts
@@ -13,38 +13,75 @@ const MEDIA_TYPE_MAP: Record<string, string> = {
     '.jpeg': 'image/jpeg',
     '.gif': 'image/gif',
     '.webp': 'image/webp',
+    '.bmp': 'image/bmp',
 };
 
+/**
+ * Options for `createImageContentFromFile`.
+ */
+export interface ImageContentOptions {
+    /** Detail level hint for the model. */
+    detail?: 'low' | 'high' | 'auto';
+    /**
+     * If set, the longest dimension of the image will be scaled down to this value
+     * (preserving aspect ratio) before encoding. Requires the `sharp` package to be
+     * installed as an optional peer dependency (`npm install sharp`). If `sharp` is
+     * not available and the image exceeds this size, a warning is printed and the
+     * original image is used unresized.
+     */
+    maxDimension?: number;
+}
+
 /**
  * Creates an `InputImageContent` part by reading an image file from disk.
  * The file is base64-encoded and embedded directly in the content part.
  *
  * @param filePath - Absolute or relative path to the image file.
- * @param detail - Optional detail level hint for the model ('low' | 'high' | 'auto').
+ * @param options - Optional settings (detail level, max dimension for resize).
  * @returns An `InputImageContent` object with base64-encoded image data.
- * @throws If the file extension is not a supported image format.
+ * @throws If the file does not exist or the extension is not a supported format.
  */
-export function createImageContentFromFile(filePath: string, detail?: 'low' | 'high' | 'auto'): InputImageContent {
+export async function createImageContentFromFile(
+    filePath: string,
+    options?: ImageContentOptions | 'low' | 'high' | 'auto'
+): Promise<InputImageContent> {
+    // Support the original simple signature: createImageContentFromFile(path, detail?)
+    const opts: ImageContentOptions = typeof options === 'string'
+        ? { detail: options }
+        : (options ?? {});
+
+    if (!fs.existsSync(filePath)) {
+        throw new Error(`Image file not found: ${filePath}`);
+    }
+
     const ext = path.extname(filePath).toLowerCase();
     const mediaType = MEDIA_TYPE_MAP[ext];
     if (!mediaType) {
-        throw new Error(`Unsupported image format: ${ext}. Supported formats: ${Object.keys(MEDIA_TYPE_MAP).join(', ')}`);
+        throw new Error(
+            `Unsupported image format: ${ext}. Supported formats: ${Object.keys(MEDIA_TYPE_MAP).join(', ')}`
+        );
+    }
+
+    let dataBuffer: Buffer = fs.readFileSync(filePath);
+
+    if (opts.maxDimension !== undefined) {
+        dataBuffer = await resizeImage(dataBuffer, opts.maxDimension, filePath);
     }
 
-    const data = fs.readFileSync(filePath);
     const content: InputImageContent = {
         type: 'input_image',
-        image_data: data.toString('base64'),
+        image_data: dataBuffer.toString('base64'),
         media_type: mediaType,
     };
-    if (detail !== undefined) {
-        content.detail = detail;
+    if (opts.detail !== undefined) {
+        content.detail = opts.detail;
     }
     return content;
 }
 
 /**
  * Creates an `InputImageContent` part from a URL.
+ * The server will infer the media type from the URL.
  *
  * @param url - Public URL pointing to the image.
  * @param detail - Optional detail level hint for the model ('low' | 'high' | 'auto').
@@ -54,10 +91,44 @@ export function createImageContentFromUrl(url: string, detail?: 'low' | 'high' |
     const content: InputImageContent = {
         type: 'input_image',
         image_url: url,
-        media_type: 'image/unknown', // server will detect from URL
+        // media_type intentionally omitted — server infers from URL
     };
     if (detail !== undefined) {
         content.detail = detail;
     }
     return content;
 }
+
+/**
+ * Attempts to resize image data to fit within `maxDimension` on the longest side.
+ * Requires the optional `sharp` peer dependency. Falls back to original data with a
+ * warning if `sharp` is not available.
+ */
+async function resizeImage(data: Buffer, maxDimension: number, filePath: string): Promise<Buffer> {
+    let sharp: any;
+    try {
+        // Dynamic import so sharp remains a soft/optional peer dep.
+        // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+        // @ts-ignore — sharp is an optional peer dependency
+        sharp = (await import('sharp')).default;
+    } catch {
+        console.warn(
+            `[foundry-local] createImageContentFromFile: maxDimension=${maxDimension} requires the ` +
+            `"sharp" package (npm install sharp). Image will be used unresized.`
+        );
+        return data;
+    }
+
+    const metadata = await sharp(data).metadata();
+    const { width = 0, height = 0 } = metadata;
+
+    if (Math.max(width, height) <= maxDimension) {
+        return data; // already within bounds
+    }
+
+    const resized: Buffer = await sharp(data)
+        .resize({ width: maxDimension, height: maxDimension, fit: 'inside', withoutEnlargement: true })
+        .toBuffer();
+
+    return resized;
+}
diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts
index 304cceef5..dcb0e2287 100644
--- a/sdk/js/src/types.ts
+++ b/sdk/js/src/types.ts
@@ -131,7 +131,7 @@ export interface InputImageContent {
     type: 'input_image';
     image_url?: string;
     image_data?: string;       // base64-encoded
-    media_type: string;        // e.g. "image/png"
+    media_type?: string;       // e.g. "image/png"; omit to let the server infer
     detail?: 'low' | 'high' | 'auto';
 }
 
diff --git a/sdk/js/test/openai/responsesClient.test.ts b/sdk/js/test/openai/responsesClient.test.ts
index 936e69ab4..a95e1f35c 100644
--- a/sdk/js/test/openai/responsesClient.test.ts
+++ b/sdk/js/test/openai/responsesClient.test.ts
@@ -388,7 +388,7 @@ describe('ResponsesClient Tests', () => {
             const content = createImageContentFromUrl('https://example.com/image.png');
             expect(content.type).to.equal('input_image');
             expect(content.image_url).to.equal('https://example.com/image.png');
-            expect(content.media_type).to.equal('image/unknown');
+            expect(content.media_type).to.be.undefined; // server infers from URL
             expect(content.detail).to.be.undefined;
             expect(content.image_data).to.be.undefined;
         });
@@ -414,9 +414,10 @@ describe('ResponsesClient Tests', () => {
             expect(content.image_url).to.be.undefined;
         });
 
-        it('should create InputImageContent from file for a temp PNG', () => {
-            // Write a minimal 1×1 PNG to a temp file
-            const tmpFile = path.join(os.tmpdir(), 'test-image.png');
+        it('should create InputImageContent from file for a temp PNG', async () => {
+            // Write a minimal 1×1 PNG to a unique temp directory
+            const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-test-'));
+            const tmpFile = path.join(tmpDir, 'test-image.png');
             // Minimal valid PNG bytes (1×1 white pixel)
             const pngBuffer = Buffer.from(
                 '89504e470d0a1a0a0000000d49484452000000010000000108020000009001' +
@@ -426,7 +427,7 @@ describe('ResponsesClient Tests', () => {
             fs.writeFileSync(tmpFile, pngBuffer);
 
             try {
-                const content = createImageContentFromFile(tmpFile);
+                const content = await createImageContentFromFile(tmpFile);
                 expect(content.type).to.equal('input_image');
                 expect(content.media_type).to.equal('image/png');
                 expect(content.image_data).to.be.a('string');
@@ -434,26 +435,48 @@ describe('ResponsesClient Tests', () => {
                 expect(content.image_url).to.be.undefined;
             } finally {
                 fs.unlinkSync(tmpFile);
+                fs.rmdirSync(tmpDir);
             }
         });
 
-        it('should throw createImageContentFromFile for unsupported extension', () => {
-            expect(() => createImageContentFromFile('/tmp/image.bmp')).to.throw('Unsupported image format');
+        it('should throw createImageContentFromFile for unsupported extension', async () => {
+            // Create a real file with an unsupported extension so we reach the format check
+            const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-test-'));
+            const tmpFile = path.join(tmpDir, 'image.xyz');
+            fs.writeFileSync(tmpFile, 'dummy');
+            try {
+                await createImageContentFromFile(tmpFile);
+                expect.fail('Should have thrown');
+            } catch (e) {
+                expect((e as Error).message).to.include('Unsupported image format');
+            } finally {
+                fs.unlinkSync(tmpFile);
+                fs.rmdirSync(tmpDir);
+            }
         });
     });
 
     // ========================================================================
-    // list() method — network error
+    // list() method — unit test with fetch mock
     // ========================================================================
 
     describe('list()', () => {
-        it('should throw a network error when server is unreachable', async () => {
-            const client = new ResponsesClient('http://localhost:1', 'test-model');
+        it('should call GET /v1/responses and return parsed JSON', async () => {
+            const mockResult = { object: 'list', data: [] };
+            const originalFetch = globalThis.fetch;
+            globalThis.fetch = async (_url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
+                return new Response(JSON.stringify(mockResult), {
+                    status: 200,
+                    headers: { 'Content-Type': 'application/json' },
+                });
+            };
             try {
-                await client.list();
-                expect.fail('Should have thrown');
-            } catch (error) {
-                expect(error).to.be.instanceOf(Error);
+                const client = new ResponsesClient('http://test-host', 'test-model');
+                const result = await client.list();
+                expect(result.object).to.equal('list');
+                expect(result.data).to.deep.equal([]);
+            } finally {
+                globalThis.fetch = originalFetch;
             }
         });
     });
diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts
index 165e2c22b..ee795a2f4 100644
--- a/sdk/js/test/testUtils.ts
+++ b/sdk/js/test/testUtils.ts
@@ -46,19 +46,18 @@ export const TEST_CONFIG: FoundryLocalConfig = {
 export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
 export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
 
-// Detect whether the native addon is available without throwing at import time.
-// Must be declared after TEST_CONFIG.
+// Detect whether the native addon is available by checking for the file on disk,
+// mirroring the exact paths that CoreInterop.loadAddon() searches. This avoids
+// the side effects of calling FoundryLocalManager.create() at module load time.
 function checkNativeAddonAvailable(): boolean {
-    try {
-        FoundryLocalManager.create(TEST_CONFIG);
-        return true;
-    } catch (e) {
-        // The addon-not-found error message contains 'foundry_local_napi.node'
-        if (e instanceof Error && e.message.includes('foundry_local_napi.node')) {
-            return false;
-        }
-        return true; // different error — addon may still be present
-    }
+    const platform = process.platform;
+    const arch = process.arch;
+    const platformKey = `${platform}-${arch}`;
+    // dist/ is the compiled output root; from there the SDK root is one level up
+    const sdkRoot = path.resolve(getGitRepoRoot(), 'sdk', 'js', 'dist');
+    const prebuiltPath = path.join(sdkRoot, 'prebuilds', platformKey, 'foundry_local_napi.node');
+    const devPath = path.join(sdkRoot, 'native', 'build', 'Release', 'foundry_local_napi.node');
+    return fs.existsSync(prebuiltPath) || fs.existsSync(devPath);
 }
 
 export const IS_NATIVE_ADDON_AVAILABLE = checkNativeAddonAvailable();

From e092c2be22c36f47d9a45dc3ee4bda6fee0bd595 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 23 Apr 2026 22:03:18 -0400
Subject: [PATCH 03/10] fix(sdk/js): address second round of PR review comments

- vision.ts: use fs.promises.readFile (async) instead of sync existsSync/readFileSync
- vision.ts: validate maxDimension is a finite positive integer before use
- vision.ts: update JSDoc to reflect overloaded options param (object or detail string)
- vision.ts: resizeImage returns { buffer, mediaType } so media_type is explicit post-resize
- vision.ts: pass fallbackMediaType into resizeImage to avoid hardcoded 'image/png'
- testUtils.ts: check all 4 candidate addon paths (sdk/js/prebuilds, sdk/js/native,
  sdk/js/dist/prebuilds, sdk/js/dist/native) matching CoreInterop.loadAddon() logic

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/js/src/openai/vision.ts | 64 +++++++++++++++++++++++++------------
 sdk/js/test/testUtils.ts    | 19 ++++++-----
 2 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/sdk/js/src/openai/vision.ts b/sdk/js/src/openai/vision.ts
index 7649e1674..2b3c7abdd 100644
--- a/sdk/js/src/openai/vision.ts
+++ b/sdk/js/src/openai/vision.ts
@@ -3,8 +3,8 @@
 // Licensed under the MIT License.
 // -------------------------------------------------------------------------
 
-import * as fs from 'fs';
 import * as path from 'path';
+import { promises as fsPromises } from 'fs';
 import type { InputImageContent } from '../types.js';
 
 const MEDIA_TYPE_MAP: Record<string, string> = {
@@ -24,10 +24,10 @@ export interface ImageContentOptions {
     detail?: 'low' | 'high' | 'auto';
     /**
      * If set, the longest dimension of the image will be scaled down to this value
-     * (preserving aspect ratio) before encoding. Requires the `sharp` package to be
-     * installed as an optional peer dependency (`npm install sharp`). If `sharp` is
-     * not available and the image exceeds this size, a warning is printed and the
-     * original image is used unresized.
+     * (preserving aspect ratio) before encoding. Must be a finite positive integer.
+     * Requires the `sharp` package to be installed as an optional peer dependency
+     * (`npm install sharp`). If `sharp` is not available, a warning is printed and
+     * the original image is used unresized.
      */
     maxDimension?: number;
 }
@@ -36,22 +36,28 @@ export interface ImageContentOptions {
  * Creates an `InputImageContent` part by reading an image file from disk.
  * The file is base64-encoded and embedded directly in the content part.
  *
+ * The second argument accepts either an `ImageContentOptions` object or a shorthand
+ * detail string (`'low' | 'high' | 'auto'`) for convenience.
+ *
  * @param filePath - Absolute or relative path to the image file.
- * @param options - Optional settings (detail level, max dimension for resize).
- * @returns An `InputImageContent` object with base64-encoded image data.
- * @throws If the file does not exist or the extension is not a supported format.
+ * @param options - Optional `ImageContentOptions`, or a shorthand detail string.
+ * @returns A `Promise<InputImageContent>` with base64-encoded image data.
+ * @throws If the file does not exist, the extension is unsupported, or `maxDimension`
+ *         is not a finite positive integer.
  */
 export async function createImageContentFromFile(
     filePath: string,
     options?: ImageContentOptions | 'low' | 'high' | 'auto'
 ): Promise<InputImageContent> {
-    // Support the original simple signature: createImageContentFromFile(path, detail?)
+    // Support the shorthand signature: createImageContentFromFile(path, detail?)
     const opts: ImageContentOptions = typeof options === 'string'
         ? { detail: options }
         : (options ?? {});
 
-    if (!fs.existsSync(filePath)) {
-        throw new Error(`Image file not found: ${filePath}`);
+    if (opts.maxDimension !== undefined) {
+        if (!Number.isFinite(opts.maxDimension) || !Number.isInteger(opts.maxDimension) || opts.maxDimension <= 0) {
+            throw new Error(`Invalid maxDimension: ${opts.maxDimension}. Expected a finite positive integer.`);
+        }
     }
 
     const ext = path.extname(filePath).toLowerCase();
@@ -62,16 +68,27 @@ export async function createImageContentFromFile(
         );
     }
 
-    let dataBuffer: Buffer = fs.readFileSync(filePath);
+    let dataBuffer: Buffer;
+    try {
+        dataBuffer = await fsPromises.readFile(filePath) as Buffer;
+    } catch (err: any) {
+        if (err.code === 'ENOENT') {
+            throw new Error(`Image file not found: ${filePath}`);
+        }
+        throw err;
+    }
 
+    let finalMediaType = mediaType;
     if (opts.maxDimension !== undefined) {
-        dataBuffer = await resizeImage(dataBuffer, opts.maxDimension, filePath);
+        const resized = await resizeImage(dataBuffer, opts.maxDimension, mediaType);
+        dataBuffer = resized.buffer;
+        finalMediaType = resized.mediaType;
     }
 
     const content: InputImageContent = {
         type: 'input_image',
         image_data: dataBuffer.toString('base64'),
-        media_type: mediaType,
+        media_type: finalMediaType,
     };
     if (opts.detail !== undefined) {
         content.detail = opts.detail;
@@ -103,8 +120,9 @@ export function createImageContentFromUrl(url: string, detail?: 'low' | 'high' |
  * Attempts to resize image data to fit within `maxDimension` on the longest side.
  * Requires the optional `sharp` peer dependency. Falls back to original data with a
  * warning if `sharp` is not available.
+ * Returns both the (possibly resized) buffer and the media type.
  */
-async function resizeImage(data: Buffer, maxDimension: number, filePath: string): Promise<Buffer> {
+async function resizeImage(data: Buffer, maxDimension: number, fallbackMediaType: string): Promise<{ buffer: Buffer; mediaType: string }> {
     let sharp: any;
     try {
         // Dynamic import so sharp remains a soft/optional peer dep.
@@ -116,19 +134,25 @@ async function resizeImage(data: Buffer, maxDimension: number, filePath: string)
             `[foundry-local] createImageContentFromFile: maxDimension=${maxDimension} requires the ` +
             `"sharp" package (npm install sharp). Image will be used unresized.`
         );
-        return data;
+        return { buffer: data, mediaType: fallbackMediaType };
     }
 
     const metadata = await sharp(data).metadata();
-    const { width = 0, height = 0 } = metadata;
+    const { width = 0, height = 0, format } = metadata;
+    // Map sharp format names back to MIME types; fall back to the original type
+    const formatToMime: Record<string, string> = {
+        png: 'image/png', jpeg: 'image/jpeg', gif: 'image/gif',
+        webp: 'image/webp', bmp: 'image/bmp',
+    };
+    const mediaType = (format && formatToMime[format]) ?? fallbackMediaType;
 
     if (Math.max(width, height) <= maxDimension) {
-        return data; // already within bounds
+        return { buffer: data, mediaType };
     }
 
-    const resized: Buffer = await sharp(data)
+    const resizedBuffer: Buffer = await sharp(data)
         .resize({ width: maxDimension, height: maxDimension, fit: 'inside', withoutEnlargement: true })
         .toBuffer();
 
-    return resized;
+    return { buffer: resizedBuffer, mediaType };
 }
diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts
index ee795a2f4..5bacda918 100644
--- a/sdk/js/test/testUtils.ts
+++ b/sdk/js/test/testUtils.ts
@@ -46,18 +46,21 @@ export const TEST_CONFIG: FoundryLocalConfig = {
 export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
 export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
 
-// Detect whether the native addon is available by checking for the file on disk,
-// mirroring the exact paths that CoreInterop.loadAddon() searches. This avoids
-// the side effects of calling FoundryLocalManager.create() at module load time.
+// Detect whether the native addon is available by checking for the file on disk.
+// Match CoreInterop.loadAddon() by resolving from the SDK root.
+// Also check dist/ to support runs against built output.
 function checkNativeAddonAvailable(): boolean {
     const platform = process.platform;
     const arch = process.arch;
     const platformKey = `${platform}-${arch}`;
-    // dist/ is the compiled output root; from there the SDK root is one level up
-    const sdkRoot = path.resolve(getGitRepoRoot(), 'sdk', 'js', 'dist');
-    const prebuiltPath = path.join(sdkRoot, 'prebuilds', platformKey, 'foundry_local_napi.node');
-    const devPath = path.join(sdkRoot, 'native', 'build', 'Release', 'foundry_local_napi.node');
-    return fs.existsSync(prebuiltPath) || fs.existsSync(devPath);
+    const sdkRoot = path.resolve(getGitRepoRoot(), 'sdk', 'js');
+    const candidatePaths = [
+        path.join(sdkRoot, 'prebuilds', platformKey, 'foundry_local_napi.node'),
+        path.join(sdkRoot, 'native', 'build', 'Release', 'foundry_local_napi.node'),
+        path.join(sdkRoot, 'dist', 'prebuilds', platformKey, 'foundry_local_napi.node'),
+        path.join(sdkRoot, 'dist', 'native', 'build', 'Release', 'foundry_local_napi.node'),
+    ];
+    return candidatePaths.some(p => fs.existsSync(p));
 }
 
 export const IS_NATIVE_ADDON_AVAILABLE = checkNativeAddonAvailable();

From 88f0a0d712e02b3b406edd5620a9a6bc6fdb7a37 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Mon, 27 Apr 2026 15:18:10 -0400
Subject: [PATCH 04/10] fix(sdk/js): address latest responses API review
 feedback

- Add list pagination options and response metadata fields
- Align streaming event types with server DTOs for reasoning, annotations, refusals, content parts, and function calls
- Make responses example self-contained by generating a temporary PNG
- Document Responses API store default, vision formats including BMP, and Foundry Local image_data contract
- Remove native-addon availability auto-skip from Responses integration tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/js/README.md                           | 40 ++++++++++++-
 sdk/js/examples/responses.ts               | 18 ++++--
 sdk/js/src/openai/responsesClient.ts       | 16 ++++-
 sdk/js/src/openai/vision.ts                |  1 +
 sdk/js/src/types.ts                        | 42 ++++++++++++-
 sdk/js/test/openai/responsesClient.test.ts | 69 +++++++++++++++++++---
 sdk/js/test/testUtils.ts                   | 19 ------
 7 files changed, 167 insertions(+), 38 deletions(-)

diff --git a/sdk/js/README.md b/sdk/js/README.md
index 26471cc8c..791f93b72 100644
--- a/sdk/js/README.md
+++ b/sdk/js/README.md
@@ -250,6 +250,43 @@ for await (const chunk of audioClient.transcribeStreaming('/path/to/audio.wav'))
 }
 ```
 
+### Responses API
+
+Use the Responses API client for OpenAI-compatible text, tool, streaming, stored-response, and vision workflows over the embedded web service:
+
+```typescript
+import { createImageContentFromFile, getOutputText } from 'foundry-local-sdk';
+
+manager.startWebService();
+
+const client = manager.createResponsesClient(model.id);
+
+// Responses are stored by default so they can be retrieved or listed later.
+// Set store=false per client or request to opt out.
+client.settings.store = false;
+
+const response = await client.create('Tell me a short joke.');
+console.log(getOutputText(response));
+
+const storedResponses = await client.list({ limit: 10, order: 'desc' });
+console.log(storedResponses.has_more);
+
+const image = await createImageContentFromFile('/path/to/image.png');
+const visionResponse = await client.create([
+    {
+        type: 'message',
+        role: 'user',
+        content: [
+            { type: 'input_text', text: 'Describe this image.' },
+            image,
+        ],
+    },
+]);
+console.log(getOutputText(visionResponse));
+```
+
+Vision helpers support `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, and `.bmp` files. `createImageContentFromFile()` sends Foundry Local's server contract (`image_data` plus `media_type`); `createImageContentFromUrl()` sends `image_url` and lets the server infer the media type.
+
 ### Embedded Web Service
 
 Start a local HTTP server that exposes an OpenAI-compatible API:
@@ -288,6 +325,7 @@ Auto-generated class documentation lives in [`docs/classes/`](docs/classes/):
 - [IModel](docs/README.md#imodel) — Model interface: variant selection, download, load, inference
 - [ChatClient](docs/classes/ChatClient.md) — Chat completions (sync and streaming)
 - [AudioClient](docs/classes/AudioClient.md) — Audio transcription (sync and streaming)
+- [ResponsesClient](docs/classes/ResponsesClient.md) — Responses API (text, streaming, tools, stored responses, vision)
 - [ModelLoadManager](docs/classes/ModelLoadManager.md) — Low-level model loading management
 
 ## Contributing: Building from Source
@@ -336,4 +374,4 @@ See `test/README.md` for details on prerequisites and setup.
 npm run example
 ```
 
-This runs the chat completion example in `examples/chat-completion.ts`.
\ No newline at end of file
+This runs the chat completion example in `examples/chat-completion.ts`.
diff --git a/sdk/js/examples/responses.ts b/sdk/js/examples/responses.ts
index 111e2222a..ffafad2fa 100644
--- a/sdk/js/examples/responses.ts
+++ b/sdk/js/examples/responses.ts
@@ -4,6 +4,8 @@
 // -------------------------------------------------------------------------
 
 import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
 import { FoundryLocalManager, getOutputText, createImageContentFromFile } from '../src/index.js';
 import type { StreamingEvent, FunctionToolDefinition, FunctionCallItem, MessageItem } from '../src/types.js';
 
@@ -133,8 +135,16 @@ async function main() {
         // Example 7: Vision — describe an image
         // =================================================================
         console.log('\n--- Example 7: Vision ---');
-        const testImagePath = 'path/to/test-image.png'; // Replace with a real image path
-        if (fs.existsSync(testImagePath)) {
+        const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-responses-example-'));
+        const testImagePath = path.join(tempDir, 'sample.png');
+        // Minimal 1x1 PNG so the example runs without external assets.
+        const samplePng = Buffer.from(
+            '89504e470d0a1a0a0000000d49484452000000010000000108020000009001' +
+            '2e00000000c4944415478016360f8cfc000000002000176dd24100000000049454e44ae426082',
+            'hex'
+        );
+        fs.writeFileSync(testImagePath, samplePng);
+        try {
             const imageContent = await createImageContentFromFile(testImagePath);
             const visionResponse = await client.create([
                 {
@@ -147,8 +157,8 @@ async function main() {
                 } as MessageItem,
             ]);
             console.log(`Vision: ${getOutputText(visionResponse)}`);
-        } else {
-            console.log('(Skipped: test image not found)');
+        } finally {
+            fs.rmSync(tempDir, { recursive: true, force: true });
         }
 
         // Cleanup
diff --git a/sdk/js/src/openai/responsesClient.ts b/sdk/js/src/openai/responsesClient.ts
index 5d9f9ac79..f7e033b08 100644
--- a/sdk/js/src/openai/responsesClient.ts
+++ b/sdk/js/src/openai/responsesClient.ts
@@ -10,6 +10,7 @@ import {
     InputItemsListResponse,
     DeleteResponseResult,
     ListResponsesResult,
+    ListResponsesOptions,
     ResponseInputItem,
     MessageItem,
     ContentPart,
@@ -283,11 +284,20 @@ export class ResponsesClient {
     }
 
     /**
-     * Lists all stored responses.
+     * Lists stored responses.
+     * @param options - Optional pagination parameters. The Foundry Local server supports
+     *   `limit`, `order`, and `after`; it does not currently support `before`.
      * @returns The list of Response objects.
      */
-    public async list(): Promise<ListResponsesResult> {
-        return this.fetchJson<ListResponsesResult>('/v1/responses', { method: 'GET' });
+    public async list(options?: ListResponsesOptions): Promise<ListResponsesResult> {
+        const query = new URLSearchParams();
+        if (options?.limit !== undefined) query.set('limit', String(options.limit));
+        if (options?.order !== undefined) query.set('order', options.order);
+        if (options?.after !== undefined) query.set('after', options.after);
+
+        const queryString = query.toString();
+        const path = queryString ? `/v1/responses?${queryString}` : '/v1/responses';
+        return this.fetchJson<ListResponsesResult>(path, { method: 'GET' });
     }
 
     // ========================================================================
diff --git a/sdk/js/src/openai/vision.ts b/sdk/js/src/openai/vision.ts
index 2b3c7abdd..e2bd66302 100644
--- a/sdk/js/src/openai/vision.ts
+++ b/sdk/js/src/openai/vision.ts
@@ -35,6 +35,7 @@ export interface ImageContentOptions {
 /**
  * Creates an `InputImageContent` part by reading an image file from disk.
  * The file is base64-encoded and embedded directly in the content part.
+ * Supported file extensions: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`.
  *
  * The second argument accepts either an `ImageContentOptions` object or a shorthand
  * detail string (`'low' | 'high' | 'auto'`) for convenience.
diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts
index dcb0e2287..0d5a54568 100644
--- a/sdk/js/src/types.ts
+++ b/sdk/js/src/types.ts
@@ -369,6 +369,7 @@ export interface OutputItemDoneEvent {
 export interface ContentPartAddedEvent {
     type: 'response.content_part.added';
     item_id: string;
+    output_index: number;
     content_index: number;
     part: ContentPart;
     sequence_number: number;
@@ -377,6 +378,7 @@ export interface ContentPartAddedEvent {
 export interface ContentPartDoneEvent {
     type: 'response.content_part.done';
     item_id: string;
+    output_index: number;
     content_index: number;
     part: ContentPart;
     sequence_number: number;
@@ -388,6 +390,8 @@ export interface OutputTextDeltaEvent {
     output_index: number;
     content_index: number;
     delta: string;
+    logprobs?: LogProb[];
+    obfuscation?: string | null;
     sequence_number: number;
 }
 
@@ -397,12 +401,14 @@ export interface OutputTextDoneEvent {
     output_index: number;
     content_index: number;
     text: string;
+    logprobs?: LogProb[];
     sequence_number: number;
 }
 
 export interface RefusalDeltaEvent {
     type: 'response.refusal.delta';
     item_id: string;
+    output_index: number;
     content_index: number;
     delta: string;
     sequence_number: number;
@@ -411,6 +417,7 @@ export interface RefusalDeltaEvent {
 export interface RefusalDoneEvent {
     type: 'response.refusal.done';
     item_id: string;
+    output_index: number;
     content_index: number;
     refusal: string;
     sequence_number: number;
@@ -420,6 +427,7 @@ export interface FunctionCallArgsDeltaEvent {
     type: 'response.function_call_arguments.delta';
     item_id: string;
     output_index: number;
+    call_id: string;
     delta: string;
     sequence_number: number;
 }
@@ -428,14 +436,17 @@ export interface FunctionCallArgsDoneEvent {
     type: 'response.function_call_arguments.done';
     item_id: string;
     output_index: number;
+    call_id: string;
     arguments: string;
-    name: string;
+    name?: string;
     sequence_number: number;
 }
 
 export interface ReasoningSummaryPartAddedEvent {
     type: 'response.reasoning_summary_part.added';
     item_id: string;
+    output_index: number;
+    summary_index: number;
     part: ContentPart;
     sequence_number: number;
 }
@@ -443,6 +454,8 @@ export interface ReasoningSummaryPartAddedEvent {
 export interface ReasoningSummaryPartDoneEvent {
     type: 'response.reasoning_summary_part.done';
     item_id: string;
+    output_index: number;
+    summary_index: number;
     part: ContentPart;
     sequence_number: number;
 }
@@ -450,13 +463,18 @@ export interface ReasoningSummaryPartDoneEvent {
 export interface ReasoningDeltaEvent {
     type: 'response.reasoning.delta';
     item_id: string;
+    output_index: number;
+    content_index: number;
     delta: string;
+    obfuscation?: string | null;
     sequence_number: number;
 }
 
 export interface ReasoningDoneEvent {
     type: 'response.reasoning.done';
     item_id: string;
+    output_index: number;
+    content_index: number;
     text: string;
     sequence_number: number;
 }
@@ -464,13 +482,18 @@ export interface ReasoningDoneEvent {
 export interface ReasoningSummaryTextDeltaEvent {
     type: 'response.reasoning_summary_text.delta';
     item_id: string;
+    output_index: number;
+    summary_index: number;
     delta: string;
+    obfuscation?: string | null;
     sequence_number: number;
 }
 
 export interface ReasoningSummaryTextDoneEvent {
     type: 'response.reasoning_summary_text.done';
     item_id: string;
+    output_index: number;
+    summary_index: number;
     text: string;
     sequence_number: number;
 }
@@ -478,7 +501,10 @@ export interface ReasoningSummaryTextDoneEvent {
 export interface OutputTextAnnotationAddedEvent {
     type: 'response.output_text.annotation.added';
     item_id: string;
-    annotation: Annotation;
+    output_index: number;
+    content_index: number;
+    annotation_index: number;
+    annotation?: Annotation | null;
     sequence_number: number;
 }
 
@@ -516,4 +542,16 @@ export type StreamingEvent =
 export interface ListResponsesResult {
     object: 'list';
     data: ResponseObject[];
+    first_id?: string | null;
+    last_id?: string | null;
+    has_more?: boolean;
+}
+
+export interface ListResponsesOptions {
+    /** Maximum number of responses to return. Server defaults to 20 and caps at 100. */
+    limit?: number;
+    /** Sort order for returned responses. Server defaults to descending. */
+    order?: 'asc' | 'desc';
+    /** Return responses after this response ID. */
+    after?: string;
 }
diff --git a/sdk/js/test/openai/responsesClient.test.ts b/sdk/js/test/openai/responsesClient.test.ts
index a95e1f35c..25bb35f67 100644
--- a/sdk/js/test/openai/responsesClient.test.ts
+++ b/sdk/js/test/openai/responsesClient.test.ts
@@ -3,7 +3,7 @@ import { expect } from 'chai';
 import * as fs from 'fs';
 import * as os from 'os';
 import * as path from 'path';
-import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI, IS_NATIVE_ADDON_AVAILABLE } from '../testUtils.js';
+import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI } from '../testUtils.js';
 import { ResponsesClient, ResponsesClientSettings, getOutputText } from '../../src/openai/responsesClient.js';
 import { createImageContentFromFile, createImageContentFromUrl } from '../../src/openai/vision.js';
 import type {
@@ -462,9 +462,11 @@ describe('ResponsesClient Tests', () => {
 
     describe('list()', () => {
         it('should call GET /v1/responses and return parsed JSON', async () => {
-            const mockResult = { object: 'list', data: [] };
+            const mockResult = { object: 'list', data: [], first_id: null, last_id: null, has_more: false };
+            let capturedUrl: string | URL | Request | undefined;
             const originalFetch = globalThis.fetch;
-            globalThis.fetch = async (_url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
+            globalThis.fetch = async (url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
+                capturedUrl = url;
                 return new Response(JSON.stringify(mockResult), {
                     status: 200,
                     headers: { 'Content-Type': 'application/json' },
@@ -475,6 +477,40 @@ describe('ResponsesClient Tests', () => {
                 const result = await client.list();
                 expect(result.object).to.equal('list');
                 expect(result.data).to.deep.equal([]);
+                expect(result.has_more).to.equal(false);
+                expect(String(capturedUrl)).to.equal('http://test-host/v1/responses');
+            } finally {
+                globalThis.fetch = originalFetch;
+            }
+        });
+
+        it('should send pagination options as query parameters', async () => {
+            const originalFetch = globalThis.fetch;
+            let capturedUrl: string | URL | Request | undefined;
+            globalThis.fetch = async (url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
+                capturedUrl = url;
+                return new Response(JSON.stringify({
+                    object: 'list',
+                    data: [],
+                    first_id: 'resp_first',
+                    last_id: 'resp_last',
+                    has_more: true,
+                }), {
+                    status: 200,
+                    headers: { 'Content-Type': 'application/json' },
+                });
+            };
+            try {
+                const client = new ResponsesClient('http://test-host', 'test-model');
+                const result = await client.list({ limit: 10, order: 'asc', after: 'resp_123' });
+                const url = new URL(String(capturedUrl));
+                expect(url.pathname).to.equal('/v1/responses');
+                expect(url.searchParams.get('limit')).to.equal('10');
+                expect(url.searchParams.get('order')).to.equal('asc');
+                expect(url.searchParams.get('after')).to.equal('resp_123');
+                expect(result.first_id).to.equal('resp_first');
+                expect(result.last_id).to.equal('resp_last');
+                expect(result.has_more).to.equal(true);
             } finally {
                 globalThis.fetch = originalFetch;
             }
@@ -490,6 +526,8 @@ describe('ResponsesClient Tests', () => {
             const event: ReasoningDeltaEvent = {
                 type: 'response.reasoning.delta',
                 item_id: 'item_1',
+                output_index: 0,
+                content_index: 0,
                 delta: 'thinking...',
                 sequence_number: 1,
             };
@@ -501,6 +539,8 @@ describe('ResponsesClient Tests', () => {
             const event: ReasoningDoneEvent = {
                 type: 'response.reasoning.done',
                 item_id: 'item_1',
+                output_index: 0,
+                content_index: 0,
                 text: 'final reasoning text',
                 sequence_number: 2,
             };
@@ -512,6 +552,8 @@ describe('ResponsesClient Tests', () => {
             const event: ReasoningSummaryTextDeltaEvent = {
                 type: 'response.reasoning_summary_text.delta',
                 item_id: 'item_2',
+                output_index: 0,
+                summary_index: 0,
                 delta: 'summary delta',
                 sequence_number: 3,
             };
@@ -522,6 +564,8 @@ describe('ResponsesClient Tests', () => {
             const event: ReasoningSummaryTextDoneEvent = {
                 type: 'response.reasoning_summary_text.done',
                 item_id: 'item_2',
+                output_index: 0,
+                summary_index: 0,
                 text: 'full summary',
                 sequence_number: 4,
             };
@@ -532,6 +576,8 @@ describe('ResponsesClient Tests', () => {
             const event: ReasoningSummaryPartAddedEvent = {
                 type: 'response.reasoning_summary_part.added',
                 item_id: 'item_3',
+                output_index: 0,
+                summary_index: 0,
                 part: { type: 'output_text', text: 'summary part' },
                 sequence_number: 5,
             };
@@ -542,6 +588,8 @@ describe('ResponsesClient Tests', () => {
             const event: ReasoningSummaryPartDoneEvent = {
                 type: 'response.reasoning_summary_part.done',
                 item_id: 'item_3',
+                output_index: 0,
+                summary_index: 0,
                 part: { type: 'output_text', text: 'done summary part' },
                 sequence_number: 6,
             };
@@ -552,6 +600,9 @@ describe('ResponsesClient Tests', () => {
             const event: OutputTextAnnotationAddedEvent = {
                 type: 'response.output_text.annotation.added',
                 item_id: 'item_4',
+                output_index: 0,
+                content_index: 0,
+                annotation_index: 0,
                 annotation: { type: 'url_citation', start_index: 0, end_index: 5 },
                 sequence_number: 7,
             };
@@ -560,10 +611,10 @@ describe('ResponsesClient Tests', () => {
 
         it('should accept reasoning events in StreamingEvent union', () => {
             const events: StreamingEvent[] = [
-                { type: 'response.reasoning.delta', item_id: 'x', delta: 'd', sequence_number: 1 },
-                { type: 'response.reasoning.done', item_id: 'x', text: 't', sequence_number: 2 },
-                { type: 'response.reasoning_summary_text.delta', item_id: 'x', delta: 'd', sequence_number: 3 },
-                { type: 'response.reasoning_summary_text.done', item_id: 'x', text: 't', sequence_number: 4 },
+                { type: 'response.reasoning.delta', item_id: 'x', output_index: 0, content_index: 0, delta: 'd', sequence_number: 1 },
+                { type: 'response.reasoning.done', item_id: 'x', output_index: 0, content_index: 0, text: 't', sequence_number: 2 },
+                { type: 'response.reasoning_summary_text.delta', item_id: 'x', output_index: 0, summary_index: 0, delta: 'd', sequence_number: 3 },
+                { type: 'response.reasoning_summary_text.done', item_id: 'x', output_index: 0, summary_index: 0, text: 't', sequence_number: 4 },
             ];
             expect(events.length).to.equal(4);
         });
@@ -581,7 +632,7 @@ describe('ResponsesClient Tests', () => {
 
         before(async function() {
             this.timeout(30000);
-            if (IS_RUNNING_IN_CI || !IS_NATIVE_ADDON_AVAILABLE) {
+            if (IS_RUNNING_IN_CI) {
                 skipped = true;
                 this.skip();
                 return;
@@ -809,4 +860,4 @@ describe('ResponsesClient Tests', () => {
             }
         });
     });
-});
\ No newline at end of file
+});
diff --git a/sdk/js/test/testUtils.ts b/sdk/js/test/testUtils.ts
index 5bacda918..7cac6b293 100644
--- a/sdk/js/test/testUtils.ts
+++ b/sdk/js/test/testUtils.ts
@@ -46,25 +46,6 @@ export const TEST_CONFIG: FoundryLocalConfig = {
 export const TEST_MODEL_ALIAS = 'qwen2.5-0.5b';
 export const EMBEDDING_MODEL_ALIAS = 'qwen3-0.6b-embedding-generic-cpu';
 
-// Detect whether the native addon is available by checking for the file on disk.
-// Match CoreInterop.loadAddon() by resolving from the SDK root.
-// Also check dist/ to support runs against built output.
-function checkNativeAddonAvailable(): boolean {
-    const platform = process.platform;
-    const arch = process.arch;
-    const platformKey = `${platform}-${arch}`;
-    const sdkRoot = path.resolve(getGitRepoRoot(), 'sdk', 'js');
-    const candidatePaths = [
-        path.join(sdkRoot, 'prebuilds', platformKey, 'foundry_local_napi.node'),
-        path.join(sdkRoot, 'native', 'build', 'Release', 'foundry_local_napi.node'),
-        path.join(sdkRoot, 'dist', 'prebuilds', platformKey, 'foundry_local_napi.node'),
-        path.join(sdkRoot, 'dist', 'native', 'build', 'Release', 'foundry_local_napi.node'),
-    ];
-    return candidatePaths.some(p => fs.existsSync(p));
-}
-
-export const IS_NATIVE_ADDON_AVAILABLE = checkNativeAddonAvailable();
-
 export function getTestManager() {
     return FoundryLocalManager.create(TEST_CONFIG);
 }

From a707ee49ba0920ccf95d51c83924f2513bb14e35 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Tue, 28 Apr 2026 14:49:49 -0400
Subject: [PATCH 05/10] feat(sdk/js): add responses FFI fallback

Use the existing chat_completions native command for Responses create and streaming calls when a CoreInterop transport is available, with HTTP fallback for server-backed operations. Keep FFI-created stored responses available through the same client instance and cover the behavior with unit tests.\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/js/README.md                           |   5 +-
 sdk/js/src/detail/model.ts                 |   7 +-
 sdk/js/src/detail/modelVariant.ts          |   7 +-
 sdk/js/src/foundryLocalManager.ts          |  11 +-
 sdk/js/src/imodel.ts                       |   7 +-
 sdk/js/src/openai/responsesClient.ts       | 730 ++++++++++++++++++++-
 sdk/js/test/openai/responsesClient.test.ts | 147 +++++
 7 files changed, 885 insertions(+), 29 deletions(-)

diff --git a/sdk/js/README.md b/sdk/js/README.md
index 791f93b72..9b5a62164 100644
--- a/sdk/js/README.md
+++ b/sdk/js/README.md
@@ -252,11 +252,12 @@ for await (const chunk of audioClient.transcribeStreaming('/path/to/audio.wav'))
 
 ### Responses API
 
-Use the Responses API client for OpenAI-compatible text, tool, streaming, stored-response, and vision workflows over the embedded web service:
+Use the Responses API client for OpenAI-compatible text, tool, streaming, stored-response, and vision workflows. Clients created from `FoundryLocalManager` or a model use native FFI for `create()` and `createStreaming()` when possible, with HTTP fallback when a web service URL is available.
 
 ```typescript
 import { createImageContentFromFile, getOutputText } from 'foundry-local-sdk';
 
+// Optional: start the web service for HTTP fallback and server-backed list/get/delete/cancel operations.
 manager.startWebService();
 
 const client = manager.createResponsesClient(model.id);
@@ -285,7 +286,7 @@ const visionResponse = await client.create([
 console.log(getOutputText(visionResponse));
 ```
 
-Vision helpers support `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, and `.bmp` files. `createImageContentFromFile()` sends Foundry Local's server contract (`image_data` plus `media_type`); `createImageContentFromUrl()` sends `image_url` and lets the server infer the media type.
+Vision helpers support `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, and `.bmp` files. `createImageContentFromFile()` sends Foundry Local's `image_data` plus `media_type` contract; the FFI path converts that to the chat-completions data URL shape internally. `createImageContentFromUrl()` sends `image_url` and lets the runtime infer the media type.
 
 ### Embedded Web Service
 
diff --git a/sdk/js/src/detail/model.ts b/sdk/js/src/detail/model.ts
index c1ee0d5fa..08627a7a7 100644
--- a/sdk/js/src/detail/model.ts
+++ b/sdk/js/src/detail/model.ts
@@ -196,10 +196,11 @@ export class Model implements IModel {
 
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
-     * @param baseUrl - The base URL of the Foundry Local web service.
+     * Uses native FFI for create/createStreaming and falls back to HTTP when baseUrl is provided.
+     * @param baseUrl - Optional base URL of the Foundry Local web service for HTTP fallback and server-backed operations.
      * @returns A ResponsesClient instance.
      */
-    public createResponsesClient(baseUrl: string): ResponsesClient {
+    public createResponsesClient(baseUrl?: string): ResponsesClient {
         return this.selectedVariant.createResponsesClient(baseUrl);
     }
-}
\ No newline at end of file
+}
diff --git a/sdk/js/src/detail/modelVariant.ts b/sdk/js/src/detail/modelVariant.ts
index 43484bac0..019782091 100644
--- a/sdk/js/src/detail/modelVariant.ts
+++ b/sdk/js/src/detail/modelVariant.ts
@@ -189,10 +189,11 @@ export class ModelVariant implements IModel {
 
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
-     * @param baseUrl - The base URL of the Foundry Local web service.
+     * Uses native FFI for create/createStreaming and falls back to HTTP when baseUrl is provided.
+     * @param baseUrl - Optional base URL of the Foundry Local web service for HTTP fallback and server-backed operations.
      * @returns A ResponsesClient instance.
      */
-    public createResponsesClient(baseUrl: string): ResponsesClient {
-        return new ResponsesClient(baseUrl, this._modelInfo.id);
+    public createResponsesClient(baseUrl?: string): ResponsesClient {
+        return ResponsesClient.createWithCoreInterop(baseUrl, this._modelInfo.id, this.coreInterop);
     }
 }
diff --git a/sdk/js/src/foundryLocalManager.ts b/sdk/js/src/foundryLocalManager.ts
index f22acdc0d..957b1a5c5 100644
--- a/sdk/js/src/foundryLocalManager.ts
+++ b/sdk/js/src/foundryLocalManager.ts
@@ -214,17 +214,12 @@ export class FoundryLocalManager {
 
     /**
      * Creates a ResponsesClient for interacting with the Responses API.
-     * The web service must be started first via `startWebService()`.
+     * Uses native FFI for create/createStreaming and falls back to HTTP if the web
+     * service has been started via `startWebService()`.
      * @param modelId - Optional default model ID for requests.
      * @returns A ResponsesClient instance.
-     * @throws Error - If the web service is not running.
      */
     public createResponsesClient(modelId?: string): ResponsesClient {
-        if (this._urls.length === 0) {
-            throw new Error(
-                'Web service is not running. Call startWebService() before creating a ResponsesClient.'
-            );
-        }
-        return new ResponsesClient(this._urls[0], modelId);
+        return ResponsesClient.createWithCoreInterop(this._urls[0], modelId, this.coreInterop);
     }
 }
diff --git a/sdk/js/src/imodel.ts b/sdk/js/src/imodel.ts
index 8f9bd0c14..259ed3052 100644
--- a/sdk/js/src/imodel.ts
+++ b/sdk/js/src/imodel.ts
@@ -36,11 +36,10 @@ export interface IModel {
     createLiveTranscriptionSession(): LiveAudioTranscriptionSession;
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
-     * Unlike createChatClient/createAudioClient (which use FFI), the Responses API
-     * is HTTP-based, so the web service base URL must be provided.
-     * @param baseUrl - The base URL of the Foundry Local web service.
+     * Uses native FFI for create/createStreaming and falls back to HTTP when baseUrl is provided.
+     * @param baseUrl - Optional base URL of the Foundry Local web service for HTTP fallback and server-backed operations.
      */
-    createResponsesClient(baseUrl: string): ResponsesClient;
+    createResponsesClient(baseUrl?: string): ResponsesClient;
 
     /**
      * Variants of the model that are available. Variants of the model are optimized for different devices.
diff --git a/sdk/js/src/openai/responsesClient.ts b/sdk/js/src/openai/responsesClient.ts
index f7e033b08..efdb2fb77 100644
--- a/sdk/js/src/openai/responsesClient.ts
+++ b/sdk/js/src/openai/responsesClient.ts
@@ -12,9 +12,27 @@ import {
     ListResponsesResult,
     ListResponsesOptions,
     ResponseInputItem,
+    ResponseOutputItem,
     MessageItem,
     ContentPart,
+    FunctionCallItem,
+    FunctionCallOutputItem,
+    InputImageContent,
+    OutputTextContent,
 } from '../types.js';
+import { randomUUID } from 'crypto';
+
+interface ResponsesCoreInterop {
+    executeCommand(command: string, params?: any): string;
+    executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise<string>;
+}
+
+interface StoredFfiResponse {
+    response: ResponseObject;
+    input: ResponseInputItem[];
+}
+
+class ResponsesCallbackError extends Error { }
 
 /**
  * Extracts the text content from an assistant message in a Response.
@@ -97,11 +115,12 @@ export class ResponsesClientSettings {
 }
 
 /**
- * Client for the OpenAI Responses API served by Foundry Local's embedded web service.
+ * Client for the OpenAI Responses API.
  *
- * Unlike ChatClient/AudioClient (which use FFI via CoreInterop), the Responses API
- * is HTTP-only. This client uses fetch() for all operations and parses Server-Sent Events
- * for streaming.
+ * When created by FoundryLocalManager or IModel factories, create/createStreaming use
+ * the native FFI path where possible and fall back to HTTP when the web service is
+ * available. Stored-response operations (get/delete/cancel/input_items/list) use the
+ * in-memory FFI store for FFI-created responses and HTTP for server-backed responses.
  *
  * Create via `FoundryLocalManager.createResponsesClient()` or
  * `model.createResponsesClient(baseUrl)`.
@@ -125,8 +144,10 @@ export class ResponsesClientSettings {
  * ```
  */
 export class ResponsesClient {
-    private baseUrl: string;
+    private baseUrl?: string;
     private modelId?: string;
+    private coreInterop?: ResponsesCoreInterop;
+    private readonly ffiStore = new Map<string, StoredFfiResponse>();
 
     /**
      * Configuration settings for responses.
@@ -138,7 +159,31 @@ export class ResponsesClient {
      * @param modelId - Optional default model ID. Can be overridden per-request via options.
      */
     constructor(baseUrl: string, modelId?: string) {
-        if (!baseUrl || typeof baseUrl !== 'string' || baseUrl.trim() === '') {
+        this.initialize(baseUrl, modelId);
+    }
+
+    /**
+     * @internal
+     * Creates a hybrid client that can use FFI first and HTTP as a fallback.
+     */
+    public static createWithCoreInterop(
+        baseUrl: string | undefined,
+        modelId: string | undefined,
+        coreInterop: ResponsesCoreInterop
+    ): ResponsesClient {
+        const client = new ResponsesClient(baseUrl?.trim() ? baseUrl : 'http://127.0.0.1', modelId);
+        client.coreInterop = coreInterop;
+        if (baseUrl === undefined || baseUrl.trim() === '') {
+            client.baseUrl = undefined;
+        }
+        return client;
+    }
+
+    private initialize(baseUrl: string, modelId?: string): void {
+        if (baseUrl === null || baseUrl === undefined || typeof baseUrl !== 'string') {
+            throw new Error('baseUrl must be a non-empty string.');
+        }
+        if (baseUrl.trim() === '') {
             throw new Error('baseUrl must be a non-empty string.');
         }
         // Strip trailing slashes for consistent URL construction
@@ -173,11 +218,25 @@ export class ResponsesClient {
 
         const body = this.buildRequest(input, { ...options, stream: false });
 
-        const response = await this.fetchJson<ResponseObject>(
+        if (this.coreInterop) {
+            try {
+                const response = await this.createViaFfi(body);
+                this.storeFfiResponseIfNeeded(response, body);
+                return response;
+            } catch (e) {
+                if (!this.baseUrl) {
+                    throw new Error(
+                        `Responses FFI create failed and no HTTP fallback is available: ${e instanceof Error ? e.message : String(e)}`,
+                        { cause: e }
+                    );
+                }
+            }
+        }
+
+        return this.fetchJson<ResponseObject>(
             '/v1/responses',
             { method: 'POST', body: JSON.stringify(body) }
         );
-        return response;
     }
 
     /**
@@ -201,6 +260,24 @@ export class ResponsesClient {
 
         const body = this.buildRequest(input, { ...options, stream: true });
 
+        if (this.coreInterop) {
+            try {
+                const response = await this.createStreamingViaFfi(body, callback);
+                this.storeFfiResponseIfNeeded(response, body);
+                return;
+            } catch (e) {
+                if (e instanceof ResponsesCallbackError) {
+                    throw e;
+                }
+                if (!this.baseUrl) {
+                    throw new Error(
+                        `Responses FFI streaming create failed and no HTTP fallback is available: ${e instanceof Error ? e.message : String(e)}`,
+                        { cause: e }
+                    );
+                }
+            }
+        }
+
         const res = await this.doFetch('/v1/responses', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream' },
@@ -238,6 +315,10 @@ export class ResponsesClient {
      */
     public async get(responseId: string): Promise<ResponseObject> {
         this.validateId(responseId, 'responseId');
+        const stored = this.ffiStore.get(responseId);
+        if (stored) {
+            return stored.response;
+        }
         return this.fetchJson<ResponseObject>(
             `/v1/responses/${encodeURIComponent(responseId)}`,
             { method: 'GET' }
@@ -251,6 +332,9 @@ export class ResponsesClient {
      */
     public async delete(responseId: string): Promise<DeleteResponseResult> {
         this.validateId(responseId, 'responseId');
+        if (this.ffiStore.delete(responseId)) {
+            return { id: responseId, object: 'response.deleted', deleted: true };
+        }
         return this.fetchJson<DeleteResponseResult>(
             `/v1/responses/${encodeURIComponent(responseId)}`,
             { method: 'DELETE' }
@@ -264,6 +348,10 @@ export class ResponsesClient {
      */
     public async cancel(responseId: string): Promise<ResponseObject> {
         this.validateId(responseId, 'responseId');
+        const stored = this.ffiStore.get(responseId);
+        if (stored) {
+            throw new Error('Cancellation is only supported for HTTP-backed Responses API operations.');
+        }
         return this.fetchJson<ResponseObject>(
             `/v1/responses/${encodeURIComponent(responseId)}/cancel`,
             { method: 'POST' }
@@ -277,6 +365,10 @@ export class ResponsesClient {
      */
     public async getInputItems(responseId: string): Promise<InputItemsListResponse> {
         this.validateId(responseId, 'responseId');
+        const stored = this.ffiStore.get(responseId);
+        if (stored) {
+            return { object: 'list', data: stored.input };
+        }
         return this.fetchJson<InputItemsListResponse>(
             `/v1/responses/${encodeURIComponent(responseId)}/input_items`,
             { method: 'GET' }
@@ -295,9 +387,34 @@ export class ResponsesClient {
         if (options?.order !== undefined) query.set('order', options.order);
         if (options?.after !== undefined) query.set('after', options.after);
 
+        if (!this.baseUrl) {
+            return this.listFfiResponses(options);
+        }
+
         const queryString = query.toString();
         const path = queryString ? `/v1/responses?${queryString}` : '/v1/responses';
-        return this.fetchJson<ListResponsesResult>(path, { method: 'GET' });
+        const serverResult = await this.fetchJson<ListResponsesResult>(path, { method: 'GET' });
+        const localResult = this.listFfiResponses(options);
+        if (localResult.data.length === 0) {
+            return serverResult;
+        }
+
+        const localIds = new Set(localResult.data.map((response) => response.id));
+        const mergedData = [...localResult.data, ...serverResult.data.filter((response) => !localIds.has(response.id))];
+        const order = options?.order ?? 'desc';
+        mergedData.sort((a, b) =>
+            order === 'asc' ? a.created_at - b.created_at : b.created_at - a.created_at
+        );
+        const limit = options?.limit ?? 20;
+        const limitedData = mergedData.slice(0, limit);
+
+        return {
+            object: 'list',
+            data: limitedData,
+            first_id: limitedData[0]?.id ?? null,
+            last_id: limitedData[limitedData.length - 1]?.id ?? null,
+            has_more: Boolean(serverResult.has_more || localResult.has_more || mergedData.length > limitedData.length),
+        };
     }
 
     // ========================================================================
@@ -392,6 +509,598 @@ export class ResponsesClient {
         }
     }
 
+    /**
+     * Creates a response through the native chat_completions FFI command.
+     */
+    private async createViaFfi(body: ResponseCreateParams): Promise<ResponseObject> {
+        if (!this.coreInterop) {
+            throw new Error('Responses FFI transport is not available.');
+        }
+
+        const chatRequest = this.buildChatCompletionRequest(body, false);
+        const raw = this.coreInterop.executeCommand('chat_completions', {
+            Params: {
+                OpenAICreateRequest: JSON.stringify(chatRequest),
+            },
+        });
+        const chatResponse = this.parseFfiJson(raw, 'chat_completions');
+        return this.mapChatCompletionToResponse(chatResponse, body);
+    }
+
+    /**
+     * Creates a streaming response through the native chat_completions FFI command.
+     */
+    private async createStreamingViaFfi(
+        body: ResponseCreateParams,
+        callback: (event: StreamingEvent) => void
+    ): Promise<ResponseObject> {
+        if (!this.coreInterop) {
+            throw new Error('Responses FFI transport is not available.');
+        }
+        if (body.tools && body.tools.length > 0) {
+            throw new Error('Responses FFI streaming does not currently support tool calls.');
+        }
+
+        const responseId = this.createResponseId();
+        const outputItemId = this.createItemId('msg');
+        const chatRequest = this.buildChatCompletionRequest(body, true);
+        const outputText: OutputTextContent = { type: 'output_text', text: '' };
+        const messageItem: MessageItem = {
+            id: outputItemId,
+            type: 'message',
+            role: 'assistant',
+            content: [outputText],
+            status: 'in_progress',
+        };
+
+        let sequence = 0;
+        let contentPartStarted = false;
+        let callbackError: ResponsesCallbackError | null = null;
+        const response = this.createBaseResponse(body, responseId, [], 'in_progress');
+        const emit = (event: any): void => {
+            if (callbackError) return;
+            try {
+                callback({ ...event, sequence_number: sequence++ } as StreamingEvent);
+            } catch (e) {
+                callbackError = new ResponsesCallbackError(
+                    `User callback threw an error: ${e instanceof Error ? e.message : String(e)}`
+                );
+                (callbackError as Error).cause = e;
+            }
+        };
+
+        emit({ type: 'response.created', response });
+        emit({ type: 'response.in_progress', response });
+
+        const processChunk = (chunk: string): void => {
+            if (callbackError) return;
+            const parsed = this.parseStreamingFfiChunk(chunk);
+            if (!parsed) return;
+
+            const choices = Array.isArray(parsed.choices) ? parsed.choices : [];
+            for (const choice of choices) {
+                const delta = choice?.delta ?? choice?.Delta ?? {};
+                const content = this.extractDeltaContent(delta);
+                if (!content) continue;
+
+                if (!contentPartStarted) {
+                    emit({
+                        type: 'response.output_item.added',
+                        item_id: outputItemId,
+                        output_index: 0,
+                        item: messageItem,
+                    });
+                    emit({
+                        type: 'response.content_part.added',
+                        item_id: outputItemId,
+                        output_index: 0,
+                        content_index: 0,
+                        part: outputText,
+                    });
+                    contentPartStarted = true;
+                }
+
+                outputText.text += content;
+                emit({
+                    type: 'response.output_text.delta',
+                    item_id: outputItemId,
+                    output_index: 0,
+                    content_index: 0,
+                    delta: content,
+                });
+            }
+        };
+
+        await this.coreInterop.executeCommandStreaming(
+            'chat_completions',
+            {
+                Params: {
+                    OpenAICreateRequest: JSON.stringify(chatRequest),
+                },
+            },
+            processChunk
+        );
+
+        if (callbackError) {
+            throw callbackError;
+        }
+
+        const finalMessage: MessageItem = {
+            ...messageItem,
+            content: [outputText],
+            status: 'completed',
+        };
+        const completedResponse = this.createBaseResponse(
+            body,
+            responseId,
+            contentPartStarted ? [finalMessage] : [],
+            'completed'
+        );
+        completedResponse.completed_at = Math.floor(Date.now() / 1000);
+
+        if (contentPartStarted) {
+            emit({
+                type: 'response.output_text.done',
+                item_id: outputItemId,
+                output_index: 0,
+                content_index: 0,
+                text: outputText.text,
+            });
+            emit({
+                type: 'response.content_part.done',
+                item_id: outputItemId,
+                output_index: 0,
+                content_index: 0,
+                part: outputText,
+            });
+            emit({
+                type: 'response.output_item.done',
+                item_id: outputItemId,
+                output_index: 0,
+                item: finalMessage,
+            });
+        }
+        emit({ type: 'response.completed', response: completedResponse });
+
+        return completedResponse;
+    }
+
+    private buildChatCompletionRequest(body: ResponseCreateParams, stream: boolean): Record<string, unknown> {
+        const request: Record<string, unknown> = {
+            model: body.model,
+            messages: this.convertResponseInputToChatMessages(body),
+            stream,
+        };
+
+        if (body.temperature !== undefined) request.temperature = body.temperature;
+        if (body.top_p !== undefined) request.top_p = body.top_p;
+        if (body.max_output_tokens !== undefined) request.max_tokens = body.max_output_tokens;
+        if (body.frequency_penalty !== undefined) request.frequency_penalty = body.frequency_penalty;
+        if (body.presence_penalty !== undefined) request.presence_penalty = body.presence_penalty;
+        if (body.seed !== undefined) request.seed = body.seed;
+        if (body.metadata !== undefined) request.metadata = body.metadata;
+        if (body.parallel_tool_calls !== undefined) request.parallel_tool_calls = body.parallel_tool_calls;
+        if (body.tools !== undefined) request.tools = body.tools.map((tool) => this.convertResponseToolToChatTool(tool));
+        if (body.tool_choice !== undefined) request.tool_choice = this.convertResponseToolChoiceToChatToolChoice(body.tool_choice);
+        const responseFormat = this.convertTextConfigToChatResponseFormat(body.text);
+        if (responseFormat !== undefined) request.response_format = responseFormat;
+
+        return request;
+    }
+
+    private convertResponseInputToChatMessages(body: ResponseCreateParams): any[] {
+        const messages: any[] = [];
+        if (body.instructions) {
+            messages.push({ role: 'system', content: body.instructions });
+        }
+        if (body.previous_response_id) {
+            const previousResponse = this.ffiStore.get(body.previous_response_id);
+            if (!previousResponse) {
+                throw new Error(
+                    `Responses FFI store does not contain previous_response_id: ${body.previous_response_id}`
+                );
+            }
+            messages.push(...this.convertStoredFfiResponseToChatMessages(previousResponse));
+        }
+
+        const input = body.input;
+        if (typeof input === 'string') {
+            messages.push({ role: 'user', content: input });
+            return messages;
+        }
+
+        if (!Array.isArray(input)) {
+            throw new Error('Responses FFI create requires string input or an array of input items.');
+        }
+
+        for (const item of input) {
+            if (item.type === 'message') {
+                messages.push({
+                    role: this.convertResponseRoleToChatRole(item.role),
+                    content: this.convertContentToChatContent(item.content),
+                });
+            } else if (item.type === 'function_call') {
+                messages.push(this.convertFunctionCallItemToChatMessage(item));
+            } else if (item.type === 'function_call_output') {
+                messages.push(this.convertFunctionCallOutputItemToChatMessage(item));
+            } else if (item.type === 'item_reference') {
+                const stored = this.ffiStore.get(item.id);
+                if (!stored) {
+                    throw new Error(`Responses FFI store does not contain referenced item: ${item.id}`);
+                }
+                messages.push(...this.convertResponseInputToChatMessages({ ...body, input: stored.input }));
+            }
+        }
+
+        if (messages.length === 0) {
+            throw new Error('Responses FFI create requires at least one message input item.');
+        }
+        return messages;
+    }
+
+    private convertStoredFfiResponseToChatMessages(stored: StoredFfiResponse): any[] {
+        const messages = this.convertResponseInputToChatMessages({
+            input: stored.input,
+            model: stored.response.model,
+        });
+        for (const item of stored.response.output) {
+            if (item.type === 'message') {
+                messages.push({
+                    role: this.convertResponseRoleToChatRole(item.role),
+                    content: this.convertContentToChatContent(item.content),
+                });
+            } else if (item.type === 'function_call') {
+                messages.push(this.convertFunctionCallItemToChatMessage(item));
+            }
+        }
+        return messages;
+    }
+
+    private convertResponseRoleToChatRole(role: MessageItem['role']): string {
+        if (role === 'developer') return 'system';
+        return role;
+    }
+
+    private convertContentToChatContent(content: string | ContentPart[]): any {
+        if (typeof content === 'string') {
+            return content;
+        }
+
+        const parts: any[] = [];
+        for (const part of content) {
+            if (part.type === 'input_text') {
+                parts.push({ type: 'text', text: part.text });
+            } else if (part.type === 'output_text') {
+                parts.push({ type: 'text', text: part.text });
+            } else if (part.type === 'input_image') {
+                parts.push({ type: 'image_url', image_url: this.convertInputImageToChatImageUrl(part) });
+            } else if (part.type === 'refusal') {
+                parts.push({ type: 'text', text: part.refusal });
+            } else {
+                throw new Error(`Responses FFI create does not support content part type: ${part.type}`);
+            }
+        }
+
+        if (parts.length === 0) {
+            return '';
+        }
+        if (parts.every((part) => part.type === 'text')) {
+            return parts.map((part) => part.text).join('');
+        }
+        return parts;
+    }
+
+    private convertInputImageToChatImageUrl(part: InputImageContent): any {
+        if (part.image_url) {
+            return part.detail ? { url: part.image_url, detail: part.detail } : { url: part.image_url };
+        }
+        if (part.image_data) {
+            if (!part.media_type) {
+                throw new Error('Responses FFI create requires media_type when image_data is provided.');
+            }
+            const url = `data:${part.media_type};base64,${part.image_data}`;
+            return part.detail ? { url, detail: part.detail } : { url };
+        }
+        throw new Error('Responses FFI create requires input_image to include image_url or image_data.');
+    }
+
+    private convertFunctionCallItemToChatMessage(item: FunctionCallItem): any {
+        return {
+            role: 'assistant',
+            content: null,
+            tool_calls: [{
+                id: item.call_id,
+                type: 'function',
+                function: {
+                    name: item.name,
+                    arguments: item.arguments,
+                },
+            }],
+        };
+    }
+
+    private convertFunctionCallOutputItemToChatMessage(item: FunctionCallOutputItem): any {
+        return {
+            role: 'tool',
+            tool_call_id: item.call_id,
+            content: this.convertContentPartOutputToText(item.output),
+        };
+    }
+
+    private convertContentPartOutputToText(output: string | ContentPart[]): string {
+        if (typeof output === 'string') {
+            return output;
+        }
+        return output.map((part) => {
+            if (part.type === 'input_text' || part.type === 'output_text') return part.text;
+            if (part.type === 'refusal') return part.refusal;
+            if (part.type === 'input_image') return part.image_url ?? '[image]';
+            if (part.type === 'input_file') return part.file_url;
+            return '';
+        }).join('');
+    }
+
+    private convertResponseToolToChatTool(tool: FunctionToolDefinition): Record<string, unknown> {
+        return {
+            type: 'function',
+            function: {
+                name: tool.name,
+                description: tool.description,
+                parameters: tool.parameters,
+                strict: tool.strict,
+            },
+        };
+    }
+
+    private convertResponseToolChoiceToChatToolChoice(toolChoice: ResponseToolChoice): unknown {
+        if (typeof toolChoice === 'string') {
+            return toolChoice;
+        }
+        if (toolChoice?.type === 'function') {
+            return {
+                type: 'function',
+                function: { name: toolChoice.name },
+            };
+        }
+        return toolChoice;
+    }
+
+    private convertTextConfigToChatResponseFormat(text?: TextConfig): unknown {
+        const format = text?.format;
+        if (!format) {
+            return undefined;
+        }
+        if (format.type === 'json_schema') {
+            return {
+                type: 'json_schema',
+                json_schema: {
+                    name: format.name,
+                    description: format.description,
+                    schema: format.schema,
+                    strict: format.strict,
+                },
+            };
+        }
+        if (format.type === 'json_object') {
+            return { type: 'json_object' };
+        }
+        return { type: format.type };
+    }
+
+    private parseFfiJson(raw: string, command: string): any {
+        try {
+            const parsed = JSON.parse(raw);
+            if (parsed?.Successful === false || parsed?.successful === false) {
+                const error = parsed?.ErrorMessage ?? parsed?.errorMessage ?? parsed?.Error ?? parsed?.error ?? raw;
+                throw new Error(String(error));
+            }
+            return parsed;
+        } catch (e) {
+            if (e instanceof SyntaxError) {
+                throw new Error(`Failed to parse ${command} FFI response JSON: ${raw.substring(0, 200)}`, { cause: e });
+            }
+            throw e;
+        }
+    }
+
+    private mapChatCompletionToResponse(chatResponse: any, body: ResponseCreateParams): ResponseObject {
+        const responseId = this.createResponseId(chatResponse?.id);
+        const createdAt = this.normalizeTimestamp(chatResponse?.created ?? chatResponse?.Created);
+        const choices = Array.isArray(chatResponse?.choices) ? chatResponse.choices : [];
+        const firstChoice = choices[0] ?? {};
+        const message = firstChoice.message ?? firstChoice.Message ?? {};
+        const output = this.convertChatMessageToResponseOutput(message);
+        const response = this.createBaseResponse(body, responseId, output, 'completed');
+        response.created_at = createdAt;
+        response.completed_at = Math.floor(Date.now() / 1000);
+        response.usage = this.mapUsage(chatResponse?.usage ?? chatResponse?.Usage);
+        return response;
+    }
+
+    private convertChatMessageToResponseOutput(message: any): ResponseOutputItem[] {
+        const toolCalls = message?.tool_calls ?? message?.ToolCalls;
+        if (Array.isArray(toolCalls) && toolCalls.length > 0) {
+            return toolCalls.map((toolCall: any) => ({
+                type: 'function_call',
+                id: this.createItemId('fc'),
+                call_id: toolCall.id ?? toolCall.Id ?? this.createItemId('call'),
+                name: toolCall.function?.name ?? toolCall.Function?.Name ?? '',
+                arguments: toolCall.function?.arguments ?? toolCall.Function?.Arguments ?? '',
+                status: 'completed',
+            }));
+        }
+
+        const content = this.extractMessageContent(message);
+        if (content === undefined) {
+            return [];
+        }
+
+        const item: MessageItem = {
+            type: 'message',
+            id: this.createItemId('msg'),
+            role: 'assistant',
+            content: [{ type: 'output_text', text: content }],
+            status: 'completed',
+        };
+        return [item];
+    }
+
+    private extractMessageContent(message: any): string | undefined {
+        const content = message?.content ?? message?.Content;
+        if (typeof content === 'string') {
+            return content;
+        }
+        if (Array.isArray(content)) {
+            return content.map((part) => {
+                if (typeof part === 'string') return part;
+                if (typeof part?.text === 'string') return part.text;
+                if (typeof part?.Text === 'string') return part.Text;
+                return '';
+            }).join('');
+        }
+        return undefined;
+    }
+
+    private mapUsage(usage: any): ResponseObject['usage'] {
+        if (!usage) {
+            return null;
+        }
+        const inputTokens = usage.prompt_tokens ?? usage.PromptTokens ?? usage.input_tokens ?? 0;
+        const outputTokens = usage.completion_tokens ?? usage.CompletionTokens ?? usage.output_tokens ?? 0;
+        return {
+            input_tokens: inputTokens,
+            output_tokens: outputTokens,
+            total_tokens: usage.total_tokens ?? usage.TotalTokens ?? inputTokens + outputTokens,
+        };
+    }
+
+    private createBaseResponse(
+        body: ResponseCreateParams,
+        id: string,
+        output: ResponseOutputItem[],
+        status: ResponseObject['status']
+    ): ResponseObject {
+        return {
+            id,
+            object: 'response',
+            created_at: Math.floor(Date.now() / 1000),
+            completed_at: status === 'completed' ? Math.floor(Date.now() / 1000) : null,
+            failed_at: null,
+            cancelled_at: null,
+            status,
+            incomplete_details: null,
+            model: body.model ?? this.modelId ?? '',
+            previous_response_id: body.previous_response_id ?? null,
+            instructions: body.instructions ?? null,
+            output,
+            error: null,
+            tools: body.tools ?? [],
+            tool_choice: body.tool_choice ?? 'auto',
+            truncation: body.truncation ?? 'disabled',
+            parallel_tool_calls: body.parallel_tool_calls ?? false,
+            text: body.text ?? {},
+            top_p: body.top_p ?? 1,
+            temperature: body.temperature ?? 1,
+            presence_penalty: body.presence_penalty ?? 0,
+            frequency_penalty: body.frequency_penalty ?? 0,
+            max_output_tokens: body.max_output_tokens ?? null,
+            reasoning: body.reasoning ?? null,
+            store: body.store ?? true,
+            metadata: body.metadata ?? null,
+            usage: null,
+            user: body.user ?? null,
+        };
+    }
+
+    private storeFfiResponseIfNeeded(response: ResponseObject, body: ResponseCreateParams): void {
+        if (body.store === false) {
+            return;
+        }
+        this.ffiStore.set(response.id, {
+            response,
+            input: this.normalizeResponseInputItems(body.input),
+        });
+    }
+
+    private normalizeResponseInputItems(input: ResponseCreateParams['input']): ResponseInputItem[] {
+        if (typeof input === 'string') {
+            return [{
+                type: 'message',
+                role: 'user',
+                content: input,
+                status: 'completed',
+            }];
+        }
+        return Array.isArray(input) ? input : [];
+    }
+
+    private listFfiResponses(options?: ListResponsesOptions): ListResponsesResult {
+        const order = options?.order ?? 'desc';
+        const limit = options?.limit ?? 20;
+        let data = Array.from(this.ffiStore.values()).map((entry) => entry.response);
+        data.sort((a, b) => order === 'asc' ? a.created_at - b.created_at : b.created_at - a.created_at);
+
+        if (options?.after) {
+            const afterIndex = data.findIndex((response) => response.id === options.after);
+            data = afterIndex >= 0 ? data.slice(afterIndex + 1) : [];
+        }
+
+        const limitedData = data.slice(0, limit);
+        return {
+            object: 'list',
+            data: limitedData,
+            first_id: limitedData[0]?.id ?? null,
+            last_id: limitedData[limitedData.length - 1]?.id ?? null,
+            has_more: data.length > limitedData.length,
+        };
+    }
+
+    private parseStreamingFfiChunk(chunk: string): any | undefined {
+        const trimmed = chunk.trim();
+        if (!trimmed || trimmed === '[DONE]') {
+            return undefined;
+        }
+        const json = trimmed.startsWith('data: ') ? trimmed.slice(6).trim() : trimmed;
+        if (!json || json === '[DONE]') {
+            return undefined;
+        }
+        return JSON.parse(json);
+    }
+
+    private extractDeltaContent(delta: any): string {
+        const content = delta?.content ?? delta?.Content;
+        if (typeof content === 'string') {
+            return content;
+        }
+        if (Array.isArray(content)) {
+            return content.map((part) => {
+                if (typeof part === 'string') return part;
+                if (typeof part?.text === 'string') return part.text;
+                if (typeof part?.Text === 'string') return part.Text;
+                return '';
+            }).join('');
+        }
+        return '';
+    }
+
+    private createResponseId(sourceId?: string): string {
+        if (sourceId?.startsWith('resp_')) {
+            return sourceId;
+        }
+        return `resp_${sourceId ?? randomUUID()}`;
+    }
+
+    private createItemId(prefix: string): string {
+        return `${prefix}_${randomUUID()}`;
+    }
+
+    private normalizeTimestamp(value: unknown): number {
+        if (typeof value === 'number' && Number.isFinite(value)) {
+            return value;
+        }
+        return Math.floor(Date.now() / 1000);
+    }
+
     /**
      * Performs a fetch and parses the JSON response, handling errors.
      */
@@ -416,6 +1125,9 @@ export class ResponsesClient {
      * Low-level fetch wrapper with error handling.
      */
     private async doFetch(path: string, init: RequestInit): Promise<Response> {
+        if (!this.baseUrl) {
+            throw new Error('Responses HTTP transport is not available. Start the Foundry Local web service or create the client with a baseUrl.');
+        }
         const url = `${this.baseUrl}${path}`;
         let res: Response;
         try {
diff --git a/sdk/js/test/openai/responsesClient.test.ts b/sdk/js/test/openai/responsesClient.test.ts
index 25bb35f67..1b329d59d 100644
--- a/sdk/js/test/openai/responsesClient.test.ts
+++ b/sdk/js/test/openai/responsesClient.test.ts
@@ -517,6 +517,153 @@ describe('ResponsesClient Tests', () => {
         });
     });
 
+    // ========================================================================
+    // FFI transport with HTTP fallback
+    // ========================================================================
+
+    describe('FFI transport', () => {
+        class FakeCoreInterop {
+            public commands: Array<{ command: string; params?: any }> = [];
+            public streamingCommands: Array<{ command: string; params?: any }> = [];
+
+            constructor(
+                private readonly response: string = JSON.stringify({
+                    id: 'chatcmpl_test',
+                    created: 123,
+                    choices: [{ message: { content: 'Hello from FFI' } }],
+                    usage: { prompt_tokens: 2, completion_tokens: 3, total_tokens: 5 },
+                }),
+                private readonly streamingChunks: string[] = []
+            ) { }
+
+            executeCommand(command: string, params?: any): string {
+                this.commands.push({ command, params });
+                return this.response;
+            }
+
+            async executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise<string> {
+                this.streamingCommands.push({ command, params });
+                for (const chunk of this.streamingChunks) {
+                    callback(chunk);
+                }
+                return '{}';
+            }
+        }
+
+        it('should use chat_completions FFI for create and store the response locally', async () => {
+            const fakeCore = new FakeCoreInterop();
+            const originalFetch = globalThis.fetch;
+            let fetchCalled = false;
+            globalThis.fetch = async (): Promise<Response> => {
+                fetchCalled = true;
+                throw new Error('fetch should not be called');
+            };
+
+            try {
+                const client = ResponsesClient.createWithCoreInterop(undefined, 'test-model', fakeCore as any);
+                const response = await client.create('Hello', { store: true });
+                const request = JSON.parse(fakeCore.commands[0].params.Params.OpenAICreateRequest);
+
+                expect(fetchCalled).to.be.false;
+                expect(fakeCore.commands[0].command).to.equal('chat_completions');
+                expect(request.model).to.equal('test-model');
+                expect(request.messages).to.deep.equal([{ role: 'user', content: 'Hello' }]);
+                expect(request.stream).to.equal(false);
+                expect(getOutputText(response)).to.equal('Hello from FFI');
+                expect(response.id).to.equal('resp_chatcmpl_test');
+                expect(response.usage?.total_tokens).to.equal(5);
+
+                const stored = await client.get(response.id);
+                expect(stored.id).to.equal(response.id);
+                const listed = await client.list();
+                expect(listed.data.map((item) => item.id)).to.deep.equal([response.id]);
+            } finally {
+                globalThis.fetch = originalFetch;
+            }
+        });
+
+        it('should convert vision input to chat image_url content for FFI', async () => {
+            const fakeCore = new FakeCoreInterop();
+            const client = ResponsesClient.createWithCoreInterop(undefined, 'test-model', fakeCore as any);
+
+            await client.create([
+                {
+                    type: 'message',
+                    role: 'user',
+                    content: [
+                        { type: 'input_text', text: 'Describe this image.' },
+                        { type: 'input_image', image_data: 'abc123', media_type: 'image/png', detail: 'low' },
+                    ],
+                },
+            ]);
+
+            const request = JSON.parse(fakeCore.commands[0].params.Params.OpenAICreateRequest);
+            expect(request.messages[0].content[0]).to.deep.equal({ type: 'text', text: 'Describe this image.' });
+            expect(request.messages[0].content[1]).to.deep.equal({
+                type: 'image_url',
+                image_url: { url: 'data:image/png;base64,abc123', detail: 'low' },
+            });
+        });
+
+        it('should fall back to HTTP create when FFI create fails and baseUrl is available', async () => {
+            const fakeCore = {
+                executeCommand: () => {
+                    throw new Error('ffi unavailable');
+                },
+                executeCommandStreaming: async () => '{}',
+            };
+            const mockResponse: ResponseObject = {
+                id: 'resp_http', object: 'response', created_at: 1, status: 'completed',
+                model: 'test-model', output: [],
+                tools: [], tool_choice: 'auto', truncation: 'disabled',
+                parallel_tool_calls: false, text: {}, top_p: 1, temperature: 1,
+                presence_penalty: 0, frequency_penalty: 0, store: true,
+            };
+            const originalFetch = globalThis.fetch;
+            let capturedUrl: string | URL | Request | undefined;
+            globalThis.fetch = async (url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
+                capturedUrl = url;
+                return new Response(JSON.stringify(mockResponse), {
+                    status: 200,
+                    headers: { 'Content-Type': 'application/json' },
+                });
+            };
+
+            try {
+                const client = ResponsesClient.createWithCoreInterop('http://test-host', 'test-model', fakeCore as any);
+                const response = await client.create('Hello');
+                expect(response.id).to.equal('resp_http');
+                expect(String(capturedUrl)).to.equal('http://test-host/v1/responses');
+            } finally {
+                globalThis.fetch = originalFetch;
+            }
+        });
+
+        it('should stream response events through FFI', async () => {
+            const fakeCore = new FakeCoreInterop('{}', [
+                JSON.stringify({ choices: [{ delta: { content: 'Hel' } }] }),
+                JSON.stringify({ choices: [{ delta: { content: 'lo' } }] }),
+            ]);
+            const client = ResponsesClient.createWithCoreInterop(undefined, 'test-model', fakeCore as any);
+            const events: StreamingEvent[] = [];
+
+            await client.createStreaming('Hello', (event) => events.push(event));
+
+            expect(fakeCore.streamingCommands[0].command).to.equal('chat_completions');
+            expect(events.map((event) => event.type)).to.include.members([
+                'response.created',
+                'response.in_progress',
+                'response.output_text.delta',
+                'response.output_text.done',
+                'response.completed',
+            ]);
+            const deltas = events
+                .filter((event) => event.type === 'response.output_text.delta')
+                .map((event: any) => event.delta);
+            expect(deltas.join('')).to.equal('Hello');
+        });
+    });
+
     // ========================================================================
     // Reasoning streaming event types
     // ========================================================================

From edf1db7019a490ebd06191d4de282f7b686f4caa Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 30 Apr 2026 16:14:42 -0400
Subject: [PATCH 06/10] refactor(sdk/js): focus responses PR on web service
 usage

Revert the SDK API surface changes from this branch and add a JavaScript web-service Responses sample plus integration coverage for direct /v1/responses calls, streaming, and tool calling.\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/README.md                             |   2 +-
 samples/js/README.md                          |   1 +
 samples/js/web-server-responses/app.js        | 149 ++++
 samples/js/web-server-responses/package.json  |  16 +
 sdk/js/README.md                              |  41 +-
 sdk/js/examples/responses.ts                  |  44 +-
 sdk/js/src/detail/model.ts                    |   7 +-
 sdk/js/src/detail/modelVariant.ts             |   7 +-
 sdk/js/src/foundryLocalManager.ts             |  11 +-
 sdk/js/src/imodel.ts                          |   7 +-
 sdk/js/src/index.ts                           |   1 -
 sdk/js/src/openai/responsesClient.ts          | 755 +-----------------
 sdk/js/src/openai/vision.ts                   | 159 ----
 sdk/js/src/types.ts                           | 119 +--
 sdk/js/test/openai/responsesClient.test.ts    | 445 +----------
 .../test/openai/responsesWebService.test.ts   | 202 +++++
 16 files changed, 404 insertions(+), 1562 deletions(-)
 create mode 100644 samples/js/web-server-responses/app.js
 create mode 100644 samples/js/web-server-responses/package.json
 delete mode 100644 sdk/js/src/openai/vision.ts
 create mode 100644 sdk/js/test/openai/responsesWebService.test.ts

diff --git a/samples/README.md b/samples/README.md
index bcac6bf3a..57de2e8bd 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -9,6 +9,6 @@ Explore complete working examples that demonstrate how to use Foundry Local —
 | Language | Samples | Description |
 |----------|---------|-------------|
 | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. |
-| [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. |
+| [**JavaScript**](js/) | 14 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, Responses API, and tutorials. |
 | [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. |
 | [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. |
diff --git a/samples/js/README.md b/samples/js/README.md
index d334555c3..bd9804015 100644
--- a/samples/js/README.md
+++ b/samples/js/README.md
@@ -19,6 +19,7 @@ These samples demonstrate how to use the Foundry Local JavaScript SDK (`foundry-
 | [langchain-integration-example](langchain-integration-example/) | LangChain.js integration for building text generation chains. |
 | [tool-calling-foundry-local](tool-calling-foundry-local/) | Tool calling with custom function definitions and streaming responses. |
 | [web-server-example](web-server-example/) | Start a local OpenAI-compatible web server and call it with the OpenAI SDK. |
+| [web-server-responses](web-server-responses/) | Start the local OpenAI-compatible web server and call the Responses API, including streaming and tool calling. |
 | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
 | [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
 | [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). |
diff --git a/samples/js/web-server-responses/app.js b/samples/js/web-server-responses/app.js
new file mode 100644
index 000000000..764a9d326
--- /dev/null
+++ b/samples/js/web-server-responses/app.js
@@ -0,0 +1,149 @@
+// <complete_code>
+// <imports>
+import { FoundryLocalManager } from 'foundry-local-sdk';
+import { OpenAI } from 'openai';
+// </imports>
+
+function getResponseText(response) {
+    if (typeof response.output_text === 'string') {
+        return response.output_text;
+    }
+
+    return (response.output ?? [])
+        .flatMap((item) => Array.isArray(item.content) ? item.content : [])
+        .filter((part) => part.type === 'output_text' && typeof part.text === 'string')
+        .map((part) => part.text)
+        .join('');
+}
+
+// <init>
+const endpointUrl = process.env.FOUNDRY_LOCAL_ENDPOINT ?? 'http://localhost:5764';
+const modelAlias = process.env.FOUNDRY_LOCAL_MODEL ?? 'qwen2.5-0.5b';
+
+console.log('Initializing Foundry Local SDK...');
+const manager = FoundryLocalManager.create({
+    appName: 'foundry_local_samples',
+    logLevel: 'info',
+    webServiceUrls: endpointUrl
+});
+console.log('SDK initialized successfully');
+
+let currentEp = '';
+await manager.downloadAndRegisterEps((epName, percent) => {
+    if (epName !== currentEp) {
+        if (currentEp !== '') process.stdout.write('\n');
+        currentEp = epName;
+    }
+    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
+});
+if (currentEp !== '') process.stdout.write('\n');
+// </init>
+
+let model;
+let webServiceStarted = false;
+
+try {
+    // <model_setup>
+    model = await manager.catalog.getModel(modelAlias);
+
+    console.log(`\nDownloading model ${modelAlias}...`);
+    await model.download((progress) => {
+        process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
+    });
+    console.log('\nModel downloaded');
+
+    console.log(`\nLoading model ${modelAlias}...`);
+    await model.load();
+    console.log(`Model loaded: ${model.id}`);
+    // </model_setup>
+
+    // <server_setup>
+    console.log(`\nStarting web service on ${endpointUrl}...`);
+    manager.startWebService();
+    webServiceStarted = true;
+    console.log('Web service started');
+
+    const openai = new OpenAI({
+        baseURL: endpointUrl + '/v1',
+        apiKey: 'notneeded',
+    });
+    // </server_setup>
+
+    // <<<<<< OPENAI RESPONSES SDK USAGE >>>>>>
+    console.log('\nTesting a non-streaming Responses call...');
+    const response = await openai.responses.create({
+        model: model.id,
+        input: 'Reply with one short sentence about local AI.',
+    });
+    console.log(`[ASSISTANT]: ${getResponseText(response)}`);
+
+    console.log('\nTesting a streaming Responses call...');
+    const stream = await openai.responses.create({
+        model: model.id,
+        input: 'Count from one to three.',
+        stream: true,
+    });
+
+    process.stdout.write('[ASSISTANT STREAM]: ');
+    for await (const event of stream) {
+        if (event.type === 'response.output_text.delta') {
+            process.stdout.write(event.delta);
+        }
+    }
+    process.stdout.write('\n');
+
+    console.log('\nTesting Responses tool calling...');
+    const tools = [
+        {
+            type: 'function',
+            name: 'get_weather',
+            description: 'Get the current weather. This sample always returns Seattle weather.',
+            parameters: {
+                type: 'object',
+                properties: {},
+                additionalProperties: false,
+            },
+        },
+    ];
+
+    const toolResponse = await openai.responses.create({
+        model: model.id,
+        input: 'Use the get_weather tool and then answer with the weather.',
+        tools,
+        tool_choice: 'required',
+        store: true,
+    });
+
+    const functionCall = toolResponse.output?.find((item) => item.type === 'function_call');
+    if (!functionCall) {
+        throw new Error('Expected the model to call get_weather.');
+    }
+
+    console.log(`[TOOL CALL]: ${functionCall.name}(${functionCall.arguments})`);
+
+    const finalResponse = await openai.responses.create({
+        model: model.id,
+        previous_response_id: toolResponse.id,
+        input: [
+            {
+                type: 'function_call_output',
+                call_id: functionCall.call_id,
+                output: JSON.stringify({ location: 'Seattle', weather: '72 degrees F and sunny' }),
+            },
+        ],
+        tools,
+    });
+
+    console.log(`[ASSISTANT FINAL]: ${getResponseText(finalResponse)}`);
+    // <<<<<< END OPENAI RESPONSES SDK USAGE >>>>>>
+} finally {
+    console.log('\nCleaning up...');
+    if (webServiceStarted) {
+        manager.stopWebService();
+    }
+    if (model) {
+        await model.unload();
+    }
+    console.log('Done');
+}
+// </complete_code>
diff --git a/samples/js/web-server-responses/package.json b/samples/js/web-server-responses/package.json
new file mode 100644
index 000000000..6c8f2ff51
--- /dev/null
+++ b/samples/js/web-server-responses/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "web-server-responses",
+  "version": "1.0.0",
+  "type": "module",
+  "main": "app.js",
+  "scripts": {
+    "start": "node app.js"
+  },
+  "dependencies": {
+    "foundry-local-sdk": "latest",
+    "openai": "latest"
+  },
+  "optionalDependencies": {
+    "foundry-local-sdk-winml": "latest"
+  }
+}
diff --git a/sdk/js/README.md b/sdk/js/README.md
index 9b5a62164..26471cc8c 100644
--- a/sdk/js/README.md
+++ b/sdk/js/README.md
@@ -250,44 +250,6 @@ for await (const chunk of audioClient.transcribeStreaming('/path/to/audio.wav'))
 }
 ```
 
-### Responses API
-
-Use the Responses API client for OpenAI-compatible text, tool, streaming, stored-response, and vision workflows. Clients created from `FoundryLocalManager` or a model use native FFI for `create()` and `createStreaming()` when possible, with HTTP fallback when a web service URL is available.
-
-```typescript
-import { createImageContentFromFile, getOutputText } from 'foundry-local-sdk';
-
-// Optional: start the web service for HTTP fallback and server-backed list/get/delete/cancel operations.
-manager.startWebService();
-
-const client = manager.createResponsesClient(model.id);
-
-// Responses are stored by default so they can be retrieved or listed later.
-// Set store=false per client or request to opt out.
-client.settings.store = false;
-
-const response = await client.create('Tell me a short joke.');
-console.log(getOutputText(response));
-
-const storedResponses = await client.list({ limit: 10, order: 'desc' });
-console.log(storedResponses.has_more);
-
-const image = await createImageContentFromFile('/path/to/image.png');
-const visionResponse = await client.create([
-    {
-        type: 'message',
-        role: 'user',
-        content: [
-            { type: 'input_text', text: 'Describe this image.' },
-            image,
-        ],
-    },
-]);
-console.log(getOutputText(visionResponse));
-```
-
-Vision helpers support `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, and `.bmp` files. `createImageContentFromFile()` sends Foundry Local's `image_data` plus `media_type` contract; the FFI path converts that to the chat-completions data URL shape internally. `createImageContentFromUrl()` sends `image_url` and lets the runtime infer the media type.
-
 ### Embedded Web Service
 
 Start a local HTTP server that exposes an OpenAI-compatible API:
@@ -326,7 +288,6 @@ Auto-generated class documentation lives in [`docs/classes/`](docs/classes/):
 - [IModel](docs/README.md#imodel) — Model interface: variant selection, download, load, inference
 - [ChatClient](docs/classes/ChatClient.md) — Chat completions (sync and streaming)
 - [AudioClient](docs/classes/AudioClient.md) — Audio transcription (sync and streaming)
-- [ResponsesClient](docs/classes/ResponsesClient.md) — Responses API (text, streaming, tools, stored responses, vision)
 - [ModelLoadManager](docs/classes/ModelLoadManager.md) — Low-level model loading management
 
 ## Contributing: Building from Source
@@ -375,4 +336,4 @@ See `test/README.md` for details on prerequisites and setup.
 npm run example
 ```
 
-This runs the chat completion example in `examples/chat-completion.ts`.
+This runs the chat completion example in `examples/chat-completion.ts`.
\ No newline at end of file
diff --git a/sdk/js/examples/responses.ts b/sdk/js/examples/responses.ts
index ffafad2fa..fa8a6d937 100644
--- a/sdk/js/examples/responses.ts
+++ b/sdk/js/examples/responses.ts
@@ -3,11 +3,8 @@
 // Licensed under the MIT License.
 // -------------------------------------------------------------------------
 
-import * as fs from 'fs';
-import * as os from 'os';
-import * as path from 'path';
-import { FoundryLocalManager, getOutputText, createImageContentFromFile } from '../src/index.js';
-import type { StreamingEvent, FunctionToolDefinition, FunctionCallItem, MessageItem } from '../src/types.js';
+import { FoundryLocalManager, getOutputText } from '../src/index.js';
+import type { StreamingEvent, FunctionToolDefinition, FunctionCallItem } from '../src/types.js';
 
 async function main() {
     try {
@@ -124,43 +121,6 @@ async function main() {
         const deleted = await client.delete(stored.id);
         console.log(`Deleted: ${deleted.deleted}`);
 
-        // =================================================================
-        // Example 6: List all stored responses
-        // =================================================================
-        console.log('\n--- Example 6: List stored responses ---');
-        const allResponses = await client.list();
-        console.log(`Listed ${allResponses.data.length} stored responses`);
-
-        // =================================================================
-        // Example 7: Vision — describe an image
-        // =================================================================
-        console.log('\n--- Example 7: Vision ---');
-        const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-responses-example-'));
-        const testImagePath = path.join(tempDir, 'sample.png');
-        // Minimal 1x1 PNG so the example runs without external assets.
-        const samplePng = Buffer.from(
-            '89504e470d0a1a0a0000000d49484452000000010000000108020000009001' +
-            '2e00000000c4944415478016360f8cfc000000002000176dd24100000000049454e44ae426082',
-            'hex'
-        );
-        fs.writeFileSync(testImagePath, samplePng);
-        try {
-            const imageContent = await createImageContentFromFile(testImagePath);
-            const visionResponse = await client.create([
-                {
-                    type: 'message',
-                    role: 'user',
-                    content: [
-                        { type: 'input_text', text: 'Describe this image in one sentence.' },
-                        imageContent,
-                    ],
-                } as MessageItem,
-            ]);
-            console.log(`Vision: ${getOutputText(visionResponse)}`);
-        } finally {
-            fs.rmSync(tempDir, { recursive: true, force: true });
-        }
-
         // Cleanup
         manager.stopWebService();
         await model.unload();
diff --git a/sdk/js/src/detail/model.ts b/sdk/js/src/detail/model.ts
index 08627a7a7..c1ee0d5fa 100644
--- a/sdk/js/src/detail/model.ts
+++ b/sdk/js/src/detail/model.ts
@@ -196,11 +196,10 @@ export class Model implements IModel {
 
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
-     * Uses native FFI for create/createStreaming and falls back to HTTP when baseUrl is provided.
-     * @param baseUrl - Optional base URL of the Foundry Local web service for HTTP fallback and server-backed operations.
+     * @param baseUrl - The base URL of the Foundry Local web service.
      * @returns A ResponsesClient instance.
      */
-    public createResponsesClient(baseUrl?: string): ResponsesClient {
+    public createResponsesClient(baseUrl: string): ResponsesClient {
         return this.selectedVariant.createResponsesClient(baseUrl);
     }
-}
+}
\ No newline at end of file
diff --git a/sdk/js/src/detail/modelVariant.ts b/sdk/js/src/detail/modelVariant.ts
index 019782091..43484bac0 100644
--- a/sdk/js/src/detail/modelVariant.ts
+++ b/sdk/js/src/detail/modelVariant.ts
@@ -189,11 +189,10 @@ export class ModelVariant implements IModel {
 
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
-     * Uses native FFI for create/createStreaming and falls back to HTTP when baseUrl is provided.
-     * @param baseUrl - Optional base URL of the Foundry Local web service for HTTP fallback and server-backed operations.
+     * @param baseUrl - The base URL of the Foundry Local web service.
      * @returns A ResponsesClient instance.
      */
-    public createResponsesClient(baseUrl?: string): ResponsesClient {
-        return ResponsesClient.createWithCoreInterop(baseUrl, this._modelInfo.id, this.coreInterop);
+    public createResponsesClient(baseUrl: string): ResponsesClient {
+        return new ResponsesClient(baseUrl, this._modelInfo.id);
     }
 }
diff --git a/sdk/js/src/foundryLocalManager.ts b/sdk/js/src/foundryLocalManager.ts
index 957b1a5c5..f22acdc0d 100644
--- a/sdk/js/src/foundryLocalManager.ts
+++ b/sdk/js/src/foundryLocalManager.ts
@@ -214,12 +214,17 @@ export class FoundryLocalManager {
 
     /**
      * Creates a ResponsesClient for interacting with the Responses API.
-     * Uses native FFI for create/createStreaming and falls back to HTTP if the web
-     * service has been started via `startWebService()`.
+     * The web service must be started first via `startWebService()`.
      * @param modelId - Optional default model ID for requests.
      * @returns A ResponsesClient instance.
+     * @throws Error - If the web service is not running.
      */
     public createResponsesClient(modelId?: string): ResponsesClient {
-        return ResponsesClient.createWithCoreInterop(this._urls[0], modelId, this.coreInterop);
+        if (this._urls.length === 0) {
+            throw new Error(
+                'Web service is not running. Call startWebService() before creating a ResponsesClient.'
+            );
+        }
+        return new ResponsesClient(this._urls[0], modelId);
     }
 }
diff --git a/sdk/js/src/imodel.ts b/sdk/js/src/imodel.ts
index 259ed3052..8f9bd0c14 100644
--- a/sdk/js/src/imodel.ts
+++ b/sdk/js/src/imodel.ts
@@ -36,10 +36,11 @@ export interface IModel {
     createLiveTranscriptionSession(): LiveAudioTranscriptionSession;
     /**
      * Creates a ResponsesClient for interacting with the model via the Responses API.
-     * Uses native FFI for create/createStreaming and falls back to HTTP when baseUrl is provided.
-     * @param baseUrl - Optional base URL of the Foundry Local web service for HTTP fallback and server-backed operations.
+     * Unlike createChatClient/createAudioClient (which use FFI), the Responses API
+     * is HTTP-based, so the web service base URL must be provided.
+     * @param baseUrl - The base URL of the Foundry Local web service.
      */
-    createResponsesClient(baseUrl?: string): ResponsesClient;
+    createResponsesClient(baseUrl: string): ResponsesClient;
 
     /**
      * Variants of the model that are available. Variants of the model are optimized for different devices.
diff --git a/sdk/js/src/index.ts b/sdk/js/src/index.ts
index c2fb72169..bc27293bb 100644
--- a/sdk/js/src/index.ts
+++ b/sdk/js/src/index.ts
@@ -12,7 +12,6 @@ export { EmbeddingClient } from './openai/embeddingClient.js';
 export { LiveAudioTranscriptionSession, LiveAudioTranscriptionOptions } from './openai/liveAudioTranscriptionClient.js';
 export type { LiveAudioTranscriptionResponse, TranscriptionContentPart } from './openai/liveAudioTranscriptionTypes.js';
 export { ResponsesClient, ResponsesClientSettings, getOutputText } from './openai/responsesClient.js';
-export { createImageContentFromFile, createImageContentFromUrl } from './openai/vision.js';
 export { ModelLoadManager } from './detail/modelLoadManager.js';
 /** @internal */
 export { CoreInterop } from './detail/coreInterop.js';
diff --git a/sdk/js/src/openai/responsesClient.ts b/sdk/js/src/openai/responsesClient.ts
index efdb2fb77..711efb78d 100644
--- a/sdk/js/src/openai/responsesClient.ts
+++ b/sdk/js/src/openai/responsesClient.ts
@@ -9,30 +9,10 @@ import {
     StreamingEvent,
     InputItemsListResponse,
     DeleteResponseResult,
-    ListResponsesResult,
-    ListResponsesOptions,
     ResponseInputItem,
-    ResponseOutputItem,
     MessageItem,
     ContentPart,
-    FunctionCallItem,
-    FunctionCallOutputItem,
-    InputImageContent,
-    OutputTextContent,
 } from '../types.js';
-import { randomUUID } from 'crypto';
-
-interface ResponsesCoreInterop {
-    executeCommand(command: string, params?: any): string;
-    executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise<string>;
-}
-
-interface StoredFfiResponse {
-    response: ResponseObject;
-    input: ResponseInputItem[];
-}
-
-class ResponsesCallbackError extends Error { }
 
 /**
  * Extracts the text content from an assistant message in a Response.
@@ -72,11 +52,6 @@ export class ResponsesClientSettings {
     toolChoice?: ResponseToolChoice;
     truncation?: TruncationStrategy;
     parallelToolCalls?: boolean;
-    /**
-     * Whether to store the response server-side so it can be retrieved via `get()`, `list()`,
-     * `getInputItems()`, or referenced by `previous_response_id`. Defaults to `true` when not
-     * explicitly set. Set to `false` to disable persistence for a given client.
-     */
     store?: boolean;
     metadata?: Record<string, string>;
     reasoning?: ReasoningConfig;
@@ -101,8 +76,7 @@ export class ResponsesClientSettings {
             tool_choice: this.toolChoice,
             truncation: this.truncation,
             parallel_tool_calls: this.parallelToolCalls,
-            // Default store to true when not explicitly set
-            store: this.store !== undefined ? this.store : true,
+            store: this.store,
             metadata: this.metadata,
             reasoning: this.reasoning ? filterUndefined(this.reasoning) : undefined,
             text: this.text ? filterUndefined(this.text) : undefined,
@@ -115,12 +89,11 @@ export class ResponsesClientSettings {
 }
 
 /**
- * Client for the OpenAI Responses API.
+ * Client for the OpenAI Responses API served by Foundry Local's embedded web service.
  *
- * When created by FoundryLocalManager or IModel factories, create/createStreaming use
- * the native FFI path where possible and fall back to HTTP when the web service is
- * available. Stored-response operations (get/delete/cancel/input_items/list) use the
- * in-memory FFI store for FFI-created responses and HTTP for server-backed responses.
+ * Unlike ChatClient/AudioClient (which use FFI via CoreInterop), the Responses API
+ * is HTTP-only. This client uses fetch() for all operations and parses Server-Sent Events
+ * for streaming.
  *
  * Create via `FoundryLocalManager.createResponsesClient()` or
  * `model.createResponsesClient(baseUrl)`.
@@ -144,10 +117,8 @@ export class ResponsesClientSettings {
  * ```
  */
 export class ResponsesClient {
-    private baseUrl?: string;
+    private baseUrl: string;
     private modelId?: string;
-    private coreInterop?: ResponsesCoreInterop;
-    private readonly ffiStore = new Map<string, StoredFfiResponse>();
 
     /**
      * Configuration settings for responses.
@@ -159,31 +130,7 @@ export class ResponsesClient {
      * @param modelId - Optional default model ID. Can be overridden per-request via options.
      */
     constructor(baseUrl: string, modelId?: string) {
-        this.initialize(baseUrl, modelId);
-    }
-
-    /**
-     * @internal
-     * Creates a hybrid client that can use FFI first and HTTP as a fallback.
-     */
-    public static createWithCoreInterop(
-        baseUrl: string | undefined,
-        modelId: string | undefined,
-        coreInterop: ResponsesCoreInterop
-    ): ResponsesClient {
-        const client = new ResponsesClient(baseUrl?.trim() ? baseUrl : 'http://127.0.0.1', modelId);
-        client.coreInterop = coreInterop;
-        if (baseUrl === undefined || baseUrl.trim() === '') {
-            client.baseUrl = undefined;
-        }
-        return client;
-    }
-
-    private initialize(baseUrl: string, modelId?: string): void {
-        if (baseUrl === null || baseUrl === undefined || typeof baseUrl !== 'string') {
-            throw new Error('baseUrl must be a non-empty string.');
-        }
-        if (baseUrl.trim() === '') {
+        if (!baseUrl || typeof baseUrl !== 'string' || baseUrl.trim() === '') {
             throw new Error('baseUrl must be a non-empty string.');
         }
         // Strip trailing slashes for consistent URL construction
@@ -218,25 +165,11 @@ export class ResponsesClient {
 
         const body = this.buildRequest(input, { ...options, stream: false });
 
-        if (this.coreInterop) {
-            try {
-                const response = await this.createViaFfi(body);
-                this.storeFfiResponseIfNeeded(response, body);
-                return response;
-            } catch (e) {
-                if (!this.baseUrl) {
-                    throw new Error(
-                        `Responses FFI create failed and no HTTP fallback is available: ${e instanceof Error ? e.message : String(e)}`,
-                        { cause: e }
-                    );
-                }
-            }
-        }
-
-        return this.fetchJson<ResponseObject>(
+        const response = await this.fetchJson<ResponseObject>(
             '/v1/responses',
             { method: 'POST', body: JSON.stringify(body) }
         );
+        return response;
     }
 
     /**
@@ -260,24 +193,6 @@ export class ResponsesClient {
 
         const body = this.buildRequest(input, { ...options, stream: true });
 
-        if (this.coreInterop) {
-            try {
-                const response = await this.createStreamingViaFfi(body, callback);
-                this.storeFfiResponseIfNeeded(response, body);
-                return;
-            } catch (e) {
-                if (e instanceof ResponsesCallbackError) {
-                    throw e;
-                }
-                if (!this.baseUrl) {
-                    throw new Error(
-                        `Responses FFI streaming create failed and no HTTP fallback is available: ${e instanceof Error ? e.message : String(e)}`,
-                        { cause: e }
-                    );
-                }
-            }
-        }
-
         const res = await this.doFetch('/v1/responses', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json', 'Accept': 'text/event-stream' },
@@ -315,10 +230,6 @@ export class ResponsesClient {
      */
     public async get(responseId: string): Promise<ResponseObject> {
         this.validateId(responseId, 'responseId');
-        const stored = this.ffiStore.get(responseId);
-        if (stored) {
-            return stored.response;
-        }
         return this.fetchJson<ResponseObject>(
             `/v1/responses/${encodeURIComponent(responseId)}`,
             { method: 'GET' }
@@ -332,9 +243,6 @@ export class ResponsesClient {
      */
     public async delete(responseId: string): Promise<DeleteResponseResult> {
         this.validateId(responseId, 'responseId');
-        if (this.ffiStore.delete(responseId)) {
-            return { id: responseId, object: 'response.deleted', deleted: true };
-        }
         return this.fetchJson<DeleteResponseResult>(
             `/v1/responses/${encodeURIComponent(responseId)}`,
             { method: 'DELETE' }
@@ -348,10 +256,6 @@ export class ResponsesClient {
      */
     public async cancel(responseId: string): Promise<ResponseObject> {
         this.validateId(responseId, 'responseId');
-        const stored = this.ffiStore.get(responseId);
-        if (stored) {
-            throw new Error('Cancellation is only supported for HTTP-backed Responses API operations.');
-        }
         return this.fetchJson<ResponseObject>(
             `/v1/responses/${encodeURIComponent(responseId)}/cancel`,
             { method: 'POST' }
@@ -365,58 +269,12 @@ export class ResponsesClient {
      */
     public async getInputItems(responseId: string): Promise<InputItemsListResponse> {
         this.validateId(responseId, 'responseId');
-        const stored = this.ffiStore.get(responseId);
-        if (stored) {
-            return { object: 'list', data: stored.input };
-        }
         return this.fetchJson<InputItemsListResponse>(
             `/v1/responses/${encodeURIComponent(responseId)}/input_items`,
             { method: 'GET' }
         );
     }
 
-    /**
-     * Lists stored responses.
-     * @param options - Optional pagination parameters. The Foundry Local server supports
-     *   `limit`, `order`, and `after`; it does not currently support `before`.
-     * @returns The list of Response objects.
-     */
-    public async list(options?: ListResponsesOptions): Promise<ListResponsesResult> {
-        const query = new URLSearchParams();
-        if (options?.limit !== undefined) query.set('limit', String(options.limit));
-        if (options?.order !== undefined) query.set('order', options.order);
-        if (options?.after !== undefined) query.set('after', options.after);
-
-        if (!this.baseUrl) {
-            return this.listFfiResponses(options);
-        }
-
-        const queryString = query.toString();
-        const path = queryString ? `/v1/responses?${queryString}` : '/v1/responses';
-        const serverResult = await this.fetchJson<ListResponsesResult>(path, { method: 'GET' });
-        const localResult = this.listFfiResponses(options);
-        if (localResult.data.length === 0) {
-            return serverResult;
-        }
-
-        const localIds = new Set(localResult.data.map((response) => response.id));
-        const mergedData = [...localResult.data, ...serverResult.data.filter((response) => !localIds.has(response.id))];
-        const order = options?.order ?? 'desc';
-        mergedData.sort((a, b) =>
-            order === 'asc' ? a.created_at - b.created_at : b.created_at - a.created_at
-        );
-        const limit = options?.limit ?? 20;
-        const limitedData = mergedData.slice(0, limit);
-
-        return {
-            object: 'list',
-            data: limitedData,
-            first_id: limitedData[0]?.id ?? null,
-            last_id: limitedData[limitedData.length - 1]?.id ?? null,
-            has_more: Boolean(serverResult.has_more || localResult.has_more || mergedData.length > limitedData.length),
-        };
-    }
-
     // ========================================================================
     // Internal helpers
     // ========================================================================
@@ -509,598 +367,6 @@ export class ResponsesClient {
         }
     }
 
-    /**
-     * Creates a response through the native chat_completions FFI command.
-     */
-    private async createViaFfi(body: ResponseCreateParams): Promise<ResponseObject> {
-        if (!this.coreInterop) {
-            throw new Error('Responses FFI transport is not available.');
-        }
-
-        const chatRequest = this.buildChatCompletionRequest(body, false);
-        const raw = this.coreInterop.executeCommand('chat_completions', {
-            Params: {
-                OpenAICreateRequest: JSON.stringify(chatRequest),
-            },
-        });
-        const chatResponse = this.parseFfiJson(raw, 'chat_completions');
-        return this.mapChatCompletionToResponse(chatResponse, body);
-    }
-
-    /**
-     * Creates a streaming response through the native chat_completions FFI command.
-     */
-    private async createStreamingViaFfi(
-        body: ResponseCreateParams,
-        callback: (event: StreamingEvent) => void
-    ): Promise<ResponseObject> {
-        if (!this.coreInterop) {
-            throw new Error('Responses FFI transport is not available.');
-        }
-        if (body.tools && body.tools.length > 0) {
-            throw new Error('Responses FFI streaming does not currently support tool calls.');
-        }
-
-        const responseId = this.createResponseId();
-        const outputItemId = this.createItemId('msg');
-        const chatRequest = this.buildChatCompletionRequest(body, true);
-        const outputText: OutputTextContent = { type: 'output_text', text: '' };
-        const messageItem: MessageItem = {
-            id: outputItemId,
-            type: 'message',
-            role: 'assistant',
-            content: [outputText],
-            status: 'in_progress',
-        };
-
-        let sequence = 0;
-        let contentPartStarted = false;
-        let callbackError: ResponsesCallbackError | null = null;
-        const response = this.createBaseResponse(body, responseId, [], 'in_progress');
-        const emit = (event: any): void => {
-            if (callbackError) return;
-            try {
-                callback({ ...event, sequence_number: sequence++ } as StreamingEvent);
-            } catch (e) {
-                callbackError = new ResponsesCallbackError(
-                    `User callback threw an error: ${e instanceof Error ? e.message : String(e)}`
-                );
-                (callbackError as Error).cause = e;
-            }
-        };
-
-        emit({ type: 'response.created', response });
-        emit({ type: 'response.in_progress', response });
-
-        const processChunk = (chunk: string): void => {
-            if (callbackError) return;
-            const parsed = this.parseStreamingFfiChunk(chunk);
-            if (!parsed) return;
-
-            const choices = Array.isArray(parsed.choices) ? parsed.choices : [];
-            for (const choice of choices) {
-                const delta = choice?.delta ?? choice?.Delta ?? {};
-                const content = this.extractDeltaContent(delta);
-                if (!content) continue;
-
-                if (!contentPartStarted) {
-                    emit({
-                        type: 'response.output_item.added',
-                        item_id: outputItemId,
-                        output_index: 0,
-                        item: messageItem,
-                    });
-                    emit({
-                        type: 'response.content_part.added',
-                        item_id: outputItemId,
-                        output_index: 0,
-                        content_index: 0,
-                        part: outputText,
-                    });
-                    contentPartStarted = true;
-                }
-
-                outputText.text += content;
-                emit({
-                    type: 'response.output_text.delta',
-                    item_id: outputItemId,
-                    output_index: 0,
-                    content_index: 0,
-                    delta: content,
-                });
-            }
-        };
-
-        await this.coreInterop.executeCommandStreaming(
-            'chat_completions',
-            {
-                Params: {
-                    OpenAICreateRequest: JSON.stringify(chatRequest),
-                },
-            },
-            processChunk
-        );
-
-        if (callbackError) {
-            throw callbackError;
-        }
-
-        const finalMessage: MessageItem = {
-            ...messageItem,
-            content: [outputText],
-            status: 'completed',
-        };
-        const completedResponse = this.createBaseResponse(
-            body,
-            responseId,
-            contentPartStarted ? [finalMessage] : [],
-            'completed'
-        );
-        completedResponse.completed_at = Math.floor(Date.now() / 1000);
-
-        if (contentPartStarted) {
-            emit({
-                type: 'response.output_text.done',
-                item_id: outputItemId,
-                output_index: 0,
-                content_index: 0,
-                text: outputText.text,
-            });
-            emit({
-                type: 'response.content_part.done',
-                item_id: outputItemId,
-                output_index: 0,
-                content_index: 0,
-                part: outputText,
-            });
-            emit({
-                type: 'response.output_item.done',
-                item_id: outputItemId,
-                output_index: 0,
-                item: finalMessage,
-            });
-        }
-        emit({ type: 'response.completed', response: completedResponse });
-
-        return completedResponse;
-    }
-
-    private buildChatCompletionRequest(body: ResponseCreateParams, stream: boolean): Record<string, unknown> {
-        const request: Record<string, unknown> = {
-            model: body.model,
-            messages: this.convertResponseInputToChatMessages(body),
-            stream,
-        };
-
-        if (body.temperature !== undefined) request.temperature = body.temperature;
-        if (body.top_p !== undefined) request.top_p = body.top_p;
-        if (body.max_output_tokens !== undefined) request.max_tokens = body.max_output_tokens;
-        if (body.frequency_penalty !== undefined) request.frequency_penalty = body.frequency_penalty;
-        if (body.presence_penalty !== undefined) request.presence_penalty = body.presence_penalty;
-        if (body.seed !== undefined) request.seed = body.seed;
-        if (body.metadata !== undefined) request.metadata = body.metadata;
-        if (body.parallel_tool_calls !== undefined) request.parallel_tool_calls = body.parallel_tool_calls;
-        if (body.tools !== undefined) request.tools = body.tools.map((tool) => this.convertResponseToolToChatTool(tool));
-        if (body.tool_choice !== undefined) request.tool_choice = this.convertResponseToolChoiceToChatToolChoice(body.tool_choice);
-        const responseFormat = this.convertTextConfigToChatResponseFormat(body.text);
-        if (responseFormat !== undefined) request.response_format = responseFormat;
-
-        return request;
-    }
-
-    private convertResponseInputToChatMessages(body: ResponseCreateParams): any[] {
-        const messages: any[] = [];
-        if (body.instructions) {
-            messages.push({ role: 'system', content: body.instructions });
-        }
-        if (body.previous_response_id) {
-            const previousResponse = this.ffiStore.get(body.previous_response_id);
-            if (!previousResponse) {
-                throw new Error(
-                    `Responses FFI store does not contain previous_response_id: ${body.previous_response_id}`
-                );
-            }
-            messages.push(...this.convertStoredFfiResponseToChatMessages(previousResponse));
-        }
-
-        const input = body.input;
-        if (typeof input === 'string') {
-            messages.push({ role: 'user', content: input });
-            return messages;
-        }
-
-        if (!Array.isArray(input)) {
-            throw new Error('Responses FFI create requires string input or an array of input items.');
-        }
-
-        for (const item of input) {
-            if (item.type === 'message') {
-                messages.push({
-                    role: this.convertResponseRoleToChatRole(item.role),
-                    content: this.convertContentToChatContent(item.content),
-                });
-            } else if (item.type === 'function_call') {
-                messages.push(this.convertFunctionCallItemToChatMessage(item));
-            } else if (item.type === 'function_call_output') {
-                messages.push(this.convertFunctionCallOutputItemToChatMessage(item));
-            } else if (item.type === 'item_reference') {
-                const stored = this.ffiStore.get(item.id);
-                if (!stored) {
-                    throw new Error(`Responses FFI store does not contain referenced item: ${item.id}`);
-                }
-                messages.push(...this.convertResponseInputToChatMessages({ ...body, input: stored.input }));
-            }
-        }
-
-        if (messages.length === 0) {
-            throw new Error('Responses FFI create requires at least one message input item.');
-        }
-        return messages;
-    }
-
-    private convertStoredFfiResponseToChatMessages(stored: StoredFfiResponse): any[] {
-        const messages = this.convertResponseInputToChatMessages({
-            input: stored.input,
-            model: stored.response.model,
-        });
-        for (const item of stored.response.output) {
-            if (item.type === 'message') {
-                messages.push({
-                    role: this.convertResponseRoleToChatRole(item.role),
-                    content: this.convertContentToChatContent(item.content),
-                });
-            } else if (item.type === 'function_call') {
-                messages.push(this.convertFunctionCallItemToChatMessage(item));
-            }
-        }
-        return messages;
-    }
-
-    private convertResponseRoleToChatRole(role: MessageItem['role']): string {
-        if (role === 'developer') return 'system';
-        return role;
-    }
-
-    private convertContentToChatContent(content: string | ContentPart[]): any {
-        if (typeof content === 'string') {
-            return content;
-        }
-
-        const parts: any[] = [];
-        for (const part of content) {
-            if (part.type === 'input_text') {
-                parts.push({ type: 'text', text: part.text });
-            } else if (part.type === 'output_text') {
-                parts.push({ type: 'text', text: part.text });
-            } else if (part.type === 'input_image') {
-                parts.push({ type: 'image_url', image_url: this.convertInputImageToChatImageUrl(part) });
-            } else if (part.type === 'refusal') {
-                parts.push({ type: 'text', text: part.refusal });
-            } else {
-                throw new Error(`Responses FFI create does not support content part type: ${part.type}`);
-            }
-        }
-
-        if (parts.length === 0) {
-            return '';
-        }
-        if (parts.every((part) => part.type === 'text')) {
-            return parts.map((part) => part.text).join('');
-        }
-        return parts;
-    }
-
-    private convertInputImageToChatImageUrl(part: InputImageContent): any {
-        if (part.image_url) {
-            return part.detail ? { url: part.image_url, detail: part.detail } : { url: part.image_url };
-        }
-        if (part.image_data) {
-            if (!part.media_type) {
-                throw new Error('Responses FFI create requires media_type when image_data is provided.');
-            }
-            const url = `data:${part.media_type};base64,${part.image_data}`;
-            return part.detail ? { url, detail: part.detail } : { url };
-        }
-        throw new Error('Responses FFI create requires input_image to include image_url or image_data.');
-    }
-
-    private convertFunctionCallItemToChatMessage(item: FunctionCallItem): any {
-        return {
-            role: 'assistant',
-            content: null,
-            tool_calls: [{
-                id: item.call_id,
-                type: 'function',
-                function: {
-                    name: item.name,
-                    arguments: item.arguments,
-                },
-            }],
-        };
-    }
-
-    private convertFunctionCallOutputItemToChatMessage(item: FunctionCallOutputItem): any {
-        return {
-            role: 'tool',
-            tool_call_id: item.call_id,
-            content: this.convertContentPartOutputToText(item.output),
-        };
-    }
-
-    private convertContentPartOutputToText(output: string | ContentPart[]): string {
-        if (typeof output === 'string') {
-            return output;
-        }
-        return output.map((part) => {
-            if (part.type === 'input_text' || part.type === 'output_text') return part.text;
-            if (part.type === 'refusal') return part.refusal;
-            if (part.type === 'input_image') return part.image_url ?? '[image]';
-            if (part.type === 'input_file') return part.file_url;
-            return '';
-        }).join('');
-    }
-
-    private convertResponseToolToChatTool(tool: FunctionToolDefinition): Record<string, unknown> {
-        return {
-            type: 'function',
-            function: {
-                name: tool.name,
-                description: tool.description,
-                parameters: tool.parameters,
-                strict: tool.strict,
-            },
-        };
-    }
-
-    private convertResponseToolChoiceToChatToolChoice(toolChoice: ResponseToolChoice): unknown {
-        if (typeof toolChoice === 'string') {
-            return toolChoice;
-        }
-        if (toolChoice?.type === 'function') {
-            return {
-                type: 'function',
-                function: { name: toolChoice.name },
-            };
-        }
-        return toolChoice;
-    }
-
-    private convertTextConfigToChatResponseFormat(text?: TextConfig): unknown {
-        const format = text?.format;
-        if (!format) {
-            return undefined;
-        }
-        if (format.type === 'json_schema') {
-            return {
-                type: 'json_schema',
-                json_schema: {
-                    name: format.name,
-                    description: format.description,
-                    schema: format.schema,
-                    strict: format.strict,
-                },
-            };
-        }
-        if (format.type === 'json_object') {
-            return { type: 'json_object' };
-        }
-        return { type: format.type };
-    }
-
-    private parseFfiJson(raw: string, command: string): any {
-        try {
-            const parsed = JSON.parse(raw);
-            if (parsed?.Successful === false || parsed?.successful === false) {
-                const error = parsed?.ErrorMessage ?? parsed?.errorMessage ?? parsed?.Error ?? parsed?.error ?? raw;
-                throw new Error(String(error));
-            }
-            return parsed;
-        } catch (e) {
-            if (e instanceof SyntaxError) {
-                throw new Error(`Failed to parse ${command} FFI response JSON: ${raw.substring(0, 200)}`, { cause: e });
-            }
-            throw e;
-        }
-    }
-
-    private mapChatCompletionToResponse(chatResponse: any, body: ResponseCreateParams): ResponseObject {
-        const responseId = this.createResponseId(chatResponse?.id);
-        const createdAt = this.normalizeTimestamp(chatResponse?.created ?? chatResponse?.Created);
-        const choices = Array.isArray(chatResponse?.choices) ? chatResponse.choices : [];
-        const firstChoice = choices[0] ?? {};
-        const message = firstChoice.message ?? firstChoice.Message ?? {};
-        const output = this.convertChatMessageToResponseOutput(message);
-        const response = this.createBaseResponse(body, responseId, output, 'completed');
-        response.created_at = createdAt;
-        response.completed_at = Math.floor(Date.now() / 1000);
-        response.usage = this.mapUsage(chatResponse?.usage ?? chatResponse?.Usage);
-        return response;
-    }
-
-    private convertChatMessageToResponseOutput(message: any): ResponseOutputItem[] {
-        const toolCalls = message?.tool_calls ?? message?.ToolCalls;
-        if (Array.isArray(toolCalls) && toolCalls.length > 0) {
-            return toolCalls.map((toolCall: any) => ({
-                type: 'function_call',
-                id: this.createItemId('fc'),
-                call_id: toolCall.id ?? toolCall.Id ?? this.createItemId('call'),
-                name: toolCall.function?.name ?? toolCall.Function?.Name ?? '',
-                arguments: toolCall.function?.arguments ?? toolCall.Function?.Arguments ?? '',
-                status: 'completed',
-            }));
-        }
-
-        const content = this.extractMessageContent(message);
-        if (content === undefined) {
-            return [];
-        }
-
-        const item: MessageItem = {
-            type: 'message',
-            id: this.createItemId('msg'),
-            role: 'assistant',
-            content: [{ type: 'output_text', text: content }],
-            status: 'completed',
-        };
-        return [item];
-    }
-
-    private extractMessageContent(message: any): string | undefined {
-        const content = message?.content ?? message?.Content;
-        if (typeof content === 'string') {
-            return content;
-        }
-        if (Array.isArray(content)) {
-            return content.map((part) => {
-                if (typeof part === 'string') return part;
-                if (typeof part?.text === 'string') return part.text;
-                if (typeof part?.Text === 'string') return part.Text;
-                return '';
-            }).join('');
-        }
-        return undefined;
-    }
-
-    private mapUsage(usage: any): ResponseObject['usage'] {
-        if (!usage) {
-            return null;
-        }
-        const inputTokens = usage.prompt_tokens ?? usage.PromptTokens ?? usage.input_tokens ?? 0;
-        const outputTokens = usage.completion_tokens ?? usage.CompletionTokens ?? usage.output_tokens ?? 0;
-        return {
-            input_tokens: inputTokens,
-            output_tokens: outputTokens,
-            total_tokens: usage.total_tokens ?? usage.TotalTokens ?? inputTokens + outputTokens,
-        };
-    }
-
-    private createBaseResponse(
-        body: ResponseCreateParams,
-        id: string,
-        output: ResponseOutputItem[],
-        status: ResponseObject['status']
-    ): ResponseObject {
-        return {
-            id,
-            object: 'response',
-            created_at: Math.floor(Date.now() / 1000),
-            completed_at: status === 'completed' ? Math.floor(Date.now() / 1000) : null,
-            failed_at: null,
-            cancelled_at: null,
-            status,
-            incomplete_details: null,
-            model: body.model ?? this.modelId ?? '',
-            previous_response_id: body.previous_response_id ?? null,
-            instructions: body.instructions ?? null,
-            output,
-            error: null,
-            tools: body.tools ?? [],
-            tool_choice: body.tool_choice ?? 'auto',
-            truncation: body.truncation ?? 'disabled',
-            parallel_tool_calls: body.parallel_tool_calls ?? false,
-            text: body.text ?? {},
-            top_p: body.top_p ?? 1,
-            temperature: body.temperature ?? 1,
-            presence_penalty: body.presence_penalty ?? 0,
-            frequency_penalty: body.frequency_penalty ?? 0,
-            max_output_tokens: body.max_output_tokens ?? null,
-            reasoning: body.reasoning ?? null,
-            store: body.store ?? true,
-            metadata: body.metadata ?? null,
-            usage: null,
-            user: body.user ?? null,
-        };
-    }
-
-    private storeFfiResponseIfNeeded(response: ResponseObject, body: ResponseCreateParams): void {
-        if (body.store === false) {
-            return;
-        }
-        this.ffiStore.set(response.id, {
-            response,
-            input: this.normalizeResponseInputItems(body.input),
-        });
-    }
-
-    private normalizeResponseInputItems(input: ResponseCreateParams['input']): ResponseInputItem[] {
-        if (typeof input === 'string') {
-            return [{
-                type: 'message',
-                role: 'user',
-                content: input,
-                status: 'completed',
-            }];
-        }
-        return Array.isArray(input) ? input : [];
-    }
-
-    private listFfiResponses(options?: ListResponsesOptions): ListResponsesResult {
-        const order = options?.order ?? 'desc';
-        const limit = options?.limit ?? 20;
-        let data = Array.from(this.ffiStore.values()).map((entry) => entry.response);
-        data.sort((a, b) => order === 'asc' ? a.created_at - b.created_at : b.created_at - a.created_at);
-
-        if (options?.after) {
-            const afterIndex = data.findIndex((response) => response.id === options.after);
-            data = afterIndex >= 0 ? data.slice(afterIndex + 1) : [];
-        }
-
-        const limitedData = data.slice(0, limit);
-        return {
-            object: 'list',
-            data: limitedData,
-            first_id: limitedData[0]?.id ?? null,
-            last_id: limitedData[limitedData.length - 1]?.id ?? null,
-            has_more: data.length > limitedData.length,
-        };
-    }
-
-    private parseStreamingFfiChunk(chunk: string): any | undefined {
-        const trimmed = chunk.trim();
-        if (!trimmed || trimmed === '[DONE]') {
-            return undefined;
-        }
-        const json = trimmed.startsWith('data: ') ? trimmed.slice(6).trim() : trimmed;
-        if (!json || json === '[DONE]') {
-            return undefined;
-        }
-        return JSON.parse(json);
-    }
-
-    private extractDeltaContent(delta: any): string {
-        const content = delta?.content ?? delta?.Content;
-        if (typeof content === 'string') {
-            return content;
-        }
-        if (Array.isArray(content)) {
-            return content.map((part) => {
-                if (typeof part === 'string') return part;
-                if (typeof part?.text === 'string') return part.text;
-                if (typeof part?.Text === 'string') return part.Text;
-                return '';
-            }).join('');
-        }
-        return '';
-    }
-
-    private createResponseId(sourceId?: string): string {
-        if (sourceId?.startsWith('resp_')) {
-            return sourceId;
-        }
-        return `resp_${sourceId ?? randomUUID()}`;
-    }
-
-    private createItemId(prefix: string): string {
-        return `${prefix}_${randomUUID()}`;
-    }
-
-    private normalizeTimestamp(value: unknown): number {
-        if (typeof value === 'number' && Number.isFinite(value)) {
-            return value;
-        }
-        return Math.floor(Date.now() / 1000);
-    }
-
     /**
      * Performs a fetch and parses the JSON response, handling errors.
      */
@@ -1125,9 +391,6 @@ export class ResponsesClient {
      * Low-level fetch wrapper with error handling.
      */
     private async doFetch(path: string, init: RequestInit): Promise<Response> {
-        if (!this.baseUrl) {
-            throw new Error('Responses HTTP transport is not available. Start the Foundry Local web service or create the client with a baseUrl.');
-        }
         const url = `${this.baseUrl}${path}`;
         let res: Response;
         try {
diff --git a/sdk/js/src/openai/vision.ts b/sdk/js/src/openai/vision.ts
deleted file mode 100644
index e2bd66302..000000000
--- a/sdk/js/src/openai/vision.ts
+++ /dev/null
@@ -1,159 +0,0 @@
-// -------------------------------------------------------------------------
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-// -------------------------------------------------------------------------
-
-import * as path from 'path';
-import { promises as fsPromises } from 'fs';
-import type { InputImageContent } from '../types.js';
-
-const MEDIA_TYPE_MAP: Record<string, string> = {
-    '.png': 'image/png',
-    '.jpg': 'image/jpeg',
-    '.jpeg': 'image/jpeg',
-    '.gif': 'image/gif',
-    '.webp': 'image/webp',
-    '.bmp': 'image/bmp',
-};
-
-/**
- * Options for `createImageContentFromFile`.
- */
-export interface ImageContentOptions {
-    /** Detail level hint for the model. */
-    detail?: 'low' | 'high' | 'auto';
-    /**
-     * If set, the longest dimension of the image will be scaled down to this value
-     * (preserving aspect ratio) before encoding. Must be a finite positive integer.
-     * Requires the `sharp` package to be installed as an optional peer dependency
-     * (`npm install sharp`). If `sharp` is not available, a warning is printed and
-     * the original image is used unresized.
-     */
-    maxDimension?: number;
-}
-
-/**
- * Creates an `InputImageContent` part by reading an image file from disk.
- * The file is base64-encoded and embedded directly in the content part.
- * Supported file extensions: `.png`, `.jpg`, `.jpeg`, `.gif`, `.webp`, `.bmp`.
- *
- * The second argument accepts either an `ImageContentOptions` object or a shorthand
- * detail string (`'low' | 'high' | 'auto'`) for convenience.
- *
- * @param filePath - Absolute or relative path to the image file.
- * @param options - Optional `ImageContentOptions`, or a shorthand detail string.
- * @returns A `Promise<InputImageContent>` with base64-encoded image data.
- * @throws If the file does not exist, the extension is unsupported, or `maxDimension`
- *         is not a finite positive integer.
- */
-export async function createImageContentFromFile(
-    filePath: string,
-    options?: ImageContentOptions | 'low' | 'high' | 'auto'
-): Promise<InputImageContent> {
-    // Support the shorthand signature: createImageContentFromFile(path, detail?)
-    const opts: ImageContentOptions = typeof options === 'string'
-        ? { detail: options }
-        : (options ?? {});
-
-    if (opts.maxDimension !== undefined) {
-        if (!Number.isFinite(opts.maxDimension) || !Number.isInteger(opts.maxDimension) || opts.maxDimension <= 0) {
-            throw new Error(`Invalid maxDimension: ${opts.maxDimension}. Expected a finite positive integer.`);
-        }
-    }
-
-    const ext = path.extname(filePath).toLowerCase();
-    const mediaType = MEDIA_TYPE_MAP[ext];
-    if (!mediaType) {
-        throw new Error(
-            `Unsupported image format: ${ext}. Supported formats: ${Object.keys(MEDIA_TYPE_MAP).join(', ')}`
-        );
-    }
-
-    let dataBuffer: Buffer;
-    try {
-        dataBuffer = await fsPromises.readFile(filePath) as Buffer;
-    } catch (err: any) {
-        if (err.code === 'ENOENT') {
-            throw new Error(`Image file not found: ${filePath}`);
-        }
-        throw err;
-    }
-
-    let finalMediaType = mediaType;
-    if (opts.maxDimension !== undefined) {
-        const resized = await resizeImage(dataBuffer, opts.maxDimension, mediaType);
-        dataBuffer = resized.buffer;
-        finalMediaType = resized.mediaType;
-    }
-
-    const content: InputImageContent = {
-        type: 'input_image',
-        image_data: dataBuffer.toString('base64'),
-        media_type: finalMediaType,
-    };
-    if (opts.detail !== undefined) {
-        content.detail = opts.detail;
-    }
-    return content;
-}
-
-/**
- * Creates an `InputImageContent` part from a URL.
- * The server will infer the media type from the URL.
- *
- * @param url - Public URL pointing to the image.
- * @param detail - Optional detail level hint for the model ('low' | 'high' | 'auto').
- * @returns An `InputImageContent` object with the image URL.
- */
-export function createImageContentFromUrl(url: string, detail?: 'low' | 'high' | 'auto'): InputImageContent {
-    const content: InputImageContent = {
-        type: 'input_image',
-        image_url: url,
-        // media_type intentionally omitted — server infers from URL
-    };
-    if (detail !== undefined) {
-        content.detail = detail;
-    }
-    return content;
-}
-
-/**
- * Attempts to resize image data to fit within `maxDimension` on the longest side.
- * Requires the optional `sharp` peer dependency. Falls back to original data with a
- * warning if `sharp` is not available.
- * Returns both the (possibly resized) buffer and the media type.
- */
-async function resizeImage(data: Buffer, maxDimension: number, fallbackMediaType: string): Promise<{ buffer: Buffer; mediaType: string }> {
-    let sharp: any;
-    try {
-        // Dynamic import so sharp remains a soft/optional peer dep.
-        // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-        // @ts-ignore — sharp is an optional peer dependency
-        sharp = (await import('sharp')).default;
-    } catch {
-        console.warn(
-            `[foundry-local] createImageContentFromFile: maxDimension=${maxDimension} requires the ` +
-            `"sharp" package (npm install sharp). Image will be used unresized.`
-        );
-        return { buffer: data, mediaType: fallbackMediaType };
-    }
-
-    const metadata = await sharp(data).metadata();
-    const { width = 0, height = 0, format } = metadata;
-    // Map sharp format names back to MIME types; fall back to the original type
-    const formatToMime: Record<string, string> = {
-        png: 'image/png', jpeg: 'image/jpeg', gif: 'image/gif',
-        webp: 'image/webp', bmp: 'image/bmp',
-    };
-    const mediaType = (format && formatToMime[format]) ?? fallbackMediaType;
-
-    if (Math.max(width, height) <= maxDimension) {
-        return { buffer: data, mediaType };
-    }
-
-    const resizedBuffer: Buffer = await sharp(data)
-        .resize({ width: maxDimension, height: maxDimension, fit: 'inside', withoutEnlargement: true })
-        .toBuffer();
-
-    return { buffer: resizedBuffer, mediaType };
-}
diff --git a/sdk/js/src/types.ts b/sdk/js/src/types.ts
index 0d5a54568..521ae34b4 100644
--- a/sdk/js/src/types.ts
+++ b/sdk/js/src/types.ts
@@ -127,20 +127,6 @@ export interface InputTextContent {
     text: string;
 }
 
-export interface InputImageContent {
-    type: 'input_image';
-    image_url?: string;
-    image_data?: string;       // base64-encoded
-    media_type?: string;       // e.g. "image/png"; omit to let the server infer
-    detail?: 'low' | 'high' | 'auto';
-}
-
-export interface InputFileContent {
-    type: 'input_file';
-    filename: string;
-    file_url: string;
-}
-
 export interface OutputTextContent {
     type: 'output_text';
     text: string;
@@ -153,7 +139,7 @@ export interface RefusalContent {
     refusal: string;
 }
 
-export type ContentPart = InputTextContent | InputImageContent | InputFileContent | OutputTextContent | RefusalContent;
+export type ContentPart = InputTextContent | OutputTextContent | RefusalContent;
 
 export interface Annotation {
     type: string;
@@ -369,7 +355,6 @@ export interface OutputItemDoneEvent {
 export interface ContentPartAddedEvent {
     type: 'response.content_part.added';
     item_id: string;
-    output_index: number;
     content_index: number;
     part: ContentPart;
     sequence_number: number;
@@ -378,7 +363,6 @@ export interface ContentPartAddedEvent {
 export interface ContentPartDoneEvent {
     type: 'response.content_part.done';
     item_id: string;
-    output_index: number;
     content_index: number;
     part: ContentPart;
     sequence_number: number;
@@ -390,8 +374,6 @@ export interface OutputTextDeltaEvent {
     output_index: number;
     content_index: number;
     delta: string;
-    logprobs?: LogProb[];
-    obfuscation?: string | null;
     sequence_number: number;
 }
 
@@ -401,14 +383,12 @@ export interface OutputTextDoneEvent {
     output_index: number;
     content_index: number;
     text: string;
-    logprobs?: LogProb[];
     sequence_number: number;
 }
 
 export interface RefusalDeltaEvent {
     type: 'response.refusal.delta';
     item_id: string;
-    output_index: number;
     content_index: number;
     delta: string;
     sequence_number: number;
@@ -417,7 +397,6 @@ export interface RefusalDeltaEvent {
 export interface RefusalDoneEvent {
     type: 'response.refusal.done';
     item_id: string;
-    output_index: number;
     content_index: number;
     refusal: string;
     sequence_number: number;
@@ -427,7 +406,6 @@ export interface FunctionCallArgsDeltaEvent {
     type: 'response.function_call_arguments.delta';
     item_id: string;
     output_index: number;
-    call_id: string;
     delta: string;
     sequence_number: number;
 }
@@ -436,75 +414,8 @@ export interface FunctionCallArgsDoneEvent {
     type: 'response.function_call_arguments.done';
     item_id: string;
     output_index: number;
-    call_id: string;
     arguments: string;
-    name?: string;
-    sequence_number: number;
-}
-
-export interface ReasoningSummaryPartAddedEvent {
-    type: 'response.reasoning_summary_part.added';
-    item_id: string;
-    output_index: number;
-    summary_index: number;
-    part: ContentPart;
-    sequence_number: number;
-}
-
-export interface ReasoningSummaryPartDoneEvent {
-    type: 'response.reasoning_summary_part.done';
-    item_id: string;
-    output_index: number;
-    summary_index: number;
-    part: ContentPart;
-    sequence_number: number;
-}
-
-export interface ReasoningDeltaEvent {
-    type: 'response.reasoning.delta';
-    item_id: string;
-    output_index: number;
-    content_index: number;
-    delta: string;
-    obfuscation?: string | null;
-    sequence_number: number;
-}
-
-export interface ReasoningDoneEvent {
-    type: 'response.reasoning.done';
-    item_id: string;
-    output_index: number;
-    content_index: number;
-    text: string;
-    sequence_number: number;
-}
-
-export interface ReasoningSummaryTextDeltaEvent {
-    type: 'response.reasoning_summary_text.delta';
-    item_id: string;
-    output_index: number;
-    summary_index: number;
-    delta: string;
-    obfuscation?: string | null;
-    sequence_number: number;
-}
-
-export interface ReasoningSummaryTextDoneEvent {
-    type: 'response.reasoning_summary_text.done';
-    item_id: string;
-    output_index: number;
-    summary_index: number;
-    text: string;
-    sequence_number: number;
-}
-
-export interface OutputTextAnnotationAddedEvent {
-    type: 'response.output_text.annotation.added';
-    item_id: string;
-    output_index: number;
-    content_index: number;
-    annotation_index: number;
-    annotation?: Annotation | null;
+    name: string;
     sequence_number: number;
 }
 
@@ -528,30 +439,4 @@ export type StreamingEvent =
     | RefusalDoneEvent
     | FunctionCallArgsDeltaEvent
     | FunctionCallArgsDoneEvent
-    | ReasoningSummaryPartAddedEvent
-    | ReasoningSummaryPartDoneEvent
-    | ReasoningDeltaEvent
-    | ReasoningDoneEvent
-    | ReasoningSummaryTextDeltaEvent
-    | ReasoningSummaryTextDoneEvent
-    | OutputTextAnnotationAddedEvent
     | StreamingErrorEvent;
-
-// --- List Responses ---
-
-export interface ListResponsesResult {
-    object: 'list';
-    data: ResponseObject[];
-    first_id?: string | null;
-    last_id?: string | null;
-    has_more?: boolean;
-}
-
-export interface ListResponsesOptions {
-    /** Maximum number of responses to return. Server defaults to 20 and caps at 100. */
-    limit?: number;
-    /** Sort order for returned responses. Server defaults to descending. */
-    order?: 'asc' | 'desc';
-    /** Return responses after this response ID. */
-    after?: string;
-}
diff --git a/sdk/js/test/openai/responsesClient.test.ts b/sdk/js/test/openai/responsesClient.test.ts
index 1b329d59d..f0dbf4b03 100644
--- a/sdk/js/test/openai/responsesClient.test.ts
+++ b/sdk/js/test/openai/responsesClient.test.ts
@@ -1,26 +1,13 @@
 import { describe, it, before, after } from 'mocha';
 import { expect } from 'chai';
-import * as fs from 'fs';
-import * as os from 'os';
-import * as path from 'path';
 import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI } from '../testUtils.js';
 import { ResponsesClient, ResponsesClientSettings, getOutputText } from '../../src/openai/responsesClient.js';
-import { createImageContentFromFile, createImageContentFromUrl } from '../../src/openai/vision.js';
 import type {
     StreamingEvent,
     FunctionToolDefinition,
     ResponseInputItem,
     ResponseObject,
     MessageItem,
-    InputImageContent,
-    ReasoningDeltaEvent,
-    ReasoningDoneEvent,
-    ReasoningSummaryTextDeltaEvent,
-    ReasoningSummaryTextDoneEvent,
-    ReasoningSummaryPartAddedEvent,
-    ReasoningSummaryPartDoneEvent,
-    OutputTextAnnotationAddedEvent,
-    ListResponsesResult,
 } from '../../src/types.js';
 import { FoundryLocalManager } from '../../src/foundryLocalManager.js';
 import type { IModel } from '../../src/imodel.js';
@@ -77,11 +64,10 @@ describe('ResponsesClient Tests', () => {
             expect(result.seed).to.equal(42);
         });
 
-        it('should serialize store as true by default when no settings defined', () => {
+        it('should return empty object when no settings defined', () => {
             const settings = new ResponsesClientSettings();
             const result = settings._serialize();
-            expect(Object.keys(result).length).to.equal(1);
-            expect(result.store).to.be.true;
+            expect(Object.keys(result).length).to.equal(0);
         });
     });
 
@@ -379,394 +365,6 @@ describe('ResponsesClient Tests', () => {
         });
     });
 
-    // ========================================================================
-    // Vision helper functions
-    // ========================================================================
-
-    describe('vision helpers', () => {
-        it('should create InputImageContent from URL', () => {
-            const content = createImageContentFromUrl('https://example.com/image.png');
-            expect(content.type).to.equal('input_image');
-            expect(content.image_url).to.equal('https://example.com/image.png');
-            expect(content.media_type).to.be.undefined; // server infers from URL
-            expect(content.detail).to.be.undefined;
-            expect(content.image_data).to.be.undefined;
-        });
-
-        it('should create InputImageContent from URL with detail', () => {
-            const content = createImageContentFromUrl('https://example.com/image.jpg', 'high');
-            expect(content.type).to.equal('input_image');
-            expect(content.detail).to.equal('high');
-        });
-
-        it('should satisfy InputImageContent type for base64 variant', () => {
-            // Verify the type is correct by construction
-            const content: InputImageContent = {
-                type: 'input_image',
-                image_data: 'base64data==',
-                media_type: 'image/png',
-                detail: 'low',
-            };
-            expect(content.type).to.equal('input_image');
-            expect(content.image_data).to.equal('base64data==');
-            expect(content.media_type).to.equal('image/png');
-            expect(content.detail).to.equal('low');
-            expect(content.image_url).to.be.undefined;
-        });
-
-        it('should create InputImageContent from file for a temp PNG', async () => {
-            // Write a minimal 1×1 PNG to a unique temp directory
-            const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-test-'));
-            const tmpFile = path.join(tmpDir, 'test-image.png');
-            // Minimal valid PNG bytes (1×1 white pixel)
-            const pngBuffer = Buffer.from(
-                '89504e470d0a1a0a0000000d49484452000000010000000108020000009001' +
-                '2e00000000c4944415478016360f8cfc000000002000176dd24100000000049454e44ae426082',
-                'hex'
-            );
-            fs.writeFileSync(tmpFile, pngBuffer);
-
-            try {
-                const content = await createImageContentFromFile(tmpFile);
-                expect(content.type).to.equal('input_image');
-                expect(content.media_type).to.equal('image/png');
-                expect(content.image_data).to.be.a('string');
-                expect(content.image_data!.length).to.be.greaterThan(0);
-                expect(content.image_url).to.be.undefined;
-            } finally {
-                fs.unlinkSync(tmpFile);
-                fs.rmdirSync(tmpDir);
-            }
-        });
-
-        it('should throw createImageContentFromFile for unsupported extension', async () => {
-            // Create a real file with an unsupported extension so we reach the format check
-            const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'foundry-test-'));
-            const tmpFile = path.join(tmpDir, 'image.xyz');
-            fs.writeFileSync(tmpFile, 'dummy');
-            try {
-                await createImageContentFromFile(tmpFile);
-                expect.fail('Should have thrown');
-            } catch (e) {
-                expect((e as Error).message).to.include('Unsupported image format');
-            } finally {
-                fs.unlinkSync(tmpFile);
-                fs.rmdirSync(tmpDir);
-            }
-        });
-    });
-
-    // ========================================================================
-    // list() method — unit test with fetch mock
-    // ========================================================================
-
-    describe('list()', () => {
-        it('should call GET /v1/responses and return parsed JSON', async () => {
-            const mockResult = { object: 'list', data: [], first_id: null, last_id: null, has_more: false };
-            let capturedUrl: string | URL | Request | undefined;
-            const originalFetch = globalThis.fetch;
-            globalThis.fetch = async (url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
-                capturedUrl = url;
-                return new Response(JSON.stringify(mockResult), {
-                    status: 200,
-                    headers: { 'Content-Type': 'application/json' },
-                });
-            };
-            try {
-                const client = new ResponsesClient('http://test-host', 'test-model');
-                const result = await client.list();
-                expect(result.object).to.equal('list');
-                expect(result.data).to.deep.equal([]);
-                expect(result.has_more).to.equal(false);
-                expect(String(capturedUrl)).to.equal('http://test-host/v1/responses');
-            } finally {
-                globalThis.fetch = originalFetch;
-            }
-        });
-
-        it('should send pagination options as query parameters', async () => {
-            const originalFetch = globalThis.fetch;
-            let capturedUrl: string | URL | Request | undefined;
-            globalThis.fetch = async (url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
-                capturedUrl = url;
-                return new Response(JSON.stringify({
-                    object: 'list',
-                    data: [],
-                    first_id: 'resp_first',
-                    last_id: 'resp_last',
-                    has_more: true,
-                }), {
-                    status: 200,
-                    headers: { 'Content-Type': 'application/json' },
-                });
-            };
-            try {
-                const client = new ResponsesClient('http://test-host', 'test-model');
-                const result = await client.list({ limit: 10, order: 'asc', after: 'resp_123' });
-                const url = new URL(String(capturedUrl));
-                expect(url.pathname).to.equal('/v1/responses');
-                expect(url.searchParams.get('limit')).to.equal('10');
-                expect(url.searchParams.get('order')).to.equal('asc');
-                expect(url.searchParams.get('after')).to.equal('resp_123');
-                expect(result.first_id).to.equal('resp_first');
-                expect(result.last_id).to.equal('resp_last');
-                expect(result.has_more).to.equal(true);
-            } finally {
-                globalThis.fetch = originalFetch;
-            }
-        });
-    });
-
-    // ========================================================================
-    // FFI transport with HTTP fallback
-    // ========================================================================
-
-    describe('FFI transport', () => {
-        class FakeCoreInterop {
-            public commands: Array<{ command: string; params?: any }> = [];
-            public streamingCommands: Array<{ command: string; params?: any }> = [];
-
-            constructor(
-                private readonly response: string = JSON.stringify({
-                    id: 'chatcmpl_test',
-                    created: 123,
-                    choices: [{ message: { content: 'Hello from FFI' } }],
-                    usage: { prompt_tokens: 2, completion_tokens: 3, total_tokens: 5 },
-                }),
-                private readonly streamingChunks: string[] = []
-            ) { }
-
-            executeCommand(command: string, params?: any): string {
-                this.commands.push({ command, params });
-                return this.response;
-            }
-
-            async executeCommandStreaming(command: string, params: any, callback: (chunk: string) => void): Promise<string> {
-                this.streamingCommands.push({ command, params });
-                for (const chunk of this.streamingChunks) {
-                    callback(chunk);
-                }
-                return '{}';
-            }
-        }
-
-        it('should use chat_completions FFI for create and store the response locally', async () => {
-            const fakeCore = new FakeCoreInterop();
-            const originalFetch = globalThis.fetch;
-            let fetchCalled = false;
-            globalThis.fetch = async (): Promise<Response> => {
-                fetchCalled = true;
-                throw new Error('fetch should not be called');
-            };
-
-            try {
-                const client = ResponsesClient.createWithCoreInterop(undefined, 'test-model', fakeCore as any);
-                const response = await client.create('Hello', { store: true });
-                const request = JSON.parse(fakeCore.commands[0].params.Params.OpenAICreateRequest);
-
-                expect(fetchCalled).to.be.false;
-                expect(fakeCore.commands[0].command).to.equal('chat_completions');
-                expect(request.model).to.equal('test-model');
-                expect(request.messages).to.deep.equal([{ role: 'user', content: 'Hello' }]);
-                expect(request.stream).to.equal(false);
-                expect(getOutputText(response)).to.equal('Hello from FFI');
-                expect(response.id).to.equal('resp_chatcmpl_test');
-                expect(response.usage?.total_tokens).to.equal(5);
-
-                const stored = await client.get(response.id);
-                expect(stored.id).to.equal(response.id);
-                const listed = await client.list();
-                expect(listed.data.map((item) => item.id)).to.deep.equal([response.id]);
-            } finally {
-                globalThis.fetch = originalFetch;
-            }
-        });
-
-        it('should convert vision input to chat image_url content for FFI', async () => {
-            const fakeCore = new FakeCoreInterop();
-            const client = ResponsesClient.createWithCoreInterop(undefined, 'test-model', fakeCore as any);
-
-            await client.create([
-                {
-                    type: 'message',
-                    role: 'user',
-                    content: [
-                        { type: 'input_text', text: 'Describe this image.' },
-                        { type: 'input_image', image_data: 'abc123', media_type: 'image/png', detail: 'low' },
-                    ],
-                },
-            ]);
-
-            const request = JSON.parse(fakeCore.commands[0].params.Params.OpenAICreateRequest);
-            expect(request.messages[0].content[0]).to.deep.equal({ type: 'text', text: 'Describe this image.' });
-            expect(request.messages[0].content[1]).to.deep.equal({
-                type: 'image_url',
-                image_url: { url: 'data:image/png;base64,abc123', detail: 'low' },
-            });
-        });
-
-        it('should fall back to HTTP create when FFI create fails and baseUrl is available', async () => {
-            const fakeCore = {
-                executeCommand: () => {
-                    throw new Error('ffi unavailable');
-                },
-                executeCommandStreaming: async () => '{}',
-            };
-            const mockResponse: ResponseObject = {
-                id: 'resp_http', object: 'response', created_at: 1, status: 'completed',
-                model: 'test-model', output: [],
-                tools: [], tool_choice: 'auto', truncation: 'disabled',
-                parallel_tool_calls: false, text: {}, top_p: 1, temperature: 1,
-                presence_penalty: 0, frequency_penalty: 0, store: true,
-            };
-            const originalFetch = globalThis.fetch;
-            let capturedUrl: string | URL | Request | undefined;
-            globalThis.fetch = async (url: string | URL | Request, _init?: RequestInit): Promise<Response> => {
-                capturedUrl = url;
-                return new Response(JSON.stringify(mockResponse), {
-                    status: 200,
-                    headers: { 'Content-Type': 'application/json' },
-                });
-            };
-
-            try {
-                const client = ResponsesClient.createWithCoreInterop('http://test-host', 'test-model', fakeCore as any);
-                const response = await client.create('Hello');
-                expect(response.id).to.equal('resp_http');
-                expect(String(capturedUrl)).to.equal('http://test-host/v1/responses');
-            } finally {
-                globalThis.fetch = originalFetch;
-            }
-        });
-
-        it('should stream response events through FFI', async () => {
-            const fakeCore = new FakeCoreInterop('{}', [
-                JSON.stringify({ choices: [{ delta: { content: 'Hel' } }] }),
-                JSON.stringify({ choices: [{ delta: { content: 'lo' } }] }),
-            ]);
-            const client = ResponsesClient.createWithCoreInterop(undefined, 'test-model', fakeCore as any);
-            const events: StreamingEvent[] = [];
-
-            await client.createStreaming('Hello', (event) => events.push(event));
-
-            expect(fakeCore.streamingCommands[0].command).to.equal('chat_completions');
-            expect(events.map((event) => event.type)).to.include.members([
-                'response.created',
-                'response.in_progress',
-                'response.output_text.delta',
-                'response.output_text.done',
-                'response.completed',
-            ]);
-            const deltas = events
-                .filter((event) => event.type === 'response.output_text.delta')
-                .map((event: any) => event.delta);
-            expect(deltas.join('')).to.equal('Hello');
-        });
-    });
-
-    // ========================================================================
-    // Reasoning streaming event types
-    // ========================================================================
-
-    describe('reasoning streaming event types', () => {
-        it('should construct ReasoningDeltaEvent', () => {
-            const event: ReasoningDeltaEvent = {
-                type: 'response.reasoning.delta',
-                item_id: 'item_1',
-                output_index: 0,
-                content_index: 0,
-                delta: 'thinking...',
-                sequence_number: 1,
-            };
-            expect(event.type).to.equal('response.reasoning.delta');
-            expect(event.delta).to.equal('thinking...');
-        });
-
-        it('should construct ReasoningDoneEvent', () => {
-            const event: ReasoningDoneEvent = {
-                type: 'response.reasoning.done',
-                item_id: 'item_1',
-                output_index: 0,
-                content_index: 0,
-                text: 'final reasoning text',
-                sequence_number: 2,
-            };
-            expect(event.type).to.equal('response.reasoning.done');
-            expect(event.text).to.equal('final reasoning text');
-        });
-
-        it('should construct ReasoningSummaryTextDeltaEvent', () => {
-            const event: ReasoningSummaryTextDeltaEvent = {
-                type: 'response.reasoning_summary_text.delta',
-                item_id: 'item_2',
-                output_index: 0,
-                summary_index: 0,
-                delta: 'summary delta',
-                sequence_number: 3,
-            };
-            expect(event.type).to.equal('response.reasoning_summary_text.delta');
-        });
-
-        it('should construct ReasoningSummaryTextDoneEvent', () => {
-            const event: ReasoningSummaryTextDoneEvent = {
-                type: 'response.reasoning_summary_text.done',
-                item_id: 'item_2',
-                output_index: 0,
-                summary_index: 0,
-                text: 'full summary',
-                sequence_number: 4,
-            };
-            expect(event.type).to.equal('response.reasoning_summary_text.done');
-        });
-
-        it('should construct ReasoningSummaryPartAddedEvent', () => {
-            const event: ReasoningSummaryPartAddedEvent = {
-                type: 'response.reasoning_summary_part.added',
-                item_id: 'item_3',
-                output_index: 0,
-                summary_index: 0,
-                part: { type: 'output_text', text: 'summary part' },
-                sequence_number: 5,
-            };
-            expect(event.type).to.equal('response.reasoning_summary_part.added');
-        });
-
-        it('should construct ReasoningSummaryPartDoneEvent', () => {
-            const event: ReasoningSummaryPartDoneEvent = {
-                type: 'response.reasoning_summary_part.done',
-                item_id: 'item_3',
-                output_index: 0,
-                summary_index: 0,
-                part: { type: 'output_text', text: 'done summary part' },
-                sequence_number: 6,
-            };
-            expect(event.type).to.equal('response.reasoning_summary_part.done');
-        });
-
-        it('should construct OutputTextAnnotationAddedEvent', () => {
-            const event: OutputTextAnnotationAddedEvent = {
-                type: 'response.output_text.annotation.added',
-                item_id: 'item_4',
-                output_index: 0,
-                content_index: 0,
-                annotation_index: 0,
-                annotation: { type: 'url_citation', start_index: 0, end_index: 5 },
-                sequence_number: 7,
-            };
-            expect(event.type).to.equal('response.output_text.annotation.added');
-        });
-
-        it('should accept reasoning events in StreamingEvent union', () => {
-            const events: StreamingEvent[] = [
-                { type: 'response.reasoning.delta', item_id: 'x', output_index: 0, content_index: 0, delta: 'd', sequence_number: 1 },
-                { type: 'response.reasoning.done', item_id: 'x', output_index: 0, content_index: 0, text: 't', sequence_number: 2 },
-                { type: 'response.reasoning_summary_text.delta', item_id: 'x', output_index: 0, summary_index: 0, delta: 'd', sequence_number: 3 },
-                { type: 'response.reasoning_summary_text.done', item_id: 'x', output_index: 0, summary_index: 0, text: 't', sequence_number: 4 },
-            ];
-            expect(events.length).to.equal(4);
-        });
-    });
-
     // ========================================================================
     // Integration tests (require running web service + loaded model)
     // ========================================================================
@@ -969,42 +567,5 @@ describe('ResponsesClient Tests', () => {
                 expect((functionCall as any).name).to.equal('get_weather');
             }
         });
-
-        it('should list stored responses', async function() {
-            this.timeout(30000);
-
-            const result = await client.list();
-
-            expect(result).to.not.be.undefined;
-            expect(result.object).to.equal('list');
-            expect(result.data).to.be.an('array');
-            console.log(`Listed ${result.data.length} responses`);
-        });
-
-        it('should create a vision response with base64 image', async function() {
-            this.timeout(60000);
-
-            // Minimal 1×1 red PNG (base64)
-            const minimalPng = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADklEQVQI12P4z8BQDwADhQGAWjR9awAAAABJRU5ErkJggg==';
-
-            const response = await client.create([
-                {
-                    type: 'message',
-                    role: 'user',
-                    content: [
-                        { type: 'input_text', text: 'What color is the dominant color in this image? Answer with one word.' },
-                        { type: 'input_image', image_data: minimalPng, media_type: 'image/png' },
-                    ],
-                } as MessageItem,
-            ]);
-
-            expect(response).to.not.be.undefined;
-            const text = getOutputText(response);
-            console.log(`Vision response: ${text}`);
-            // Just verify we got a non-empty response — vision support depends on the loaded model
-            if (response.status === 'completed') {
-                expect(text.length).to.be.greaterThan(0);
-            }
-        });
     });
-});
+});
\ No newline at end of file
diff --git a/sdk/js/test/openai/responsesWebService.test.ts b/sdk/js/test/openai/responsesWebService.test.ts
new file mode 100644
index 000000000..a414ed9d8
--- /dev/null
+++ b/sdk/js/test/openai/responsesWebService.test.ts
@@ -0,0 +1,202 @@
+import { describe, it, before, after } from 'mocha';
+import { expect } from 'chai';
+import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI } from '../testUtils.js';
+import { FoundryLocalManager } from '../../src/foundryLocalManager.js';
+import type { IModel } from '../../src/imodel.js';
+
+function getOutputText(response: any): string {
+    if (typeof response.output_text === 'string') {
+        return response.output_text;
+    }
+
+    return (response.output ?? [])
+        .flatMap((item: any) => Array.isArray(item.content) ? item.content : [])
+        .filter((part: any) => part.type === 'output_text' && typeof part.text === 'string')
+        .map((part: any) => part.text)
+        .join('');
+}
+
+async function postResponse(baseUrl: string, body: Record<string, unknown>): Promise<any> {
+    const res = await fetch(`${baseUrl}/v1/responses`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify(body),
+    });
+
+    const text = await res.text();
+    expect(res.ok, text).to.equal(true);
+    return JSON.parse(text);
+}
+
+async function postStreamingResponse(baseUrl: string, body: Record<string, unknown>): Promise<any[]> {
+    const res = await fetch(`${baseUrl}/v1/responses`, {
+        method: 'POST',
+        headers: {
+            'Content-Type': 'application/json',
+            'Accept': 'text/event-stream',
+        },
+        body: JSON.stringify({ ...body, stream: true }),
+    });
+
+    if (!res.ok) {
+        const errorText = await res.text().catch(() => res.statusText);
+        expect.fail(errorText);
+    }
+    expect(res.body).to.not.equal(null);
+
+    const reader = res.body!.getReader();
+    const decoder = new TextDecoder();
+    const events: any[] = [];
+    let buffer = '';
+
+    try {
+        while (true) {
+            const { value, done } = await reader.read();
+            if (done) break;
+
+            buffer += decoder.decode(value, { stream: true });
+            const blocks = buffer.split('\n\n');
+            buffer = blocks.pop() ?? '';
+
+            for (const block of blocks) {
+                const data = block
+                    .split('\n')
+                    .filter((line) => line.startsWith('data: '))
+                    .map((line) => line.slice(6))
+                    .join('\n')
+                    .trim();
+
+                if (!data) continue;
+                if (data === '[DONE]') return events;
+                events.push(JSON.parse(data));
+            }
+        }
+    } finally {
+        reader.releaseLock();
+    }
+
+    return events;
+}
+
+describe('Responses web service Integration', function() {
+    let manager: FoundryLocalManager;
+    let model: IModel;
+    let modelId: string;
+    let baseUrl: string;
+    let skipped = false;
+
+    before(async function() {
+        this.timeout(30000);
+        if (IS_RUNNING_IN_CI) {
+            skipped = true;
+            this.skip();
+            return;
+        }
+
+        manager = getTestManager();
+        const cachedModels = await manager.catalog.getCachedModels();
+        const cachedVariant = cachedModels.find((m) => m.alias === TEST_MODEL_ALIAS);
+        if (!cachedVariant) {
+            skipped = true;
+            this.skip();
+            return;
+        }
+
+        model = await manager.catalog.getModel(TEST_MODEL_ALIAS);
+        model.selectVariant(cachedVariant);
+        modelId = cachedVariant.id;
+
+        await model.load();
+        manager.startWebService();
+        baseUrl = manager.urls[0];
+    });
+
+    after(async function() {
+        if (skipped) return;
+        try { manager.stopWebService(); } catch { /* ignore cleanup errors */ }
+        try { await model.unload(); } catch { /* ignore cleanup errors */ }
+    });
+
+    it('should create a response through the OpenAI-compatible web service', async function() {
+        this.timeout(30000);
+
+        const response = await postResponse(baseUrl, {
+            model: modelId,
+            input: 'What is 2 + 2? Answer with just the number.',
+            temperature: 0,
+            max_output_tokens: 64,
+            store: false,
+        });
+
+        expect(response.object).to.equal('response');
+        expect(response.status).to.equal('completed');
+        expect(getOutputText(response).length).to.be.greaterThan(0);
+    });
+
+    it('should stream response events through the OpenAI-compatible web service', async function() {
+        this.timeout(30000);
+
+        const events = await postStreamingResponse(baseUrl, {
+            model: modelId,
+            input: 'Count from 1 to 3.',
+            temperature: 0,
+            max_output_tokens: 64,
+            store: false,
+        });
+
+        expect(events.some((event) => event.type === 'response.created')).to.equal(true);
+        expect(events.some((event) => event.type === 'response.output_text.delta')).to.equal(true);
+        expect(events.some((event) => event.type === 'response.completed')).to.equal(true);
+    });
+
+    it('should support Responses function calling through the web service', async function() {
+        this.timeout(30000);
+        if (model.supportsToolCalling === false) {
+            this.skip();
+            return;
+        }
+
+        const tools = [{
+            type: 'function',
+            name: 'get_weather',
+            description: 'Get the current weather. This test always returns Seattle weather.',
+            parameters: {
+                type: 'object',
+                properties: {},
+                additionalProperties: false,
+            },
+        }];
+
+        const toolResponse = await postResponse(baseUrl, {
+            model: modelId,
+            input: 'Use the get_weather tool and then answer with the weather.',
+            tools,
+            tool_choice: 'required',
+            temperature: 0,
+            max_output_tokens: 64,
+            store: true,
+        });
+
+        const functionCall = toolResponse.output?.find((item: any) => item.type === 'function_call');
+        expect(functionCall, JSON.stringify(toolResponse.output)).to.not.equal(undefined);
+        expect(functionCall.name).to.equal('get_weather');
+        expect(functionCall.call_id).to.be.a('string');
+
+        const finalResponse = await postResponse(baseUrl, {
+            model: modelId,
+            previous_response_id: toolResponse.id,
+            input: [{
+                type: 'function_call_output',
+                call_id: functionCall.call_id,
+                output: JSON.stringify({ location: 'Seattle', weather: '72 degrees F and sunny' }),
+            }],
+            tools,
+            temperature: 0,
+            max_output_tokens: 64,
+            store: false,
+        });
+
+        expect(finalResponse.status).to.equal('completed');
+        expect(getOutputText(finalResponse).length).to.be.greaterThan(0);
+    });
+});

From aba3b039f2f763e673db4e767810500bfeecb45a Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 30 Apr 2026 20:21:47 -0400
Subject: [PATCH 07/10] test(sdk/js): use external responses web service

Make the JavaScript Responses web-service sample and integration tests call an already-running OpenAI-compatible Foundry Local web server without using the JS native addon.\n\nCo-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/README.md                             |   2 +-
 samples/js/README.md                          |   2 +-
 samples/js/web-server-responses/app.js        | 189 +++++++-----------
 samples/js/web-server-responses/package.json  |   4 -
 .../test/openai/responsesWebService.test.ts   |  38 +---
 5 files changed, 81 insertions(+), 154 deletions(-)

diff --git a/samples/README.md b/samples/README.md
index 57de2e8bd..f61f31c45 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -9,6 +9,6 @@ Explore complete working examples that demonstrate how to use Foundry Local —
 | Language | Samples | Description |
 |----------|---------|-------------|
 | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. |
-| [**JavaScript**](js/) | 14 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, Responses API, and tutorials. |
+| [**JavaScript**](js/) | 14 | Node.js samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, Responses API, and tutorials. |
 | [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. |
 | [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. |
diff --git a/samples/js/README.md b/samples/js/README.md
index bd9804015..8f450c9f3 100644
--- a/samples/js/README.md
+++ b/samples/js/README.md
@@ -19,7 +19,7 @@ These samples demonstrate how to use the Foundry Local JavaScript SDK (`foundry-
 | [langchain-integration-example](langchain-integration-example/) | LangChain.js integration for building text generation chains. |
 | [tool-calling-foundry-local](tool-calling-foundry-local/) | Tool calling with custom function definitions and streaming responses. |
 | [web-server-example](web-server-example/) | Start a local OpenAI-compatible web server and call it with the OpenAI SDK. |
-| [web-server-responses](web-server-responses/) | Start the local OpenAI-compatible web server and call the Responses API, including streaming and tool calling. |
+| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. |
 | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
 | [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). |
 | [tutorial-tool-calling](tutorial-tool-calling/) | Create a tool-calling assistant (tutorial). |
diff --git a/samples/js/web-server-responses/app.js b/samples/js/web-server-responses/app.js
index 764a9d326..1df4ac829 100644
--- a/samples/js/web-server-responses/app.js
+++ b/samples/js/web-server-responses/app.js
@@ -1,6 +1,5 @@
 // <complete_code>
 // <imports>
-import { FoundryLocalManager } from 'foundry-local-sdk';
 import { OpenAI } from 'openai';
 // </imports>
 
@@ -17,133 +16,87 @@ function getResponseText(response) {
 }
 
 // <init>
-const endpointUrl = process.env.FOUNDRY_LOCAL_ENDPOINT ?? 'http://localhost:5764';
-const modelAlias = process.env.FOUNDRY_LOCAL_MODEL ?? 'qwen2.5-0.5b';
-
-console.log('Initializing Foundry Local SDK...');
-const manager = FoundryLocalManager.create({
-    appName: 'foundry_local_samples',
-    logLevel: 'info',
-    webServiceUrls: endpointUrl
-});
-console.log('SDK initialized successfully');
-
-let currentEp = '';
-await manager.downloadAndRegisterEps((epName, percent) => {
-    if (epName !== currentEp) {
-        if (currentEp !== '') process.stdout.write('\n');
-        currentEp = epName;
-    }
-    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
-});
-if (currentEp !== '') process.stdout.write('\n');
+const endpointUrl = process.env.FOUNDRY_LOCAL_ENDPOINT ?? 'http://127.0.0.1:52495';
+const modelId = process.env.FOUNDRY_LOCAL_MODEL ?? 'qwen2.5-0.5b';
 // </init>
 
-let model;
-let webServiceStarted = false;
-
-try {
-    // <model_setup>
-    model = await manager.catalog.getModel(modelAlias);
-
-    console.log(`\nDownloading model ${modelAlias}...`);
-    await model.download((progress) => {
-        process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
-    });
-    console.log('\nModel downloaded');
-
-    console.log(`\nLoading model ${modelAlias}...`);
-    await model.load();
-    console.log(`Model loaded: ${model.id}`);
-    // </model_setup>
-
-    // <server_setup>
-    console.log(`\nStarting web service on ${endpointUrl}...`);
-    manager.startWebService();
-    webServiceStarted = true;
-    console.log('Web service started');
+// Start the Foundry Local web service separately, for example with the
+// C# foundry-local-web-server sample, then point this sample at that URL.
+// <server_setup>
+const openai = new OpenAI({
+    baseURL: endpointUrl + '/v1',
+    apiKey: 'notneeded',
+});
+// </server_setup>
 
-    const openai = new OpenAI({
-        baseURL: endpointUrl + '/v1',
-        apiKey: 'notneeded',
-    });
-    // </server_setup>
+// <<<<<< OPENAI RESPONSES SDK USAGE >>>>>>
+console.log(`Using Foundry Local web service at ${endpointUrl}`);
+console.log(`Using model ${modelId}`);
 
-    // <<<<<< OPENAI RESPONSES SDK USAGE >>>>>>
-    console.log('\nTesting a non-streaming Responses call...');
-    const response = await openai.responses.create({
-        model: model.id,
-        input: 'Reply with one short sentence about local AI.',
-    });
-    console.log(`[ASSISTANT]: ${getResponseText(response)}`);
+console.log('\nTesting a non-streaming Responses call...');
+const response = await openai.responses.create({
+    model: modelId,
+    input: 'Reply with one short sentence about local AI.',
+});
+console.log(`[ASSISTANT]: ${getResponseText(response)}`);
 
-    console.log('\nTesting a streaming Responses call...');
-    const stream = await openai.responses.create({
-        model: model.id,
-        input: 'Count from one to three.',
-        stream: true,
-    });
+console.log('\nTesting a streaming Responses call...');
+const stream = await openai.responses.create({
+    model: modelId,
+    input: 'Count from one to three.',
+    stream: true,
+});
 
-    process.stdout.write('[ASSISTANT STREAM]: ');
-    for await (const event of stream) {
-        if (event.type === 'response.output_text.delta') {
-            process.stdout.write(event.delta);
-        }
+process.stdout.write('[ASSISTANT STREAM]: ');
+for await (const event of stream) {
+    if (event.type === 'response.output_text.delta') {
+        process.stdout.write(event.delta);
     }
-    process.stdout.write('\n');
-
-    console.log('\nTesting Responses tool calling...');
-    const tools = [
-        {
-            type: 'function',
-            name: 'get_weather',
-            description: 'Get the current weather. This sample always returns Seattle weather.',
-            parameters: {
-                type: 'object',
-                properties: {},
-                additionalProperties: false,
-            },
+}
+process.stdout.write('\n');
+
+console.log('\nTesting Responses tool calling...');
+const tools = [
+    {
+        type: 'function',
+        name: 'get_weather',
+        description: 'Get the current weather. This sample always returns Seattle weather.',
+        parameters: {
+            type: 'object',
+            properties: {},
+            additionalProperties: false,
         },
-    ];
-
-    const toolResponse = await openai.responses.create({
-        model: model.id,
-        input: 'Use the get_weather tool and then answer with the weather.',
-        tools,
-        tool_choice: 'required',
-        store: true,
-    });
+    },
+];
+
+const toolResponse = await openai.responses.create({
+    model: modelId,
+    input: 'Use the get_weather tool and then answer with the weather.',
+    tools,
+    tool_choice: 'required',
+    store: true,
+});
 
-    const functionCall = toolResponse.output?.find((item) => item.type === 'function_call');
-    if (!functionCall) {
-        throw new Error('Expected the model to call get_weather.');
-    }
+const functionCall = toolResponse.output?.find((item) => item.type === 'function_call');
+if (!functionCall) {
+    throw new Error('Expected the model to call get_weather.');
+}
 
-    console.log(`[TOOL CALL]: ${functionCall.name}(${functionCall.arguments})`);
+console.log(`[TOOL CALL]: ${functionCall.name}(${functionCall.arguments})`);
 
-    const finalResponse = await openai.responses.create({
-        model: model.id,
-        previous_response_id: toolResponse.id,
-        input: [
-            {
-                type: 'function_call_output',
-                call_id: functionCall.call_id,
-                output: JSON.stringify({ location: 'Seattle', weather: '72 degrees F and sunny' }),
-            },
-        ],
-        tools,
-    });
+const finalResponse = await openai.responses.create({
+    model: modelId,
+    previous_response_id: toolResponse.id,
+    input: [
+        {
+            type: 'function_call_output',
+            call_id: functionCall.call_id,
+            output: JSON.stringify({ location: 'Seattle', weather: '72 degrees F and sunny' }),
+        },
+    ],
+    tools,
+});
 
-    console.log(`[ASSISTANT FINAL]: ${getResponseText(finalResponse)}`);
-    // <<<<<< END OPENAI RESPONSES SDK USAGE >>>>>>
-} finally {
-    console.log('\nCleaning up...');
-    if (webServiceStarted) {
-        manager.stopWebService();
-    }
-    if (model) {
-        await model.unload();
-    }
-    console.log('Done');
-}
+console.log(`[ASSISTANT FINAL]: ${getResponseText(finalResponse)}`);
+// <<<<<< END OPENAI RESPONSES SDK USAGE >>>>>>
 // </complete_code>
diff --git a/samples/js/web-server-responses/package.json b/samples/js/web-server-responses/package.json
index 6c8f2ff51..83b90d1cf 100644
--- a/samples/js/web-server-responses/package.json
+++ b/samples/js/web-server-responses/package.json
@@ -7,10 +7,6 @@
     "start": "node app.js"
   },
   "dependencies": {
-    "foundry-local-sdk": "latest",
     "openai": "latest"
-  },
-  "optionalDependencies": {
-    "foundry-local-sdk-winml": "latest"
   }
 }
diff --git a/sdk/js/test/openai/responsesWebService.test.ts b/sdk/js/test/openai/responsesWebService.test.ts
index a414ed9d8..b3add15fb 100644
--- a/sdk/js/test/openai/responsesWebService.test.ts
+++ b/sdk/js/test/openai/responsesWebService.test.ts
@@ -1,8 +1,9 @@
-import { describe, it, before, after } from 'mocha';
+import { describe, it, before } from 'mocha';
 import { expect } from 'chai';
-import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI } from '../testUtils.js';
-import { FoundryLocalManager } from '../../src/foundryLocalManager.js';
-import type { IModel } from '../../src/imodel.js';
+import { IS_RUNNING_IN_CI } from '../testUtils.js';
+
+const baseUrlFromEnv = process.env.FOUNDRY_LOCAL_RESPONSES_ENDPOINT ?? process.env.FOUNDRY_LOCAL_ENDPOINT;
+const modelIdFromEnv = process.env.FOUNDRY_LOCAL_RESPONSES_MODEL ?? process.env.FOUNDRY_LOCAL_MODEL;
 
 function getOutputText(response: any): string {
     if (typeof response.output_text === 'string') {
@@ -79,42 +80,23 @@ async function postStreamingResponse(baseUrl: string, body: Record<string, unkno
 }
 
 describe('Responses web service Integration', function() {
-    let manager: FoundryLocalManager;
-    let model: IModel;
     let modelId: string;
     let baseUrl: string;
-    let skipped = false;
 
     before(async function() {
         this.timeout(30000);
         if (IS_RUNNING_IN_CI) {
-            skipped = true;
             this.skip();
             return;
         }
 
-        manager = getTestManager();
-        const cachedModels = await manager.catalog.getCachedModels();
-        const cachedVariant = cachedModels.find((m) => m.alias === TEST_MODEL_ALIAS);
-        if (!cachedVariant) {
-            skipped = true;
+        if (!baseUrlFromEnv || !modelIdFromEnv) {
             this.skip();
             return;
         }
 
-        model = await manager.catalog.getModel(TEST_MODEL_ALIAS);
-        model.selectVariant(cachedVariant);
-        modelId = cachedVariant.id;
-
-        await model.load();
-        manager.startWebService();
-        baseUrl = manager.urls[0];
-    });
-
-    after(async function() {
-        if (skipped) return;
-        try { manager.stopWebService(); } catch { /* ignore cleanup errors */ }
-        try { await model.unload(); } catch { /* ignore cleanup errors */ }
+        baseUrl = baseUrlFromEnv.replace(/\/$/, '');
+        modelId = modelIdFromEnv;
     });
 
     it('should create a response through the OpenAI-compatible web service', async function() {
@@ -151,10 +133,6 @@ describe('Responses web service Integration', function() {
 
     it('should support Responses function calling through the web service', async function() {
         this.timeout(30000);
-        if (model.supportsToolCalling === false) {
-            this.skip();
-            return;
-        }
 
         const tools = [{
             type: 'function',

From 092d4c41512aebeaefb98386de14390fe039158e Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 30 Apr 2026 20:30:59 -0400
Subject: [PATCH 08/10] fix(samples/js): use FoundryLocalManager for setup;
 OpenAI SDK for responses calls

Match the C# web-server sample pattern: FoundryLocalManager handles SDK init, EP download, model download/load, and startWebService. The OpenAI JS SDK (openai npm package) makes all Responses API calls against the running endpoint.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/js/web-server-responses/app.js        | 172 +++++++++++-------
 samples/js/web-server-responses/package.json  |   4 +
 .../test/openai/responsesWebService.test.ts   |  50 ++---
 3 files changed, 137 insertions(+), 89 deletions(-)

diff --git a/samples/js/web-server-responses/app.js b/samples/js/web-server-responses/app.js
index 1df4ac829..f51e656d9 100644
--- a/samples/js/web-server-responses/app.js
+++ b/samples/js/web-server-responses/app.js
@@ -1,5 +1,6 @@
 // <complete_code>
 // <imports>
+import { FoundryLocalManager } from 'foundry-local-sdk';
 import { OpenAI } from 'openai';
 // </imports>
 
@@ -7,7 +8,6 @@ function getResponseText(response) {
     if (typeof response.output_text === 'string') {
         return response.output_text;
     }
-
     return (response.output ?? [])
         .flatMap((item) => Array.isArray(item.content) ? item.content : [])
         .filter((part) => part.type === 'output_text' && typeof part.text === 'string')
@@ -16,87 +16,125 @@ function getResponseText(response) {
 }
 
 // <init>
-const endpointUrl = process.env.FOUNDRY_LOCAL_ENDPOINT ?? 'http://127.0.0.1:52495';
-const modelId = process.env.FOUNDRY_LOCAL_MODEL ?? 'qwen2.5-0.5b';
+const endpointUrl = 'http://localhost:5764';
+
+console.log('Initializing Foundry Local SDK...');
+const manager = FoundryLocalManager.create({
+    appName: 'foundry_local_samples',
+    logLevel: 'info',
+    webServiceUrls: endpointUrl,
+});
+console.log('SDK initialized successfully');
+
+let currentEp = '';
+await manager.downloadAndRegisterEps((epName, percent) => {
+    if (epName !== currentEp) {
+        if (currentEp !== '') process.stdout.write('\n');
+        currentEp = epName;
+    }
+    process.stdout.write(`\r  ${epName.padEnd(30)}  ${percent.toFixed(1).padStart(5)}%`);
+});
+if (currentEp !== '') process.stdout.write('\n');
 // </init>
 
-// Start the Foundry Local web service separately, for example with the
-// C# foundry-local-web-server sample, then point this sample at that URL.
+// <model_setup>
+const modelAlias = 'qwen2.5-0.5b';
+const model = await manager.catalog.getModel(modelAlias);
+
+console.log(`\nDownloading model ${modelAlias}...`);
+await model.download((progress) => {
+    process.stdout.write(`\rDownloading... ${progress.toFixed(2)}%`);
+});
+console.log('\nModel downloaded');
+
+console.log('\nLoading model...');
+await model.load();
+console.log('Model loaded');
+// </model_setup>
+
 // <server_setup>
+console.log('\nStarting web service...');
+manager.startWebService();
+console.log('Web service started');
+
+// <<<<<< OPENAI SDK USAGE >>>>>>
+// Use the OpenAI SDK to call the local Foundry web service Responses API
 const openai = new OpenAI({
     baseURL: endpointUrl + '/v1',
     apiKey: 'notneeded',
 });
 // </server_setup>
 
-// <<<<<< OPENAI RESPONSES SDK USAGE >>>>>>
-console.log(`Using Foundry Local web service at ${endpointUrl}`);
-console.log(`Using model ${modelId}`);
+try {
+    console.log('\nTesting a non-streaming Responses call...');
+    const response = await openai.responses.create({
+        model: model.id,
+        input: 'Reply with one short sentence about local AI.',
+    });
+    console.log(`[ASSISTANT]: ${getResponseText(response)}`);
 
-console.log('\nTesting a non-streaming Responses call...');
-const response = await openai.responses.create({
-    model: modelId,
-    input: 'Reply with one short sentence about local AI.',
-});
-console.log(`[ASSISTANT]: ${getResponseText(response)}`);
+    console.log('\nTesting a streaming Responses call...');
+    const stream = await openai.responses.create({
+        model: model.id,
+        input: 'Count from one to three.',
+        stream: true,
+    });
 
-console.log('\nTesting a streaming Responses call...');
-const stream = await openai.responses.create({
-    model: modelId,
-    input: 'Count from one to three.',
-    stream: true,
-});
-
-process.stdout.write('[ASSISTANT STREAM]: ');
-for await (const event of stream) {
-    if (event.type === 'response.output_text.delta') {
-        process.stdout.write(event.delta);
+    process.stdout.write('[ASSISTANT STREAM]: ');
+    for await (const event of stream) {
+        if (event.type === 'response.output_text.delta') {
+            process.stdout.write(event.delta);
+        }
     }
-}
-process.stdout.write('\n');
-
-console.log('\nTesting Responses tool calling...');
-const tools = [
-    {
-        type: 'function',
-        name: 'get_weather',
-        description: 'Get the current weather. This sample always returns Seattle weather.',
-        parameters: {
-            type: 'object',
-            properties: {},
-            additionalProperties: false,
+    process.stdout.write('\n');
+
+    console.log('\nTesting Responses tool calling...');
+    const tools = [
+        {
+            type: 'function',
+            name: 'get_weather',
+            description: 'Get the current weather. This sample always returns Seattle weather.',
+            parameters: {
+                type: 'object',
+                properties: {},
+                additionalProperties: false,
+            },
         },
-    },
-];
-
-const toolResponse = await openai.responses.create({
-    model: modelId,
-    input: 'Use the get_weather tool and then answer with the weather.',
-    tools,
-    tool_choice: 'required',
-    store: true,
-});
+    ];
 
-const functionCall = toolResponse.output?.find((item) => item.type === 'function_call');
-if (!functionCall) {
-    throw new Error('Expected the model to call get_weather.');
-}
+    const toolResponse = await openai.responses.create({
+        model: model.id,
+        input: 'Use the get_weather tool and then answer with the weather.',
+        tools,
+        tool_choice: 'required',
+        store: true,
+    });
 
-console.log(`[TOOL CALL]: ${functionCall.name}(${functionCall.arguments})`);
+    const functionCall = toolResponse.output?.find((item) => item.type === 'function_call');
+    if (!functionCall) {
+        throw new Error('Expected the model to call get_weather.');
+    }
 
-const finalResponse = await openai.responses.create({
-    model: modelId,
-    previous_response_id: toolResponse.id,
-    input: [
-        {
-            type: 'function_call_output',
-            call_id: functionCall.call_id,
-            output: JSON.stringify({ location: 'Seattle', weather: '72 degrees F and sunny' }),
-        },
-    ],
-    tools,
-});
+    console.log(`[TOOL CALL]: ${functionCall.name}(${functionCall.arguments})`);
+
+    const finalResponse = await openai.responses.create({
+        model: model.id,
+        previous_response_id: toolResponse.id,
+        input: [
+            {
+                type: 'function_call_output',
+                call_id: functionCall.call_id,
+                output: JSON.stringify({ location: 'Seattle', weather: '72 degrees F and sunny' }),
+            },
+        ],
+        tools,
+    });
 
-console.log(`[ASSISTANT FINAL]: ${getResponseText(finalResponse)}`);
-// <<<<<< END OPENAI RESPONSES SDK USAGE >>>>>>
+    console.log(`[ASSISTANT FINAL]: ${getResponseText(finalResponse)}`);
+    // <<<<<< END OPENAI SDK USAGE >>>>>>
+} finally {
+    // Tidy up
+    manager.stopWebService();
+    await model.unload();
+}
 // </complete_code>
diff --git a/samples/js/web-server-responses/package.json b/samples/js/web-server-responses/package.json
index 83b90d1cf..6c8f2ff51 100644
--- a/samples/js/web-server-responses/package.json
+++ b/samples/js/web-server-responses/package.json
@@ -7,6 +7,10 @@
     "start": "node app.js"
   },
   "dependencies": {
+    "foundry-local-sdk": "latest",
     "openai": "latest"
+  },
+  "optionalDependencies": {
+    "foundry-local-sdk-winml": "latest"
   }
 }
diff --git a/sdk/js/test/openai/responsesWebService.test.ts b/sdk/js/test/openai/responsesWebService.test.ts
index b3add15fb..6488c2c3a 100644
--- a/sdk/js/test/openai/responsesWebService.test.ts
+++ b/sdk/js/test/openai/responsesWebService.test.ts
@@ -1,15 +1,13 @@
-import { describe, it, before } from 'mocha';
+import { describe, it, before, after } from 'mocha';
 import { expect } from 'chai';
-import { IS_RUNNING_IN_CI } from '../testUtils.js';
-
-const baseUrlFromEnv = process.env.FOUNDRY_LOCAL_RESPONSES_ENDPOINT ?? process.env.FOUNDRY_LOCAL_ENDPOINT;
-const modelIdFromEnv = process.env.FOUNDRY_LOCAL_RESPONSES_MODEL ?? process.env.FOUNDRY_LOCAL_MODEL;
+import { getTestManager, TEST_MODEL_ALIAS, IS_RUNNING_IN_CI } from '../testUtils.js';
+import { FoundryLocalManager } from '../../src/foundryLocalManager.js';
+import type { IModel } from '../../src/imodel.js';
 
 function getOutputText(response: any): string {
     if (typeof response.output_text === 'string') {
         return response.output_text;
     }
-
     return (response.output ?? [])
         .flatMap((item: any) => Array.isArray(item.content) ? item.content : [])
         .filter((part: any) => part.type === 'output_text' && typeof part.text === 'string')
@@ -18,19 +16,18 @@ function getOutputText(response: any): string {
 }
 
 async function postResponse(baseUrl: string, body: Record<string, unknown>): Promise<any> {
-    const res = await fetch(`${baseUrl}/v1/responses`, {
+    const res = await fetch(`\/v1/responses`, {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify(body),
     });
-
     const text = await res.text();
     expect(res.ok, text).to.equal(true);
     return JSON.parse(text);
 }
 
 async function postStreamingResponse(baseUrl: string, body: Record<string, unknown>): Promise<any[]> {
-    const res = await fetch(`${baseUrl}/v1/responses`, {
+    const res = await fetch(`\/v1/responses`, {
         method: 'POST',
         headers: {
             'Content-Type': 'application/json',
@@ -38,7 +35,6 @@ async function postStreamingResponse(baseUrl: string, body: Record<string, unkno
         },
         body: JSON.stringify({ ...body, stream: true }),
     });
-
     if (!res.ok) {
         const errorText = await res.text().catch(() => res.statusText);
         expect.fail(errorText);
@@ -54,11 +50,9 @@ async function postStreamingResponse(baseUrl: string, body: Record<string, unkno
         while (true) {
             const { value, done } = await reader.read();
             if (done) break;
-
             buffer += decoder.decode(value, { stream: true });
             const blocks = buffer.split('\n\n');
             buffer = blocks.pop() ?? '';
-
             for (const block of blocks) {
                 const data = block
                     .split('\n')
@@ -66,7 +60,6 @@ async function postStreamingResponse(baseUrl: string, body: Record<string, unkno
                     .map((line) => line.slice(6))
                     .join('\n')
                     .trim();
-
                 if (!data) continue;
                 if (data === '[DONE]') return events;
                 events.push(JSON.parse(data));
@@ -75,33 +68,50 @@ async function postStreamingResponse(baseUrl: string, body: Record<string, unkno
     } finally {
         reader.releaseLock();
     }
-
     return events;
 }
 
 describe('Responses web service Integration', function() {
+    let manager: FoundryLocalManager;
+    let model: IModel;
     let modelId: string;
     let baseUrl: string;
+    let skipped = false;
 
     before(async function() {
         this.timeout(30000);
         if (IS_RUNNING_IN_CI) {
+            skipped = true;
             this.skip();
             return;
         }
 
-        if (!baseUrlFromEnv || !modelIdFromEnv) {
+        manager = getTestManager();
+        const cachedModels = await manager.catalog.getCachedModels();
+        const cachedVariant = cachedModels.find((m) => m.alias === TEST_MODEL_ALIAS);
+        if (!cachedVariant) {
+            skipped = true;
             this.skip();
             return;
         }
 
-        baseUrl = baseUrlFromEnv.replace(/\/$/, '');
-        modelId = modelIdFromEnv;
+        model = await manager.catalog.getModel(TEST_MODEL_ALIAS);
+        model.selectVariant(cachedVariant);
+        modelId = cachedVariant.id;
+
+        await model.load();
+        manager.startWebService();
+        baseUrl = manager.urls[0];
+    });
+
+    after(async function() {
+        if (skipped) return;
+        try { manager.stopWebService(); } catch { /* ignore */ }
+        try { await model.unload(); } catch { /* ignore */ }
     });
 
     it('should create a response through the OpenAI-compatible web service', async function() {
         this.timeout(30000);
-
         const response = await postResponse(baseUrl, {
             model: modelId,
             input: 'What is 2 + 2? Answer with just the number.',
@@ -109,7 +119,6 @@ describe('Responses web service Integration', function() {
             max_output_tokens: 64,
             store: false,
         });
-
         expect(response.object).to.equal('response');
         expect(response.status).to.equal('completed');
         expect(getOutputText(response).length).to.be.greaterThan(0);
@@ -117,7 +126,6 @@ describe('Responses web service Integration', function() {
 
     it('should stream response events through the OpenAI-compatible web service', async function() {
         this.timeout(30000);
-
         const events = await postStreamingResponse(baseUrl, {
             model: modelId,
             input: 'Count from 1 to 3.',
@@ -125,7 +133,6 @@ describe('Responses web service Integration', function() {
             max_output_tokens: 64,
             store: false,
         });
-
         expect(events.some((event) => event.type === 'response.created')).to.equal(true);
         expect(events.some((event) => event.type === 'response.output_text.delta')).to.equal(true);
         expect(events.some((event) => event.type === 'response.completed')).to.equal(true);
@@ -133,7 +140,6 @@ describe('Responses web service Integration', function() {
 
     it('should support Responses function calling through the web service', async function() {
         this.timeout(30000);
-
         const tools = [{
             type: 'function',
             name: 'get_weather',

From 719d25fa367a9d9700d6c0626f42523c18733e68 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Thu, 30 Apr 2026 20:32:46 -0400
Subject: [PATCH 09/10] fix: use baseUrl in responsesWebService integration
 test fetch calls

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 sdk/js/test/openai/responsesWebService.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdk/js/test/openai/responsesWebService.test.ts b/sdk/js/test/openai/responsesWebService.test.ts
index 6488c2c3a..2ea6e0197 100644
--- a/sdk/js/test/openai/responsesWebService.test.ts
+++ b/sdk/js/test/openai/responsesWebService.test.ts
@@ -16,7 +16,7 @@ function getOutputText(response: any): string {
 }
 
 async function postResponse(baseUrl: string, body: Record<string, unknown>): Promise<any> {
-    const res = await fetch(`\/v1/responses`, {
+    const res = await fetch(`${baseUrl}/v1/responses`, {
         method: 'POST',
         headers: { 'Content-Type': 'application/json' },
         body: JSON.stringify(body),
@@ -27,7 +27,7 @@ async function postResponse(baseUrl: string, body: Record<string, unknown>): Pro
 }
 
 async function postStreamingResponse(baseUrl: string, body: Record<string, unknown>): Promise<any[]> {
-    const res = await fetch(`\/v1/responses`, {
+    const res = await fetch(`${baseUrl}/v1/responses`, {
         method: 'POST',
         headers: {
             'Content-Type': 'application/json',

From 785399f48c84f2d702ccde8b1b49ef030f3ad0c7 Mon Sep 17 00:00:00 2001
From: maanavd <maanavdalal@gmail.com>
Date: Fri, 1 May 2026 18:03:32 -0400
Subject: [PATCH 10/10] docs: add Responses web service sample README

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 samples/js/web-server-responses/README.md | 80 +++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 samples/js/web-server-responses/README.md

diff --git a/samples/js/web-server-responses/README.md b/samples/js/web-server-responses/README.md
new file mode 100644
index 000000000..c8382004e
--- /dev/null
+++ b/samples/js/web-server-responses/README.md
@@ -0,0 +1,80 @@
+# Foundry Local Responses web service sample
+
+This sample starts the Foundry Local OpenAI-compatible web service, then uses the official OpenAI JavaScript SDK to call the Responses API.
+
+The important pattern is:
+
+1. `FoundryLocalManager` handles Foundry Local setup, model download/load, web service startup, and cleanup.
+1. `openai` handles the actual `/v1/responses` calls.
+
+## Prerequisites
+
+- Node.js 18 or later
+- Internet access on first run to install npm packages, download execution providers, and download the sample model
+
+## What gets installed
+
+Running `npm install` in this folder installs:
+
+| Package | Why it is used |
+|---------|----------------|
+| `foundry-local-sdk` | Starts Foundry Local, downloads/loads the model, and runs the local OpenAI-compatible web service. |
+| `openai` | Sends Responses API requests to the local web service at `http://localhost:5764/v1`. |
+| `foundry-local-sdk-winml` | Optional Windows acceleration package. npm installs it when supported and ignores it otherwise. |
+
+The Foundry Local SDK install also provisions the native runtime files it needs, including Foundry Local Core, ONNX Runtime, and ONNX Runtime GenAI.
+
+When you run the sample, it also downloads and loads the `qwen2.5-0.5b` model if it is not already cached.
+
+## Run the sample
+
+From the repository root:
+
+```powershell
+cd samples\js\web-server-responses
+npm install
+npm start
+```
+
+## What the sample does
+
+The sample:
+
+1. Initializes `FoundryLocalManager`.
+1. Downloads and registers execution providers.
+1. Downloads and loads `qwen2.5-0.5b`.
+1. Starts the local web service at `http://localhost:5764`.
+1. Uses the OpenAI JavaScript SDK with `baseURL: "http://localhost:5764/v1"`.
+1. Runs a non-streaming Responses call.
+1. Runs a streaming Responses call.
+1. Runs a Responses function-calling flow with a sample `get_weather` tool.
+1. Stops the web service and unloads the model.
+
+## Expected output
+
+You should see setup logs, then output similar to:
+
+```text
+Testing a non-streaming Responses call...
+[ASSISTANT]: ...
+
+Testing a streaming Responses call...
+[ASSISTANT STREAM]: ...
+
+Testing Responses tool calling...
+[TOOL CALL]: get_weather(...)
+[ASSISTANT FINAL]: ...
+```
+
+The exact model text can vary.
+
+## Troubleshooting
+
+If the sample fails while creating `FoundryLocalManager` with a native symbol error such as `Failed to resolve 'execute_command_with_binary' symbol`, the installed Foundry Local Core runtime is older than the JavaScript native addon expects. Reinstall the sample dependencies so npm can fetch the latest SDK/runtime packages:
+
+```powershell
+Remove-Item -Recurse -Force node_modules, package-lock.json
+npm install
+```
+
+If port `5764` is already in use, stop the other process or update `endpointUrl` in `app.js` to an available local URL.