diff --git a/proxy-server/docs/TESTING.md b/proxy-server/docs/TESTING.md
new file mode 100644
index 0000000..eea56dc
--- /dev/null
+++ b/proxy-server/docs/TESTING.md
@@ -0,0 +1,101 @@
+# Integration test architecture
+
+The integration tests verify the full request pipeline. An HTTP client sends a request to the proxy, the proxy creates a Copilot SDK session that talks to a mock LLM server, and the proxy streams the response back in the correct SSE format.
+
+```mermaid
+flowchart LR
+    Test["Test client<br/>(fetch)"]
+    Proxy["xcode-copilot-server<br/>(Fastify)"]
+    SDK["Copilot SDK<br/>(BYOK mode)"]
+    Mock["llm-mock-server<br/>(deterministic rules)"]
+
+    Test -->|"POST /v1/chat/completions<br/>POST /v1/messages<br/>POST /v1/responses"| Proxy
+    Proxy -->|"SDK session events"| SDK
+    SDK -->|"OpenAI / Anthropic / Responses<br/>wire format"| Mock
+    Mock -->|"SSE / JSON"| SDK
+    SDK -->|"assistant.message_delta<br/>session.idle"| Proxy
+    Proxy -->|"SSE in matching format"| Test
+```
+
+## How it works
+
+The Copilot SDK supports [BYOK (Bring Your Own Key)](https://github.com/github/copilot-sdk) providers. Instead of talking to GitHub's backend, the SDK sends requests to a custom endpoint. We point it at [`llm-mock-server`](https://github.com/theblixguy/llm-mock-server), which returns deterministic responses based on pattern-matching rules.
+
+This means the tests exercise the real SDK session lifecycle (event subscriptions, streaming, session reuse) without needing GitHub auth or making real API calls. A dummy token is enough to start the SDK CLI process.
+
+## Setup
+
+[`setup.ts`](../test/integration/setup.ts) runs once per test file via `beforeAll`/`afterAll`.
+
+1. Starts `llm-mock-server` on a random port with shared rules
+2. Starts `CopilotService` with a dummy GitHub token
+3. Exports `startServer()` which creates a proxy instance pointed at the mock via BYOK
+
+The mock rules are simple input-output pairs.
+
+```text
+"hello"              -> "Hello from mock!"
+"capital of France"  -> "The capital of France is Paris."
+/what word/i         -> "The word was banana."
+"think about life"   -> { text: "The answer is 42.", reasoning: "..." }
+"say nothing"        -> ""
+(no match)           -> "I'm a mock server."
+```
+
+## Per-provider BYOK config
+
+Each provider uses the correct wire format between the SDK and the mock.
+
+| Provider | BYOK type | BYOK baseUrl | Notes |
+| -------- | --------- | ------------ | ----- |
+| OpenAI | `openai` | `mock.url/v1` | SDK appends `/chat/completions` |
+| Claude | `anthropic` | `mock.url` | SDK appends `/v1/messages`. Needs dummy `apiKey` |
+| Codex | `openai` + `wireApi: "responses"` | `mock.url/v1` | SDK appends `/responses` |
+
+The `allowedCliTools: ["test"]` config prevents the SDK from attaching its built-in tools to BYOK requests. Without this, the SDK sends ~30 tool definitions that fail the mock's strict schema validation.
+
+## Test structure
+
+```text
+test/integration/
+    setup.ts          shared mock rules, service lifecycle, helpers
+    openai.test.ts    OpenAI Chat Completions endpoint
+    claude.test.ts    Anthropic Messages endpoint
+    codex.test.ts     Responses API endpoint
+
+test/streaming-integration.test.ts
+    SDK-level tests that mock the CopilotSession directly.
+    Covers error handling, compaction, reasoning block structure,
+    tool bridge, and MCP routes.
+```
+
+Each integration test file defines a `PATH` (the endpoint path), `msg()` (builds a minimal valid request), `byok()` (returns the BYOK provider config), and `textFrom()` (extracts text content from the provider's SSE format).
+
+## What's tested
+
+### Integration tests (via llm-mock-server)
+
+Per-provider coverage:
+
+- Basic streaming response with correct SSE format and content-type
+- System message / instructions passthrough
+- Multi-turn conversation (incremental prompts via session reuse)
+- Reasoning reply text extraction
+- Fallback response for unmatched messages
+- Empty response handling
+- Schema validation (missing required fields, invalid types, non-streaming rejection)
+- Usage stats recording across single and multiple requests
+- User-agent guard rejection (wrong and missing user-agent)
+- File pattern exclusion (excluded code blocks stripped from prompt)
+- Health endpoint
+
+### SDK-level tests (via mocked CopilotSession)
+
+These test things that llm-mock-server can't simulate:
+
+- Session error mid-stream (no deltas, partial deltas)
+- Context compaction events
+- Reasoning block structure (Claude thinking blocks, Codex reasoning summary events)
+- Tool execution event logging
+- Tool bridge (Claude tool_use blocks, Codex function_call items)
+- MCP JSON-RPC routes (initialize, tools/list, tools/call, notifications)
diff --git a/proxy-server/package-lock.json b/proxy-server/package-lock.json
index 1123b29..370b724 100644
--- a/proxy-server/package-lock.json
+++ b/proxy-server/package-lock.json
@@ -11,7 +11,7 @@
       "license": "MIT",
       "dependencies": {
         "commander": "14.0.3",
-        "copilot-sdk-proxy": "3.0.1",
+        "copilot-sdk-proxy": "4.0.3",
         "fastify": "5.8.2",
         "json5": "2.2.3",
         "koffi": "2.15.2",
@@ -19,12 +19,13 @@
         "zod": "4.3.6"
       },
       "bin": {
-        "xcode-copilot-server": "bin/xcode-copilot-server.mjs"
+        "xcode-copilot-server": "dist/index.js"
       },
       "devDependencies": {
         "@types/node": "25.5.0",
         "@types/plist": "3.0.5",
-        "oxlint": "^1.55.0",
+        "llm-mock-server": "1.0.3",
+        "oxlint": "1.55.0",
         "patch-package": "8.0.1",
         "tsx": "4.21.0",
         "typescript": "5.9.3",
@@ -664,26 +665,26 @@
       }
     },
     "node_modules/@github/copilot": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.2.tgz",
-      "integrity": "sha512-716SIZMYftldVcJay2uZOzsa9ROGGb2Mh2HnxbDxoisFsWNNgZlQXlV7A+PYoGsnAo2Zk/8e1i5SPTscGf2oww==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.5.tgz",
+      "integrity": "sha512-lQGN1/qw7gJRT+lSW1U79Ltrf9rkF6UP8FcEb0hGEf9hq0K8/MaulzK+iDtH/gwXYweFXID29E3QlwSqbdsHqQ==",
       "license": "SEE LICENSE IN LICENSE.md",
       "bin": {
         "copilot": "npm-loader.js"
       },
       "optionalDependencies": {
-        "@github/copilot-darwin-arm64": "1.0.2",
-        "@github/copilot-darwin-x64": "1.0.2",
-        "@github/copilot-linux-arm64": "1.0.2",
-        "@github/copilot-linux-x64": "1.0.2",
-        "@github/copilot-win32-arm64": "1.0.2",
-        "@github/copilot-win32-x64": "1.0.2"
+        "@github/copilot-darwin-arm64": "1.0.5",
+        "@github/copilot-darwin-x64": "1.0.5",
+        "@github/copilot-linux-arm64": "1.0.5",
+        "@github/copilot-linux-x64": "1.0.5",
+        "@github/copilot-win32-arm64": "1.0.5",
+        "@github/copilot-win32-x64": "1.0.5"
       }
     },
     "node_modules/@github/copilot-darwin-arm64": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.2.tgz",
-      "integrity": "sha512-dYoeaTidsphRXyMjvAgpjEbBV41ipICnXURrLFEiATcjC4IY6x2BqPOocrExBYW/Tz2VZvDw51iIZaf6GXrTmw==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.5.tgz",
+      "integrity": "sha512-XBwo8t5higPXzCvXVYkADImixt9k8P2XsflWup2b86x9KtcssYTcfEWWIg42AOCe8J/OJRJN2MMTQuWt5aeK9w==",
       "cpu": [
         "arm64"
       ],
@@ -697,9 +698,9 @@
       }
     },
     "node_modules/@github/copilot-darwin-x64": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.2.tgz",
-      "integrity": "sha512-8+Z9dYigEfXf0wHl9c2tgFn8Cr6v4RAY8xTgHMI9mZInjQyxVeBXCxbE2VgzUtDUD3a705Ka2d8ZOz05aYtGsg==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.5.tgz",
+      "integrity": "sha512-zUlMEKct5oPk/ImnYKz+fUjI9xfIwRE2/WI8BrpuDDe16aFDW2Co/6WFFr5rgYcXoGX2Jm8HT563UUxaFbnnOA==",
       "cpu": [
         "x64"
       ],
@@ -713,9 +714,9 @@
       }
     },
     "node_modules/@github/copilot-linux-arm64": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.2.tgz",
-      "integrity": "sha512-ik0Y5aTXOFRPLFrNjZJdtfzkozYqYeJjVXGBAH3Pp1nFZRu/pxJnrnQ1HrqO/LEgQVbJzAjQmWEfMbXdQIxE4Q==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.5.tgz",
+      "integrity": "sha512-Rp5Key6IBcm00K3+yc8rga3IXaJKN7mwYtP/mpkCKaJJp7izpJK7Z7Dr1slb63Z3yCAyPwMeYlE+adFCwlnYUA==",
       "cpu": [
         "arm64"
       ],
@@ -729,9 +730,9 @@
       }
     },
     "node_modules/@github/copilot-linux-x64": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.2.tgz",
-      "integrity": "sha512-mHSPZjH4nU9rwbfwLxYJ7CQ90jK/Qu1v2CmvBCUPfmuGdVwrpGPHB5FrB+f+b0NEXjmemDWstk2zG53F7ppHfw==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.5.tgz",
+      "integrity": "sha512-ZEKOi57SUo3Ds2ZeYkIkHJ9MJA0Im1i04i0vdAPKH5Xibb2AC6I2EHO2dU/MWwqIeXoK5QDRh0r0Gs+BkHA/dg==",
       "cpu": [
         "x64"
       ],
@@ -759,9 +760,9 @@
       }
     },
     "node_modules/@github/copilot-win32-arm64": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.2.tgz",
-      "integrity": "sha512-tLW2CY/vg0fYLp8EuiFhWIHBVzbFCDDpohxT/F/XyMAdTVSZLnopCcxQHv2BOu0CVGrYjlf7YOIwPfAKYml1FA==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.5.tgz",
+      "integrity": "sha512-pkhuKJZ1AcRAkVS2OO4BEBfMovGSuGWem4isBq+cgRDtuXRfRiZuc88Z9WcrtDCCwpdLx9rSYPVSWQG5fvupPQ==",
       "cpu": [
         "arm64"
       ],
@@ -775,9 +776,9 @@
       }
     },
     "node_modules/@github/copilot-win32-x64": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.2.tgz",
-      "integrity": "sha512-cFlc3xMkKKFRIYR00EEJ2XlYAemeh5EZHsGA8Ir2G0AH+DOevJbomdP1yyCC5gaK/7IyPkHX3sGie5sER2yPvQ==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.5.tgz",
+      "integrity": "sha512-x6PWG80uCuCI+IgCLD1fnBJtfuf9nMBzJwOcMlFwjRtHduV/V9OOW3c89ooGwh/lRhCatAP5GxZGTyC7AJR3kQ==",
       "cpu": [
         "x64"
       ],
@@ -1894,9 +1895,9 @@
       }
     },
     "node_modules/copilot-sdk-proxy": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/copilot-sdk-proxy/-/copilot-sdk-proxy-3.0.1.tgz",
-      "integrity": "sha512-bl6shXGFcj+fR1VEH4jSzAvd3nneU+Jbn+fO8n+izntnlchZb98rLxmy2ZI+BNVIbUt0AJJPAAREexS9rtAJRw==",
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/copilot-sdk-proxy/-/copilot-sdk-proxy-4.0.3.tgz",
+      "integrity": "sha512-htEWPCV64xYp80cdEmV6gbx5XFWyb15KOu2ulfdDy2OTCsKqKJ0WfQITb4BkSp6DacTbym7sAtzUNfKQnz2R8g==",
       "license": "MIT",
       "dependencies": {
         "@fastify/cors": "11.2.0",
@@ -1904,12 +1905,13 @@
         "commander": "14.0.3",
         "fastify": "5.8.2",
         "json5": "2.2.3",
+        "llm-schemas": "1.0.1",
         "picocolors": "1.1.1",
         "tokenx": "1.3.0",
         "zod": "4.3.6"
       },
       "bin": {
-        "copilot-proxy": "bin/copilot-proxy.mjs"
+        "copilot-proxy": "dist/cli.js"
       },
       "engines": {
         "node": "25.6.0"
@@ -2895,6 +2897,39 @@
         "url": "https://opencollective.com/parcel"
       }
     },
+    "node_modules/llm-mock-server": {
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/llm-mock-server/-/llm-mock-server-1.0.3.tgz",
+      "integrity": "sha512-lmG8w4B60O7F7HjA5hwKlPiOGVCM0nOPYo5fE9py3+lH7UUfbkoj8ewRx6ER6XffqyUq8G7PpL9XeU5N3OcS+Q==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "commander": "14.0.3",
+        "fastify": "5.8.2",
+        "json5": "2.2.3",
+        "llm-schemas": "1.0.1",
+        "picocolors": "1.1.1",
+        "zod": "4.3.6"
+      },
+      "bin": {
+        "llm-mock-server": "dist/cli.js"
+      },
+      "engines": {
+        "node": ">=22"
+      }
+    },
+    "node_modules/llm-schemas": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/llm-schemas/-/llm-schemas-1.0.1.tgz",
+      "integrity": "sha512-tyjIQZL/8S+CuSefonOzY0gyymHMqjOVMIOhqQqp3eoHLzTpy0HdvYcjDs5/+hDxvZ6yyC9qzclq7t/IVWC0CQ==",
+      "license": "MIT",
+      "dependencies": {
+        "zod": "4.3.6"
+      },
+      "engines": {
+        "node": ">=22"
+      }
+    },
     "node_modules/magic-string": {
       "version": "0.30.21",
       "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
diff --git a/proxy-server/package.json b/proxy-server/package.json
index 88fa0b6..b77d578 100644
--- a/proxy-server/package.json
+++ b/proxy-server/package.json
@@ -33,7 +33,7 @@
   },
   "dependencies": {
     "commander": "14.0.3",
-    "copilot-sdk-proxy": "3.0.1",
+    "copilot-sdk-proxy": "4.0.3",
     "fastify": "5.8.2",
     "json5": "2.2.3",
     "koffi": "2.15.2",
@@ -43,7 +43,8 @@
   "devDependencies": {
     "@types/node": "25.5.0",
     "@types/plist": "3.0.5",
-    "oxlint": "^1.55.0",
+    "llm-mock-server": "1.0.3",
+    "oxlint": "1.55.0",
     "patch-package": "8.0.1",
     "tsx": "4.21.0",
     "typescript": "5.9.3",
diff --git a/proxy-server/src/providers/codex/tool-results.ts b/proxy-server/src/providers/codex/tool-results.ts
index 23be180..fbc5bc9 100644
--- a/proxy-server/src/providers/codex/tool-results.ts
+++ b/proxy-server/src/providers/codex/tool-results.ts
@@ -1,8 +1,8 @@
-import type { FunctionCallOutputInput, Logger } from "copilot-sdk-proxy";
+import type { FunctionCallOutput, Logger } from "copilot-sdk-proxy";
 import type { ToolBridgeState } from "../../tool-bridge/state.js";
 
 export function resolveResponsesToolResults(
-  outputs: FunctionCallOutputInput[],
+  outputs: FunctionCallOutput[],
   state: ToolBridgeState,
   logger: Logger,
 ): void {
diff --git a/proxy-server/src/providers/shared/session-config.ts b/proxy-server/src/providers/shared/session-config.ts
index f880cc9..ed7eae6 100644
--- a/proxy-server/src/providers/shared/session-config.ts
+++ b/proxy-server/src/providers/shared/session-config.ts
@@ -18,13 +18,8 @@ const SDK_BUILT_IN_TOOLS: string[] = [
   "skill", "web_fetch", "fetch_copilot_cli_documentation",
 ];
 
-interface SessionConfigOptions {
-  model: string;
-  systemMessage?: string | undefined;
-  logger: Logger;
+interface SessionConfigOptions extends BaseSessionConfigOptions {
   config: ServerConfig;
-  supportsReasoningEffort: boolean;
-  cwd?: string | undefined;
   hasToolBridge?: boolean | undefined;
   port: number;
   conversationId: string;
@@ -76,6 +71,7 @@ export function createSessionConfig({
   config,
   supportsReasoningEffort,
   cwd,
+  provider,
   hasToolBridge,
   port,
   conversationId,
@@ -87,6 +83,7 @@ export function createSessionConfig({
     config,
     supportsReasoningEffort,
     cwd,
+    provider,
   });
 
   // Hide SDK built-ins so the model uses bridge tools (forwarded to Xcode).
diff --git a/proxy-server/test/integration/claude.test.ts b/proxy-server/test/integration/claude.test.ts
new file mode 100644
index 0000000..4abf6e8
--- /dev/null
+++ b/proxy-server/test/integration/claude.test.ts
@@ -0,0 +1,164 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { claudeProvider } from "../../src/providers/claude/provider.js";
+import { TIMEOUT, CLAUDE_MODEL, startServer, postJSON, parseSSELines, mock } from "./setup.js";
+
+const PATH = "/v1/messages";
+const UA = { "user-agent": "claude-cli/1.0" };
+const msg = (content: string, max_tokens = 100) => ({
+  model: CLAUDE_MODEL, messages: [{ role: "user", content }], max_tokens,
+});
+const byok = () => ({ type: "anthropic" as const, baseUrl: mock.url, apiKey: "dummy" });
+const post = (baseUrl: string, body: unknown) => postJSON(baseUrl, PATH, body, UA);
+
+function textFrom(res: { body: string }): string {
+  return (parseSSELines(res.body) as { type?: string; delta?: { type?: string; text?: string } }[])
+    .filter((e) => e.type === "content_block_delta" && e.delta?.type === "text_delta")
+    .map((e) => e.delta?.text ?? "")
+    .join("");
+}
+
+describe("Claude provider", () => {
+  let baseUrl: string;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    const server = await startServer(claudeProvider, byok());
+    baseUrl = server.baseUrl;
+    close = () => server.app.close();
+  }, TIMEOUT);
+
+  afterEach(async () => { await close(); });
+
+  it("streams a basic response with Anthropic SSE events", async () => {
+    const res = await post(baseUrl, msg("hello"));
+
+    expect(res.status).toBe(200);
+    expect(res.contentType).toBe("text/event-stream");
+    expect(textFrom(res)).toBe("Hello from mock!");
+
+    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
+    expect(types).toContain("message_start");
+    expect(types).toContain("content_block_start");
+    expect(types).toContain("content_block_delta");
+    expect(types).toContain("content_block_stop");
+    expect(types).toContain("message_delta");
+    expect(types).toContain("message_stop");
+  }, TIMEOUT);
+
+  it("streams with a system message", async () => {
+    const res = await post(baseUrl, {
+      ...msg("capital of France"),
+      system: "You are helpful.",
+    });
+
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The capital of France is Paris.");
+  }, TIMEOUT);
+
+  it("handles multi-turn conversation", async () => {
+    const res = await post(baseUrl, {
+      model: CLAUDE_MODEL,
+      messages: [
+        { role: "user", content: "remember the word banana" },
+        { role: "assistant", content: "OK" },
+        { role: "user", content: "what word did I ask you to remember?" },
+      ],
+      max_tokens: 100,
+    });
+
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The word was banana.");
+  }, TIMEOUT);
+
+  it("streams response with reasoning reply", async () => {
+    const res = await post(baseUrl, msg("think about life", 16000));
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The answer is 42.");
+  }, TIMEOUT);
+
+  it("uses fallback for unmatched messages", async () => {
+    const res = await post(baseUrl, msg("something random"));
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("I'm a mock server.");
+  }, TIMEOUT);
+
+  it("streams an empty response without errors", async () => {
+    const res = await post(baseUrl, msg("say nothing"));
+    expect(res.status).toBe(200);
+    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
+    expect(types).toContain("message_stop");
+  }, TIMEOUT);
+
+  it("rejects missing max_tokens", async () => {
+    const res = await post(baseUrl, {
+      model: CLAUDE_MODEL, messages: [{ role: "user", content: "hello" }],
+    });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects missing model", async () => {
+    const res = await post(baseUrl, {
+      messages: [{ role: "user", content: "hello" }], max_tokens: 100,
+    });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects empty messages array", async () => {
+    const res = await post(baseUrl, {
+      model: CLAUDE_MODEL, messages: [], max_tokens: 100,
+    });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects requests with wrong user-agent", async () => {
+    const res = await postJSON(baseUrl, PATH, msg("hello"), { "user-agent": "curl/1.0" });
+    expect(res.status).toBe(403);
+  }, TIMEOUT);
+
+  it("rejects requests with missing user-agent", async () => {
+    const res = await fetch(`${baseUrl}${PATH}`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(msg("hello")),
+    });
+    expect(res.status).toBe(403);
+  }, TIMEOUT);
+
+  it("rejects non-streaming requests", async () => {
+    const res = await post(baseUrl, { ...msg("hello"), stream: false });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+});
+
+describe("Claude provider - usage stats", () => {
+  it("records usage stats", async () => {
+    const server = await startServer(claudeProvider, byok());
+    try {
+      await post(server.baseUrl, msg("hello"));
+      const snap = server.ctx.stats.snapshot();
+      expect(snap.requests).toBe(1);
+      expect(snap.sessions).toBe(1);
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+
+  it("records multiple requests across turns", async () => {
+    const server = await startServer(claudeProvider, byok());
+    try {
+      await post(server.baseUrl, msg("hello"));
+      await post(server.baseUrl, {
+        model: CLAUDE_MODEL,
+        messages: [
+          { role: "user", content: "hello" },
+          { role: "assistant", content: "Hi" },
+          { role: "user", content: "capital of France" },
+        ],
+        max_tokens: 100,
+      });
+      expect(server.ctx.stats.snapshot().requests).toBe(2);
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+});
diff --git a/proxy-server/test/integration/codex.test.ts b/proxy-server/test/integration/codex.test.ts
new file mode 100644
index 0000000..d19bae0
--- /dev/null
+++ b/proxy-server/test/integration/codex.test.ts
@@ -0,0 +1,143 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { codexProvider } from "../../src/providers/codex/provider.js";
+import { TIMEOUT, OPENAI_MODEL, startServer, postJSON, parseSSELines, mock } from "./setup.js";
+
+const PATH = "/v1/responses";
+const UA = { "user-agent": "Xcode/16000 CFNetwork/1 Darwin/25.0.0" };
+const msg = (input: string | { role: string; content: string }[]) => ({ model: OPENAI_MODEL, input });
+const byok = () => ({ type: "openai" as const, wireApi: "responses" as const, baseUrl: `${mock.url}/v1` });
+const post = (baseUrl: string, body: unknown) => postJSON(baseUrl, PATH, body, UA);
+
+function textFrom(res: { body: string }): string {
+  return (parseSSELines(res.body) as { type?: string; delta?: string }[])
+    .filter((e) => e.type === "response.output_text.delta")
+    .map((e) => e.delta ?? "")
+    .join("");
+}
+
+describe("Codex provider", () => {
+  let baseUrl: string;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    const server = await startServer(codexProvider, byok());
+    baseUrl = server.baseUrl;
+    close = () => server.app.close();
+  }, TIMEOUT);
+
+  afterEach(async () => { await close(); });
+
+  it("streams a basic response with Responses API events", async () => {
+    const res = await post(baseUrl, msg("hello"));
+
+    expect(res.status).toBe(200);
+    expect(res.contentType).toBe("text/event-stream");
+    expect(textFrom(res)).toBe("Hello from mock!");
+
+    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
+    expect(types).toContain("response.created");
+    expect(types).toContain("response.output_item.added");
+    expect(types).toContain("response.content_part.added");
+    expect(types).toContain("response.output_text.delta");
+    expect(types).toContain("response.output_text.done");
+    expect(types).toContain("response.completed");
+  }, TIMEOUT);
+
+  it("streams with instructions", async () => {
+    const res = await post(baseUrl, {
+      ...msg("capital of France"),
+      instructions: "You are helpful.",
+    });
+
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The capital of France is Paris.");
+  }, TIMEOUT);
+
+  it("handles multi-turn via input array", async () => {
+    const res = await post(baseUrl, msg([
+      { role: "user", content: "remember the word banana" },
+      { role: "assistant", content: "OK" },
+      { role: "user", content: "what word did I ask you to remember?" },
+    ]));
+
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The word was banana.");
+  }, TIMEOUT);
+
+  it("streams response with reasoning reply", async () => {
+    const res = await post(baseUrl, msg("think about life"));
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The answer is 42.");
+  }, TIMEOUT);
+
+  it("uses fallback for unmatched messages", async () => {
+    const res = await post(baseUrl, msg("something random"));
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("I'm a mock server.");
+  }, TIMEOUT);
+
+  it("streams an empty response without errors", async () => {
+    const res = await post(baseUrl, msg("say nothing"));
+    expect(res.status).toBe(200);
+    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
+    expect(types).toContain("response.completed");
+  }, TIMEOUT);
+
+  it("rejects missing input", async () => {
+    const res = await post(baseUrl, { model: OPENAI_MODEL });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects missing model", async () => {
+    const res = await post(baseUrl, { input: "hello" });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects requests with wrong user-agent", async () => {
+    const res = await postJSON(baseUrl, PATH, msg("hello"), { "user-agent": "curl/1.0" });
+    expect(res.status).toBe(403);
+  }, TIMEOUT);
+
+  it("rejects requests with missing user-agent", async () => {
+    const res = await fetch(`${baseUrl}${PATH}`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(msg("hello")),
+    });
+    expect(res.status).toBe(403);
+  }, TIMEOUT);
+
+  it("rejects non-streaming requests", async () => {
+    const res = await post(baseUrl, { ...msg("hello"), stream: false });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+});
+
+describe("Codex provider - usage stats", () => {
+  it("records usage stats", async () => {
+    const server = await startServer(codexProvider, byok());
+    try {
+      await post(server.baseUrl, msg("hello"));
+      const snap = server.ctx.stats.snapshot();
+      expect(snap.requests).toBe(1);
+      expect(snap.sessions).toBe(1);
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+
+  it("records multiple requests across turns", async () => {
+    const server = await startServer(codexProvider, byok());
+    try {
+      await post(server.baseUrl, msg("hello"));
+      await post(server.baseUrl, msg([
+        { role: "user", content: "hello" },
+        { role: "assistant", content: "Hi" },
+        { role: "user", content: "capital of France" },
+      ]));
+      expect(server.ctx.stats.snapshot().requests).toBe(2);
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+});
diff --git a/proxy-server/test/integration/openai.test.ts b/proxy-server/test/integration/openai.test.ts
new file mode 100644
index 0000000..52ae944
--- /dev/null
+++ b/proxy-server/test/integration/openai.test.ts
@@ -0,0 +1,192 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { openaiProvider } from "../../src/providers/openai/provider.js";
+import { TIMEOUT, OPENAI_MODEL, startServer, postJSON, parseSSELines, mock } from "./setup.js";
+
+const PATH = "/v1/chat/completions";
+const UA = { "user-agent": "Xcode/16000 CFNetwork/1 Darwin/25.0.0" };
+const msg = (content: string) => ({ model: OPENAI_MODEL, messages: [{ role: "user", content }] });
+const byok = () => ({ type: "openai" as const, baseUrl: `${mock.url}/v1` });
+const post = (baseUrl: string, body: unknown) => postJSON(baseUrl, PATH, body, UA);
+
+function textFrom(res: { body: string }): string {
+  return (parseSSELines(res.body) as { choices?: { delta?: { content?: string } }[] }[])
+    .flatMap((e) => e.choices ?? [])
+    .map((c) => c.delta?.content ?? "")
+    .filter(Boolean)
+    .join("");
+}
+
+describe("OpenAI provider", () => {
+  let baseUrl: string;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    const server = await startServer(openaiProvider, byok());
+    baseUrl = server.baseUrl;
+    close = () => server.app.close();
+  }, TIMEOUT);
+
+  afterEach(async () => { await close(); });
+
+  it("streams a basic response", async () => {
+    const res = await post(baseUrl, msg("hello"));
+
+    expect(res.status).toBe(200);
+    expect(res.contentType).toBe("text/event-stream");
+    expect(res.body).toContain("data: [DONE]");
+    expect(textFrom(res)).toBe("Hello from mock!");
+  }, TIMEOUT);
+
+  it("streams with a system message", async () => {
+    const res = await post(baseUrl, {
+      model: OPENAI_MODEL,
+      messages: [
+        { role: "system", content: "You are helpful." },
+        { role: "user", content: "capital of France" },
+      ],
+    });
+
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The capital of France is Paris.");
+  }, TIMEOUT);
+
+  it("handles multi-turn conversation", async () => {
+    const res = await post(baseUrl, {
+      model: OPENAI_MODEL,
+      messages: [
+        { role: "user", content: "remember the word banana" },
+        { role: "assistant", content: "OK" },
+        { role: "user", content: "what word did I ask you to remember?" },
+      ],
+    });
+
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The word was banana.");
+  }, TIMEOUT);
+
+  it("streams response with reasoning reply", async () => {
+    const res = await post(baseUrl, msg("think about life"));
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("The answer is 42.");
+  }, TIMEOUT);
+
+  it("uses fallback for unmatched messages", async () => {
+    const res = await post(baseUrl, msg("something random"));
+    expect(res.status).toBe(200);
+    expect(textFrom(res)).toBe("I'm a mock server.");
+  }, TIMEOUT);
+
+  it("streams an empty response without errors", async () => {
+    const res = await post(baseUrl, msg("say nothing"));
+    expect(res.status).toBe(200);
+    expect(res.body).toContain("data: [DONE]");
+  }, TIMEOUT);
+
+  it("rejects non-streaming requests", async () => {
+    const res = await post(baseUrl, { ...msg("hello"), stream: false });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects invalid schema", async () => {
+    const res = await post(baseUrl, { model: OPENAI_MODEL, messages: "not an array" });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects missing model", async () => {
+    const res = await post(baseUrl, { messages: [{ role: "user", content: "hello" }] });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects empty messages array", async () => {
+    const res = await post(baseUrl, { model: OPENAI_MODEL, messages: [] });
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+
+  it("rejects requests with wrong user-agent", async () => {
+    const res = await postJSON(baseUrl, PATH, msg("hello"), { "user-agent": "curl/1.0" });
+    expect(res.status).toBe(403);
+  }, TIMEOUT);
+
+  it("rejects requests with missing user-agent", async () => {
+    const res = await fetch(`${baseUrl}${PATH}`, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: JSON.stringify(msg("hello")),
+    });
+    expect(res.status).toBe(403);
+  }, TIMEOUT);
+
+  it("strips excluded file code blocks from prompt", async () => {
+    mock.history.clear();
+    const server = await startServer(openaiProvider, byok(), {
+      excludedFilePatterns: ["secret.ts"],
+    });
+    try {
+      const content = [
+        "Here is some code:",
+        "```swift:main.swift",
+        "print(\"hello\")",
+        "```",
+        "```typescript:secret.ts",
+        "const API_KEY = \"sk-1234\";",
+        "```",
+        "Please review.",
+      ].join("\n");
+
+      await post(server.baseUrl, {
+        model: OPENAI_MODEL,
+        messages: [{ role: "user", content }],
+      });
+
+      const lastReq = mock.history.last();
+      expect(lastReq).toBeDefined();
+      const lastMessage = lastReq!.request.lastMessage;
+      expect(lastMessage).toContain("main.swift");
+      expect(lastMessage).not.toContain("secret.ts");
+      expect(lastMessage).not.toContain("sk-1234");
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+
+  it("GET /health returns 200", async () => {
+    const res = await fetch(`${baseUrl}/health`, {
+      headers: UA,
+    });
+    expect(res.status).toBe(200);
+    const json = await res.json();
+    expect(json.status).toBe("ok");
+  }, TIMEOUT);
+});
+
+describe("OpenAI provider - usage stats", () => {
+  it("records usage stats", async () => {
+    const server = await startServer(openaiProvider, byok());
+    try {
+      await post(server.baseUrl, msg("hello"));
+      const snap = server.ctx.stats.snapshot();
+      expect(snap.requests).toBe(1);
+      expect(snap.sessions).toBe(1);
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+
+  it("records multiple requests across turns", async () => {
+    const server = await startServer(openaiProvider, byok());
+    try {
+      await post(server.baseUrl, msg("hello"));
+      await post(server.baseUrl, {
+        model: OPENAI_MODEL,
+        messages: [
+          { role: "user", content: "hello" },
+          { role: "assistant", content: "Hi" },
+          { role: "user", content: "capital of France" },
+        ],
+      });
+      expect(server.ctx.stats.snapshot().requests).toBe(2);
+    } finally {
+      await server.app.close();
+    }
+  }, TIMEOUT);
+});
diff --git a/proxy-server/test/integration/setup.ts b/proxy-server/test/integration/setup.ts
new file mode 100644
index 0000000..9e5c9ac
--- /dev/null
+++ b/proxy-server/test/integration/setup.ts
@@ -0,0 +1,87 @@
+import { beforeAll, afterAll } from "vitest";
+import { createMock, type MockServer } from "llm-mock-server";
+import type { SessionConfig } from "copilot-sdk-proxy";
+import { createServer, CopilotService, Logger, Stats } from "copilot-sdk-proxy";
+import type { AppContext } from "../../src/context.js";
+import type { ServerConfig } from "../../src/config-schema.js";
+import { BYTES_PER_MIB } from "../../src/config-schema.js";
+import type { Provider } from "../../src/providers/types.js";
+
+export const TIMEOUT = 60_000;
+export const OPENAI_MODEL = "gpt-5.4";
+export const CLAUDE_MODEL = "claude-sonnet-4-6";
+
+export let service: CopilotService;
+export let mock: MockServer;
+
+const logger = new Logger("none");
+
+beforeAll(async () => {
+  mock = await createMock({ port: 0 });
+
+  mock.when("hello").reply("Hello from mock!");
+  mock.when("capital of France").reply("The capital of France is Paris.");
+  mock.when(/what word/i).reply("The word was banana.");
+  mock.when("think about life").reply({
+    text: "The answer is 42.",
+    reasoning: "Let me think step by step about the meaning of life...",
+  });
+  mock.when("read the file").reply({
+    tools: [{ name: "read_file", args: { path: "/tmp/test.txt" } }],
+  });
+  mock.when("say nothing").reply("");
+  mock.fallback("I'm a mock server.");
+
+  service = new CopilotService({
+    logger,
+    githubToken: process.env.GITHUB_TOKEN ?? "dummy-token-for-byok",
+  });
+  await service.start();
+}, TIMEOUT);
+
+afterAll(async () => {
+  await service.stop();
+  await mock.stop();
+}, TIMEOUT);
+
+export async function startServer(provider: Provider, byokProvider: SessionConfig["provider"], configOverrides?: Partial<ServerConfig>) {
+  const config: ServerConfig = {
+    toolBridge: false,
+    toolBridgeTimeoutMs: 0,
+    mcpServers: {},
+    allowedCliTools: ["test"],
+    excludedFilePatterns: [],
+    bodyLimit: 10 * BYTES_PER_MIB,
+    requestTimeoutMs: 0,
+    autoApprovePermissions: true,
+    ...configOverrides,
+  };
+
+  const ctx: AppContext & { provider: SessionConfig["provider"] } = {
+    service,
+    logger,
+    config,
+    port: 0,
+    stats: new Stats(),
+    provider: byokProvider,
+  };
+  const app = await createServer(ctx, provider);
+  const address = await app.listen({ port: 0, host: "127.0.0.1" });
+  return { app, baseUrl: address, ctx };
+}
+
+export async function postJSON(baseUrl: string, path: string, body: unknown, extraHeaders?: Record<string, string>): Promise<{ status: number; body: string; contentType: string | null }> {
+  const res = await fetch(`${baseUrl}${path}`, {
+    method: "POST",
+    headers: { "content-type": "application/json", ...extraHeaders },
+    body: JSON.stringify(body),
+  });
+  return { status: res.status, body: await res.text(), contentType: res.headers.get("content-type") };
+}
+
+export function parseSSELines(body: string): unknown[] {
+  return body
+    .split("\n")
+    .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
+    .map((line) => JSON.parse(line.slice(6)) as unknown);
+}
diff --git a/proxy-server/test/streaming-integration.test.ts b/proxy-server/test/streaming-integration.test.ts
index 78dfc10..55b97bc 100644
--- a/proxy-server/test/streaming-integration.test.ts
+++ b/proxy-server/test/streaming-integration.test.ts
@@ -2,9 +2,9 @@ import { describe, it, expect, afterEach } from "vitest";
 import type { FastifyInstance } from "fastify";
 import type { SessionEvent, SessionEventHandler, CopilotSession } from "@github/copilot-sdk";
 import { createServer, Logger, Stats } from "copilot-sdk-proxy";
-import { openaiProvider } from "../src/providers/openai/provider.js";
 import { claudeProvider } from "../src/providers/claude/provider.js";
 import { codexProvider } from "../src/providers/codex/provider.js";
+import { openaiProvider } from "../src/providers/openai/provider.js";
 import type { AppContext } from "../src/context.js";
 import { BYTES_PER_MIB, type ServerConfig } from "../src/config-schema.js";
 import { BRIDGE_TOOL_PREFIX } from "../src/bridge-constants.js";
@@ -113,6 +113,13 @@ function toolRequestSequence(opts: {
   };
 }
 
+function parseSSELines(body: string): unknown[] {
+  return body
+    .split("\n")
+    .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
+    .map((line) => JSON.parse(line.slice(6)) as unknown);
+}
+
 const config: ServerConfig = {
   toolBridge: false,
   toolBridgeTimeoutMs: 0,
@@ -146,51 +153,6 @@ function createCtx(sequence: EventSequence, overrideConfig?: ServerConfig): AppC
   };
 }
 
-function createMultiTurnCtx(sequences: EventSequence[]): AppContext {
-  let callIndex = 0;
-  let handler: SessionEventHandler | null = null;
-
-  function emit(type: string, data: Record<string, unknown>): void {
-    handler?.({ ...BASE_EVENT, type, data } as unknown as SessionEvent);
-  }
-
-  const session = {
-    on(h: SessionEventHandler) {
-      handler = h;
-      return () => { handler = null; };
-    },
-    abort: () => Promise.resolve(),
-    setModel: () => Promise.resolve(),
-    send() {
-      const seq = sequences[callIndex++];
-      seq?.(emit);
-      return Promise.resolve();
-    },
-  } as unknown as CopilotSession;
-
-  return {
-    service: {
-      cwd: process.cwd(),
-      createSession: () => Promise.resolve(session),
-      listModels: () => Promise.resolve([
-        { id: "test-model", capabilities: { supports: { reasoningEffort: false } } },
-      ]),
-      ping: () => Promise.resolve({ message: "ok", timestamp: Date.now() }),
-    } as unknown as AppContext["service"],
-    logger: new Logger("none"),
-    config,
-    port: 8080,
-    stats: new Stats(),
-  };
-}
-
-function parseSSELines(body: string): unknown[] {
-  return body
-    .split("\n")
-    .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
-    .map((line) => JSON.parse(line.slice(6)) as unknown);
-}
-
 function collectTextContent(events: unknown[], provider: "openai" | "claude" | "codex"): string {
   if (provider === "openai") {
     return (events as { choices?: { delta?: { content?: string } }[] }[])
@@ -213,21 +175,21 @@ function collectTextContent(events: unknown[], provider: "openai" | "claude" | "
     .join("");
 }
 
-const xcodeHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
-const claudeHeaders = { "user-agent": "claude-cli/1.0" };
-const codexHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
-
 async function createApp(ctx: AppContext, provider: Provider): Promise<FastifyInstance> {
   return createServer(ctx, provider);
 }
 
+const claudeHeaders = { "user-agent": "claude-cli/1.0" };
+const codexHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+const xcodeHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+
 describe("OpenAI streaming integration", () => {
   let app: FastifyInstance;
 
   afterEach(async () => { await app.close(); });
 
-  it("streams text deltas as SSE chunks", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+  it("handles session error", async () => {
+    const ctx = createCtx(errorSequence({ errorMessage: "backend exploded" }));
     app = await createApp(ctx, openaiProvider);
 
     const res = await app.inject({
@@ -238,30 +200,13 @@ describe("OpenAI streaming integration", () => {
     });
 
     expect(res.statusCode).toBe(200);
-    expect(res.headers["content-type"]).toBe("text/event-stream");
-    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Hello world");
-    expect(res.body).toContain("data: [DONE]");
-  });
-
-  it("streams with reasoning deltas", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Let me", " think"] }));
-    app = await createApp(ctx, openaiProvider);
-
-    const res = await app.inject({
-      method: "POST",
-      url: "/v1/chat/completions",
-      headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Think hard" }] },
-    });
-
-    expect(res.statusCode).toBe(200);
-    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Answer");
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("");
   });
 
-  it("streams with tool execution events", async () => {
-    const ctx = createCtx(standardSequence({
-      deltas: ["Done"],
-      toolCall: { id: "tc1", name: "read_file", args: { path: "/tmp" } },
+  it("handles session error after partial deltas", async () => {
+    const ctx = createCtx(errorSequence({
+      deltasBeforeError: ["Partial"],
+      errorMessage: "connection lost",
     }));
     app = await createApp(ctx, openaiProvider);
 
@@ -269,11 +214,11 @@ describe("OpenAI streaming integration", () => {
       method: "POST",
       url: "/v1/chat/completions",
       headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Read file" }] },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
     });
 
+    // Stream still completes (HTTP 200 was already sent)
     expect(res.statusCode).toBe(200);
-    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Done");
   });
 
   it("streams with compaction mid-session", async () => {
@@ -292,25 +237,10 @@ describe("OpenAI streaming integration", () => {
     expect(res.body).toContain("data: [DONE]");
   });
 
-  it("handles session error", async () => {
-    const ctx = createCtx(errorSequence({ errorMessage: "backend exploded" }));
-    app = await createApp(ctx, openaiProvider);
-
-    const res = await app.inject({
-      method: "POST",
-      url: "/v1/chat/completions",
-      headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
-    });
-
-    expect(res.statusCode).toBe(200);
-    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("");
-  });
-
-  it("handles session error after partial deltas", async () => {
-    const ctx = createCtx(errorSequence({
-      deltasBeforeError: ["Partial"],
-      errorMessage: "connection lost",
+  it("streams with tool execution events", async () => {
+    const ctx = createCtx(standardSequence({
+      deltas: ["Done"],
+      toolCall: { id: "tc1", name: "read_file", args: { path: "/tmp" } },
     }));
     app = await createApp(ctx, openaiProvider);
 
@@ -318,74 +248,26 @@ describe("OpenAI streaming integration", () => {
       method: "POST",
       url: "/v1/chat/completions",
       headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Read file" }] },
     });
 
-    // Stream still completes (HTTP 200 was already sent)
     expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Done");
   });
 
-  it("rejects non-streaming requests", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["x"] }));
+  it("streams with reasoning deltas", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Let me", " think"] }));
     app = await createApp(ctx, openaiProvider);
 
     const res = await app.inject({
       method: "POST",
       url: "/v1/chat/completions",
       headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], stream: false },
-    });
-
-    expect(res.statusCode).toBe(400);
-  });
-
-  it("records usage stats", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
-    app = await createApp(ctx, openaiProvider);
-
-    await app.inject({
-      method: "POST",
-      url: "/v1/chat/completions",
-      headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
-    });
-
-    const snapshot = ctx.stats.snapshot();
-    expect(snapshot.requests).toBe(1);
-    expect(snapshot.inputTokens).toBe(10);
-    expect(snapshot.outputTokens).toBe(5);
-  });
-
-  it("reuses session for multi-turn conversation", async () => {
-    const ctx = createMultiTurnCtx([
-      standardSequence({ deltas: ["First"] }),
-      standardSequence({ deltas: ["Second"] }),
-    ]);
-    app = await createApp(ctx, openaiProvider);
-
-    const res1 = await app.inject({
-      method: "POST",
-      url: "/v1/chat/completions",
-      headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Turn 1" }] },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Think hard" }] },
     });
-    expect(collectTextContent(parseSSELines(res1.body), "openai")).toBe("First");
 
-    const res2 = await app.inject({
-      method: "POST",
-      url: "/v1/chat/completions",
-      headers: { ...xcodeHeaders, "content-type": "application/json" },
-      payload: {
-        model: "test-model",
-        messages: [
-          { role: "user", content: "Turn 1" },
-          { role: "assistant", content: "First" },
-          { role: "user", content: "Turn 2" },
-        ],
-      },
-    });
-    expect(collectTextContent(parseSSELines(res2.body), "openai")).toBe("Second");
-    expect(ctx.stats.snapshot().sessions).toBe(1);
+    expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Answer");
   });
 });
 
@@ -394,53 +276,6 @@ describe("Claude streaming integration", () => {
 
   afterEach(async () => { await app.close(); });
 
-  it("streams text deltas as Anthropic SSE events", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
-    app = await createApp(ctx, claudeProvider);
-
-    const res = await app.inject({
-      method: "POST",
-      url: "/v1/messages",
-      headers: { ...claudeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
-    });
-
-    expect(res.statusCode).toBe(200);
-    expect(res.headers["content-type"]).toBe("text/event-stream");
-    expect(collectTextContent(parseSSELines(res.body), "claude")).toBe("Hello world");
-
-    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
-    expect(types).toContain("message_start");
-    expect(types).toContain("message_stop");
-  });
-
-  it("streams reasoning as thinking blocks", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Thinking..."] }));
-    app = await createApp(ctx, claudeProvider);
-
-    const res = await app.inject({
-      method: "POST",
-      url: "/v1/messages",
-      headers: { ...claudeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Think" }], max_tokens: 100 },
-    });
-
-    expect(res.statusCode).toBe(200);
-    const events = parseSSELines(res.body) as Record<string, unknown>[];
-
-    const thinkingStart = events.find(
-      (e) => e.type === "content_block_start" && (e.content_block as Record<string, unknown>).type === "thinking",
-    );
-    expect(thinkingStart).toBeDefined();
-
-    const thinkingDelta = events.find(
-      (e) => e.type === "content_block_delta" && (e.delta as Record<string, unknown>).type === "thinking_delta",
-    );
-    expect(thinkingDelta).toBeDefined();
-    expect((thinkingDelta!.delta as Record<string, string>).thinking).toBe("Thinking...");
-    expect(collectTextContent(events, "claude")).toBe("Answer");
-  });
-
   it("streams with compaction mid-session", async () => {
     const ctx = createCtx(standardSequence({ deltas: ["OK"], compaction: true }));
     app = await createApp(ctx, claudeProvider);
@@ -475,20 +310,31 @@ describe("Claude streaming integration", () => {
     expect(messageDelta).toBeDefined();
   });
 
-  it("records usage stats", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
+  it("streams reasoning as thinking blocks", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Thinking..."] }));
     app = await createApp(ctx, claudeProvider);
 
-    await app.inject({
+    const res = await app.inject({
       method: "POST",
       url: "/v1/messages",
       headers: { ...claudeHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Think" }], max_tokens: 100 },
     });
 
-    const snapshot = ctx.stats.snapshot();
-    expect(snapshot.requests).toBe(1);
-    expect(snapshot.inputTokens).toBe(10);
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+
+    const thinkingStart = events.find(
+      (e) => e.type === "content_block_start" && (e.content_block as Record<string, unknown>).type === "thinking",
+    );
+    expect(thinkingStart).toBeDefined();
+
+    const thinkingDelta = events.find(
+      (e) => e.type === "content_block_delta" && (e.delta as Record<string, unknown>).type === "thinking_delta",
+    );
+    expect(thinkingDelta).toBeDefined();
+    expect((thinkingDelta!.delta as Record<string, string>).thinking).toBe("Thinking...");
+    expect(collectTextContent(events, "claude")).toBe("Answer");
   });
 });
 
@@ -497,8 +343,8 @@ describe("Codex streaming integration", () => {
 
   afterEach(async () => { await app.close(); });
 
-  it("streams text deltas as Responses API events", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+  it("handles session error with failed status", async () => {
+    const ctx = createCtx(errorSequence({ errorMessage: "timeout" }));
     app = await createApp(ctx, codexProvider);
 
     const res = await app.inject({
@@ -509,12 +355,9 @@ describe("Codex streaming integration", () => {
     });
 
     expect(res.statusCode).toBe(200);
-    expect(res.headers["content-type"]).toBe("text/event-stream");
-    expect(collectTextContent(parseSSELines(res.body), "codex")).toBe("Hello world");
-
-    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
-    expect(types).toContain("response.created");
-    expect(types).toContain("response.completed");
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+    const failed = events.find((e) => e.type === "response.failed");
+    expect(failed).toBeDefined();
   });
 
   it("streams reasoning as reasoning summary events", async () => {
@@ -536,39 +379,6 @@ describe("Codex streaming integration", () => {
     expect(reasoningDelta!.delta).toBe("Deep thought");
     expect(collectTextContent(events, "codex")).toBe("Answer");
   });
-
-  it("handles session error with failed status", async () => {
-    const ctx = createCtx(errorSequence({ errorMessage: "timeout" }));
-    app = await createApp(ctx, codexProvider);
-
-    const res = await app.inject({
-      method: "POST",
-      url: "/v1/responses",
-      headers: { ...codexHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", input: "Hi" },
-    });
-
-    expect(res.statusCode).toBe(200);
-    const events = parseSSELines(res.body) as Record<string, unknown>[];
-    const failed = events.find((e) => e.type === "response.failed");
-    expect(failed).toBeDefined();
-  });
-
-  it("records usage stats", async () => {
-    const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
-    app = await createApp(ctx, codexProvider);
-
-    await app.inject({
-      method: "POST",
-      url: "/v1/responses",
-      headers: { ...codexHeaders, "content-type": "application/json" },
-      payload: { model: "test-model", input: "Hi" },
-    });
-
-    const snapshot = ctx.stats.snapshot();
-    expect(snapshot.requests).toBe(1);
-    expect(snapshot.outputTokens).toBe(5);
-  });
 });
 
 describe("Tool bridge integration — Claude", () => {