diff --git a/proxy-server/docs/TESTING.md b/proxy-server/docs/TESTING.md
new file mode 100644
index 0000000..eea56dc
--- /dev/null
+++ b/proxy-server/docs/TESTING.md
@@ -0,0 +1,101 @@
+# Integration test architecture
+
+The integration tests verify the full request pipeline. An HTTP client sends a request to the proxy, the proxy creates a Copilot SDK session that talks to a mock LLM server, and the proxy streams the response back in the correct SSE format.
+
+```mermaid
+flowchart LR
+ Test["Test client
(fetch)"]
+ Proxy["xcode-copilot-server
(Fastify)"]
+ SDK["Copilot SDK
(BYOK mode)"]
+ Mock["llm-mock-server
(deterministic rules)"]
+
+ Test -->|"POST /v1/chat/completions
POST /v1/messages
POST /v1/responses"| Proxy
+ Proxy -->|"SDK session events"| SDK
+ SDK -->|"OpenAI / Anthropic / Responses
wire format"| Mock
+ Mock -->|"SSE / JSON"| SDK
+ SDK -->|"assistant.message_delta
session.idle"| Proxy
+ Proxy -->|"SSE in matching format"| Test
+```
+
+## How it works
+
+The Copilot SDK supports [BYOK (Bring Your Own Key)](https://github.com/github/copilot-sdk) providers. Instead of talking to GitHub's backend, the SDK sends requests to a custom endpoint. We point it at [`llm-mock-server`](https://github.com/theblixguy/llm-mock-server), which returns deterministic responses based on pattern-matching rules.
+
+This means the tests exercise the real SDK session lifecycle (event subscriptions, streaming, session reuse) without needing GitHub auth or making real API calls. A dummy token is enough to start the SDK CLI process.
+
+## Setup
+
+[`setup.ts`](../test/integration/setup.ts) runs once per test file via `beforeAll`/`afterAll`.
+
+1. Starts `llm-mock-server` on a random port with shared rules
+2. Starts `CopilotService` with a dummy GitHub token
+3. Exports `startServer()` which creates a proxy instance pointed at the mock via BYOK
+
+The mock rules are simple input-output pairs.
+
+```text
+"hello" -> "Hello from mock!"
+"capital of France" -> "The capital of France is Paris."
+/what word/i -> "The word was banana."
+"think about life" -> { text: "The answer is 42.", reasoning: "..." }
+"say nothing" -> ""
+(no match) -> "I'm a mock server."
+```
+
+## Per-provider BYOK config
+
+Each provider uses the correct wire format between the SDK and the mock.
+
+| Provider | BYOK type | BYOK baseUrl | Notes |
+| -------- | --------- | ------------ | ----- |
+| OpenAI | `openai` | `mock.url/v1` | SDK appends `/chat/completions` |
+| Claude | `anthropic` | `mock.url` | SDK appends `/v1/messages`. Needs dummy `apiKey` |
+| Codex | `openai` + `wireApi: "responses"` | `mock.url/v1` | SDK appends `/responses` |
+
+The `allowedCliTools: ["test"]` config prevents the SDK from attaching its built-in tools to BYOK requests. Without this, the SDK sends ~30 tool definitions that fail the mock's strict schema validation.
+
+## Test structure
+
+```text
+test/integration/
+ setup.ts shared mock rules, service lifecycle, helpers
+ openai.test.ts OpenAI Chat Completions endpoint
+ claude.test.ts Anthropic Messages endpoint
+ codex.test.ts Responses API endpoint
+
+test/streaming-integration.test.ts
+ SDK-level tests that mock the CopilotSession directly.
+ Covers error handling, compaction, reasoning block structure,
+ tool bridge, and MCP routes.
+```
+
+Each integration test file defines a `PATH` (the endpoint path), `msg()` (builds a minimal valid request), `byok()` (returns the BYOK provider config), and `textFrom()` (extracts text content from the provider's SSE format).
+
+## What's tested
+
+### Integration tests (via llm-mock-server)
+
+Per-provider coverage:
+
+- Basic streaming response with correct SSE format and content-type
+- System message / instructions passthrough
+- Multi-turn conversation (incremental prompts via session reuse)
+- Reasoning reply text extraction
+- Fallback response for unmatched messages
+- Empty response handling
+- Schema validation (missing required fields, invalid types, non-streaming rejection)
+- Usage stats recording across single and multiple requests
+- User-agent guard rejection (wrong and missing user-agent)
+- File pattern exclusion (excluded code blocks stripped from prompt)
+- Health endpoint
+
+### SDK-level tests (via mocked CopilotSession)
+
+These test things that llm-mock-server can't simulate:
+
+- Session error mid-stream (no deltas, partial deltas)
+- Context compaction events
+- Reasoning block structure (Claude thinking blocks, Codex reasoning summary events)
+- Tool execution event logging
+- Tool bridge (Claude tool_use blocks, Codex function_call items)
+- MCP JSON-RPC routes (initialize, tools/list, tools/call, notifications)
diff --git a/proxy-server/package-lock.json b/proxy-server/package-lock.json
index 1123b29..370b724 100644
--- a/proxy-server/package-lock.json
+++ b/proxy-server/package-lock.json
@@ -11,7 +11,7 @@
"license": "MIT",
"dependencies": {
"commander": "14.0.3",
- "copilot-sdk-proxy": "3.0.1",
+ "copilot-sdk-proxy": "4.0.3",
"fastify": "5.8.2",
"json5": "2.2.3",
"koffi": "2.15.2",
@@ -19,12 +19,13 @@
"zod": "4.3.6"
},
"bin": {
- "xcode-copilot-server": "bin/xcode-copilot-server.mjs"
+ "xcode-copilot-server": "dist/index.js"
},
"devDependencies": {
"@types/node": "25.5.0",
"@types/plist": "3.0.5",
- "oxlint": "^1.55.0",
+ "llm-mock-server": "1.0.3",
+ "oxlint": "1.55.0",
"patch-package": "8.0.1",
"tsx": "4.21.0",
"typescript": "5.9.3",
@@ -664,26 +665,26 @@
}
},
"node_modules/@github/copilot": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.2.tgz",
- "integrity": "sha512-716SIZMYftldVcJay2uZOzsa9ROGGb2Mh2HnxbDxoisFsWNNgZlQXlV7A+PYoGsnAo2Zk/8e1i5SPTscGf2oww==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot/-/copilot-1.0.5.tgz",
+ "integrity": "sha512-lQGN1/qw7gJRT+lSW1U79Ltrf9rkF6UP8FcEb0hGEf9hq0K8/MaulzK+iDtH/gwXYweFXID29E3QlwSqbdsHqQ==",
"license": "SEE LICENSE IN LICENSE.md",
"bin": {
"copilot": "npm-loader.js"
},
"optionalDependencies": {
- "@github/copilot-darwin-arm64": "1.0.2",
- "@github/copilot-darwin-x64": "1.0.2",
- "@github/copilot-linux-arm64": "1.0.2",
- "@github/copilot-linux-x64": "1.0.2",
- "@github/copilot-win32-arm64": "1.0.2",
- "@github/copilot-win32-x64": "1.0.2"
+ "@github/copilot-darwin-arm64": "1.0.5",
+ "@github/copilot-darwin-x64": "1.0.5",
+ "@github/copilot-linux-arm64": "1.0.5",
+ "@github/copilot-linux-x64": "1.0.5",
+ "@github/copilot-win32-arm64": "1.0.5",
+ "@github/copilot-win32-x64": "1.0.5"
}
},
"node_modules/@github/copilot-darwin-arm64": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.2.tgz",
- "integrity": "sha512-dYoeaTidsphRXyMjvAgpjEbBV41ipICnXURrLFEiATcjC4IY6x2BqPOocrExBYW/Tz2VZvDw51iIZaf6GXrTmw==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot-darwin-arm64/-/copilot-darwin-arm64-1.0.5.tgz",
+ "integrity": "sha512-XBwo8t5higPXzCvXVYkADImixt9k8P2XsflWup2b86x9KtcssYTcfEWWIg42AOCe8J/OJRJN2MMTQuWt5aeK9w==",
"cpu": [
"arm64"
],
@@ -697,9 +698,9 @@
}
},
"node_modules/@github/copilot-darwin-x64": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.2.tgz",
- "integrity": "sha512-8+Z9dYigEfXf0wHl9c2tgFn8Cr6v4RAY8xTgHMI9mZInjQyxVeBXCxbE2VgzUtDUD3a705Ka2d8ZOz05aYtGsg==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot-darwin-x64/-/copilot-darwin-x64-1.0.5.tgz",
+ "integrity": "sha512-zUlMEKct5oPk/ImnYKz+fUjI9xfIwRE2/WI8BrpuDDe16aFDW2Co/6WFFr5rgYcXoGX2Jm8HT563UUxaFbnnOA==",
"cpu": [
"x64"
],
@@ -713,9 +714,9 @@
}
},
"node_modules/@github/copilot-linux-arm64": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.2.tgz",
- "integrity": "sha512-ik0Y5aTXOFRPLFrNjZJdtfzkozYqYeJjVXGBAH3Pp1nFZRu/pxJnrnQ1HrqO/LEgQVbJzAjQmWEfMbXdQIxE4Q==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot-linux-arm64/-/copilot-linux-arm64-1.0.5.tgz",
+ "integrity": "sha512-Rp5Key6IBcm00K3+yc8rga3IXaJKN7mwYtP/mpkCKaJJp7izpJK7Z7Dr1slb63Z3yCAyPwMeYlE+adFCwlnYUA==",
"cpu": [
"arm64"
],
@@ -729,9 +730,9 @@
}
},
"node_modules/@github/copilot-linux-x64": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.2.tgz",
- "integrity": "sha512-mHSPZjH4nU9rwbfwLxYJ7CQ90jK/Qu1v2CmvBCUPfmuGdVwrpGPHB5FrB+f+b0NEXjmemDWstk2zG53F7ppHfw==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot-linux-x64/-/copilot-linux-x64-1.0.5.tgz",
+ "integrity": "sha512-ZEKOi57SUo3Ds2ZeYkIkHJ9MJA0Im1i04i0vdAPKH5Xibb2AC6I2EHO2dU/MWwqIeXoK5QDRh0r0Gs+BkHA/dg==",
"cpu": [
"x64"
],
@@ -759,9 +760,9 @@
}
},
"node_modules/@github/copilot-win32-arm64": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.2.tgz",
- "integrity": "sha512-tLW2CY/vg0fYLp8EuiFhWIHBVzbFCDDpohxT/F/XyMAdTVSZLnopCcxQHv2BOu0CVGrYjlf7YOIwPfAKYml1FA==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot-win32-arm64/-/copilot-win32-arm64-1.0.5.tgz",
+ "integrity": "sha512-pkhuKJZ1AcRAkVS2OO4BEBfMovGSuGWem4isBq+cgRDtuXRfRiZuc88Z9WcrtDCCwpdLx9rSYPVSWQG5fvupPQ==",
"cpu": [
"arm64"
],
@@ -775,9 +776,9 @@
}
},
"node_modules/@github/copilot-win32-x64": {
- "version": "1.0.2",
- "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.2.tgz",
- "integrity": "sha512-cFlc3xMkKKFRIYR00EEJ2XlYAemeh5EZHsGA8Ir2G0AH+DOevJbomdP1yyCC5gaK/7IyPkHX3sGie5sER2yPvQ==",
+ "version": "1.0.5",
+ "resolved": "https://registry.npmjs.org/@github/copilot-win32-x64/-/copilot-win32-x64-1.0.5.tgz",
+ "integrity": "sha512-x6PWG80uCuCI+IgCLD1fnBJtfuf9nMBzJwOcMlFwjRtHduV/V9OOW3c89ooGwh/lRhCatAP5GxZGTyC7AJR3kQ==",
"cpu": [
"x64"
],
@@ -1894,9 +1895,9 @@
}
},
"node_modules/copilot-sdk-proxy": {
- "version": "3.0.1",
- "resolved": "https://registry.npmjs.org/copilot-sdk-proxy/-/copilot-sdk-proxy-3.0.1.tgz",
- "integrity": "sha512-bl6shXGFcj+fR1VEH4jSzAvd3nneU+Jbn+fO8n+izntnlchZb98rLxmy2ZI+BNVIbUt0AJJPAAREexS9rtAJRw==",
+ "version": "4.0.3",
+ "resolved": "https://registry.npmjs.org/copilot-sdk-proxy/-/copilot-sdk-proxy-4.0.3.tgz",
+ "integrity": "sha512-htEWPCV64xYp80cdEmV6gbx5XFWyb15KOu2ulfdDy2OTCsKqKJ0WfQITb4BkSp6DacTbym7sAtzUNfKQnz2R8g==",
"license": "MIT",
"dependencies": {
"@fastify/cors": "11.2.0",
@@ -1904,12 +1905,13 @@
"commander": "14.0.3",
"fastify": "5.8.2",
"json5": "2.2.3",
+ "llm-schemas": "1.0.1",
"picocolors": "1.1.1",
"tokenx": "1.3.0",
"zod": "4.3.6"
},
"bin": {
- "copilot-proxy": "bin/copilot-proxy.mjs"
+ "copilot-proxy": "dist/cli.js"
},
"engines": {
"node": "25.6.0"
@@ -2895,6 +2897,39 @@
"url": "https://opencollective.com/parcel"
}
},
+ "node_modules/llm-mock-server": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/llm-mock-server/-/llm-mock-server-1.0.3.tgz",
+ "integrity": "sha512-lmG8w4B60O7F7HjA5hwKlPiOGVCM0nOPYo5fE9py3+lH7UUfbkoj8ewRx6ER6XffqyUq8G7PpL9XeU5N3OcS+Q==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "commander": "14.0.3",
+ "fastify": "5.8.2",
+ "json5": "2.2.3",
+ "llm-schemas": "1.0.1",
+ "picocolors": "1.1.1",
+ "zod": "4.3.6"
+ },
+ "bin": {
+ "llm-mock-server": "dist/cli.js"
+ },
+ "engines": {
+ "node": ">=22"
+ }
+ },
+ "node_modules/llm-schemas": {
+ "version": "1.0.1",
+ "resolved": "https://registry.npmjs.org/llm-schemas/-/llm-schemas-1.0.1.tgz",
+ "integrity": "sha512-tyjIQZL/8S+CuSefonOzY0gyymHMqjOVMIOhqQqp3eoHLzTpy0HdvYcjDs5/+hDxvZ6yyC9qzclq7t/IVWC0CQ==",
+ "license": "MIT",
+ "dependencies": {
+ "zod": "4.3.6"
+ },
+ "engines": {
+ "node": ">=22"
+ }
+ },
"node_modules/magic-string": {
"version": "0.30.21",
"resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
diff --git a/proxy-server/package.json b/proxy-server/package.json
index 88fa0b6..b77d578 100644
--- a/proxy-server/package.json
+++ b/proxy-server/package.json
@@ -33,7 +33,7 @@
},
"dependencies": {
"commander": "14.0.3",
- "copilot-sdk-proxy": "3.0.1",
+ "copilot-sdk-proxy": "4.0.3",
"fastify": "5.8.2",
"json5": "2.2.3",
"koffi": "2.15.2",
@@ -43,7 +43,8 @@
"devDependencies": {
"@types/node": "25.5.0",
"@types/plist": "3.0.5",
- "oxlint": "^1.55.0",
+ "llm-mock-server": "1.0.3",
+ "oxlint": "1.55.0",
"patch-package": "8.0.1",
"tsx": "4.21.0",
"typescript": "5.9.3",
diff --git a/proxy-server/src/providers/codex/tool-results.ts b/proxy-server/src/providers/codex/tool-results.ts
index 23be180..fbc5bc9 100644
--- a/proxy-server/src/providers/codex/tool-results.ts
+++ b/proxy-server/src/providers/codex/tool-results.ts
@@ -1,8 +1,8 @@
-import type { FunctionCallOutputInput, Logger } from "copilot-sdk-proxy";
+import type { FunctionCallOutput, Logger } from "copilot-sdk-proxy";
import type { ToolBridgeState } from "../../tool-bridge/state.js";
export function resolveResponsesToolResults(
- outputs: FunctionCallOutputInput[],
+ outputs: FunctionCallOutput[],
state: ToolBridgeState,
logger: Logger,
): void {
diff --git a/proxy-server/src/providers/shared/session-config.ts b/proxy-server/src/providers/shared/session-config.ts
index f880cc9..ed7eae6 100644
--- a/proxy-server/src/providers/shared/session-config.ts
+++ b/proxy-server/src/providers/shared/session-config.ts
@@ -18,13 +18,8 @@ const SDK_BUILT_IN_TOOLS: string[] = [
"skill", "web_fetch", "fetch_copilot_cli_documentation",
];
-interface SessionConfigOptions {
- model: string;
- systemMessage?: string | undefined;
- logger: Logger;
+interface SessionConfigOptions extends BaseSessionConfigOptions {
config: ServerConfig;
- supportsReasoningEffort: boolean;
- cwd?: string | undefined;
hasToolBridge?: boolean | undefined;
port: number;
conversationId: string;
@@ -76,6 +71,7 @@ export function createSessionConfig({
config,
supportsReasoningEffort,
cwd,
+ provider,
hasToolBridge,
port,
conversationId,
@@ -87,6 +83,7 @@ export function createSessionConfig({
config,
supportsReasoningEffort,
cwd,
+ provider,
});
// Hide SDK built-ins so the model uses bridge tools (forwarded to Xcode).
diff --git a/proxy-server/test/integration/claude.test.ts b/proxy-server/test/integration/claude.test.ts
new file mode 100644
index 0000000..4abf6e8
--- /dev/null
+++ b/proxy-server/test/integration/claude.test.ts
@@ -0,0 +1,164 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { claudeProvider } from "../../src/providers/claude/provider.js";
+import { TIMEOUT, CLAUDE_MODEL, startServer, postJSON, parseSSELines, mock } from "./setup.js";
+
+const PATH = "/v1/messages";
+const UA = { "user-agent": "claude-cli/1.0" };
+const msg = (content: string, max_tokens = 100) => ({
+ model: CLAUDE_MODEL, messages: [{ role: "user", content }], max_tokens,
+});
+const byok = () => ({ type: "anthropic" as const, baseUrl: mock.url, apiKey: "dummy" });
+const post = (baseUrl: string, body: unknown) => postJSON(baseUrl, PATH, body, UA);
+
+function textFrom(res: { body: string }): string {
+ return (parseSSELines(res.body) as { type?: string; delta?: { type?: string; text?: string } }[])
+ .filter((e) => e.type === "content_block_delta" && e.delta?.type === "text_delta")
+ .map((e) => e.delta?.text ?? "")
+ .join("");
+}
+
+describe("Claude provider", () => {
+ let baseUrl: string;
+ let close: () => Promise;
+
+ beforeEach(async () => {
+ const server = await startServer(claudeProvider, byok());
+ baseUrl = server.baseUrl;
+ close = () => server.app.close();
+ }, TIMEOUT);
+
+ afterEach(async () => { await close(); });
+
+ it("streams a basic response with Anthropic SSE events", async () => {
+ const res = await post(baseUrl, msg("hello"));
+
+ expect(res.status).toBe(200);
+ expect(res.contentType).toBe("text/event-stream");
+ expect(textFrom(res)).toBe("Hello from mock!");
+
+ const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
+ expect(types).toContain("message_start");
+ expect(types).toContain("content_block_start");
+ expect(types).toContain("content_block_delta");
+ expect(types).toContain("content_block_stop");
+ expect(types).toContain("message_delta");
+ expect(types).toContain("message_stop");
+ }, TIMEOUT);
+
+ it("streams with a system message", async () => {
+ const res = await post(baseUrl, {
+ ...msg("capital of France"),
+ system: "You are helpful.",
+ });
+
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The capital of France is Paris.");
+ }, TIMEOUT);
+
+ it("handles multi-turn conversation", async () => {
+ const res = await post(baseUrl, {
+ model: CLAUDE_MODEL,
+ messages: [
+ { role: "user", content: "remember the word banana" },
+ { role: "assistant", content: "OK" },
+ { role: "user", content: "what word did I ask you to remember?" },
+ ],
+ max_tokens: 100,
+ });
+
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The word was banana.");
+ }, TIMEOUT);
+
+ it("streams response with reasoning reply", async () => {
+ const res = await post(baseUrl, msg("think about life", 16000));
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The answer is 42.");
+ }, TIMEOUT);
+
+ it("uses fallback for unmatched messages", async () => {
+ const res = await post(baseUrl, msg("something random"));
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("I'm a mock server.");
+ }, TIMEOUT);
+
+ it("streams an empty response without errors", async () => {
+ const res = await post(baseUrl, msg("say nothing"));
+ expect(res.status).toBe(200);
+ const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
+ expect(types).toContain("message_stop");
+ }, TIMEOUT);
+
+ it("rejects missing max_tokens", async () => {
+ const res = await post(baseUrl, {
+ model: CLAUDE_MODEL, messages: [{ role: "user", content: "hello" }],
+ });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects missing model", async () => {
+ const res = await post(baseUrl, {
+ messages: [{ role: "user", content: "hello" }], max_tokens: 100,
+ });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects empty messages array", async () => {
+ const res = await post(baseUrl, {
+ model: CLAUDE_MODEL, messages: [], max_tokens: 100,
+ });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects requests with wrong user-agent", async () => {
+ const res = await postJSON(baseUrl, PATH, msg("hello"), { "user-agent": "curl/1.0" });
+ expect(res.status).toBe(403);
+ }, TIMEOUT);
+
+ it("rejects requests with missing user-agent", async () => {
+ const res = await fetch(`${baseUrl}${PATH}`, {
+ method: "POST",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify(msg("hello")),
+ });
+ expect(res.status).toBe(403);
+ }, TIMEOUT);
+
+ it("rejects non-streaming requests", async () => {
+ const res = await post(baseUrl, { ...msg("hello"), stream: false });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+});
+
+describe("Claude provider - usage stats", () => {
+ it("records usage stats", async () => {
+ const server = await startServer(claudeProvider, byok());
+ try {
+ await post(server.baseUrl, msg("hello"));
+ const snap = server.ctx.stats.snapshot();
+ expect(snap.requests).toBe(1);
+ expect(snap.sessions).toBe(1);
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+
+ it("records multiple requests across turns", async () => {
+ const server = await startServer(claudeProvider, byok());
+ try {
+ await post(server.baseUrl, msg("hello"));
+ await post(server.baseUrl, {
+ model: CLAUDE_MODEL,
+ messages: [
+ { role: "user", content: "hello" },
+ { role: "assistant", content: "Hi" },
+ { role: "user", content: "capital of France" },
+ ],
+ max_tokens: 100,
+ });
+ expect(server.ctx.stats.snapshot().requests).toBe(2);
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+});
diff --git a/proxy-server/test/integration/codex.test.ts b/proxy-server/test/integration/codex.test.ts
new file mode 100644
index 0000000..d19bae0
--- /dev/null
+++ b/proxy-server/test/integration/codex.test.ts
@@ -0,0 +1,143 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { codexProvider } from "../../src/providers/codex/provider.js";
+import { TIMEOUT, OPENAI_MODEL, startServer, postJSON, parseSSELines, mock } from "./setup.js";
+
+const PATH = "/v1/responses";
+const UA = { "user-agent": "Xcode/16000 CFNetwork/1 Darwin/25.0.0" };
+const msg = (input: string | { role: string; content: string }[]) => ({ model: OPENAI_MODEL, input });
+const byok = () => ({ type: "openai" as const, wireApi: "responses" as const, baseUrl: `${mock.url}/v1` });
+const post = (baseUrl: string, body: unknown) => postJSON(baseUrl, PATH, body, UA);
+
+function textFrom(res: { body: string }): string {
+ return (parseSSELines(res.body) as { type?: string; delta?: string }[])
+ .filter((e) => e.type === "response.output_text.delta")
+ .map((e) => e.delta ?? "")
+ .join("");
+}
+
+describe("Codex provider", () => {
+ let baseUrl: string;
+ let close: () => Promise;
+
+ beforeEach(async () => {
+ const server = await startServer(codexProvider, byok());
+ baseUrl = server.baseUrl;
+ close = () => server.app.close();
+ }, TIMEOUT);
+
+ afterEach(async () => { await close(); });
+
+ it("streams a basic response with Responses API events", async () => {
+ const res = await post(baseUrl, msg("hello"));
+
+ expect(res.status).toBe(200);
+ expect(res.contentType).toBe("text/event-stream");
+ expect(textFrom(res)).toBe("Hello from mock!");
+
+ const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
+ expect(types).toContain("response.created");
+ expect(types).toContain("response.output_item.added");
+ expect(types).toContain("response.content_part.added");
+ expect(types).toContain("response.output_text.delta");
+ expect(types).toContain("response.output_text.done");
+ expect(types).toContain("response.completed");
+ }, TIMEOUT);
+
+ it("streams with instructions", async () => {
+ const res = await post(baseUrl, {
+ ...msg("capital of France"),
+ instructions: "You are helpful.",
+ });
+
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The capital of France is Paris.");
+ }, TIMEOUT);
+
+ it("handles multi-turn via input array", async () => {
+ const res = await post(baseUrl, msg([
+ { role: "user", content: "remember the word banana" },
+ { role: "assistant", content: "OK" },
+ { role: "user", content: "what word did I ask you to remember?" },
+ ]));
+
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The word was banana.");
+ }, TIMEOUT);
+
+ it("streams response with reasoning reply", async () => {
+ const res = await post(baseUrl, msg("think about life"));
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The answer is 42.");
+ }, TIMEOUT);
+
+ it("uses fallback for unmatched messages", async () => {
+ const res = await post(baseUrl, msg("something random"));
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("I'm a mock server.");
+ }, TIMEOUT);
+
+ it("streams an empty response without errors", async () => {
+ const res = await post(baseUrl, msg("say nothing"));
+ expect(res.status).toBe(200);
+ const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
+ expect(types).toContain("response.completed");
+ }, TIMEOUT);
+
+ it("rejects missing input", async () => {
+ const res = await post(baseUrl, { model: OPENAI_MODEL });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects missing model", async () => {
+ const res = await post(baseUrl, { input: "hello" });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects requests with wrong user-agent", async () => {
+ const res = await postJSON(baseUrl, PATH, msg("hello"), { "user-agent": "curl/1.0" });
+ expect(res.status).toBe(403);
+ }, TIMEOUT);
+
+ it("rejects requests with missing user-agent", async () => {
+ const res = await fetch(`${baseUrl}${PATH}`, {
+ method: "POST",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify(msg("hello")),
+ });
+ expect(res.status).toBe(403);
+ }, TIMEOUT);
+
+ it("rejects non-streaming requests", async () => {
+ const res = await post(baseUrl, { ...msg("hello"), stream: false });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+});
+
+describe("Codex provider - usage stats", () => {
+ it("records usage stats", async () => {
+ const server = await startServer(codexProvider, byok());
+ try {
+ await post(server.baseUrl, msg("hello"));
+ const snap = server.ctx.stats.snapshot();
+ expect(snap.requests).toBe(1);
+ expect(snap.sessions).toBe(1);
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+
+ it("records multiple requests across turns", async () => {
+ const server = await startServer(codexProvider, byok());
+ try {
+ await post(server.baseUrl, msg("hello"));
+ await post(server.baseUrl, msg([
+ { role: "user", content: "hello" },
+ { role: "assistant", content: "Hi" },
+ { role: "user", content: "capital of France" },
+ ]));
+ expect(server.ctx.stats.snapshot().requests).toBe(2);
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+});
diff --git a/proxy-server/test/integration/openai.test.ts b/proxy-server/test/integration/openai.test.ts
new file mode 100644
index 0000000..52ae944
--- /dev/null
+++ b/proxy-server/test/integration/openai.test.ts
@@ -0,0 +1,192 @@
+import { describe, it, expect, beforeEach, afterEach } from "vitest";
+import { openaiProvider } from "../../src/providers/openai/provider.js";
+import { TIMEOUT, OPENAI_MODEL, startServer, postJSON, parseSSELines, mock } from "./setup.js";
+
+const PATH = "/v1/chat/completions";
+const UA = { "user-agent": "Xcode/16000 CFNetwork/1 Darwin/25.0.0" };
+const msg = (content: string) => ({ model: OPENAI_MODEL, messages: [{ role: "user", content }] });
+const byok = () => ({ type: "openai" as const, baseUrl: `${mock.url}/v1` });
+const post = (baseUrl: string, body: unknown) => postJSON(baseUrl, PATH, body, UA);
+
+function textFrom(res: { body: string }): string {
+ return (parseSSELines(res.body) as { choices?: { delta?: { content?: string } }[] }[])
+ .flatMap((e) => e.choices ?? [])
+ .map((c) => c.delta?.content ?? "")
+ .filter(Boolean)
+ .join("");
+}
+
+describe("OpenAI provider", () => {
+ let baseUrl: string;
+ let close: () => Promise;
+
+ beforeEach(async () => {
+ const server = await startServer(openaiProvider, byok());
+ baseUrl = server.baseUrl;
+ close = () => server.app.close();
+ }, TIMEOUT);
+
+ afterEach(async () => { await close(); });
+
+ it("streams a basic response", async () => {
+ const res = await post(baseUrl, msg("hello"));
+
+ expect(res.status).toBe(200);
+ expect(res.contentType).toBe("text/event-stream");
+ expect(res.body).toContain("data: [DONE]");
+ expect(textFrom(res)).toBe("Hello from mock!");
+ }, TIMEOUT);
+
+ it("streams with a system message", async () => {
+ const res = await post(baseUrl, {
+ model: OPENAI_MODEL,
+ messages: [
+ { role: "system", content: "You are helpful." },
+ { role: "user", content: "capital of France" },
+ ],
+ });
+
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The capital of France is Paris.");
+ }, TIMEOUT);
+
+ it("handles multi-turn conversation", async () => {
+ const res = await post(baseUrl, {
+ model: OPENAI_MODEL,
+ messages: [
+ { role: "user", content: "remember the word banana" },
+ { role: "assistant", content: "OK" },
+ { role: "user", content: "what word did I ask you to remember?" },
+ ],
+ });
+
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The word was banana.");
+ }, TIMEOUT);
+
+ it("streams response with reasoning reply", async () => {
+ const res = await post(baseUrl, msg("think about life"));
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("The answer is 42.");
+ }, TIMEOUT);
+
+ it("uses fallback for unmatched messages", async () => {
+ const res = await post(baseUrl, msg("something random"));
+ expect(res.status).toBe(200);
+ expect(textFrom(res)).toBe("I'm a mock server.");
+ }, TIMEOUT);
+
+ it("streams an empty response without errors", async () => {
+ const res = await post(baseUrl, msg("say nothing"));
+ expect(res.status).toBe(200);
+ expect(res.body).toContain("data: [DONE]");
+ }, TIMEOUT);
+
+ it("rejects non-streaming requests", async () => {
+ const res = await post(baseUrl, { ...msg("hello"), stream: false });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects invalid schema", async () => {
+ const res = await post(baseUrl, { model: OPENAI_MODEL, messages: "not an array" });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects missing model", async () => {
+ const res = await post(baseUrl, { messages: [{ role: "user", content: "hello" }] });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects empty messages array", async () => {
+ const res = await post(baseUrl, { model: OPENAI_MODEL, messages: [] });
+ expect(res.status).toBe(400);
+ }, TIMEOUT);
+
+ it("rejects requests with wrong user-agent", async () => {
+ const res = await postJSON(baseUrl, PATH, msg("hello"), { "user-agent": "curl/1.0" });
+ expect(res.status).toBe(403);
+ }, TIMEOUT);
+
+ it("rejects requests with missing user-agent", async () => {
+ const res = await fetch(`${baseUrl}${PATH}`, {
+ method: "POST",
+ headers: { "content-type": "application/json" },
+ body: JSON.stringify(msg("hello")),
+ });
+ expect(res.status).toBe(403);
+ }, TIMEOUT);
+
+ it("strips excluded file code blocks from prompt", async () => {
+ mock.history.clear();
+ const server = await startServer(openaiProvider, byok(), {
+ excludedFilePatterns: ["secret.ts"],
+ });
+ try {
+ const content = [
+ "Here is some code:",
+ "```swift:main.swift",
+ "print(\"hello\")",
+ "```",
+ "```typescript:secret.ts",
+ "const API_KEY = \"sk-1234\";",
+ "```",
+ "Please review.",
+ ].join("\n");
+
+ await post(server.baseUrl, {
+ model: OPENAI_MODEL,
+ messages: [{ role: "user", content }],
+ });
+
+ const lastReq = mock.history.last();
+ expect(lastReq).toBeDefined();
+ const lastMessage = lastReq!.request.lastMessage;
+ expect(lastMessage).toContain("main.swift");
+ expect(lastMessage).not.toContain("secret.ts");
+ expect(lastMessage).not.toContain("sk-1234");
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+
+ it("GET /health returns 200", async () => {
+ const res = await fetch(`${baseUrl}/health`, {
+ headers: UA,
+ });
+ expect(res.status).toBe(200);
+ const json = await res.json();
+ expect(json.status).toBe("ok");
+ }, TIMEOUT);
+});
+
+describe("OpenAI provider - usage stats", () => {
+ it("records usage stats", async () => {
+ const server = await startServer(openaiProvider, byok());
+ try {
+ await post(server.baseUrl, msg("hello"));
+ const snap = server.ctx.stats.snapshot();
+ expect(snap.requests).toBe(1);
+ expect(snap.sessions).toBe(1);
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+
+ it("records multiple requests across turns", async () => {
+ const server = await startServer(openaiProvider, byok());
+ try {
+ await post(server.baseUrl, msg("hello"));
+ await post(server.baseUrl, {
+ model: OPENAI_MODEL,
+ messages: [
+ { role: "user", content: "hello" },
+ { role: "assistant", content: "Hi" },
+ { role: "user", content: "capital of France" },
+ ],
+ });
+ expect(server.ctx.stats.snapshot().requests).toBe(2);
+ } finally {
+ await server.app.close();
+ }
+ }, TIMEOUT);
+});
diff --git a/proxy-server/test/integration/setup.ts b/proxy-server/test/integration/setup.ts
new file mode 100644
index 0000000..9e5c9ac
--- /dev/null
+++ b/proxy-server/test/integration/setup.ts
@@ -0,0 +1,87 @@
+import { beforeAll, afterAll } from "vitest";
+import { createMock, type MockServer } from "llm-mock-server";
+import type { SessionConfig } from "copilot-sdk-proxy";
+import { createServer, CopilotService, Logger, Stats } from "copilot-sdk-proxy";
+import type { AppContext } from "../../src/context.js";
+import type { ServerConfig } from "../../src/config-schema.js";
+import { BYTES_PER_MIB } from "../../src/config-schema.js";
+import type { Provider } from "../../src/providers/types.js";
+
+export const TIMEOUT = 60_000;
+export const OPENAI_MODEL = "gpt-5.4";
+export const CLAUDE_MODEL = "claude-sonnet-4-6";
+
+export let service: CopilotService;
+export let mock: MockServer;
+
+const logger = new Logger("none");
+
+beforeAll(async () => {
+ mock = await createMock({ port: 0 });
+
+ mock.when("hello").reply("Hello from mock!");
+ mock.when("capital of France").reply("The capital of France is Paris.");
+ mock.when(/what word/i).reply("The word was banana.");
+ mock.when("think about life").reply({
+ text: "The answer is 42.",
+ reasoning: "Let me think step by step about the meaning of life...",
+ });
+ mock.when("read the file").reply({
+ tools: [{ name: "read_file", args: { path: "/tmp/test.txt" } }],
+ });
+ mock.when("say nothing").reply("");
+ mock.fallback("I'm a mock server.");
+
+ service = new CopilotService({
+ logger,
+ githubToken: process.env.GITHUB_TOKEN ?? "dummy-token-for-byok",
+ });
+ await service.start();
+}, TIMEOUT);
+
+afterAll(async () => {
+ await service.stop();
+ await mock.stop();
+}, TIMEOUT);
+
+export async function startServer(provider: Provider, byokProvider: SessionConfig["provider"], configOverrides?: Partial) {
+ const config: ServerConfig = {
+ toolBridge: false,
+ toolBridgeTimeoutMs: 0,
+ mcpServers: {},
+ allowedCliTools: ["test"],
+ excludedFilePatterns: [],
+ bodyLimit: 10 * BYTES_PER_MIB,
+ requestTimeoutMs: 0,
+ autoApprovePermissions: true,
+ ...configOverrides,
+ };
+
+ const ctx: AppContext & { provider: SessionConfig["provider"] } = {
+ service,
+ logger,
+ config,
+ port: 0,
+ stats: new Stats(),
+ provider: byokProvider,
+ };
+ const app = await createServer(ctx, provider);
+ const address = await app.listen({ port: 0, host: "127.0.0.1" });
+ return { app, baseUrl: address, ctx };
+}
+
+export async function postJSON(baseUrl: string, path: string, body: unknown, extraHeaders?: Record): Promise<{ status: number; body: string; contentType: string | null }> {
+ const res = await fetch(`${baseUrl}${path}`, {
+ method: "POST",
+ headers: { "content-type": "application/json", ...extraHeaders },
+ body: JSON.stringify(body),
+ });
+ return { status: res.status, body: await res.text(), contentType: res.headers.get("content-type") };
+}
+
+export function parseSSELines(body: string): unknown[] {
+ return body
+ .split("\n")
+ .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
+ .map((line) => JSON.parse(line.slice(6)) as unknown);
+}
diff --git a/proxy-server/test/streaming-integration.test.ts b/proxy-server/test/streaming-integration.test.ts
index 78dfc10..55b97bc 100644
--- a/proxy-server/test/streaming-integration.test.ts
+++ b/proxy-server/test/streaming-integration.test.ts
@@ -2,9 +2,9 @@ import { describe, it, expect, afterEach } from "vitest";
import type { FastifyInstance } from "fastify";
import type { SessionEvent, SessionEventHandler, CopilotSession } from "@github/copilot-sdk";
import { createServer, Logger, Stats } from "copilot-sdk-proxy";
-import { openaiProvider } from "../src/providers/openai/provider.js";
import { claudeProvider } from "../src/providers/claude/provider.js";
import { codexProvider } from "../src/providers/codex/provider.js";
+import { openaiProvider } from "../src/providers/openai/provider.js";
import type { AppContext } from "../src/context.js";
import { BYTES_PER_MIB, type ServerConfig } from "../src/config-schema.js";
import { BRIDGE_TOOL_PREFIX } from "../src/bridge-constants.js";
@@ -113,6 +113,13 @@ function toolRequestSequence(opts: {
};
}
+function parseSSELines(body: string): unknown[] {
+ return body
+ .split("\n")
+ .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
+ .map((line) => JSON.parse(line.slice(6)) as unknown);
+}
+
const config: ServerConfig = {
toolBridge: false,
toolBridgeTimeoutMs: 0,
@@ -146,51 +153,6 @@ function createCtx(sequence: EventSequence, overrideConfig?: ServerConfig): AppC
};
}
-function createMultiTurnCtx(sequences: EventSequence[]): AppContext {
- let callIndex = 0;
- let handler: SessionEventHandler | null = null;
-
- function emit(type: string, data: Record): void {
- handler?.({ ...BASE_EVENT, type, data } as unknown as SessionEvent);
- }
-
- const session = {
- on(h: SessionEventHandler) {
- handler = h;
- return () => { handler = null; };
- },
- abort: () => Promise.resolve(),
- setModel: () => Promise.resolve(),
- send() {
- const seq = sequences[callIndex++];
- seq?.(emit);
- return Promise.resolve();
- },
- } as unknown as CopilotSession;
-
- return {
- service: {
- cwd: process.cwd(),
- createSession: () => Promise.resolve(session),
- listModels: () => Promise.resolve([
- { id: "test-model", capabilities: { supports: { reasoningEffort: false } } },
- ]),
- ping: () => Promise.resolve({ message: "ok", timestamp: Date.now() }),
- } as unknown as AppContext["service"],
- logger: new Logger("none"),
- config,
- port: 8080,
- stats: new Stats(),
- };
-}
-
-function parseSSELines(body: string): unknown[] {
- return body
- .split("\n")
- .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
- .map((line) => JSON.parse(line.slice(6)) as unknown);
-}
-
function collectTextContent(events: unknown[], provider: "openai" | "claude" | "codex"): string {
if (provider === "openai") {
return (events as { choices?: { delta?: { content?: string } }[] }[])
@@ -213,21 +175,21 @@ function collectTextContent(events: unknown[], provider: "openai" | "claude" | "
.join("");
}
-const xcodeHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
-const claudeHeaders = { "user-agent": "claude-cli/1.0" };
-const codexHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
-
async function createApp(ctx: AppContext, provider: Provider): Promise {
return createServer(ctx, provider);
}
+const claudeHeaders = { "user-agent": "claude-cli/1.0" };
+const codexHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+const xcodeHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+
describe("OpenAI streaming integration", () => {
let app: FastifyInstance;
afterEach(async () => { await app.close(); });
- it("streams text deltas as SSE chunks", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+ it("handles session error", async () => {
+ const ctx = createCtx(errorSequence({ errorMessage: "backend exploded" }));
app = await createApp(ctx, openaiProvider);
const res = await app.inject({
@@ -238,30 +200,13 @@ describe("OpenAI streaming integration", () => {
});
expect(res.statusCode).toBe(200);
- expect(res.headers["content-type"]).toBe("text/event-stream");
- expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Hello world");
- expect(res.body).toContain("data: [DONE]");
- });
-
- it("streams with reasoning deltas", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Let me", " think"] }));
- app = await createApp(ctx, openaiProvider);
-
- const res = await app.inject({
- method: "POST",
- url: "/v1/chat/completions",
- headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Think hard" }] },
- });
-
- expect(res.statusCode).toBe(200);
- expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Answer");
+ expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("");
});
- it("streams with tool execution events", async () => {
- const ctx = createCtx(standardSequence({
- deltas: ["Done"],
- toolCall: { id: "tc1", name: "read_file", args: { path: "/tmp" } },
+ it("handles session error after partial deltas", async () => {
+ const ctx = createCtx(errorSequence({
+ deltasBeforeError: ["Partial"],
+ errorMessage: "connection lost",
}));
app = await createApp(ctx, openaiProvider);
@@ -269,11 +214,11 @@ describe("OpenAI streaming integration", () => {
method: "POST",
url: "/v1/chat/completions",
headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Read file" }] },
+ payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
});
+ // Stream still completes (HTTP 200 was already sent)
expect(res.statusCode).toBe(200);
- expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Done");
});
it("streams with compaction mid-session", async () => {
@@ -292,25 +237,10 @@ describe("OpenAI streaming integration", () => {
expect(res.body).toContain("data: [DONE]");
});
- it("handles session error", async () => {
- const ctx = createCtx(errorSequence({ errorMessage: "backend exploded" }));
- app = await createApp(ctx, openaiProvider);
-
- const res = await app.inject({
- method: "POST",
- url: "/v1/chat/completions",
- headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
- });
-
- expect(res.statusCode).toBe(200);
- expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("");
- });
-
- it("handles session error after partial deltas", async () => {
- const ctx = createCtx(errorSequence({
- deltasBeforeError: ["Partial"],
- errorMessage: "connection lost",
+ it("streams with tool execution events", async () => {
+ const ctx = createCtx(standardSequence({
+ deltas: ["Done"],
+ toolCall: { id: "tc1", name: "read_file", args: { path: "/tmp" } },
}));
app = await createApp(ctx, openaiProvider);
@@ -318,74 +248,26 @@ describe("OpenAI streaming integration", () => {
method: "POST",
url: "/v1/chat/completions",
headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+ payload: { model: "test-model", messages: [{ role: "user", content: "Read file" }] },
});
- // Stream still completes (HTTP 200 was already sent)
expect(res.statusCode).toBe(200);
+ expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Done");
});
- it("rejects non-streaming requests", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["x"] }));
+ it("streams with reasoning deltas", async () => {
+ const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Let me", " think"] }));
app = await createApp(ctx, openaiProvider);
const res = await app.inject({
method: "POST",
url: "/v1/chat/completions",
headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], stream: false },
- });
-
- expect(res.statusCode).toBe(400);
- });
-
- it("records usage stats", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
- app = await createApp(ctx, openaiProvider);
-
- await app.inject({
- method: "POST",
- url: "/v1/chat/completions",
- headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
- });
-
- const snapshot = ctx.stats.snapshot();
- expect(snapshot.requests).toBe(1);
- expect(snapshot.inputTokens).toBe(10);
- expect(snapshot.outputTokens).toBe(5);
- });
-
- it("reuses session for multi-turn conversation", async () => {
- const ctx = createMultiTurnCtx([
- standardSequence({ deltas: ["First"] }),
- standardSequence({ deltas: ["Second"] }),
- ]);
- app = await createApp(ctx, openaiProvider);
-
- const res1 = await app.inject({
- method: "POST",
- url: "/v1/chat/completions",
- headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Turn 1" }] },
+ payload: { model: "test-model", messages: [{ role: "user", content: "Think hard" }] },
});
- expect(collectTextContent(parseSSELines(res1.body), "openai")).toBe("First");
- const res2 = await app.inject({
- method: "POST",
- url: "/v1/chat/completions",
- headers: { ...xcodeHeaders, "content-type": "application/json" },
- payload: {
- model: "test-model",
- messages: [
- { role: "user", content: "Turn 1" },
- { role: "assistant", content: "First" },
- { role: "user", content: "Turn 2" },
- ],
- },
- });
- expect(collectTextContent(parseSSELines(res2.body), "openai")).toBe("Second");
- expect(ctx.stats.snapshot().sessions).toBe(1);
+ expect(res.statusCode).toBe(200);
+ expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Answer");
});
});
@@ -394,53 +276,6 @@ describe("Claude streaming integration", () => {
afterEach(async () => { await app.close(); });
- it("streams text deltas as Anthropic SSE events", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
- app = await createApp(ctx, claudeProvider);
-
- const res = await app.inject({
- method: "POST",
- url: "/v1/messages",
- headers: { ...claudeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
- });
-
- expect(res.statusCode).toBe(200);
- expect(res.headers["content-type"]).toBe("text/event-stream");
- expect(collectTextContent(parseSSELines(res.body), "claude")).toBe("Hello world");
-
- const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
- expect(types).toContain("message_start");
- expect(types).toContain("message_stop");
- });
-
- it("streams reasoning as thinking blocks", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Thinking..."] }));
- app = await createApp(ctx, claudeProvider);
-
- const res = await app.inject({
- method: "POST",
- url: "/v1/messages",
- headers: { ...claudeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Think" }], max_tokens: 100 },
- });
-
- expect(res.statusCode).toBe(200);
- const events = parseSSELines(res.body) as Record[];
-
- const thinkingStart = events.find(
- (e) => e.type === "content_block_start" && (e.content_block as Record).type === "thinking",
- );
- expect(thinkingStart).toBeDefined();
-
- const thinkingDelta = events.find(
- (e) => e.type === "content_block_delta" && (e.delta as Record).type === "thinking_delta",
- );
- expect(thinkingDelta).toBeDefined();
- expect((thinkingDelta!.delta as Record).thinking).toBe("Thinking...");
- expect(collectTextContent(events, "claude")).toBe("Answer");
- });
-
it("streams with compaction mid-session", async () => {
const ctx = createCtx(standardSequence({ deltas: ["OK"], compaction: true }));
app = await createApp(ctx, claudeProvider);
@@ -475,20 +310,31 @@ describe("Claude streaming integration", () => {
expect(messageDelta).toBeDefined();
});
- it("records usage stats", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
+ it("streams reasoning as thinking blocks", async () => {
+ const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Thinking..."] }));
app = await createApp(ctx, claudeProvider);
- await app.inject({
+ const res = await app.inject({
method: "POST",
url: "/v1/messages",
headers: { ...claudeHeaders, "content-type": "application/json" },
- payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
+ payload: { model: "test-model", messages: [{ role: "user", content: "Think" }], max_tokens: 100 },
});
- const snapshot = ctx.stats.snapshot();
- expect(snapshot.requests).toBe(1);
- expect(snapshot.inputTokens).toBe(10);
+ expect(res.statusCode).toBe(200);
+ const events = parseSSELines(res.body) as Record[];
+
+ const thinkingStart = events.find(
+ (e) => e.type === "content_block_start" && (e.content_block as Record).type === "thinking",
+ );
+ expect(thinkingStart).toBeDefined();
+
+ const thinkingDelta = events.find(
+ (e) => e.type === "content_block_delta" && (e.delta as Record).type === "thinking_delta",
+ );
+ expect(thinkingDelta).toBeDefined();
+ expect((thinkingDelta!.delta as Record).thinking).toBe("Thinking...");
+ expect(collectTextContent(events, "claude")).toBe("Answer");
});
});
@@ -497,8 +343,8 @@ describe("Codex streaming integration", () => {
afterEach(async () => { await app.close(); });
- it("streams text deltas as Responses API events", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+ it("handles session error with failed status", async () => {
+ const ctx = createCtx(errorSequence({ errorMessage: "timeout" }));
app = await createApp(ctx, codexProvider);
const res = await app.inject({
@@ -509,12 +355,9 @@ describe("Codex streaming integration", () => {
});
expect(res.statusCode).toBe(200);
- expect(res.headers["content-type"]).toBe("text/event-stream");
- expect(collectTextContent(parseSSELines(res.body), "codex")).toBe("Hello world");
-
- const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
- expect(types).toContain("response.created");
- expect(types).toContain("response.completed");
+ const events = parseSSELines(res.body) as Record[];
+ const failed = events.find((e) => e.type === "response.failed");
+ expect(failed).toBeDefined();
});
it("streams reasoning as reasoning summary events", async () => {
@@ -536,39 +379,6 @@ describe("Codex streaming integration", () => {
expect(reasoningDelta!.delta).toBe("Deep thought");
expect(collectTextContent(events, "codex")).toBe("Answer");
});
-
- it("handles session error with failed status", async () => {
- const ctx = createCtx(errorSequence({ errorMessage: "timeout" }));
- app = await createApp(ctx, codexProvider);
-
- const res = await app.inject({
- method: "POST",
- url: "/v1/responses",
- headers: { ...codexHeaders, "content-type": "application/json" },
- payload: { model: "test-model", input: "Hi" },
- });
-
- expect(res.statusCode).toBe(200);
- const events = parseSSELines(res.body) as Record[];
- const failed = events.find((e) => e.type === "response.failed");
- expect(failed).toBeDefined();
- });
-
- it("records usage stats", async () => {
- const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
- app = await createApp(ctx, codexProvider);
-
- await app.inject({
- method: "POST",
- url: "/v1/responses",
- headers: { ...codexHeaders, "content-type": "application/json" },
- payload: { model: "test-model", input: "Hi" },
- });
-
- const snapshot = ctx.stats.snapshot();
- expect(snapshot.requests).toBe(1);
- expect(snapshot.outputTokens).toBe(5);
- });
});
describe("Tool bridge integration — Claude", () => {