diff --git a/proxy-server/eslint.config.js b/proxy-server/eslint.config.js
index 1411557..2d83b34 100644
--- a/proxy-server/eslint.config.js
+++ b/proxy-server/eslint.config.js
@@ -28,6 +28,6 @@ export default defineConfig(
     },
   },
   {
-    ignores: ["bin/", "dist/", "node_modules/", "scripts/", "config.json5", "eslint.config.js", "vitest.config.ts"],
+    ignores: ["bin/", "dist/", "node_modules/", "scripts/", "config.json5", "eslint.config.js", "vitest.config.ts", "vitest.live.config.ts"],
   },
 );
diff --git a/proxy-server/package-lock.json b/proxy-server/package-lock.json
index e2a9323..a138691 100644
--- a/proxy-server/package-lock.json
+++ b/proxy-server/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "xcode-copilot-server",
-  "version": "4.0.3",
+  "version": "4.0.4",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "xcode-copilot-server",
-      "version": "4.0.3",
+      "version": "4.0.4",
       "license": "MIT",
       "dependencies": {
         "commander": "14.0.3",
@@ -18,7 +18,7 @@
         "zod": "4.3.6"
       },
       "bin": {
-        "xcode-copilot-server": "dist/index.js"
+        "xcode-copilot-server": "bin/xcode-copilot-server.mjs"
       },
       "devDependencies": {
         "@eslint/js": "10.0.1",
diff --git a/proxy-server/package.json b/proxy-server/package.json
index c9db9cb..9d1c8b1 100644
--- a/proxy-server/package.json
+++ b/proxy-server/package.json
@@ -24,6 +24,7 @@
     "start": "node dist/index.js",
     "dev": "tsx src/index.ts",
     "test": "vitest run",
+    "test:live": "vitest run --config vitest.live.config.ts",
     "test:watch": "vitest",
     "lint": "eslint .",
     "typecheck": "tsc -p tsconfig.check.json",
diff --git a/proxy-server/test/live/streaming.live.test.ts b/proxy-server/test/live/streaming.live.test.ts
new file mode 100644
index 0000000..5a39092
--- /dev/null
+++ b/proxy-server/test/live/streaming.live.test.ts
@@ -0,0 +1,334 @@
+import { describe, it, expect, beforeAll, afterAll, beforeEach, afterEach } from "vitest";
+import { createServer, CopilotService, Logger, Stats } from "copilot-sdk-proxy";
+import { openaiProvider } from "../../src/providers/openai/provider.js";
+import { claudeProvider } from "../../src/providers/claude/provider.js";
+import { codexProvider } from "../../src/providers/codex/provider.js";
+import type { AppContext } from "../../src/context.js";
+import { BYTES_PER_MIB, type ServerConfig } from "../../src/config-schema.js";
+import type { Provider } from "../../src/providers/types.js";
+
+const OPENAI_MODELS = ["claude-sonnet-4-6", "gpt-5.3"];
+const CLAUDE_MODELS = ["claude-sonnet-4-6"];
+const CODEX_MODELS = ["gpt-5.3"];
+const TIMEOUT = 60_000;
+
+let service: CopilotService;
+const logger = new Logger("info");
+
+const config: ServerConfig = {
+  toolBridge: false,
+  toolBridgeTimeoutMs: 0,
+  mcpServers: {},
+  allowedCliTools: ["*"],
+  excludedFilePatterns: [],
+  bodyLimit: 10 * BYTES_PER_MIB,
+  requestTimeoutMs: 0,
+  autoApprovePermissions: true,
+};
+
+beforeAll(async () => {
+  service = new CopilotService({ logger });
+  await service.start();
+
+  const auth = await service.getAuthStatus();
+  if (!auth.isAuthenticated) {
+    throw new Error("Copilot not authenticated. Sign in first.");
+  }
+}, TIMEOUT);
+
+afterAll(async () => {
+  await service.stop();
+}, TIMEOUT);
+
+function createCtx(): AppContext {
+  return {
+    service,
+    logger,
+    config,
+    port: 0,
+    stats: new Stats(),
+  };
+}
+
+const xcodeHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+const claudeCliHeaders = { "user-agent": "claude-cli/1.0" };
+const codexHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+
+async function startServer(provider: Provider) {
+  const ctx = createCtx();
+  const app = await createServer(ctx, provider);
+  const address = await app.listen({ port: 0, host: "127.0.0.1" });
+  return { app, baseUrl: address, ctx };
+}
+
+async function postJSON(baseUrl: string, path: string, body: unknown, headers: Record<string, string>): Promise<{ status: number; body: string }> {
+  const res = await fetch(`${baseUrl}${path}`, {
+    method: "POST",
+    headers: { "content-type": "application/json", ...headers },
+    body: JSON.stringify(body),
+  });
+  return { status: res.status, body: await res.text() };
+}
+
+function parseSSELines(body: string): unknown[] {
+  return body
+    .split("\n")
+    .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
+    .map((line) => JSON.parse(line.slice(6)) as unknown);
+}
+
+function collectOpenAIText(events: unknown[]): string {
+  return (events as { choices?: { delta?: { content?: string } }[] }[])
+    .flatMap((e) => e.choices ?? [])
+    .map((c) => c.delta?.content ?? "")
+    .filter(Boolean)
+    .join("");
+}
+
+function collectClaudeText(events: unknown[]): string {
+  return (events as { type?: string; delta?: { type?: string; text?: string } }[])
+    .filter((e) => e.type === "content_block_delta" && e.delta?.type === "text_delta")
+    .map((e) => e.delta?.text ?? "")
+    .join("");
+}
+
+function collectCodexText(events: unknown[]): string {
+  return (events as { type?: string; delta?: string }[])
+    .filter((e) => e.type === "response.output_text.delta")
+    .map((e) => e.delta ?? "")
+    .join("");
+}
+
+describe.each(OPENAI_MODELS)("OpenAI provider with %s", (model) => {
+  let baseUrl: string;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    const server = await startServer(openaiProvider);
+    baseUrl = server.baseUrl;
+    close = () => server.app.close();
+  }, TIMEOUT);
+
+  afterEach(async () => { await close(); });
+
+  it("streams a basic response", async () => {
+    const res = await postJSON(baseUrl, "/v1/chat/completions", {
+      model,
+      messages: [{ role: "user", content: "Reply with exactly: hello" }],
+    }, xcodeHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectOpenAIText(parseSSELines(res.body));
+    expect(text.toLowerCase()).toContain("hello");
+    expect(res.body).toContain("data: [DONE]");
+  }, TIMEOUT);
+
+  it("streams with a system message", async () => {
+    const res = await postJSON(baseUrl, "/v1/chat/completions", {
+      model,
+      messages: [
+        { role: "system", content: "You are a calculator. Only respond with numbers." },
+        { role: "user", content: "What is 2+2?" },
+      ],
+    }, xcodeHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectOpenAIText(parseSSELines(res.body));
+    expect(text).toContain("4");
+  }, TIMEOUT);
+
+  it("streams reasoning content", async () => {
+    const res = await postJSON(baseUrl, "/v1/chat/completions", {
+      model,
+      messages: [{ role: "user", content: "Think step by step: what is 15 * 17?" }],
+    }, xcodeHeaders);
+
+    expect(res.status).toBe(200);
+    const events = parseSSELines(res.body);
+    const text = collectOpenAIText(events);
+    expect(text).toContain("255");
+  }, TIMEOUT);
+
+  it("handles multi-turn conversation", async () => {
+    const res = await postJSON(baseUrl, "/v1/chat/completions", {
+      model,
+      messages: [
+        { role: "user", content: "Remember the word 'banana'. Just say OK." },
+        { role: "assistant", content: "OK" },
+        { role: "user", content: "What word did I ask you to remember?" },
+      ],
+    }, xcodeHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectOpenAIText(parseSSELines(res.body));
+    expect(text.toLowerCase()).toContain("banana");
+  }, TIMEOUT);
+
+  it("rejects invalid schema", async () => {
+    const res = await postJSON(baseUrl, "/v1/chat/completions", {
+      model,
+      messages: "not an array",
+    }, xcodeHeaders);
+
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+});
+
+describe.each(CLAUDE_MODELS)("Claude provider with %s", (model) => {
+  let baseUrl: string;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    const server = await startServer(claudeProvider);
+    baseUrl = server.baseUrl;
+    close = () => server.app.close();
+  }, TIMEOUT);
+
+  afterEach(async () => { await close(); });
+
+  it("streams a basic response", async () => {
+    const res = await postJSON(baseUrl, "/v1/messages", {
+      model,
+      messages: [{ role: "user", content: "Reply with exactly: hello" }],
+      max_tokens: 100,
+    }, claudeCliHeaders);
+
+    expect(res.status).toBe(200);
+    const events = parseSSELines(res.body);
+    const text = collectClaudeText(events);
+    expect(text.toLowerCase()).toContain("hello");
+
+    const types = (events as { type?: string }[]).map((e) => e.type);
+    expect(types).toContain("message_start");
+    expect(types).toContain("message_stop");
+  }, TIMEOUT);
+
+  it("streams with a system message", async () => {
+    const res = await postJSON(baseUrl, "/v1/messages", {
+      model,
+      system: "You are a calculator. Only respond with numbers.",
+      messages: [{ role: "user", content: "What is 3+3?" }],
+      max_tokens: 100,
+    }, claudeCliHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectClaudeText(parseSSELines(res.body));
+    expect(text).toContain("6");
+  }, TIMEOUT);
+
+  it("streams reasoning content via thinking blocks", async () => {
+    const res = await postJSON(baseUrl, "/v1/messages", {
+      model,
+      messages: [{ role: "user", content: "Think step by step: what is 15 * 17?" }],
+      max_tokens: 16000,
+    }, claudeCliHeaders);
+
+    expect(res.status).toBe(200);
+    const events = parseSSELines(res.body);
+    const text = collectClaudeText(events);
+    expect(text).toContain("255");
+
+    const types = (events as { type?: string }[]).map((e) => e.type);
+    expect(types).toContain("content_block_start");
+    expect(types).toContain("content_block_delta");
+  }, TIMEOUT);
+
+  it("handles multi-turn conversation", async () => {
+    const res = await postJSON(baseUrl, "/v1/messages", {
+      model,
+      messages: [
+        { role: "user", content: "Remember the word 'mango'. Just say OK." },
+        { role: "assistant", content: "OK" },
+        { role: "user", content: "What word did I ask you to remember?" },
+      ],
+      max_tokens: 100,
+    }, claudeCliHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectClaudeText(parseSSELines(res.body));
+    expect(text.toLowerCase()).toContain("mango");
+  }, TIMEOUT);
+
+  it("rejects missing max_tokens", async () => {
+    const res = await postJSON(baseUrl, "/v1/messages", {
+      model,
+      messages: [{ role: "user", content: "Hi" }],
+    }, claudeCliHeaders);
+
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+});
+
+describe.each(CODEX_MODELS)("Codex provider with %s", (model) => {
+  let baseUrl: string;
+  let close: () => Promise<void>;
+
+  beforeEach(async () => {
+    const server = await startServer(codexProvider);
+    baseUrl = server.baseUrl;
+    close = () => server.app.close();
+  }, TIMEOUT);
+
+  afterEach(async () => { await close(); });
+
+  it("streams a basic response", async () => {
+    const res = await postJSON(baseUrl, "/v1/responses", { model, input: "Reply with exactly: hello" }, codexHeaders);
+
+    expect(res.status).toBe(200);
+    const events = parseSSELines(res.body);
+    const text = collectCodexText(events);
+    expect(text.toLowerCase()).toContain("hello");
+
+    const types = (events as { type?: string }[]).map((e) => e.type).filter(Boolean);
+    expect(types).toContain("response.created");
+    expect(types).toContain("response.completed");
+  }, TIMEOUT);
+
+  it("streams with instructions", async () => {
+    const res = await postJSON(baseUrl, "/v1/responses", {
+      model,
+      instructions: "You are a calculator. Only respond with numbers.",
+      input: "What is 5+5?",
+    }, codexHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectCodexText(parseSSELines(res.body));
+    expect(text).toContain("10");
+  }, TIMEOUT);
+
+  it("streams reasoning content", async () => {
+    const res = await postJSON(baseUrl, "/v1/responses", {
+      model,
+      input: "Think step by step: what is 15 * 17?",
+    }, codexHeaders);
+
+    expect(res.status).toBe(200);
+    const events = parseSSELines(res.body);
+    const text = collectCodexText(events);
+    expect(text).toContain("255");
+
+    const types = (events as { type?: string }[]).map((e) => e.type).filter(Boolean);
+    expect(types).toContain("response.completed");
+  }, TIMEOUT);
+
+  it("handles multi-turn via input array", async () => {
+    const res = await postJSON(baseUrl, "/v1/responses", {
+      model,
+      input: [
+        { role: "user", content: "Remember the word 'cherry'. Just say OK." },
+        { role: "assistant", content: "OK" },
+        { role: "user", content: "What word did I ask you to remember?" },
+      ],
+    }, codexHeaders);
+
+    expect(res.status).toBe(200);
+    const text = collectCodexText(parseSSELines(res.body));
+    expect(text.toLowerCase()).toContain("cherry");
+  }, TIMEOUT);
+
+  it("rejects missing input", async () => {
+    const res = await postJSON(baseUrl, "/v1/responses", { model }, codexHeaders);
+
+    expect(res.status).toBe(400);
+  }, TIMEOUT);
+});
diff --git a/proxy-server/test/streaming-integration.test.ts b/proxy-server/test/streaming-integration.test.ts
new file mode 100644
index 0000000..78dfc10
--- /dev/null
+++ b/proxy-server/test/streaming-integration.test.ts
@@ -0,0 +1,788 @@
+import { describe, it, expect, afterEach } from "vitest";
+import type { FastifyInstance } from "fastify";
+import type { SessionEvent, SessionEventHandler, CopilotSession } from "@github/copilot-sdk";
+import { createServer, Logger, Stats } from "copilot-sdk-proxy";
+import { openaiProvider } from "../src/providers/openai/provider.js";
+import { claudeProvider } from "../src/providers/claude/provider.js";
+import { codexProvider } from "../src/providers/codex/provider.js";
+import type { AppContext } from "../src/context.js";
+import { BYTES_PER_MIB, type ServerConfig } from "../src/config-schema.js";
+import { BRIDGE_TOOL_PREFIX } from "../src/bridge-constants.js";
+import type { Provider } from "../src/providers/types.js";
+
+const BASE_EVENT = { id: "e1", timestamp: new Date().toISOString(), parentId: null };
+
+type EventSequence = (emit: (type: string, data: Record<string, unknown>) => void) => void;
+
+function createMockSession(sequence: EventSequence): CopilotSession {
+  let handler: SessionEventHandler | null = null;
+
+  function emit(type: string, data: Record<string, unknown>): void {
+    handler?.({ ...BASE_EVENT, type, data } as unknown as SessionEvent);
+  }
+
+  return {
+    on(h: SessionEventHandler) {
+      handler = h;
+      return () => { handler = null; };
+    },
+    abort: () => Promise.resolve(),
+    setModel: () => Promise.resolve(),
+    send() {
+      sequence(emit);
+      return Promise.resolve();
+    },
+  } as unknown as CopilotSession;
+}
+
+function standardSequence(opts: {
+  deltas: string[];
+  reasoning?: string[];
+  toolCall?: { id: string; name: string; args: Record<string, unknown> };
+  compaction?: boolean;
+}): EventSequence {
+  return (emit) => {
+    if (opts.reasoning) {
+      for (const text of opts.reasoning) {
+        emit("assistant.reasoning_delta", { reasoningId: "r1", deltaContent: text });
+      }
+      emit("assistant.reasoning", { reasoningId: "r1", content: opts.reasoning.join("") });
+    }
+
+    if (opts.compaction) {
+      emit("session.compaction_start", {});
+      emit("session.compaction_complete", {
+        success: true,
+        preCompactionTokens: 1000,
+        postCompactionTokens: 400,
+      });
+    }
+
+    if (opts.toolCall) {
+      emit("tool.execution_start", {
+        toolCallId: opts.toolCall.id,
+        toolName: opts.toolCall.name,
+        arguments: opts.toolCall.args,
+      });
+      emit("tool.execution_complete", {
+        toolCallId: opts.toolCall.id,
+        success: true,
+        result: { content: "tool result" },
+      });
+    }
+
+    for (const text of opts.deltas) {
+      emit("assistant.message_delta", { messageId: "m1", deltaContent: text });
+    }
+    emit("assistant.message", {
+      messageId: "m1",
+      content: opts.deltas.join(""),
+      toolRequests: [],
+    });
+
+    emit("assistant.usage", { inputTokens: 10, outputTokens: 5, model: "test-model" });
+    emit("session.idle", {});
+  };
+}
+
+function errorSequence(opts: { deltasBeforeError?: string[]; errorMessage: string }): EventSequence {
+  return (emit) => {
+    if (opts.deltasBeforeError) {
+      for (const text of opts.deltasBeforeError) {
+        emit("assistant.message_delta", { messageId: "m1", deltaContent: text });
+      }
+    }
+    emit("session.error", { message: opts.errorMessage });
+  };
+}
+
+function toolRequestSequence(opts: {
+  deltas: string[];
+  toolRequests: { toolCallId: string; name: string; arguments?: unknown }[];
+}): EventSequence {
+  return (emit) => {
+    for (const text of opts.deltas) {
+      emit("assistant.message_delta", { messageId: "m1", deltaContent: text });
+    }
+    emit("assistant.message", {
+      messageId: "m1",
+      content: opts.deltas.join(""),
+      toolRequests: opts.toolRequests,
+    });
+    // No session.idle: the session stays active waiting for tool results
+  };
+}
+
+const config: ServerConfig = {
+  toolBridge: false,
+  toolBridgeTimeoutMs: 0,
+  mcpServers: {},
+  allowedCliTools: [],
+  excludedFilePatterns: [],
+  bodyLimit: 4 * BYTES_PER_MIB,
+  requestTimeoutMs: 0,
+  autoApprovePermissions: ["read", "mcp"],
+};
+
+const toolBridgeConfig: ServerConfig = {
+  ...config,
+  toolBridge: true,
+};
+
+function createCtx(sequence: EventSequence, overrideConfig?: ServerConfig): AppContext {
+  return {
+    service: {
+      cwd: process.cwd(),
+      createSession: () => Promise.resolve(createMockSession(sequence)),
+      listModels: () => Promise.resolve([
+        { id: "test-model", capabilities: { supports: { reasoningEffort: false } } },
+      ]),
+      ping: () => Promise.resolve({ message: "ok", timestamp: Date.now() }),
+    } as unknown as AppContext["service"],
+    logger: new Logger("none"),
+    config: overrideConfig ?? config,
+    port: 8080,
+    stats: new Stats(),
+  };
+}
+
+function createMultiTurnCtx(sequences: EventSequence[]): AppContext {
+  let callIndex = 0;
+  let handler: SessionEventHandler | null = null;
+
+  function emit(type: string, data: Record<string, unknown>): void {
+    handler?.({ ...BASE_EVENT, type, data } as unknown as SessionEvent);
+  }
+
+  const session = {
+    on(h: SessionEventHandler) {
+      handler = h;
+      return () => { handler = null; };
+    },
+    abort: () => Promise.resolve(),
+    setModel: () => Promise.resolve(),
+    send() {
+      const seq = sequences[callIndex++];
+      seq?.(emit);
+      return Promise.resolve();
+    },
+  } as unknown as CopilotSession;
+
+  return {
+    service: {
+      cwd: process.cwd(),
+      createSession: () => Promise.resolve(session),
+      listModels: () => Promise.resolve([
+        { id: "test-model", capabilities: { supports: { reasoningEffort: false } } },
+      ]),
+      ping: () => Promise.resolve({ message: "ok", timestamp: Date.now() }),
+    } as unknown as AppContext["service"],
+    logger: new Logger("none"),
+    config,
+    port: 8080,
+    stats: new Stats(),
+  };
+}
+
+function parseSSELines(body: string): unknown[] {
+  return body
+    .split("\n")
+    .filter((line) => line.startsWith("data: ") && line !== "data: [DONE]")
+    .map((line) => JSON.parse(line.slice(6)) as unknown);
+}
+
+function collectTextContent(events: unknown[], provider: "openai" | "claude" | "codex"): string {
+  if (provider === "openai") {
+    return (events as { choices?: { delta?: { content?: string } }[] }[])
+      .flatMap((e) => e.choices ?? [])
+      .map((c) => c.delta?.content ?? "")
+      .filter(Boolean)
+      .join("");
+  }
+
+  if (provider === "claude") {
+    return (events as { type?: string; delta?: { type?: string; text?: string } }[])
+      .filter((e) => e.type === "content_block_delta" && e.delta?.type === "text_delta")
+      .map((e) => e.delta?.text ?? "")
+      .join("");
+  }
+
+  return (events as { type?: string; delta?: string }[])
+    .filter((e) => e.type === "response.output_text.delta")
+    .map((e) => e.delta ?? "")
+    .join("");
+}
+
+const xcodeHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+const claudeHeaders = { "user-agent": "claude-cli/1.0" };
+const codexHeaders = { "user-agent": "Xcode/24577 CFNetwork/3860.300.31 Darwin/25.2.0" };
+
+async function createApp(ctx: AppContext, provider: Provider): Promise<FastifyInstance> {
+  return createServer(ctx, provider);
+}
+
+describe("OpenAI streaming integration", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("streams text deltas as SSE chunks", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Hello world");
+    expect(res.body).toContain("data: [DONE]");
+  });
+
+  it("streams with reasoning deltas", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Let me", " think"] }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Think hard" }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Answer");
+  });
+
+  it("streams with tool execution events", async () => {
+    const ctx = createCtx(standardSequence({
+      deltas: ["Done"],
+      toolCall: { id: "tc1", name: "read_file", args: { path: "/tmp" } },
+    }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Read file" }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Done");
+  });
+
+  it("streams with compaction mid-session", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Compacted"], compaction: true }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("Compacted");
+    expect(res.body).toContain("data: [DONE]");
+  });
+
+  it("handles session error", async () => {
+    const ctx = createCtx(errorSequence({ errorMessage: "backend exploded" }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "openai")).toBe("");
+  });
+
+  it("handles session error after partial deltas", async () => {
+    const ctx = createCtx(errorSequence({
+      deltasBeforeError: ["Partial"],
+      errorMessage: "connection lost",
+    }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+    });
+
+    // Stream still completes (HTTP 200 was already sent)
+    expect(res.statusCode).toBe(200);
+  });
+
+  it("rejects non-streaming requests", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["x"] }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], stream: false },
+    });
+
+    expect(res.statusCode).toBe(400);
+  });
+
+  it("records usage stats", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
+    app = await createApp(ctx, openaiProvider);
+
+    await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }] },
+    });
+
+    const snapshot = ctx.stats.snapshot();
+    expect(snapshot.requests).toBe(1);
+    expect(snapshot.inputTokens).toBe(10);
+    expect(snapshot.outputTokens).toBe(5);
+  });
+
+  it("reuses session for multi-turn conversation", async () => {
+    const ctx = createMultiTurnCtx([
+      standardSequence({ deltas: ["First"] }),
+      standardSequence({ deltas: ["Second"] }),
+    ]);
+    app = await createApp(ctx, openaiProvider);
+
+    const res1 = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Turn 1" }] },
+    });
+    expect(collectTextContent(parseSSELines(res1.body), "openai")).toBe("First");
+
+    const res2 = await app.inject({
+      method: "POST",
+      url: "/v1/chat/completions",
+      headers: { ...xcodeHeaders, "content-type": "application/json" },
+      payload: {
+        model: "test-model",
+        messages: [
+          { role: "user", content: "Turn 1" },
+          { role: "assistant", content: "First" },
+          { role: "user", content: "Turn 2" },
+        ],
+      },
+    });
+    expect(collectTextContent(parseSSELines(res2.body), "openai")).toBe("Second");
+    expect(ctx.stats.snapshot().sessions).toBe(1);
+  });
+});
+
+describe("Claude streaming integration", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("streams text deltas as Anthropic SSE events", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/messages",
+      headers: { ...claudeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+    expect(collectTextContent(parseSSELines(res.body), "claude")).toBe("Hello world");
+
+    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type);
+    expect(types).toContain("message_start");
+    expect(types).toContain("message_stop");
+  });
+
+  it("streams reasoning as thinking blocks", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Thinking..."] }));
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/messages",
+      headers: { ...claudeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Think" }], max_tokens: 100 },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+
+    const thinkingStart = events.find(
+      (e) => e.type === "content_block_start" && (e.content_block as Record<string, unknown>).type === "thinking",
+    );
+    expect(thinkingStart).toBeDefined();
+
+    const thinkingDelta = events.find(
+      (e) => e.type === "content_block_delta" && (e.delta as Record<string, unknown>).type === "thinking_delta",
+    );
+    expect(thinkingDelta).toBeDefined();
+    expect((thinkingDelta!.delta as Record<string, string>).thinking).toBe("Thinking...");
+    expect(collectTextContent(events, "claude")).toBe("Answer");
+  });
+
+  it("streams with compaction mid-session", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["OK"], compaction: true }));
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/messages",
+      headers: { ...claudeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(collectTextContent(parseSSELines(res.body), "claude")).toBe("OK");
+  });
+
+  it("handles session error with end_turn stop reason", async () => {
+    const ctx = createCtx(errorSequence({ errorMessage: "rate limited" }));
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/messages",
+      headers: { ...claudeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+    const messageDelta = events.find(
+      (e) => e.type === "message_delta" && (e.delta as Record<string, unknown>).stop_reason === "end_turn",
+    );
+    expect(messageDelta).toBeDefined();
+  });
+
+  it("records usage stats", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
+    app = await createApp(ctx, claudeProvider);
+
+    await app.inject({
+      method: "POST",
+      url: "/v1/messages",
+      headers: { ...claudeHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", messages: [{ role: "user", content: "Hi" }], max_tokens: 100 },
+    });
+
+    const snapshot = ctx.stats.snapshot();
+    expect(snapshot.requests).toBe(1);
+    expect(snapshot.inputTokens).toBe(10);
+  });
+});
+
+describe("Codex streaming integration", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("streams text deltas as Responses API events", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Hello", " world"] }));
+    app = await createApp(ctx, codexProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/responses",
+      headers: { ...codexHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", input: "Hi" },
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(res.headers["content-type"]).toBe("text/event-stream");
+    expect(collectTextContent(parseSSELines(res.body), "codex")).toBe("Hello world");
+
+    const types = (parseSSELines(res.body) as { type?: string }[]).map((e) => e.type).filter(Boolean);
+    expect(types).toContain("response.created");
+    expect(types).toContain("response.completed");
+  });
+
+  it("streams reasoning as reasoning summary events", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Answer"], reasoning: ["Deep thought"] }));
+    app = await createApp(ctx, codexProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/responses",
+      headers: { ...codexHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", input: "Think" },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+
+    const reasoningDelta = events.find((e) => e.type === "response.reasoning_summary_text.delta");
+    expect(reasoningDelta).toBeDefined();
+    expect(reasoningDelta!.delta).toBe("Deep thought");
+    expect(collectTextContent(events, "codex")).toBe("Answer");
+  });
+
+  it("handles session error with failed status", async () => {
+    const ctx = createCtx(errorSequence({ errorMessage: "timeout" }));
+    app = await createApp(ctx, codexProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/responses",
+      headers: { ...codexHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", input: "Hi" },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+    const failed = events.find((e) => e.type === "response.failed");
+    expect(failed).toBeDefined();
+  });
+
+  it("records usage stats", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["Hi"] }));
+    app = await createApp(ctx, codexProvider);
+
+    await app.inject({
+      method: "POST",
+      url: "/v1/responses",
+      headers: { ...codexHeaders, "content-type": "application/json" },
+      payload: { model: "test-model", input: "Hi" },
+    });
+
+    const snapshot = ctx.stats.snapshot();
+    expect(snapshot.requests).toBe(1);
+    expect(snapshot.outputTokens).toBe(5);
+  });
+});
+
+describe("Tool bridge integration — Claude", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("emits tool_use blocks when model requests bridge tools", async () => {
+    const ctx = createCtx(
+      toolRequestSequence({
+        deltas: ["Let me check"],
+        toolRequests: [
+          { toolCallId: "tc1", name: `${BRIDGE_TOOL_PREFIX}XcodeRead`, arguments: { path: "/src" } },
+        ],
+      }),
+      toolBridgeConfig,
+    );
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/messages",
+      headers: { ...claudeHeaders, "content-type": "application/json" },
+      payload: {
+        model: "test-model",
+        messages: [{ role: "user", content: "Read my code" }],
+        max_tokens: 100,
+        tools: [{ name: "XcodeRead", description: "Read file", input_schema: { type: "object" } }],
+      },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+
+    const toolUseStart = events.find(
+      (e) => e.type === "content_block_start" && (e.content_block as Record<string, unknown>).type === "tool_use",
+    );
+    expect(toolUseStart).toBeDefined();
+
+    const block = toolUseStart!.content_block as Record<string, unknown>;
+    expect(block.id).toBe("tc1");
+    expect(block.name).toBe("XcodeRead");
+
+    const inputDelta = events.find(
+      (e) => e.type === "content_block_delta" && (e.delta as Record<string, unknown>).type === "input_json_delta",
+    );
+    expect(inputDelta).toBeDefined();
+  });
+});
+
+describe("Tool bridge integration — Codex", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("emits function_call items when model requests bridge tools", async () => {
+    const ctx = createCtx(
+      toolRequestSequence({
+        deltas: ["Let me check"],
+        toolRequests: [
+          { toolCallId: "tc1", name: `${BRIDGE_TOOL_PREFIX}XcodeRead`, arguments: { path: "/src" } },
+        ],
+      }),
+      toolBridgeConfig,
+    );
+    app = await createApp(ctx, codexProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/v1/responses",
+      headers: { ...codexHeaders, "content-type": "application/json" },
+      payload: {
+        model: "test-model",
+        input: "Read my code",
+        tools: [{ type: "function", name: "XcodeRead", description: "Read file", parameters: { type: "object" } }],
+      },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const events = parseSSELines(res.body) as Record<string, unknown>[];
+
+    // The first output_item.added is the "message" item, not the function_call
+    const fcAdded = events.find(
+      (e) => e.type === "response.output_item.added" && (e as { item?: { type?: string } }).item?.type === "function_call",
+    );
+    expect(fcAdded).toBeDefined();
+
+    const item = (fcAdded as { item?: Record<string, unknown> }).item;
+    expect(item?.name).toBe("XcodeRead");
+    expect(item?.call_id).toBe("tc1");
+  });
+});
+
+describe("MCP routes", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("responds to initialize with protocol version and capabilities", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["x"] }), toolBridgeConfig);
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/mcp/test-conv-id",
+      headers: { "content-type": "application/json" },
+      payload: {
+        jsonrpc: "2.0",
+        id: 1,
+        method: "initialize",
+        params: {},
+      },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const body = res.json();
+    expect(body.jsonrpc).toBe("2.0");
+    expect(body.id).toBe(1);
+    expect(body.result.protocolVersion).toBe("2024-11-05");
+    expect(body.result.capabilities).toEqual({ tools: {} });
+    expect(body.result.serverInfo.name).toBe("xcode-bridge");
+  });
+
+  it("returns method not found for unknown methods", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["x"] }), toolBridgeConfig);
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/mcp/test-conv-id",
+      headers: { "content-type": "application/json" },
+      payload: {
+        jsonrpc: "2.0",
+        id: 2,
+        method: "unknown/method",
+      },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const body = res.json();
+    expect(body.error.code).toBe(-32601);
+  });
+
+  it("returns parse error for invalid JSON-RPC", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["x"] }), toolBridgeConfig);
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/mcp/test-conv-id",
+      headers: { "content-type": "application/json" },
+      payload: { invalid: true },
+    });
+
+    expect(res.statusCode).toBe(200);
+    const body = res.json();
+    expect(body.error.code).toBe(-32700);
+  });
+
+  it("accepts notifications (no id) with 202", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["x"] }), toolBridgeConfig);
+    app = await createApp(ctx, claudeProvider);
+
+    const res = await app.inject({
+      method: "POST",
+      url: "/mcp/test-conv-id",
+      headers: { "content-type": "application/json" },
+      payload: {
+        jsonrpc: "2.0",
+        method: "notifications/initialized",
+      },
+    });
+
+    expect(res.statusCode).toBe(202);
+  });
+
+  // Can't test the SSE GET route with app.inject() — it holds the connection open forever
+});
+
+describe("GET /health", () => {
+  let app: FastifyInstance;
+
+  afterEach(async () => { await app.close(); });
+
+  it("returns 200 with status ok when ping succeeds", async () => {
+    const ctx = createCtx(standardSequence({ deltas: ["x"] }));
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "GET",
+      url: "/health",
+      headers: xcodeHeaders,
+    });
+
+    expect(res.statusCode).toBe(200);
+    expect(res.json().status).toBe("ok");
+  });
+
+  it("returns 503 when ping fails", async () => {
+    const ctx: AppContext = {
+      ...createCtx(standardSequence({ deltas: ["x"] })),
+      service: {
+        ping: () => Promise.reject(new Error("connection lost")),
+      } as unknown as AppContext["service"],
+    };
+    app = await createApp(ctx, openaiProvider);
+
+    const res = await app.inject({
+      method: "GET",
+      url: "/health",
+      headers: xcodeHeaders,
+    });
+
+    expect(res.statusCode).toBe(503);
+    expect(res.json()).toEqual({ status: "error", message: "connection lost" });
+  });
+});
diff --git a/proxy-server/vitest.config.ts b/proxy-server/vitest.config.ts
index 8bf24b5..23c876e 100644
--- a/proxy-server/vitest.config.ts
+++ b/proxy-server/vitest.config.ts
@@ -3,6 +3,7 @@ import { defineConfig } from "vitest/config";
 export default defineConfig({
   test: {
     include: ["test/**/*.test.ts"],
+    exclude: ["test/live/**"],
     environment: "node",
     setupFiles: ["src/fix-jsonrpc-import.ts"],
   },
diff --git a/proxy-server/vitest.live.config.ts b/proxy-server/vitest.live.config.ts
new file mode 100644
index 0000000..69c872f
--- /dev/null
+++ b/proxy-server/vitest.live.config.ts
@@ -0,0 +1,11 @@
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+  test: {
+    include: ["test/live/**/*.test.ts"],
+    setupFiles: ["src/fix-jsonrpc-import.ts"],
+    environment: "node",
+    testTimeout: 60_000,
+    hookTimeout: 60_000,
+  },
+});