ericc-ch · lyzgeorge · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,7 @@ node_modules/
 .eslintcache
 
 # build output
-dist/
+dist/
+
+# local worktrees
+.worktrees/
diff --git a/README.md b/README.md
@@ -1,5 +1,20 @@
 # Copilot API Proxy
 
+> **This is a fork of [ericc-ch/copilot-api](https://github.com/ericc-ch/copilot-api) with full reasoning / extended-thinking support added.**
+>
+> **What this fork adds:**
+>
+> - **Capability-aware reasoning routing** — reads each model's `capabilities.supports` at startup and routes `reasoning_effort` / `thinking_budget` only to models that actually support them; unsupported models silently strip the fields.
+> - **Anthropic ↔ OpenAI thinking translation** — `thinking: { type: "enabled", budget_tokens: N }` on the `/v1/messages` surface is automatically translated into `reasoning_effort: "high"` + `thinking_budget` for the upstream call, and vice versa.
+> - **Streaming thinking traces** — Claude thinking streams emit proper `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events so Claude Code and similar clients see native thinking UIs.
+> - **Forward-compatible `reasoning_effort`** — type accepts any string (not just `"low" | "medium" | "high"`), so new model-specific values like `"xhigh"` or `"minimal"` are transparently passed through without code changes.
+>
+> Everything else — auth, rate limiting, usage dashboard, CLI flags — is identical to the upstream project.
+
+---
+
+**One Copilot subscription. Every frontier reasoning model. OpenAI and Anthropic shaped.** Point Claude Code, Cline, or your own scripts at a single localhost URL and unlock Claude Sonnet 4.6, GPT-5, Gemini, and friends — with real reasoning traces and thinking budgets routed to whichever knob the upstream model actually supports.
+
 > [!WARNING]
 > This is a reverse-engineered proxy of GitHub Copilot API. It is not supported by GitHub, and may break unexpectedly. Use at your own risk.
 
@@ -32,6 +47,7 @@ A reverse-engineered proxy for the GitHub Copilot API that exposes it as an Open
 ## Features
 
 - **OpenAI & Anthropic Compatibility**: Exposes GitHub Copilot as an OpenAI-compatible (`/v1/chat/completions`, `/v1/models`, `/v1/embeddings`) and Anthropic-compatible (`/v1/messages`) API.
+- **Reasoning & Extended Thinking**: Capability-aware translation of `reasoning_effort` and Anthropic `thinking` blocks. Thinking traces, signatures, and `reasoning_opaque` tokens flow through both non-streaming and streaming responses without you having to know which upstream flag each model wants.
 - **Claude Code Integration**: Easily configure and launch [Claude Code](https://docs.anthropic.com/en/docs/claude-code/overview) to use Copilot as its backend with a simple command-line flag (`--claude-code`).
 - **Usage Dashboard**: A web-based dashboard to monitor your Copilot API usage, view quotas, and see detailed statistics.
 - **Rate Limit Control**: Manage API usage with rate-limiting options (`--rate-limit`) and a waiting mechanism (`--wait`) to prevent errors from rapid requests.
@@ -278,6 +294,56 @@ The dashboard provides a user-friendly interface to view your Copilot usage data
 - **URL-based Configuration**: You can also specify the API endpoint directly in the URL using a query parameter. This is useful for bookmarks or sharing links. For example:
   `https://ericc-ch.github.io/copilot-api?endpoint=http://your-api-server/usage`
 
+## Reasoning & Extended Thinking
+
+Each Copilot model advertises its own reasoning knobs under `capabilities.supports`. The proxy reads them at startup and translates requests accordingly, so the same client call works across Claude, GPT, Gemini, and friends.
+
+### OpenAI-shaped requests (`/v1/chat/completions`)
+
+- `reasoning_effort` (`low` | `medium` | `high`, plus `minimal` for GPT-5 family) is passed through to any model whose `supports.reasoning_effort` is non-empty. Other models get it stripped.
+- `thinking_budget` is passed through only when the model advertises `supports.adaptive_thinking` (currently Claude Sonnet 4.5+/4.6, Opus 4.6). Unsupported models silently drop it.
+- Claude reasoning responses surface as `reasoning_text` and `reasoning_opaque` on the assistant message.
+
+```sh
+# GPT-5 mini with heavy reasoning
+curl http://localhost:4141/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5-mini",
+    "reasoning_effort": "high",
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+
+# Claude Sonnet 4.6 with an explicit thinking budget
+curl http://localhost:4141/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "reasoning_effort": "high",
+    "thinking_budget": 2048,
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+```
+
+### Anthropic-shaped requests (`/v1/messages`)
+
+- `thinking: {"type": "enabled", "budget_tokens": N}` is translated into `reasoning_effort: "high"` for any reasoning-capable model, plus `thinking_budget` for adaptive-thinking models.
+- `thinking: {"type": "disabled"}` suppresses both fields upstream.
+- If the selected model supports neither knob, the thinking config is silently stripped and logged at debug level — the request still succeeds.
+- Claude thinking streams emit `content_block_start` / `thinking_delta` / `signature_delta` / `content_block_stop` events before the text block, so Claude Code and similar clients see native thinking UIs.
+
+```sh
+# Extended thinking via the Anthropic surface
+curl http://localhost:4141/v1/messages \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.6",
+    "max_tokens": 1024,
+    "thinking": {"type": "enabled", "budget_tokens": 2048},
+    "messages": [{"role": "user", "content": "Think carefully: what is 17*23?"}]
+  }'
+```
+
 ## Using with Claude Code
 
 This proxy can be used to power [Claude Code](https://docs.anthropic.com/en/claude-code), an experimental conversational AI assistant for developers from Anthropic.

diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts
@@ -8,6 +8,7 @@ import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
 import { isNullish } from "~/lib/utils"
+import { buildOpenAIReasoningContext } from "~/routes/reasoning-context"
 import {
   createChatCompletions,
   type ChatCompletionResponse,
@@ -47,6 +48,21 @@ export async function handleCompletion(c: Context) {
     consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
   }
 
+  const reasoningContext = buildOpenAIReasoningContext(payload, selectedModel)
+
+  if (payload.thinking_budget && !reasoningContext.thinkingBudget) {
+    consola.debug(
+      "Dropping unsupported OpenAI thinking_budget for model:",
+      payload.model,
+    )
+  }
+
+  payload = {
+    ...payload,
+    reasoning_effort: reasoningContext.reasoningEffort,
+    thinking_budget: reasoningContext.thinkingBudget,
+  }
+
   const response = await createChatCompletions(payload)
 
   if (isNonStreaming(response)) {

diff --git a/src/routes/messages/anthropic-types.ts b/src/routes/messages/anthropic-types.ts
@@ -19,7 +19,7 @@ export interface AnthropicMessagesPayload {
     name?: string
   }
   thinking?: {
-    type: "enabled"
+    type: "enabled" | "disabled"
     budget_tokens?: number
   }
   service_tier?: "auto" | "standard_only"
@@ -56,6 +56,7 @@ export interface AnthropicToolUseBlock {
 export interface AnthropicThinkingBlock {
   type: "thinking"
   thinking: string
+  signature?: string
 }
 
 export type AnthropicUserContentBlock =
@@ -92,6 +93,7 @@ export interface AnthropicResponse {
   role: "assistant"
   content: Array<AnthropicAssistantContentBlock>
   model: string
+  reasoning_opaque?: string
   stop_reason:
     | "end_turn"
     | "max_tokens"
@@ -195,7 +197,8 @@ export type AnthropicStreamEventData =
 export interface AnthropicStreamState {
   messageStartSent: boolean
   contentBlockIndex: number
-  contentBlockOpen: boolean
+  currentBlockType?: "text" | "thinking" | "tool_use"
+  reasoningOpaque?: string
   toolCalls: {
     [openAIToolIndex: number]: {
       id: string

diff --git a/src/routes/messages/count-tokens-handler.ts b/src/routes/messages/count-tokens-handler.ts
@@ -4,6 +4,7 @@ import consola from "consola"
 
 import { state } from "~/lib/state"
 import { getTokenCount } from "~/lib/tokenizer"
+import { buildAnthropicReasoningContext } from "~/routes/reasoning-context"
 
 import { type AnthropicMessagesPayload } from "./anthropic-types"
 import { translateToOpenAI } from "./non-stream-translation"
@@ -17,11 +18,13 @@ export async function handleCountTokens(c: Context) {
 
     const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
 
-    const openAIPayload = translateToOpenAI(anthropicPayload)
-
     const selectedModel = state.models?.data.find(
       (model) => model.id === anthropicPayload.model,
     )
+    const openAIPayload = translateToOpenAI(
+      anthropicPayload,
+      buildAnthropicReasoningContext(anthropicPayload, selectedModel),
+    )
 
     if (!selectedModel) {
       consola.warn("Model not found, returning default token count")

diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts
@@ -6,6 +6,7 @@ import { streamSSE } from "hono/streaming"
 import { awaitApproval } from "~/lib/approval"
 import { checkRateLimit } from "~/lib/rate-limit"
 import { state } from "~/lib/state"
+import { buildAnthropicReasoningContext } from "~/routes/reasoning-context"
 import {
   createChatCompletions,
   type ChatCompletionChunk,
@@ -28,7 +29,26 @@ export async function handleCompletion(c: Context) {
   const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
   consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))
 
-  const openAIPayload = translateToOpenAI(anthropicPayload)
+  const selectedModel = state.models?.data.find(
+    (model) => model.id === anthropicPayload.model,
+  )
+  const reasoningContext = buildAnthropicReasoningContext(
+    anthropicPayload,
+    selectedModel,
+  )
+
+  if (
+    anthropicPayload.thinking?.type === "enabled"
+    && reasoningContext.reasoningEffort === undefined
+    && reasoningContext.thinkingBudget === undefined
+  ) {
+    consola.debug(
+      "Stripping unsupported Anthropic thinking config for model:",
+      anthropicPayload.model,
+    )
+  }
+
+  const openAIPayload = translateToOpenAI(anthropicPayload, reasoningContext)
   consola.debug(
     "Translated OpenAI request payload:",
     JSON.stringify(openAIPayload),
@@ -58,7 +78,6 @@ export async function handleCompletion(c: Context) {
     const streamState: AnthropicStreamState = {
       messageStartSent: false,
       contentBlockIndex: 0,
-      contentBlockOpen: false,
       toolCalls: {},
     }
 

diff --git a/src/routes/messages/non-stream-translation.ts b/src/routes/messages/non-stream-translation.ts
@@ -1,3 +1,5 @@
+import type { ReasoningContext } from "~/routes/reasoning-context"
+
 import {
   type ChatCompletionResponse,
   type ChatCompletionsPayload,
@@ -28,6 +30,7 @@ import { mapOpenAIStopReasonToAnthropic } from "./utils"
 
 export function translateToOpenAI(
   payload: AnthropicMessagesPayload,
+  context: ReasoningContext,
 ): ChatCompletionsPayload {
   return {
     model: translateModelName(payload.model),
@@ -43,6 +46,8 @@ export function translateToOpenAI(
     user: payload.metadata?.user_id,
     tools: translateAnthropicToolsToOpenAI(payload.tools),
     tool_choice: translateAnthropicToolChoiceToOpenAI(payload.tool_choice),
+    reasoning_effort: context.reasoningEffort,
+    thinking_budget: context.thinkingBudget,
   }
 }
 
@@ -281,35 +286,23 @@ function translateAnthropicToolChoiceToOpenAI(
 export function translateToAnthropic(
   response: ChatCompletionResponse,
 ): AnthropicResponse {
-  // Merge content from all choices
-  const allTextBlocks: Array<AnthropicTextBlock> = []
-  const allToolUseBlocks: Array<AnthropicToolUseBlock> = []
-  let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null =
-    null // default
-  stopReason = response.choices[0]?.finish_reason ?? stopReason
-
-  // Process all choices to extract text and tool use blocks
-  for (const choice of response.choices) {
-    const textBlocks = getAnthropicTextBlocks(choice.message.content)
-    const toolUseBlocks = getAnthropicToolUseBlocks(choice.message.tool_calls)
-
-    allTextBlocks.push(...textBlocks)
-    allToolUseBlocks.push(...toolUseBlocks)
-
-    // Use the finish_reason from the first choice, or prioritize tool_calls
-    if (choice.finish_reason === "tool_calls" || stopReason === "stop") {
-      stopReason = choice.finish_reason
-    }
-  }
-
-  // Note: GitHub Copilot doesn't generate thinking blocks, so we don't include them in responses
+  const content = response.choices.flatMap((choice) => [
+    ...getAnthropicThinkingBlocks(choice.message.reasoning_text),
+    ...getAnthropicTextBlocks(choice.message.content),
+    ...getAnthropicToolUseBlocks(choice.message.tool_calls),
+  ])
+  const reasoningOpaque = response.choices.find(
+    (choice) => choice.message.reasoning_opaque,
+  )?.message.reasoning_opaque
+  const stopReason = getAnthropicStopReason(response.choices)
 
   return {
     id: response.id,
     type: "message",
     role: "assistant",
     model: response.model,
-    content: [...allTextBlocks, ...allToolUseBlocks],
+    reasoning_opaque: reasoningOpaque ?? undefined,
+    content,
     stop_reason: mapOpenAIStopReasonToAnthropic(stopReason),
     stop_sequence: null,
     usage: {
@@ -326,6 +319,31 @@ export function translateToAnthropic(
   }
 }
 
+function getAnthropicStopReason(
+  choices: ChatCompletionResponse["choices"],
+): "stop" | "length" | "tool_calls" | "content_filter" | null {
+  let stopReason: "stop" | "length" | "tool_calls" | "content_filter" | null =
+    choices[0]?.finish_reason ?? null
+
+  for (const choice of choices) {
+    if (choice.finish_reason === "tool_calls" || stopReason === "stop") {
+      stopReason = choice.finish_reason
+    }
+  }
+
+  return stopReason
+}
+
+function getAnthropicThinkingBlocks(
+  reasoningText: string | null | undefined,
+): Array<AnthropicThinkingBlock> {
+  if (!reasoningText) {
+    return []
+  }
+
+  return [{ type: "thinking", thinking: reasoningText }]
+}
+
 function getAnthropicTextBlocks(
   messageContent: Message["content"],
 ): Array<AnthropicTextBlock> {