Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
997 changes: 499 additions & 498 deletions bun.lock

Large diffs are not rendered by default.

76 changes: 76 additions & 0 deletions src/lib/request-logger.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import type { Context, Next } from "hono"

const requestModelKey = "requestModel"
const resolvedModelKey = "resolvedModel"
const responseModelKey = "responseModel"

export function setRequestModel(c: Context, model: string | null | undefined) {
if (model) {
c.set(requestModelKey, model)
}
}

export function setResolvedModel(c: Context, model: string | null | undefined) {
if (model) {
c.set(resolvedModelKey, model)
}
}

export function setResponseModel(c: Context, model: string | null | undefined) {
if (model) {
c.set(responseModelKey, model)
}
}

export async function requestLogger(c: Context, next: Next) {
const start = Date.now()
const requestTarget = getRequestTarget(c)

console.log(`<-- ${c.req.method} ${requestTarget}`)

await next()

const duration = formatDuration(Date.now() - start)
const modelSuffix = formatModelSuffix(c)

console.log(
`--> ${c.req.method} ${requestTarget} ${c.res.status} ${duration}${modelSuffix}`,
)
}

function getRequestTarget(c: Context): string {
const url = new URL(c.req.url)
return `${url.pathname}${url.search}`
}

function formatDuration(durationMs: number): string {
if (durationMs >= 1000) {
return `${Math.round(durationMs / 1000)}s`
}

return `${durationMs}ms`
}

function formatModelSuffix(c: Context): string {
const requestModel = c.get(requestModelKey) as string | undefined
const resolvedModel = c.get(resolvedModelKey) as string | undefined
const responseModel = c.get(responseModelKey) as string | undefined

if (responseModel) {
return ` model=${responseModel}`
}

if (resolvedModel && requestModel && resolvedModel !== requestModel) {
return ` requested_model=${requestModel} resolved_model=${resolvedModel}`
}

if (resolvedModel) {
return ` model=${resolvedModel}`
}

if (requestModel) {
return ` model=${requestModel}`
}

return ""
}
43 changes: 32 additions & 11 deletions src/routes/chat-completions/handler.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
import type { Context } from "hono"

import consola from "consola"
import { streamSSE, type SSEMessage } from "hono/streaming"

import { awaitApproval } from "~/lib/approval"
import { checkRateLimit } from "~/lib/rate-limit"
import {
setRequestModel,
setResolvedModel,
setResponseModel,
} from "~/lib/request-logger"
import { state } from "~/lib/state"
import { getTokenCount } from "~/lib/tokenizer"
import { isNullish } from "~/lib/utils"
import {
createChatCompletions,
type ChatCompletionResponse,
type ChatCompletionsPayload,
usesMaxCompletionTokens,
} from "~/services/copilot/create-chat-completions"

export async function handleCompletion(c: Context) {
await checkRateLimit(state)

let payload = await c.req.json<ChatCompletionsPayload>()
setRequestModel(c, payload.model)
setResolvedModel(c, payload.model)
consola.debug("Request payload:", JSON.stringify(payload).slice(-400))

// Find the selected model
Expand All @@ -40,26 +47,40 @@ export async function handleCompletion(c: Context) {
if (state.manualApprove) await awaitApproval()

if (isNullish(payload.max_tokens)) {
payload = {
...payload,
max_tokens: selectedModel?.capabilities.limits.max_output_tokens,
}
consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens))
const defaultMaxTokens =
selectedModel?.capabilities.limits.max_output_tokens
payload =
usesMaxCompletionTokens(payload.model) ?
{
...payload,
max_completion_tokens: defaultMaxTokens,
}
: {
...payload,
max_tokens: defaultMaxTokens,
}

consola.debug(
"Set output token limit to:",
JSON.stringify(defaultMaxTokens),
)
}

const response = await createChatCompletions(payload)

if (isNonStreaming(response)) {
setResponseModel(c, response.model)
consola.debug("Non-streaming response:", JSON.stringify(response))
return c.json(response)
}

consola.debug("Streaming response")
return streamSSE(c, async (stream) => {
for await (const chunk of response) {
consola.debug("Streaming chunk:", JSON.stringify(chunk))
await stream.writeSSE(chunk as SSEMessage)
}
return new Response(response, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
},
})
}

Expand Down
1 change: 1 addition & 0 deletions src/routes/messages/anthropic-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ export interface AnthropicStreamState {
messageStartSent: boolean
contentBlockIndex: number
contentBlockOpen: boolean
currentContentBlockType?: "text" | "thinking" | "tool_use"
toolCalls: {
[openAIToolIndex: number]: {
id: string
Expand Down
4 changes: 3 additions & 1 deletion src/routes/messages/count-tokens-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { getTokenCount } from "~/lib/tokenizer"

import { type AnthropicMessagesPayload } from "./anthropic-types"
import { translateToOpenAI } from "./non-stream-translation"
import { resolveModelId } from "./utils"

/**
* Handles token counting for Anthropic messages
Expand All @@ -18,9 +19,10 @@ export async function handleCountTokens(c: Context) {
const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()

const openAIPayload = translateToOpenAI(anthropicPayload)
const resolvedModelId = resolveModelId(anthropicPayload.model)

const selectedModel = state.models?.data.find(
(model) => model.id === anthropicPayload.model,
(model) => model.id === resolvedModelId,
)

if (!selectedModel) {
Expand Down
18 changes: 17 additions & 1 deletion src/routes/messages/handler.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import type { Context } from "hono"

import consola from "consola"
import { events } from "fetch-event-stream"
import { streamSSE } from "hono/streaming"

import { awaitApproval } from "~/lib/approval"
import { checkRateLimit } from "~/lib/rate-limit"
import {
setRequestModel,
setResolvedModel,
setResponseModel,
} from "~/lib/request-logger"
import { state } from "~/lib/state"
import {
createChatCompletions,
Expand All @@ -26,9 +32,11 @@ export async function handleCompletion(c: Context) {
await checkRateLimit(state)

const anthropicPayload = await c.req.json<AnthropicMessagesPayload>()
setRequestModel(c, anthropicPayload.model)
consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload))

const openAIPayload = translateToOpenAI(anthropicPayload)
setResolvedModel(c, openAIPayload.model)
consola.debug(
"Translated OpenAI request payload:",
JSON.stringify(openAIPayload),
Expand All @@ -41,6 +49,7 @@ export async function handleCompletion(c: Context) {
const response = await createChatCompletions(openAIPayload)

if (isNonStreaming(response)) {
setResponseModel(c, response.model)
consola.debug(
"Non-streaming response from Copilot:",
JSON.stringify(response).slice(-400),
Expand All @@ -59,10 +68,17 @@ export async function handleCompletion(c: Context) {
messageStartSent: false,
contentBlockIndex: 0,
contentBlockOpen: false,
currentContentBlockType: undefined,
toolCalls: {},
}

for await (const rawEvent of response) {
const eventStream = events(
new Response(response, {
headers: { "Content-Type": "text/event-stream" },
}),
)

for await (const rawEvent of eventStream) {
consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent))
if (rawEvent.data === "[DONE]") {
break
Expand Down
14 changes: 2 additions & 12 deletions src/routes/messages/non-stream-translation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ import {
type AnthropicUserContentBlock,
type AnthropicUserMessage,
} from "./anthropic-types"
import { mapOpenAIStopReasonToAnthropic } from "./utils"
import { mapOpenAIStopReasonToAnthropic, resolveModelId } from "./utils"

// Payload translation

export function translateToOpenAI(
payload: AnthropicMessagesPayload,
): ChatCompletionsPayload {
return {
model: translateModelName(payload.model),
model: resolveModelId(payload.model),
messages: translateAnthropicMessagesToOpenAI(
payload.messages,
payload.system,
Expand All @@ -46,16 +46,6 @@ export function translateToOpenAI(
}
}

function translateModelName(model: string): string {
// Subagent requests use a specific model number which Copilot doesn't support
if (model.startsWith("claude-sonnet-4-")) {
return model.replace(/^claude-sonnet-4-.*/, "claude-sonnet-4")
} else if (model.startsWith("claude-opus-")) {
return model.replace(/^claude-opus-4-.*/, "claude-opus-4")
}
return model
}

function translateAnthropicMessagesToOpenAI(
anthropicMessages: Array<AnthropicMessage>,
system: string | Array<AnthropicTextBlock> | undefined,
Expand Down
Loading