Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,16 @@ export const oaCompatHelper: ProviderHelper = ({ adjustCacheUsage, safetyIdentif
},
normalizeUsage: (usage: Usage) => {
let inputTokens = usage.prompt_tokens ?? 0
const outputTokens = usage.completion_tokens ?? 0
const reasoningTokens = usage.completion_tokens_details?.reasoning_tokens ?? undefined
const completionTokens = usage.completion_tokens ?? 0
const reasoningTokensRaw = usage.completion_tokens_details?.reasoning_tokens
// Per OpenAI chat-completions spec, completion_tokens already includes reasoning_tokens.
// Downstream cost calculation bills outputCost + reasoningCost separately, so we must
// subtract here to avoid double-counting reasoning. Some providers (e.g. Moonshot Kimi
// K2.6) report reasoning_tokens > completion_tokens; in that case clamp reasoning down
// to completion so the invariant `outputTokens + reasoningTokens === completion_tokens`
// holds and we charge the same total the upstream API billed (no over-charge).
const reasoningTokens =
reasoningTokensRaw !== undefined ? Math.min(reasoningTokensRaw, completionTokens) : undefined
let cacheReadTokens = usage.cached_tokens ?? usage.prompt_tokens_details?.cached_tokens ?? undefined
const cacheWriteTokens = usage.prompt_tokens_details?.cache_creation_input_tokens ?? undefined

Expand All @@ -72,7 +80,7 @@ export const oaCompatHelper: ProviderHelper = ({ adjustCacheUsage, safetyIdentif

return {
inputTokens: inputTokens - (cacheReadTokens ?? 0),
outputTokens,
outputTokens: completionTokens - (reasoningTokens ?? 0),
reasoningTokens,
cacheReadTokens,
cacheWrite5mTokens: cacheWriteTokens,
Expand Down
80 changes: 80 additions & 0 deletions packages/console/app/test/zen-usage.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { describe, expect, test } from "bun:test"
import { oaCompatHelper } from "../src/routes/zen/util/provider/openai-compatible"
import { openaiHelper } from "../src/routes/zen/util/provider/openai"

const helper = (h: ReturnType<typeof oaCompatHelper>) => h
const ctx = { reqModel: "kimi-k2.6", providerModel: "moonshotai/kimi-k2.6-20260420" }

describe("oaCompatHelper.normalizeUsage (#24268)", () => {
test("subtracts reasoning_tokens from completion_tokens so billing does not double-count", () => {
const h = helper(oaCompatHelper(ctx))

const usage = {
prompt_tokens: 22,
completion_tokens: 1226,
total_tokens: 1248,
completion_tokens_details: { reasoning_tokens: 790 },
}

const result = h.normalizeUsage(usage)

expect(result.outputTokens).toBe(436)
expect(result.reasoningTokens).toBe(790)
expect(result.outputTokens + (result.reasoningTokens ?? 0)).toBe(1226)
})

test("clamps reasoning to completion when reasoning_tokens > completion_tokens (reporter's 'Hi' example)", () => {
const h = helper(oaCompatHelper(ctx))

const usage = {
prompt_tokens: 22,
completion_tokens: 77,
total_tokens: 99,
completion_tokens_details: { reasoning_tokens: 78 },
}

const result = h.normalizeUsage(usage)

// outputTokens floors at 0; reasoningTokens is clamped to completion_tokens so the
// invariant `outputTokens + reasoningTokens === completion_tokens` holds and we bill
// exactly what the upstream API billed (no over-charge of the extra reasoning unit).
expect(result.outputTokens).toBe(0)
expect(result.reasoningTokens).toBe(77)
expect(result.outputTokens + (result.reasoningTokens ?? 0)).toBe(77)
})

test("leaves outputTokens unchanged when no reasoning_tokens are reported", () => {
const h = helper(oaCompatHelper(ctx))

const usage = {
prompt_tokens: 22,
completion_tokens: 77,
total_tokens: 99,
}

const result = h.normalizeUsage(usage)

expect(result.outputTokens).toBe(77)
expect(result.reasoningTokens).toBeUndefined()
})

test("matches OpenAI Responses helper convention for the same logical usage", () => {
const compat = helper(oaCompatHelper(ctx))
const responses = openaiHelper(ctx)

const compatResult = compat.normalizeUsage({
prompt_tokens: 22,
completion_tokens: 1226,
completion_tokens_details: { reasoning_tokens: 790 },
})
const responsesResult = responses.normalizeUsage({
input_tokens: 22,
output_tokens: 1226,
output_tokens_details: { reasoning_tokens: 790 },
})

expect(compatResult.outputTokens).toBe(responsesResult.outputTokens)
expect(compatResult.reasoningTokens).toBe(responsesResult.reasoningTokens)
expect(compatResult.inputTokens).toBe(responsesResult.inputTokens)
})
})
Loading