diff --git a/.changeset/fix-debug-tps.md b/.changeset/fix-debug-tps.md new file mode 100644 index 000000000..3b0aeec01 --- /dev/null +++ b/.changeset/fix-debug-tps.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": patch +--- + +Report debug TPS over the full model response window so short tool-call streams do not show inflated rates. diff --git a/apps/kimi-code/src/utils/usage/debug-timing.ts b/apps/kimi-code/src/utils/usage/debug-timing.ts index 76d400506..c50d30ef2 100644 --- a/apps/kimi-code/src/utils/usage/debug-timing.ts +++ b/apps/kimi-code/src/utils/usage/debug-timing.ts @@ -11,9 +11,12 @@ export function formatStepDebugTiming(input: StepTimingInput): string | undefine const parts: string[] = [`TTFT: ${formatDuration(latency)}`]; const outputTokens = input.usage?.output; - if (outputTokens !== undefined && outputTokens > 0 && streamMs > 0) { - const tps = (outputTokens / (streamMs / 1000)).toFixed(1); - parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`); + const totalMs = latency + streamMs; + if (outputTokens !== undefined && outputTokens > 0 && totalMs > 0) { + const tps = (outputTokens / (totalMs / 1000)).toFixed(1); + parts.push( + `TPS: ${tps} tok/s (${outputTokens} tokens over ${formatDuration(totalMs)}, stream ${formatDuration(streamMs)})`, + ); } return `[Debug] ${parts.join(' | ')}`; } diff --git a/apps/kimi-code/test/utils/usage/debug-timing.test.ts b/apps/kimi-code/test/utils/usage/debug-timing.test.ts index b986f08e6..98207f811 100644 --- a/apps/kimi-code/test/utils/usage/debug-timing.test.ts +++ b/apps/kimi-code/test/utils/usage/debug-timing.test.ts @@ -24,7 +24,20 @@ describe('formatStepDebugTiming', () => { llmStreamDurationMs: 5000, usage: { output: 200 }, }); - expect(result).toBe('[Debug] TTFT: 800ms | TPS: 40.0 tok/s (200 tokens in 5.0s)'); + expect(result).toBe( + '[Debug] TTFT: 800ms | TPS: 34.5 tok/s (200 tokens over 5.8s, stream 5.0s)', + ); + }); + + it('does not inflate TPS when the streamed window is tiny', () => { + const result = formatStepDebugTiming({ + llmFirstTokenLatencyMs: 1200, + llmStreamDurationMs: 1, + usage: { output: 44 }, + }); + expect(result).toBe( + '[Debug] TTFT: 1.2s | TPS: 36.6 tok/s (44 tokens over 1.2s, stream 1ms)', + ); }); it('formats durations under 1s as milliseconds', () => {