From be7f33b150654cd0f9d224f4aeb050601eeafab0 Mon Sep 17 00:00:00 2001 From: 7Sageer <7sageer@djwcb.cn> Date: Wed, 17 Jun 2026 20:24:55 +0800 Subject: [PATCH] fix: skip debug TPS when the stream is too short Decode TPS is meaningless when the streamed window is only ~1ms (short / single-chunk tool-call turns), since dividing output tokens by a timer-quantized duration reports inflated rates like tens of thousands of tok/s. Only compute TPS when the stream window reaches 50ms; otherwise show the raw token count and duration. --- .changeset/fix-debug-tps-short-streams.md | 5 +++++ .../kimi-code/src/utils/usage/debug-timing.ts | 20 ++++++++++++++++--- .../test/utils/usage/debug-timing.test.ts | 20 +++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 .changeset/fix-debug-tps-short-streams.md diff --git a/.changeset/fix-debug-tps-short-streams.md b/.changeset/fix-debug-tps-short-streams.md new file mode 100644 index 000000000..618a266bb --- /dev/null +++ b/.changeset/fix-debug-tps-short-streams.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": patch +--- + +Skip debug TPS when the output stream is too short to measure reliably. diff --git a/apps/kimi-code/src/utils/usage/debug-timing.ts b/apps/kimi-code/src/utils/usage/debug-timing.ts index 76d400506..457b686a3 100644 --- a/apps/kimi-code/src/utils/usage/debug-timing.ts +++ b/apps/kimi-code/src/utils/usage/debug-timing.ts @@ -4,6 +4,14 @@ export interface StepTimingInput { readonly usage?: { readonly output: number } | undefined; } +// Decode TPS is only meaningful when the output actually streamed over a +// measurable window. Below this threshold the duration is dominated by +// `Date.now()`'s ~1ms quantization (short / single-chunk tool-call turns can +// drain in 1ms), so dividing output tokens by it would report inflated rates +// like tens of thousands of tok/s. In that case we report the raw counts +// instead of a meaningless ratio. +const MIN_STREAM_MS_FOR_TPS = 50; + export function formatStepDebugTiming(input: StepTimingInput): string | undefined { const latency = input.llmFirstTokenLatencyMs; const streamMs = input.llmStreamDurationMs; @@ -11,9 +19,15 @@ export function formatStepDebugTiming(input: StepTimingInput): string | undefine const parts: string[] = [`TTFT: ${formatDuration(latency)}`]; const outputTokens = input.usage?.output; - if (outputTokens !== undefined && outputTokens > 0 && streamMs > 0) { - const tps = (outputTokens / (streamMs / 1000)).toFixed(1); - parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`); + if (outputTokens !== undefined && outputTokens > 0) { + if (streamMs >= MIN_STREAM_MS_FOR_TPS) { + const tps = (outputTokens / (streamMs / 1000)).toFixed(1); + parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`); + } else { + parts.push( + `${outputTokens} tokens in ${formatDuration(streamMs)} (stream too short for TPS)`, + ); + } } return `[Debug] ${parts.join(' | ')}`; } diff --git a/apps/kimi-code/test/utils/usage/debug-timing.test.ts b/apps/kimi-code/test/utils/usage/debug-timing.test.ts index b986f08e6..be871f3c1 100644 --- a/apps/kimi-code/test/utils/usage/debug-timing.test.ts +++ b/apps/kimi-code/test/utils/usage/debug-timing.test.ts @@ -27,6 +27,26 @@ describe('formatStepDebugTiming', () => { expect(result).toBe('[Debug] TTFT: 800ms | TPS: 40.0 tok/s (200 tokens in 5.0s)'); }); + it('omits TPS when the streamed window is too short to measure', () => { + const result = formatStepDebugTiming({ + llmFirstTokenLatencyMs: 1200, + llmStreamDurationMs: 1, + usage: { output: 44 }, + }); + expect(result).toBe( + '[Debug] TTFT: 1.2s | 44 tokens in 1ms (stream too short for TPS)', + ); + }); + + it('computes TPS once the streamed window reaches the reliability threshold', () => { + const result = formatStepDebugTiming({ + llmFirstTokenLatencyMs: 200, + llmStreamDurationMs: 50, + usage: { output: 20 }, + }); + expect(result).toBe('[Debug] TTFT: 200ms | TPS: 400.0 tok/s (20 tokens in 50ms)'); + }); + it('formats durations under 1s as milliseconds', () => { const result = formatStepDebugTiming({ llmFirstTokenLatencyMs: 50,