MoonshotAI · 7Sageer · Jun 17, 2026 · Jun 17, 2026
diff --git a/.changeset/fix-debug-tps-short-streams.md b/.changeset/fix-debug-tps-short-streams.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Skip debug TPS when the output stream is too short to measure reliably.
diff --git a/apps/kimi-code/src/utils/usage/debug-timing.ts b/apps/kimi-code/src/utils/usage/debug-timing.ts
@@ -4,16 +4,30 @@ export interface StepTimingInput {
   readonly usage?: { readonly output: number } | undefined;
 }
 
+// Decode TPS is only meaningful when the output actually streamed over a
+// measurable window. Below this threshold the duration is dominated by
+// `Date.now()`'s ~1ms quantization (short / single-chunk tool-call turns can
+// drain in 1ms), so dividing output tokens by it would report inflated rates
+// like tens of thousands of tok/s. In that case we report the raw counts
+// instead of a meaningless ratio.
+const MIN_STREAM_MS_FOR_TPS = 50;
+
 export function formatStepDebugTiming(input: StepTimingInput): string | undefined {
   const latency = input.llmFirstTokenLatencyMs;
   const streamMs = input.llmStreamDurationMs;
   if (latency === undefined || streamMs === undefined) return undefined;
 
   const parts: string[] = [`TTFT: ${formatDuration(latency)}`];
   const outputTokens = input.usage?.output;
-  if (outputTokens !== undefined && outputTokens > 0 && streamMs > 0) {
-    const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
-    parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
+  if (outputTokens !== undefined && outputTokens > 0) {
+    if (streamMs >= MIN_STREAM_MS_FOR_TPS) {
+      const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
+      parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
+    } else {
+      parts.push(
+        `${outputTokens} tokens in ${formatDuration(streamMs)} (stream too short for TPS)`,
+      );
+    }
   }
   return `[Debug] ${parts.join(' | ')}`;
 }

diff --git a/apps/kimi-code/test/utils/usage/debug-timing.test.ts b/apps/kimi-code/test/utils/usage/debug-timing.test.ts
@@ -27,6 +27,26 @@ describe('formatStepDebugTiming', () => {
     expect(result).toBe('[Debug] TTFT: 800ms | TPS: 40.0 tok/s (200 tokens in 5.0s)');
   });
 
+  it('omits TPS when the streamed window is too short to measure', () => {
+    const result = formatStepDebugTiming({
+      llmFirstTokenLatencyMs: 1200,
+      llmStreamDurationMs: 1,
+      usage: { output: 44 },
+    });
+    expect(result).toBe(
+      '[Debug] TTFT: 1.2s | 44 tokens in 1ms (stream too short for TPS)',
+    );
+  });
+
+  it('computes TPS once the streamed window reaches the reliability threshold', () => {
+    const result = formatStepDebugTiming({
+      llmFirstTokenLatencyMs: 200,
+      llmStreamDurationMs: 50,
+      usage: { output: 20 },
+    });
+    expect(result).toBe('[Debug] TTFT: 200ms | TPS: 400.0 tok/s (20 tokens in 50ms)');
+  });
+
   it('formats durations under 1s as milliseconds', () => {
     const result = formatStepDebugTiming({
       llmFirstTokenLatencyMs: 50,