From be7f33b150654cd0f9d224f4aeb050601eeafab0 Mon Sep 17 00:00:00 2001
From: 7Sageer <7sageer@djwcb.cn>
Date: Wed, 17 Jun 2026 20:24:55 +0800
Subject: [PATCH] fix: skip debug TPS when the stream is too short

Decode TPS is meaningless when the streamed window is only ~1ms
(short / single-chunk tool-call turns), since dividing output tokens
by a timer-quantized duration reports inflated rates like tens of
thousands of tok/s. Only compute TPS when the stream window reaches
50ms; otherwise show the raw token count and duration.
---
 .changeset/fix-debug-tps-short-streams.md     |  5 +++++
 .../kimi-code/src/utils/usage/debug-timing.ts | 20 ++++++++++++++++---
 .../test/utils/usage/debug-timing.test.ts     | 20 +++++++++++++++++++
 3 files changed, 42 insertions(+), 3 deletions(-)
 create mode 100644 .changeset/fix-debug-tps-short-streams.md

diff --git a/.changeset/fix-debug-tps-short-streams.md b/.changeset/fix-debug-tps-short-streams.md
new file mode 100644
index 000000000..618a266bb
--- /dev/null
+++ b/.changeset/fix-debug-tps-short-streams.md
@@ -0,0 +1,5 @@
+---
+"@moonshot-ai/kimi-code": patch
+---
+
+Skip debug TPS when the output stream is too short to measure reliably.
diff --git a/apps/kimi-code/src/utils/usage/debug-timing.ts b/apps/kimi-code/src/utils/usage/debug-timing.ts
index 76d400506..457b686a3 100644
--- a/apps/kimi-code/src/utils/usage/debug-timing.ts
+++ b/apps/kimi-code/src/utils/usage/debug-timing.ts
@@ -4,6 +4,14 @@ export interface StepTimingInput {
   readonly usage?: { readonly output: number } | undefined;
 }
 
+// Decode TPS is only meaningful when the output actually streamed over a
+// measurable window. Below this threshold the duration is dominated by
+// `Date.now()`'s ~1ms quantization (short / single-chunk tool-call turns can
+// drain in 1ms), so dividing output tokens by it would report inflated rates
+// like tens of thousands of tok/s. In that case we report the raw counts
+// instead of a meaningless ratio.
+const MIN_STREAM_MS_FOR_TPS = 50;
+
 export function formatStepDebugTiming(input: StepTimingInput): string | undefined {
   const latency = input.llmFirstTokenLatencyMs;
   const streamMs = input.llmStreamDurationMs;
@@ -11,9 +19,15 @@ export function formatStepDebugTiming(input: StepTimingInput): string | undefine
 
   const parts: string[] = [`TTFT: ${formatDuration(latency)}`];
   const outputTokens = input.usage?.output;
-  if (outputTokens !== undefined && outputTokens > 0 && streamMs > 0) {
-    const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
-    parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
+  if (outputTokens !== undefined && outputTokens > 0) {
+    if (streamMs >= MIN_STREAM_MS_FOR_TPS) {
+      const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
+      parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
+    } else {
+      parts.push(
+        `${outputTokens} tokens in ${formatDuration(streamMs)} (stream too short for TPS)`,
+      );
+    }
   }
   return `[Debug] ${parts.join(' | ')}`;
 }
diff --git a/apps/kimi-code/test/utils/usage/debug-timing.test.ts b/apps/kimi-code/test/utils/usage/debug-timing.test.ts
index b986f08e6..be871f3c1 100644
--- a/apps/kimi-code/test/utils/usage/debug-timing.test.ts
+++ b/apps/kimi-code/test/utils/usage/debug-timing.test.ts
@@ -27,6 +27,26 @@ describe('formatStepDebugTiming', () => {
     expect(result).toBe('[Debug] TTFT: 800ms | TPS: 40.0 tok/s (200 tokens in 5.0s)');
   });
 
+  it('omits TPS when the streamed window is too short to measure', () => {
+    const result = formatStepDebugTiming({
+      llmFirstTokenLatencyMs: 1200,
+      llmStreamDurationMs: 1,
+      usage: { output: 44 },
+    });
+    expect(result).toBe(
+      '[Debug] TTFT: 1.2s | 44 tokens in 1ms (stream too short for TPS)',
+    );
+  });
+
+  it('computes TPS once the streamed window reaches the reliability threshold', () => {
+    const result = formatStepDebugTiming({
+      llmFirstTokenLatencyMs: 200,
+      llmStreamDurationMs: 50,
+      usage: { output: 20 },
+    });
+    expect(result).toBe('[Debug] TTFT: 200ms | TPS: 400.0 tok/s (20 tokens in 50ms)');
+  });
+
   it('formats durations under 1s as milliseconds', () => {
     const result = formatStepDebugTiming({
       llmFirstTokenLatencyMs: 50,