Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-debug-tps-short-streams.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Skip debug TPS when the output stream is too short to measure reliably.
20 changes: 17 additions & 3 deletions apps/kimi-code/src/utils/usage/debug-timing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,30 @@ export interface StepTimingInput {
readonly usage?: { readonly output: number } | undefined;
}

// Decode TPS is only meaningful when the output actually streamed over a
// measurable window. Below this threshold the duration is dominated by
// `Date.now()`'s ~1ms quantization (short / single-chunk tool-call turns can
// drain in 1ms), so dividing output tokens by it would report inflated rates
// like tens of thousands of tok/s. In that case we report the raw counts
// instead of a meaningless ratio.
const MIN_STREAM_MS_FOR_TPS = 50;

export function formatStepDebugTiming(input: StepTimingInput): string | undefined {
const latency = input.llmFirstTokenLatencyMs;
const streamMs = input.llmStreamDurationMs;
if (latency === undefined || streamMs === undefined) return undefined;

const parts: string[] = [`TTFT: ${formatDuration(latency)}`];
const outputTokens = input.usage?.output;
if (outputTokens !== undefined && outputTokens > 0 && streamMs > 0) {
const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
if (outputTokens !== undefined && outputTokens > 0) {
if (streamMs >= MIN_STREAM_MS_FOR_TPS) {
const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
} else {
parts.push(
`${outputTokens} tokens in ${formatDuration(streamMs)} (stream too short for TPS)`,
);
}
}
return `[Debug] ${parts.join(' | ')}`;
}
Expand Down
20 changes: 20 additions & 0 deletions apps/kimi-code/test/utils/usage/debug-timing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,26 @@ describe('formatStepDebugTiming', () => {
expect(result).toBe('[Debug] TTFT: 800ms | TPS: 40.0 tok/s (200 tokens in 5.0s)');
});

it('omits TPS when the streamed window is too short to measure', () => {
const result = formatStepDebugTiming({
llmFirstTokenLatencyMs: 1200,
llmStreamDurationMs: 1,
usage: { output: 44 },
});
expect(result).toBe(
'[Debug] TTFT: 1.2s | 44 tokens in 1ms (stream too short for TPS)',
);
});

it('computes TPS once the streamed window reaches the reliability threshold', () => {
const result = formatStepDebugTiming({
llmFirstTokenLatencyMs: 200,
llmStreamDurationMs: 50,
usage: { output: 20 },
});
expect(result).toBe('[Debug] TTFT: 200ms | TPS: 400.0 tok/s (20 tokens in 50ms)');
});

it('formats durations under 1s as milliseconds', () => {
const result = formatStepDebugTiming({
llmFirstTokenLatencyMs: 50,
Expand Down
Loading