Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-debug-tps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Report debug TPS over the full model response window so short tool-call streams do not show inflated rates.
9 changes: 6 additions & 3 deletions apps/kimi-code/src/utils/usage/debug-timing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ export function formatStepDebugTiming(input: StepTimingInput): string | undefine

const parts: string[] = [`TTFT: ${formatDuration(latency)}`];
const outputTokens = input.usage?.output;
if (outputTokens !== undefined && outputTokens > 0 && streamMs > 0) {
const tps = (outputTokens / (streamMs / 1000)).toFixed(1);
parts.push(`TPS: ${tps} tok/s (${outputTokens} tokens in ${formatDuration(streamMs)})`);
const totalMs = latency + streamMs;
if (outputTokens !== undefined && outputTokens > 0 && totalMs > 0) {
const tps = (outputTokens / (totalMs / 1000)).toFixed(1);
parts.push(
`TPS: ${tps} tok/s (${outputTokens} tokens over ${formatDuration(totalMs)}, stream ${formatDuration(streamMs)})`,
);
}
return `[Debug] ${parts.join(' | ')}`;
}
Expand Down
15 changes: 14 additions & 1 deletion apps/kimi-code/test/utils/usage/debug-timing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,20 @@ describe('formatStepDebugTiming', () => {
llmStreamDurationMs: 5000,
usage: { output: 200 },
});
expect(result).toBe('[Debug] TTFT: 800ms | TPS: 40.0 tok/s (200 tokens in 5.0s)');
expect(result).toBe(
'[Debug] TTFT: 800ms | TPS: 34.5 tok/s (200 tokens over 5.8s, stream 5.0s)',
);
});

it('does not inflate TPS when the streamed window is tiny', () => {
const result = formatStepDebugTiming({
llmFirstTokenLatencyMs: 1200,
llmStreamDurationMs: 1,
usage: { output: 44 },
});
expect(result).toBe(
'[Debug] TTFT: 1.2s | TPS: 36.6 tok/s (44 tokens over 1.2s, stream 1ms)',
);
});

it('formats durations under 1s as milliseconds', () => {
Expand Down
Loading