diff --git a/.changeset/honest-timers-flow.md b/.changeset/honest-timers-flow.md new file mode 100644 index 000000000..f21c3774d --- /dev/null +++ b/.changeset/honest-timers-flow.md @@ -0,0 +1,6 @@ +--- +"@moonshot-ai/kimi-code": patch +"@moonshot-ai/agent-core": patch +--- + +Respect model output-token limits during full-history compaction. diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 55bf73f1f..3f885b359 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -264,6 +264,7 @@ export class FullCompaction { const provider = applyCompletionBudget({ provider: this.agent.config.provider, budget: resolveCompletionBudget({ + maxOutputSize: this.agent.config.maxOutputSize, reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize, }), capability: this.agent.config.modelCapabilities, diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 990140e1b..53477a1f1 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -1528,7 +1528,7 @@ describe('FullCompaction', () => { it('compacts provider overflow when model context size is unknown', async () => { let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1594,7 +1594,7 @@ describe('FullCompaction', () => { it('honors completion budget env hard caps during compaction', async () => { vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '8192'); let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1625,10 +1625,50 @@ describe('FullCompaction', () => { expect(compactionMaxCompletionTokens).toEqual([8192]); }); + it('honors model maxOutputSize during compaction', async () => { + let callCount = 0; + const compactionMaxCompletionTokens: Array = []; + const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { + callCount += 1; + if (callCount === 1) { + throw new APIContextOverflowError(400, 'Context length exceeded', 'req-model-output-cap'); + } + if (callCount === 2) { + compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider)); + return textResult('Model output cap compacted summary.'); + } + await callbacks?.onMessagePart?.({ + type: 'text', + text: 'Recovered with model output cap.', + }); + return textResult('Recovered with model output cap.'); + }; + const ctx = testAgent({ generate }); + ctx.configure({ + provider: CATALOGUED_PROVIDER, + modelCapabilities: CATALOGUED_MODEL_CAPABILITIES, + }); + const providerManager = ctx.agent.modelProvider; + if (providerManager === undefined) throw new Error('Expected provider manager'); + const resolveProviderConfig = providerManager.resolveProviderConfig.bind(providerManager); + vi.spyOn(providerManager, 'resolveProviderConfig').mockImplementation((model) => ({ + ...resolveProviderConfig(model), + maxOutputSize: 32768, + })); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.newEvents(); + + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with model output cap' }] }); + await ctx.untilTurnEnd(); + + expect(callCount).toBe(3); + expect(compactionMaxCompletionTokens).toEqual([32768]); + }); + it('honors completion budget env opt-out during compaction', async () => { vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '0'); let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1884,12 +1924,13 @@ function oauthTestAgentOptions( }; } -function providerMaxCompletionTokens(provider: Parameters[0]): unknown { - return ( +function providerMaxCompletionTokens(provider: Parameters[0]): number | undefined { + const value = ( provider as { readonly modelParameters?: Record; } ).modelParameters?.['max_completion_tokens']; + return typeof value === 'number' ? value : undefined; } function textResult(text: string): Awaited> {