From 592b622ce729e9b342272f69596ff4d5c6f34227 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 16 Jun 2026 22:04:28 +0200 Subject: [PATCH 1/4] fix(agent-core): honor model output cap during compaction --- .../agent-core/src/agent/compaction/full.ts | 1 + .../test/agent/compaction/full.test.ts | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 55bf73f1f..3f885b359 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -264,6 +264,7 @@ export class FullCompaction { const provider = applyCompletionBudget({ provider: this.agent.config.provider, budget: resolveCompletionBudget({ + maxOutputSize: this.agent.config.maxOutputSize, reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize, }), capability: this.agent.config.modelCapabilities, diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 990140e1b..427bf900d 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -1625,6 +1625,46 @@ describe('FullCompaction', () => { expect(compactionMaxCompletionTokens).toEqual([8192]); }); + it('honors model maxOutputSize during compaction', async () => { + let callCount = 0; + const compactionMaxCompletionTokens: unknown[] = []; + const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { + callCount += 1; + if (callCount === 1) { + throw new APIContextOverflowError(400, 'Context length exceeded', 'req-model-output-cap'); + } + if (callCount === 2) { + compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider)); + return textResult('Model output cap compacted summary.'); + } + await callbacks?.onMessagePart?.({ + type: 'text', + text: 'Recovered with model output cap.', + }); + return textResult('Recovered with model output cap.'); + }; + const ctx = testAgent({ generate }); + ctx.configure({ + provider: CATALOGUED_PROVIDER, + modelCapabilities: CATALOGUED_MODEL_CAPABILITIES, + }); + const providerManager = ctx.agent.modelProvider; + if (providerManager === undefined) throw new Error('Expected provider manager'); + const resolveProviderConfig = providerManager.resolveProviderConfig.bind(providerManager); + providerManager.resolveProviderConfig = (model) => ({ + ...resolveProviderConfig(model), + maxOutputSize: 32768, + }); + ctx.appendExchange(1, 'old user one', 'old assistant one', 20); + ctx.newEvents(); + + await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with model output cap' }] }); + await ctx.untilTurnEnd(); + + expect(callCount).toBe(3); + expect(compactionMaxCompletionTokens).toEqual([32768]); + }); + it('honors completion budget env opt-out during compaction', async () => { vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '0'); let callCount = 0; From ccf71e176af1a528b940efb556552b522ad0e1c3 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 16 Jun 2026 22:06:24 +0200 Subject: [PATCH 2/4] chore: add changeset for compaction output cap --- .changeset/honest-timers-flow.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/honest-timers-flow.md diff --git a/.changeset/honest-timers-flow.md b/.changeset/honest-timers-flow.md new file mode 100644 index 000000000..e81ac7da1 --- /dev/null +++ b/.changeset/honest-timers-flow.md @@ -0,0 +1,5 @@ +--- +"@moonshot-ai/kimi-code": patch +--- + +Respect model output-token limits during full-history compaction. From 3b91e242cf75b785fa76e0486ad88cdefab8cbf0 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 16 Jun 2026 22:12:46 +0200 Subject: [PATCH 3/4] test(agent-core): isolate compaction output cap regression --- .../test/agent/compaction/full.test.ts | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts index 427bf900d..53477a1f1 100644 --- a/packages/agent-core/test/agent/compaction/full.test.ts +++ b/packages/agent-core/test/agent/compaction/full.test.ts @@ -1528,7 +1528,7 @@ describe('FullCompaction', () => { it('compacts provider overflow when model context size is unknown', async () => { let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1594,7 +1594,7 @@ describe('FullCompaction', () => { it('honors completion budget env hard caps during compaction', async () => { vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '8192'); let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1627,7 +1627,7 @@ describe('FullCompaction', () => { it('honors model maxOutputSize during compaction', async () => { let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1651,10 +1651,10 @@ describe('FullCompaction', () => { const providerManager = ctx.agent.modelProvider; if (providerManager === undefined) throw new Error('Expected provider manager'); const resolveProviderConfig = providerManager.resolveProviderConfig.bind(providerManager); - providerManager.resolveProviderConfig = (model) => ({ + vi.spyOn(providerManager, 'resolveProviderConfig').mockImplementation((model) => ({ ...resolveProviderConfig(model), maxOutputSize: 32768, - }); + })); ctx.appendExchange(1, 'old user one', 'old assistant one', 20); ctx.newEvents(); @@ -1668,7 +1668,7 @@ describe('FullCompaction', () => { it('honors completion budget env opt-out during compaction', async () => { vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '0'); let callCount = 0; - const compactionMaxCompletionTokens: unknown[] = []; + const compactionMaxCompletionTokens: Array = []; const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => { callCount += 1; if (callCount === 1) { @@ -1924,12 +1924,13 @@ function oauthTestAgentOptions( }; } -function providerMaxCompletionTokens(provider: Parameters[0]): unknown { - return ( +function providerMaxCompletionTokens(provider: Parameters[0]): number | undefined { + const value = ( provider as { readonly modelParameters?: Record; } ).modelParameters?.['max_completion_tokens']; + return typeof value === 'number' ? value : undefined; } function textResult(text: string): Awaited> { From eb011271277f656178f105c6a36068444ccb16c3 Mon Sep 17 00:00:00 2001 From: Mikael Hugo Date: Tue, 16 Jun 2026 22:23:19 +0200 Subject: [PATCH 4/4] chore: include agent-core in compaction changeset --- .changeset/honest-timers-flow.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.changeset/honest-timers-flow.md b/.changeset/honest-timers-flow.md index e81ac7da1..f21c3774d 100644 --- a/.changeset/honest-timers-flow.md +++ b/.changeset/honest-timers-flow.md @@ -1,5 +1,6 @@ --- "@moonshot-ai/kimi-code": patch +"@moonshot-ai/agent-core": patch --- Respect model output-token limits during full-history compaction.