MoonshotAI · mikkihugo · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/.changeset/honest-timers-flow.md b/.changeset/honest-timers-flow.md
@@ -0,0 +1,6 @@
+---
+"@moonshot-ai/kimi-code": patch
+"@moonshot-ai/agent-core": patch
+---
+
+Respect model output-token limits during full-history compaction.
diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts
@@ -264,6 +264,7 @@ export class FullCompaction {
       const provider = applyCompletionBudget({
         provider: this.agent.config.provider,
         budget: resolveCompletionBudget({
+          maxOutputSize: this.agent.config.maxOutputSize,
           reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize,
         }),
         capability: this.agent.config.modelCapabilities,

diff --git a/packages/agent-core/test/agent/compaction/full.test.ts b/packages/agent-core/test/agent/compaction/full.test.ts
@@ -1528,7 +1528,7 @@ describe('FullCompaction', () => {
 
   it('compacts provider overflow when model context size is unknown', async () => {
     let callCount = 0;
-    const compactionMaxCompletionTokens: unknown[] = [];
+    const compactionMaxCompletionTokens: Array<number | undefined> = [];
     const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
       callCount += 1;
       if (callCount === 1) {
@@ -1594,7 +1594,7 @@ describe('FullCompaction', () => {
   it('honors completion budget env hard caps during compaction', async () => {
     vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '8192');
     let callCount = 0;
-    const compactionMaxCompletionTokens: unknown[] = [];
+    const compactionMaxCompletionTokens: Array<number | undefined> = [];
     const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
       callCount += 1;
       if (callCount === 1) {
@@ -1625,10 +1625,50 @@ describe('FullCompaction', () => {
     expect(compactionMaxCompletionTokens).toEqual([8192]);
   });
 
+  it('honors model maxOutputSize during compaction', async () => {
+    let callCount = 0;
+    const compactionMaxCompletionTokens: Array<number | undefined> = [];
+    const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
+      callCount += 1;
+      if (callCount === 1) {
+        throw new APIContextOverflowError(400, 'Context length exceeded', 'req-model-output-cap');
+      }
+      if (callCount === 2) {
+        compactionMaxCompletionTokens.push(providerMaxCompletionTokens(provider));
+        return textResult('Model output cap compacted summary.');
+      }
+      await callbacks?.onMessagePart?.({
+        type: 'text',
+        text: 'Recovered with model output cap.',
+      });
+      return textResult('Recovered with model output cap.');
+    };
+    const ctx = testAgent({ generate });
+    ctx.configure({
+      provider: CATALOGUED_PROVIDER,
+      modelCapabilities: CATALOGUED_MODEL_CAPABILITIES,
+    });
+    const providerManager = ctx.agent.modelProvider;
+    if (providerManager === undefined) throw new Error('Expected provider manager');
+    const resolveProviderConfig = providerManager.resolveProviderConfig.bind(providerManager);
+    vi.spyOn(providerManager, 'resolveProviderConfig').mockImplementation((model) => ({
+      ...resolveProviderConfig(model),
+      maxOutputSize: 32768,
+    }));
+    ctx.appendExchange(1, 'old user one', 'old assistant one', 20);
+    ctx.newEvents();
+
+    await ctx.rpc.prompt({ input: [{ type: 'text', text: 'Retry with model output cap' }] });
+    await ctx.untilTurnEnd();
+
+    expect(callCount).toBe(3);
+    expect(compactionMaxCompletionTokens).toEqual([32768]);
+  });
+
   it('honors completion budget env opt-out during compaction', async () => {
     vi.stubEnv('KIMI_MODEL_MAX_COMPLETION_TOKENS', '0');
     let callCount = 0;
-    const compactionMaxCompletionTokens: unknown[] = [];
+    const compactionMaxCompletionTokens: Array<number | undefined> = [];
     const generate: GenerateFn = async (provider, _system, _tools, _history, callbacks) => {
       callCount += 1;
       if (callCount === 1) {
@@ -1884,12 +1924,13 @@ function oauthTestAgentOptions(
   };
 }
 
-function providerMaxCompletionTokens(provider: Parameters<GenerateFn>[0]): unknown {
-  return (
+function providerMaxCompletionTokens(provider: Parameters<GenerateFn>[0]): number | undefined {
+  const value = (
     provider as {
       readonly modelParameters?: Record<string, unknown>;
     }
   ).modelParameters?.['max_completion_tokens'];
+  return typeof value === 'number' ? value : undefined;
 }
 
 function textResult(text: string): Awaited<ReturnType<GenerateFn>> {