Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/compaction-output-token-cap.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/agent-core": patch
---

Cap compaction output tokens to a conservative fallback when maxOutputSize is not configured, preventing APIContextOverflowError on providers that do not auto-clamp max_tokens.
10 changes: 10 additions & 0 deletions packages/agent-core/src/agent/compaction/full.ts
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,19 @@ export class FullCompaction {
await this.triggerPreCompactHook(data, tokensBefore, signal);

const model = this.agent.config.model;
const maxOutputSize = this.agent.config.maxOutputSize;
const maxCtx = this.agent.config.modelCapabilities.max_context_tokens ?? 0;
// When maxOutputSize is not configured (the default), fall back to a
// conservative cap so compaction never requests the full context window
// as max_completion_tokens. 1/4 of the context window (capped at 8192)
// is generous for a summary while preventing overflow on providers that
// do not auto-clamp max_tokens server-side.
const compactionOutputCap =
maxOutputSize ?? (maxCtx > 0 ? Math.min(Math.floor(maxCtx / 4), 8192) : undefined);
const provider = applyCompletionBudget({
provider: this.agent.config.provider,
budget: resolveCompletionBudget({
maxOutputSize: compactionOutputCap,
reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize,
}),
capability: this.agent.config.modelCapabilities,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import { describe, expect, it } from 'vitest';

import {
applyCompletionBudget,
computeCompletionBudgetCap,
resolveCompletionBudget,
} from '../../src/utils/completion-budget';

import type { ChatProvider, ModelCapability } from '@moonshot-ai/kosong';

function makeCapability(maxContextTokens: number): ModelCapability {
return {
image_in: false,
video_in: false,
audio_in: false,
thinking: false,
tool_use: true,
max_context_tokens: maxContextTokens,
};
}

function makeMockProvider(): { provider: ChatProvider; getCap: () => number | null } {
let cap: number | null = null;
const provider = {
name: 'mock',
modelName: 'mock-model',
thinkingEffort: null,
generate: (() => {}) as unknown as ChatProvider['generate'],
withThinking: (() => {}) as unknown as ChatProvider['withThinking'],
withMaxCompletionTokens: ((n: number) => {
cap = n;
return { ...provider, _cap: n } as unknown as ChatProvider;
}) as unknown as (n: number) => ChatProvider,
} as ChatProvider;
return { provider, getCap: () => cap };
}

/**
* Simulates the ORIGINAL compactionRound() budget logic (before fix):
* does NOT pass maxOutputSize to resolveCompletionBudget.
*/
function originalCompactionMaxTokens(args: {
maxOutputSize?: number;
maxCtx: number;
reservedContextSize?: number;
}): number {
const { provider, getCap } = makeMockProvider();
applyCompletionBudget({
provider,
budget: resolveCompletionBudget({
// maxOutputSize intentionally omitted — this is the bug
reservedContextSize: args.reservedContextSize,
}),
capability: makeCapability(args.maxCtx),
});
return getCap() ?? 0;
}

/**
* Simulates the PATCHED compactionRound() budget logic (after fix):
* passes maxOutputSize or a conservative fallback cap.
*/
function patchedCompactionMaxTokens(args: {
maxOutputSize?: number;
maxCtx: number;
reservedContextSize?: number;
}): number {
const compactionOutputCap =
args.maxOutputSize ?? (args.maxCtx > 0 ? Math.min(Math.floor(args.maxCtx / 4), 8192) : undefined);
const { provider, getCap } = makeMockProvider();
applyCompletionBudget({
provider,
budget: resolveCompletionBudget({
maxOutputSize: compactionOutputCap,
reservedContextSize: args.reservedContextSize,
}),
capability: makeCapability(args.maxCtx),
});
return getCap() ?? 0;
}

describe('compaction overflow verification (before vs after fix)', () => {
// Simulated compaction input size: a typical compaction prompt contains
// the entire conversation history being compacted.
const COMPACTION_INPUT_TOKENS = 80_000;

const testModels = [
{
name: 'stepfun/step-3.7-flash (maxOutputSize not configured)',
maxOutputSize: undefined,
maxCtx: 256_000,
reservedContextSize: 50_000,
},
{
name: 'kimi-for-coding (maxOutputSize not configured)',
maxOutputSize: undefined,
maxCtx: 262_144,
reservedContextSize: 50_000,
},
{
name: 'zhipu/glm-5.2 (maxOutputSize=131072)',
maxOutputSize: 131_072,
maxCtx: 1_000_000,
reservedContextSize: 50_000,
},
];

for (const model of testModels) {
it(`${model.name}: original overflows, patched is safe`, () => {
const origCap = originalCompactionMaxTokens(model);
const patchedCap = patchedCompactionMaxTokens(model);
const origTotal = COMPACTION_INPUT_TOKENS + origCap;
const patchedTotal = COMPACTION_INPUT_TOKENS + patchedCap;

// --- Original code ---
// The original compaction code does NOT pass maxOutputSize to
// resolveCompletionBudget, so it always falls back to using the full
// context window as max_completion_tokens — regardless of whether
// maxOutputSize is configured. This is the core bug.
expect(origCap).toBe(model.maxCtx); // bug: always uses full context as max_tokens
expect(origTotal).toBeGreaterThan(model.maxCtx); // overflow!

// --- Patched code ---
// The patched code always uses a safe cap (either maxOutputSize or min(maxCtx/4, 8192))
expect(patchedTotal).toBeLessThanOrEqual(model.maxCtx);
expect(patchedCap).toBeLessThan(model.maxCtx);

// Print a comparison table for manual verification
console.log(`
${model.name}
max_context_tokens: ${model.maxCtx.toLocaleString()}
maxOutputSize: ${model.maxOutputSize?.toLocaleString() ?? 'undefined'}
compaction input est: ${COMPACTION_INPUT_TOKENS.toLocaleString()}

Original: max_tokens=${origCap.toLocaleString()} total=${origTotal.toLocaleString()} overflow=${origTotal > model.maxCtx ? 'YES ❌' : 'NO'}
Patched: max_tokens=${patchedCap.toLocaleString()} total=${patchedTotal.toLocaleString()} overflow=${patchedTotal > model.maxCtx ? 'YES ❌' : 'NO ✅'}
`);
});
}
});
89 changes: 89 additions & 0 deletions packages/agent-core/test/utils/completion-budget.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,92 @@ describe('resolveCompletionBudget', () => {
expect(budget?.fallback).toBe(32000);
});
});

describe('compaction budget resolution', () => {
// Simulates the budget resolution logic from full.ts compactionRound():
// const compactionOutputCap =
// maxOutputSize ?? (maxCtx > 0 ? Math.min(Math.floor(maxCtx / 4), 8192) : undefined);
// This ensures compaction never requests the full context window as
// max_completion_tokens when maxOutputSize is not explicitly configured.
function resolveCompactionBudget(args: {
readonly maxOutputSize?: number;
readonly maxCtx: number;
readonly reservedContextSize?: number;
readonly env?: NodeJS.ProcessEnv;
}): ReturnType<typeof resolveCompletionBudget> {
const compactionOutputCap =
args.maxOutputSize ?? (args.maxCtx > 0 ? Math.min(Math.floor(args.maxCtx / 4), 8192) : undefined);
return resolveCompletionBudget({
maxOutputSize: compactionOutputCap,
reservedContextSize: args.reservedContextSize,
env: args.env,
});
}

it('uses a conservative fallback cap when maxOutputSize is undefined', () => {
const budget = resolveCompactionBudget({
maxCtx: 262_144,
reservedContextSize: 50_000,
env: {},
});
// 262144 / 4 = 65536, min(65536, 8192) = 8192
expect(budget?.hardCap).toBe(8192);
});

it('caps at 8192 even for very large context windows', () => {
const budget = resolveCompactionBudget({
maxCtx: 1_000_000,
reservedContextSize: 50_000,
env: {},
});
expect(budget?.hardCap).toBe(8192);
});

it('uses 1/4 of context when context is small', () => {
const budget = resolveCompactionBudget({
maxCtx: 20_000,
reservedContextSize: 50_000,
env: {},
});
// 20000 / 4 = 5000, min(5000, 8192) = 5000
expect(budget?.hardCap).toBe(5000);
});

it('uses explicit maxOutputSize when configured', () => {
const budget = resolveCompactionBudget({
maxOutputSize: 131_072,
maxCtx: 1_000_000,
reservedContextSize: 50_000,
env: {},
});
expect(budget?.hardCap).toBe(131_072);
});

it('respects KIMI_MODEL_MAX_COMPLETION_TOKENS over the fallback cap', () => {
const budget = resolveCompactionBudget({
maxCtx: 262_144,
reservedContextSize: 50_000,
env: { KIMI_MODEL_MAX_COMPLETION_TOKENS: '4096' },
});
expect(budget?.hardCap).toBe(4096);
});

it('produces a hardCap that computeCompletionBudgetCap will use instead of maxCtx', () => {
const maxCtx = 262_144;
const budget = resolveCompactionBudget({
maxCtx,
reservedContextSize: 50_000,
env: {},
});
// The budget should have a hardCap, not just a fallback
expect(budget?.hardCap).toBeDefined();
expect(budget?.hardCap).not.toBe(maxCtx);
// computeCompletionBudgetCap should use the hardCap, not the context window
const cap = computeCompletionBudgetCap({
budget: budget!,
capability: makeCapability(maxCtx),
});
expect(cap).toBe(8192);
expect(cap).toBeLessThan(maxCtx);
});
});