Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/L0/_all/mod/_core/agent_prompt/AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Current shared runtime contract:
- normalized prompt items should cache `valueTokenCount` alongside the normalized string value so repeated prompt builds can reuse tokenizer results for the same item body
- prompt-budget ratios are stored as percentages of the configured model `maxTokens`, with `system`, `history`, and `transient` required to total 100 while `singleMessage` is a separate percentage of the history budget
- long prompt contributors may be trimmed through the shared middle-replacement placeholder emitted by `trimPromptLongMessage(...)`; the placeholder must keep a stable `space.chat.readLongMessage({ id, from, to })` instruction so the active chat runtime can expose the removed text on demand during that turn
- the `<<NNNNNNNNNN characters removed ...>>` counter inside that placeholder must stay byte-stable across turns even when the removed-character count changes order of magnitude, so prefix prompt-cache backends (llama.cpp `--prompt-cache`, qwen serve, vLLM prefix cache, etc.) keep their warm cache for everything after the placeholder instead of forcing a full prompt re-prefill on every turn; `buildPromptLongMessagePlaceholder(...)` enforces this by padding the counter to a fixed decimal width with leading zeros
- part-level prompt-budget trimming should build a one-shot thresholded multi-contributor plan that trims only contributors whose planned cut is at least `250` tokens; system and transient consumers may then fall back to one combined section-body trim when contributor-level trims would all be smaller than that threshold
- `installPromptItemAccess(...)` must keep full prompt-item text in runtime-only memory while publishing only redacted `space.chat.promptItems` metadata plus `readLongMessage(...)` on the live chat namespace
- this module is prompt-builder-agnostic; callers must provide `buildPromptInput(context)` and may optionally provide `updatePromptHistory({ context, historyMessages, options, prompt, promptInput })`
Expand Down
19 changes: 18 additions & 1 deletion app/L0/_all/mod/_core/agent_prompt/prompt-items.js
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,18 @@ export function buildPromptOverflowTrimPlan(contributors = [], overflowTokens, o
};
}

// Pad the removed-chars counter to a fixed width so the placeholder string has
// a byte-stable length regardless of how many characters were trimmed. Each
// time the trimmer runs the counter changes (because chars-needed drifts with
// content and token budget), and without padding that changes the byte offset
// of every token after the placeholder. For backends with prefix prompt-cache
// reuse (llama.cpp `--prompt-cache`, qwen serve, vLLM prefix cache, etc.) the
// drifting placeholder forces a full prompt re-prefill on every turn instead
// of a warm-cache continuation — turning a 10s warm reply into multi-minute
// full PP for long-context sessions. 10 digits cover up to ~10 billion
// characters which is far above any realistic message length.
const LONG_MESSAGE_REMOVED_CHARS_PAD_WIDTH = 10;

export function buildPromptLongMessagePlaceholder({ id, removedChars } = {}) {
const normalizedId = Number.isFinite(Number(id)) ? Math.max(1, Math.round(Number(id))) : 0;
const normalizedRemovedChars = Number.isFinite(Number(removedChars))
Expand All @@ -543,7 +555,12 @@ export function buildPromptLongMessagePlaceholder({ id, removedChars } = {}) {
return "";
}

return `<<${normalizedRemovedChars} characters removed to optimize context, read with space.chat.readLongMessage({id: ${normalizedId}, from: 0, to:${LONG_MESSAGE_DEFAULT_TO}})>>`;
const paddedRemovedChars = String(normalizedRemovedChars).padStart(
LONG_MESSAGE_REMOVED_CHARS_PAD_WIDTH,
"0"
);

return `<<${paddedRemovedChars} characters removed to optimize context, read with space.chat.readLongMessage({id: ${normalizedId}, from: 0, to:${LONG_MESSAGE_DEFAULT_TO}})>>`;
}

export function trimPromptLongMessage(text, options = {}) {
Expand Down
33 changes: 33 additions & 0 deletions tests/prompt_budget_trim_test.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,36 @@ test("system part falls back to section-body compression when no contributor can
assert.match(plan.sectionContributor.originalValueText, /\n\n/u);
assert.equal(plan.sectionContributor.tokenCount > 0, true);
});

test("buildPromptLongMessagePlaceholder pads removedChars to a fixed byte width across orders of magnitude", () => {
// Prefix prompt-cache backends (llama.cpp `--prompt-cache`, qwen serve,
// vLLM prefix cache, etc.) key cache hits on a byte-stable prompt prefix.
// The placeholder appears inside every trimmed message and changes every
// turn as the trimmer recalculates how many characters to drop. Without
// padding, the counter `<<N characters removed ...>>` changes byte length
// each turn (3 -> 4 digits when N crosses 1000), shifting every byte after
// the placeholder and forcing a full prompt re-prefill instead of a warm-
// cache continuation. Lock the placeholder length to be byte-stable
// regardless of the counter's magnitude so the cache prefix stays valid
// across turns.
const placeholderSmall = buildPromptLongMessagePlaceholder({ id: 1, removedChars: 9 });
const placeholderMedium = buildPromptLongMessagePlaceholder({ id: 1, removedChars: 4096 });
const placeholderLarge = buildPromptLongMessagePlaceholder({ id: 1, removedChars: 500_000 });
const placeholderHuge = buildPromptLongMessagePlaceholder({ id: 1, removedChars: 1_000_000_000 });

assert.equal(placeholderSmall.length, placeholderMedium.length);
assert.equal(placeholderSmall.length, placeholderLarge.length);
assert.equal(placeholderSmall.length, placeholderHuge.length);
});

test("buildPromptLongMessagePlaceholder keeps the counter readable as a decimal number with leading zeros", () => {
const placeholder = buildPromptLongMessagePlaceholder({ id: 7, removedChars: 42 });

// The counter should still be parseable as a positive integer after the
// leading-zero pad so anything that inspects it (logs, debug overlays,
// future tooling) does not need to special-case the format.
const match = placeholder.match(/<<(\d+) characters removed/u);

assert.ok(match, "placeholder should start with `<<<digits> characters removed`");
assert.equal(Number(match[1]), 42);
});