replace context header with token-triggered compress nudge

Tarquinen · Tarquinen · commit 424249c116cb · 2026-02-05T01:45:30.000-05:00
diff --git a/README.md b/README.md
@@ -104,7 +104,7 @@ DCP uses its own config file:
 >             // Nudge the LLM to use prune tools (every <nudgeFrequency> tool results)
 >             "nudgeEnabled": true,
 >             "nudgeFrequency": 10,
->             // Encourages the model to stay within this context budget (not a hard limit); set to "model" to use full model capacity
+>             // When session tokens exceed this limit, the model is encouraged to compress context. Set to "model" to use full model context limit.
 >             "contextLimit": 100000,
 >             // Additional tools to protect from pruning
 >             "protectedTools": [],
diff --git a/dcp.schema.json b/dcp.schema.json
@@ -110,7 +110,7 @@
                             "description": "Tool names that should be protected from automatic pruning"
                         },
                         "contextLimit": {
-                            "description": "Context limit used for the prunable-tools header (\"model\" uses the active model's context limit)",
+                            "description": "When session tokens exceed this limit, a compress nudge is injected (\"model\" uses the active model's context limit)",
                             "default": 100000,
                             "oneOf": [
                                 {
diff --git a/lib/messages/inject.ts b/lib/messages/inject.ts
@@ -2,24 +2,22 @@ import type { SessionState, WithParts } from "../state"
 import type { Logger } from "../logger"
 import type { PluginConfig } from "../config"
 import type { UserMessage } from "@opencode-ai/sdk/v2"
-import { renderNudge } from "../prompts"
+import { renderNudge, renderCompressNudge } from "../prompts"
 import {
     extractParameterKey,
     buildToolIdList,
     createSyntheticTextPart,
     createSyntheticToolPart,
     isIgnoredUserMessage,
-    formatContextHeader,
-    type ContextInfo,
 } from "./utils"
 import { getFilePathsFromParameters, isProtected } from "../protected-file-patterns"
 import { getLastUserMessage, isMessageCompacted } from "../shared-utils"
 import { getCurrentTokenUsage } from "../strategies/utils"
 
-export const wrapPrunableTools = (content: string, contextInfo?: ContextInfo): string => {
-    const contextHeader = formatContextHeader(contextInfo)
+// XML wrappers
+export const wrapPrunableTools = (content: string): string => {
     return `<prunable-tools>
-${contextHeader}The following tools have been invoked and are available for pruning. This list does not mandate immediate action. Consider your current goals and the resources you need before pruning valuable tool inputs or outputs. Consolidate your prunes for efficiency; it is rarely worth pruning a single tiny tool output. Keep the context free of noise.
+The following tools have been invoked and are available for pruning. This list does not mandate immediate action. Consider your current goals and the resources you need before pruning valuable tool inputs or outputs. Consolidate your prunes for efficiency; it is rarely worth pruning a single tiny tool output. Keep the context free of noise.
 ${content}
 </prunable-tools>`
 }
@@ -55,6 +53,32 @@ Context management was just performed. Do NOT use the ${toolName} again. A fresh
 </context-info>`
 }
 
+const resolveContextLimit = (config: PluginConfig, state: SessionState): number | undefined => {
+    const configLimit = config.tools.settings.contextLimit
+    if (configLimit === "model") {
+        return state.modelContextLimit
+    }
+    return configLimit
+}
+
+const shouldInjectCompressNudge = (
+    config: PluginConfig,
+    state: SessionState,
+    messages: WithParts[],
+): boolean => {
+    if (config.tools.compress.permission === "deny") {
+        return false
+    }
+
+    const contextLimit = resolveContextLimit(config, state)
+    if (contextLimit === undefined) {
+        return false
+    }
+
+    const currentTokens = getCurrentTokenUsage(messages)
+    return currentTokens > contextLimit
+}
+
 const getNudgeString = (config: PluginConfig): string => {
     const flags = {
         prune: config.tools.prune.permission !== "deny",
@@ -135,20 +159,7 @@ const buildPrunableToolsList = (
         return ""
     }
 
-    const configLimit =
-        config.tools.settings.contextLimit === "model"
-            ? state.modelContextLimit
-            : config.tools.settings.contextLimit
-
-    const contextInfo: ContextInfo = {
-        used: getCurrentTokenUsage(messages),
-        limit:
-            state.modelContextLimit !== undefined && configLimit !== undefined
-                ? Math.min(state.modelContextLimit, configLimit)
-                : (configLimit ?? state.modelContextLimit),
-    }
-
-    return wrapPrunableTools(lines.join("\n"), contextInfo)
+    return wrapPrunableTools(lines.join("\n"))
 }
 
 export const insertPruneToolContext = (
@@ -194,6 +205,12 @@ export const insertPruneToolContext = (
             logger.info("Inserting prune nudge message")
             contentParts.push(getNudgeString(config))
         }
+
+        // Add compress nudge if token usage exceeds contextLimit
+        if (shouldInjectCompressNudge(config, state, messages)) {
+            logger.info("Inserting compress nudge - token usage exceeds contextLimit")
+            contentParts.push(renderCompressNudge())
+        }
     }
 
     if (contentParts.length === 0) {
diff --git a/lib/messages/utils.ts b/lib/messages/utils.ts
@@ -3,31 +3,9 @@ import { isMessageCompacted } from "../shared-utils"
 import { Logger } from "../logger"
 import type { SessionState, WithParts } from "../state"
 import type { UserMessage } from "@opencode-ai/sdk/v2"
-import { formatTokenCount } from "../ui/utils"
 
 export const COMPRESS_SUMMARY_PREFIX = "[Compressed conversation block]\n\n"
 
-export interface ContextInfo {
-    used: number
-    limit: number | undefined
-}
-
-export function formatContextHeader(contextInfo?: ContextInfo): string {
-    if (!contextInfo || contextInfo.used === 0) {
-        return ""
-    }
-
-    const usedStr = formatTokenCount(contextInfo.used)
-
-    if (contextInfo.limit) {
-        const limitStr = formatTokenCount(contextInfo.limit)
-        const percentage = Math.round((contextInfo.used / contextInfo.limit) * 100)
-        return `Context: ~${usedStr} / ${limitStr} (${percentage}% used)\n`
-    }
-
-    return `Context: ~${usedStr}\n`
-}
-
 const generateUniqueId = (prefix: string): string => `${prefix}_${ulid()}`
 
 const isGeminiModel = (modelID: string): boolean => {
diff --git a/lib/prompts/compress-nudge.md b/lib/prompts/compress-nudge.md
@@ -0,0 +1,3 @@
+<context-limit-warning>
+Your session context has exceeded the configured limit. Use the `compress` tool to consolidate completed work phases into summaries. Target conversation segments where exploration or implementation is finished - compress preserves understanding while freeing space for continued work.
+</context-limit-warning>
diff --git a/lib/prompts/index.ts b/lib/prompts/index.ts
@@ -1,6 +1,7 @@
 // Generated prompts (from .md files via scripts/generate-prompts.ts)
 import { SYSTEM as SYSTEM_PROMPT } from "./_codegen/system.generated"
 import { NUDGE } from "./_codegen/nudge.generated"
+import { COMPRESS_NUDGE } from "./_codegen/compress-nudge.generated"
 import { PRUNE as PRUNE_TOOL_SPEC } from "./_codegen/prune.generated"
 import { DISTILL as DISTILL_TOOL_SPEC } from "./_codegen/distill.generated"
 import { COMPRESS as COMPRESS_TOOL_SPEC } from "./_codegen/compress.generated"
@@ -33,6 +34,10 @@ export function renderNudge(flags: ToolFlags): string {
     return processConditionals(NUDGE, flags)
 }
 
+export function renderCompressNudge(): string {
+    return COMPRESS_NUDGE
+}
+
 const PROMPTS: Record<string, string> = {
     "prune-tool-spec": PRUNE_TOOL_SPEC,
     "distill-tool-spec": DISTILL_TOOL_SPEC,
diff --git a/lib/prompts/system.md b/lib/prompts/system.md
@@ -14,8 +14,6 @@ AVAILABLE TOOLS FOR CONTEXT MANAGEMENT
 <compress>THE COMPRESS TOOL
 `compress` is a sledgehammer and should be used accordingly. It's purpose is to reduce whole part of the conversation to its essence and technical details in order to leave room for newer context. Your summary MUST be technical and specific enough to preserve FULL understanding of WHAT TRANSPIRED, such that NO AMBIGUITY remains about what was done, found, or decided. Your compress summary must be thorough and precise. `compress` will replace everything in the range you match, user and assistant messages, tool inputs and outputs. It is preferred to not compress preemptively, but rather wait for natural breakpoints in the conversation. Those breakpoints are to be infered from user messages. You WILL NOT compress based on thinking that you are done with the task, wait for conversation queues that the user has moved on from current phase.
 
-When the context usage indicator is high (around 80% or above), prioritize using `compress` to reduce verbosity and keep room for future context.
-
 This tool will typically be used at the end of a phase of work, when conversation starts to accumulate noise that would better served summarized, or when you've done significant exploration and can FULLY synthesize your findings and understanding into a technical summary.
 
 Make sure to match enough of the context with start and end strings so you're not faced with an error calling the tool. Be VERY CAREFUL AND CONSERVATIVE when using `compress`.
@@ -39,7 +37,6 @@ Be respectful of the user's API usage, manage context methodically as you work t
 
 <instruction name=injected_context_handling policy_level=critical>
 This chat environment injects context information on your behalf in the form of a <prunable-tools> list to help you manage context effectively. Carefully read the list and use it to inform your management decisions. The list is automatically updated after each turn to reflect the current state of manageable tools and context usage. If no list is present, do NOT attempt to prune anything.
-Aim to keep the context usage indicator below roughly 80% so there is room for future turns and tool outputs.
 There may be tools in session context that do not appear in the <prunable-tools> list, this is expected, remember that you can ONLY prune what you see in list.
 </instruction>
 </system-reminder>

Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,7 @@`
`110`	`110`	`"description": "Tool names that should be protected from automatic pruning"`
`111`	`111`	`},`
`112`	`112`	`"contextLimit": {`
`113`		`- "description": "Context limit used for the prunable-tools header (\"model\" uses the active model's context limit)",`
	`113`	`+ "description": "When session tokens exceed this limit, a compress nudge is injected (\"model\" uses the active model's context limit)",`
`114`	`114`	`"default": 100000,`
`115`	`115`	`"oneOf": [`
`116`	`116`	`{`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+<context-limit-warning>`
	`2`	+Your session context has exceeded the configured limit. Use the `compress` tool to consolidate completed work phases into summaries. Target conversation segments where exploration or implementation is finished - compress preserves understanding while freeing space for continued work.
	`3`	`+</context-limit-warning>`