ekailabs · DaevMithran · Apr 9, 2026
diff --git a/packages/contexto/README.md b/packages/contexto/README.md
@@ -121,6 +121,40 @@ For the deeper technical reasoning:
 | Property | Type | Required | Description |
 | --- | --- | --- | --- |
 | `apiKey` | string | Yes | Your Contexto API key |
+| `contextEnabled` | boolean | No | Enable or disable context retrieval (default: `true`) |
+| `maxContextChars` | number | No | Maximum characters to inject as retrieved context |
+| `compactThreshold` | number | No | Fraction of token budget that triggers compaction (default: `0.50`) |
+| `compactionStrategy` | string | No | `"default"` or `"sliding-window"` (default: `"default"`) |
+| `rlmEnabled` | boolean | No | Enable RLM tools for processing large contexts (default: `false`) |
+
+## Large Context Processing (RLM)
+
+When a user sends a message that exceeds 50% of the available token budget — a PDF, a spreadsheet, a massive log dump — the standard approach of stuffing it into the prompt breaks down. Contexto includes optional support for **Recursive Language Model (RLM)** processing to handle these cases.
+
+When enabled, Contexto automatically detects oversized inputs, offloads them to an in-memory buffer, and gives the agent a set of six tools to explore, search, and reason over the content iteratively — without flooding the context window.
+
+### Enabling RLM
+
+Set `rlmEnabled` to `true` in your plugin config:
+
+```bash
+openclaw config set plugins.entries.contexto.config.rlmEnabled true
+```
+
+When disabled (the default), the plugin behaves exactly as before — no RLM tools are registered and no additional dependencies are loaded.
+
+### How It Works
+
+1. During context assembly, if the user's message exceeds 50% of the token budget, the content is moved to an in-memory **ContextBuffer** and the message is replaced with a brief instruction.
+2. The agent receives six RLM tools: **rlm_overview**, **rlm_peek**, **rlm_grep**, **rlm_slice**, **rlm_query**, and **rlm_repl** — covering structural exploration, pattern search, targeted extraction, sub-LLM reasoning, and sandboxed scripting.
+3. The agent iteratively explores and synthesizes an answer using these tools, keeping token usage bounded regardless of input size.
+4. Once complete, the synthesized result is ingested into the mindmap as an episode, making it available for future recall just like any other conversation context.
+
+RLM can also be invoked explicitly by the user, regardless of message size.
+
+Sub-LLM calls are routed through [pi-ai](https://docs.openclaw.ai/pi) via OpenRouter's auto-routing, which automatically selects an appropriate model. No additional API keys or provider SDKs are needed beyond what OpenClaw already manages.
+
+The RLM tools are provided by the [`@ekai/rlm`](../rlm/) package, which can also be used standalone outside of Contexto. See its [README](../rlm/README.md) for full tool documentation.
 
 ## Community
 

diff --git a/packages/contexto/openclaw.plugin.json b/packages/contexto/openclaw.plugin.json
@@ -15,6 +15,11 @@
       "maxContextChars": {
         "type": "number",
         "description": "Maximum characters of context to inject (default: 2000)"
+      },
+      "rlmEnabled": {
+        "type": "boolean",
+        "default": false,
+        "description": "Enable Recursive Language Model (RLM) tools for processing large contexts that exceed the token budget"
       }
     }
   }

diff --git a/packages/contexto/package.json b/packages/contexto/package.json
@@ -31,8 +31,17 @@
   "scripts": {
     "build": "tsc --noEmit"
   },
+  "dependencies": {
+    "@ekai/rlm": "workspace:*"
+  },
   "peerDependencies": {
-    "openclaw": "*"
+    "openclaw": "*",
+    "@mariozechner/pi-ai": "*"
+  },
+  "peerDependenciesMeta": {
+    "@mariozechner/pi-ai": {
+      "optional": true
+    }
   },
   "devDependencies": {
     "@types/node": "^20.10.0",

diff --git a/packages/contexto/src/engine/base.ts b/packages/contexto/src/engine/base.ts
@@ -4,9 +4,9 @@ import type {
   IngestResult, IngestBatchResult, SubagentSpawnPreparation,
 } from 'openclaw/plugin-sdk';
 import type { ContextoBackend, Logger, BaseConfig } from '../types.js';
-import { stripMetadataEnvelope, formatSearchResults, assembleContextMessages } from '../helpers.js';
+import { stripMetadataEnvelope, formatSearchResults, assembleContextMessages, buildPayload } from '../helpers.js';
 import type {
-  CompactionState,
+  CompactionState, PendingContext,
   BootstrapParams, IngestParams, IngestBatchParams,
   AfterTurnParams, AssembleParams,
   CompactParams, SubagentSpawnParams, SubagentEndedParams,
@@ -15,6 +15,7 @@ import type {
 const DEFAULT_MAX_CONTEXT_CHARS = 2000;
 const DEFAULT_MAX_RESULTS = 7;
 const DEFAULT_MIN_SCORE = 0.45;
+const RLM_CONTEXT_THRESHOLD = 0.5;
 
 /**
  * Abstract base class for context engine implementations.
@@ -74,6 +75,37 @@ export abstract class AbstractContextEngine implements ContextEngine {
       return { messages, estimatedTokens: 0 };
     }
 
+    // --- RLM: detect large user context ---
+    if (this.config.rlmEnabled && tokenBudget && lastMsg?.role === 'user') {
+      const userText = this.extractMessageText(lastMsg);
+      if (userText) {
+        const estimatedTokens = Math.ceil(userText.length / 4);
+        const threshold = Math.floor(tokenBudget * RLM_CONTEXT_THRESHOLD);
+
+        if (estimatedTokens > threshold) {
+          const sessionKey = (params as any).sessionKey ?? (params as any).sessionId ?? 'default';
+          this.state.pendingLargeContext.set(sessionKey, {
+            content: userText,
+            tokenEstimate: estimatedTokens,
+          });
+          this.logger.info(`[contexto] RLM: large context detected (${userText.length} chars, ~${estimatedTokens} tokens, threshold: ${threshold}). Stored pending context for session ${sessionKey}`);
+
+          // Replace the large user message with an instruction
+          const replacement = `[Large context provided — ${userText.length} chars, ~${estimatedTokens} tokens. Use the rlm_query tool to analyze it.]`;
+          const modifiedMessages = [
+            ...messages.slice(0, -1),
+            { ...lastMsg, content: replacement },
+          ];
+
+          return {
+            messages: modifiedMessages,
+            estimatedTokens: Math.ceil(replacement.length / 4),
+            systemPromptAddition: 'The user has provided a large context that exceeds the context window. Use the RLM tools (rlm_overview, rlm_peek, rlm_grep, rlm_slice, rlm_query, rlm_repl) to analyze it. Start with rlm_overview to understand the structure, then use other tools to answer the user\'s question.',
+          };
+        }
+      }
+    }
+
     const query = params.prompt ? stripMetadataEnvelope(params.prompt) : undefined;
     if (!query) {
       return { messages, estimatedTokens: 0 };
@@ -126,11 +158,108 @@ export abstract class AbstractContextEngine implements ContextEngine {
     return assembleContextMessages(context, messages);
   }
 
-  async prepareSubagentSpawn(_params: SubagentSpawnParams): Promise<SubagentSpawnPreparation | undefined> {
+  async prepareSubagentSpawn(params: SubagentSpawnParams): Promise<SubagentSpawnPreparation | undefined> {
+    const childSessionKey = (params as any).childSessionKey;
+    if (!childSessionKey) return undefined;
+
+    // Check if there's a pending large context for the current session
+    // The parent session key is stored when assemble() detects large content
+    for (const [sessionKey, pending] of this.state.pendingLargeContext.entries()) {
+      // Map child session to parent so onSubagentEnded can find the context
+      this.state.activeRlmSessions.set(childSessionKey, sessionKey);
+      this.logger.info(`[contexto] prepareSubagentSpawn: mapped child ${childSessionKey} → parent ${sessionKey} (${pending.tokenEstimate} est. tokens)`);
+
+      return {
+        rollback: () => {
+          this.state.activeRlmSessions.delete(childSessionKey);
+          this.logger.info(`[contexto] prepareSubagentSpawn rollback: removed child ${childSessionKey}`);
+        },
+      };
+    }
+
     return undefined;
   }
 
-  async onSubagentEnded(_params: SubagentEndedParams): Promise<void> {}
+  async onSubagentEnded(params: SubagentEndedParams): Promise<void> {
+    const childSessionKey = (params as any).childSessionKey;
+    const result = (params as any).result;
+    if (!childSessionKey) return;
+
+    const parentSessionKey = this.state.activeRlmSessions.get(childSessionKey);
+    if (!parentSessionKey) return;
+
+    // Clean up session mapping
+    this.state.activeRlmSessions.delete(childSessionKey);
+
+    const pending = this.state.pendingLargeContext.get(parentSessionKey);
+    if (!pending) return;
+
+    // Clean up pending context
+    this.state.pendingLargeContext.delete(parentSessionKey);
+
+    // Extract the subagent's answer
+    const answer = typeof result === 'string' ? result : this.extractSubagentAnswer(result);
+    if (!answer) {
+      this.logger.warn(`[contexto] onSubagentEnded: no answer from subagent ${childSessionKey}`);
+      return;
+    }
+
+    // Ingest the processed result into the mindmap for future recall
+    const payload = buildPayload('rlm-summary', 'processed', parentSessionKey, {
+      charCount: pending.content.length,
+      tokenEstimate: pending.tokenEstimate,
+    }, undefined, {
+      userMessage: { role: 'user', content: `[Large context: ${pending.content.length} chars, ~${pending.tokenEstimate} tokens]` },
+      assistantMessages: [{ role: 'assistant', content: answer }],
+    });
+
+    try {
+      await this.backend.ingest(payload);
+      this.logger.info(`[contexto] onSubagentEnded: ingested RLM summary (${answer.length} chars) for session ${parentSessionKey}`);
+    } catch (err) {
+      this.logger.warn(`[contexto] onSubagentEnded: failed to ingest RLM summary — ${err}`);
+    }
+  }
+
+  // --- Helpers ---
+
+  /** Extract text from a message with string or ContentBlock[] content. */
+  protected extractMessageText(msg: any): string | undefined {
+    if (!msg) return undefined;
+    if (typeof msg.content === 'string') return msg.content;
+    if (Array.isArray(msg.content)) {
+      const texts = msg.content
+        .filter((b: any) => b.type === 'text' && typeof b.text === 'string')
+        .map((b: any) => b.text);
+      return texts.length > 0 ? texts.join('\n') : undefined;
+    }
+    return undefined;
+  }
+
+  /** Get pending large context for a session key. */
+  getPendingContext(sessionKey: string): PendingContext | undefined {
+    return this.state.pendingLargeContext.get(sessionKey);
+  }
+
+  /** Clear pending large context for a session key. */
+  clearPendingContext(sessionKey: string): void {
+    this.state.pendingLargeContext.delete(sessionKey);
+  }
+
+  /** Extract the last assistant text from subagent result. */
+  private extractSubagentAnswer(result: any): string | undefined {
+    if (!result) return undefined;
+    // Handle array of messages
+    const messages = Array.isArray(result) ? result : result?.messages;
+    if (!Array.isArray(messages)) return typeof result === 'string' ? result : undefined;
+
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      const text = this.extractMessageText(msg);
+      if (msg?.role === 'assistant' && text) return text;
+    }
+    return undefined;
+  }
 
   // --- Template method with apiKey guard ---
 

diff --git a/packages/contexto/src/engine/types.ts b/packages/contexto/src/engine/types.ts
@@ -16,13 +16,23 @@ export type CompactParams = MethodParams<'compact'>;
 export type SubagentSpawnParams = MethodParams<'prepareSubagentSpawn'>;
 export type SubagentEndedParams = MethodParams<'onSubagentEnded'>;
 
+/** Large context pending RLM processing. */
+export interface PendingContext {
+  content: string;
+  tokenEstimate: number;
+}
+
 // Internal state — not part of the SDK contract
 export interface CompactionState {
   bufferedMessages: WebhookPayload[];
   lastSessionId: string;
   lastSessionKey: string;
   cachedTokenBudget: number | undefined;
   injectedItemIds: Set<string>;
+  /** Large contexts awaiting RLM subagent processing, keyed by sessionKey. */
+  pendingLargeContext: Map<string, PendingContext>;
+  /** Active RLM subagent sessions, keyed by childSessionKey → parentSessionKey. */
+  activeRlmSessions: Map<string, string>;
 }
 
 export function createCompactionState(): CompactionState {
@@ -32,5 +42,7 @@ export function createCompactionState(): CompactionState {
     lastSessionKey: '',
     cachedTokenBudget: undefined,
     injectedItemIds: new Set(),
+    pendingLargeContext: new Map(),
+    activeRlmSessions: new Map(),
   };
 }
diff --git a/packages/contexto/src/engine/utils.ts b/packages/contexto/src/engine/utils.ts
@@ -78,8 +78,9 @@ export function selectMessagesToEvict(
   };
 }
 
-/** Extract the firstKeptEntryId from the first message in an array (if available). */
-export function getFirstKeptEntryId(messages: any[]): string | undefined {
-  const first = messages.length > 0 ? messages[0] : null;
-  return first?.id ?? first?.entryId ?? undefined;
+/** Extract the firstKeptEntryId from the user message in the first kept episode payload. */
+export function getFirstKeptEntryId(kept: WebhookPayload[]): string | undefined {
+  if (kept.length === 0) return undefined;
+  const data = kept[0].data as Record<string, any> | undefined;
+  return data?.userMessage?.id ?? undefined;
 }