Entrolution · gvonness-apolitical · Mar 4, 2026 · Mar 4, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,19 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.9.1] - 2026-03-04
+
+### Fixed
+
+- **Token budget overhead**: Search and reconstruction assemblers now reserve space for formatting overhead (per-chunk headers, separators, response diagnostics) before chunk assembly, preventing responses from exceeding stated token budgets by 5-15%.
+- **Node version mismatch detection**: Hook runner now detects `NODE_MODULE_VERSION` errors and surfaces actionable guidance ("run: npm install -g causantic") instead of the generic "internal error" message. These errors are also excluded from transient error retries.
+
+### Changed
+
+- **Skill template memory queries**: Roadmap skill reduced from 10 parallel queries at 8K tokens each (80K total) to 4 sequential queries at 4K (16K cap). Cleanup skill reduced from 4 parallel queries at 8K (32K) to 3 sequential queries at 4K (12K cap) with a 2K summarization step before passing to subagents. Both changes address "prompt is too long" failures.
+- **`list-sessions` truncation**: Added `limit` parameter (default 30) to the `list-sessions` MCP tool. When results exceed the limit, displays the most recent sessions with a truncation notice.
+- **`batch-ingest` post-run guidance**: Output now includes a "Next steps" section recommending `npx causantic maintenance run update-clusters` to generate topic clusters from ingested sessions.
+
 ## [0.9.0] - 2026-02-27
 
 ### Added

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "causantic",
-  "version": "0.9.0",
+  "version": "0.9.1",
   "description": "Long-term memory for Claude Code — local-first, graph-augmented, self-benchmarking",
   "type": "module",
   "private": false,

diff --git a/src/cli/commands/ingest.ts b/src/cli/commands/ingest.ts
@@ -29,5 +29,8 @@ export const batchIngestCommand: Command = {
     const { batchIngestDirectory } = await import('../../ingest/batch-ingest.js');
     const result = await batchIngestDirectory(args[0], {});
     console.log(`Batch ingestion complete: ${result.successCount} sessions processed.`);
+    console.log('\nNext steps:');
+    console.log('  npx causantic maintenance run update-clusters');
+    console.log('  (generates topic clusters from ingested sessions)');
   },
 };
diff --git a/src/cli/skill-templates.ts b/src/cli/skill-templates.ts
@@ -412,31 +412,29 @@ The lead agent has MCP access to Causantic tools — subagents do not. Gather al
 
 ### 1.5.1 Query Memory
 
-Run these queries directly (do NOT delegate to subagents):
+Run these queries **sequentially** (do NOT delegate to subagents, do NOT run in parallel):
 
-- \`search\` with query: "architecture decisions", \`max_tokens: 8000\`
-- \`search\` with query: "tech debt", \`max_tokens: 8000\`
-- \`search\` with query: "past cleanup findings", \`max_tokens: 8000\`
-- \`recall\` with query: "why was this designed this way", \`max_tokens: 8000\`
+1. \`search\` with query: "architecture decisions", \`max_tokens: 4000\`
+2. \`search\` with query: "tech debt", \`max_tokens: 4000\`
+3. \`search\` with query: "past cleanup findings", \`max_tokens: 4000\`
+
+After each query, discard any results that duplicate earlier findings. Stop querying early if accumulated memory exceeds the total cap of 12K tokens.
 
 ### 1.5.2 Assemble Memory Context
 
-Combine the results into a single \`memoryContext\` text block, capped at ~15K tokens total. Structure it as:
+Summarize memory into a concise bullet list (max 2K tokens) before passing to subagents. Structure it as:
 
 \`\`\`
 ## Memory Context (from Causantic)
 
 ### Architecture Decisions
-[results from search "architecture decisions"]
+[summarized results from search "architecture decisions"]
 
 ### Known Tech Debt
-[results from search "tech debt"]
+[summarized results from search "tech debt"]
 
 ### Past Cleanup Findings
-[results from search "past cleanup findings"]
-
-### Design Rationale
-[results from recall "why was this designed this way"]
+[summarized results from search "past cleanup findings"]
 \`\`\`
 
 If Causantic MCP tools are unavailable, skip this phase and note the gap.
@@ -1176,19 +1174,15 @@ If \`ROADMAP.md\` exists in the project root (updating an existing roadmap):
 
 ### 1.3 Query Causantic Memory
 
-Run all memory queries directly in the lead agent context. Do not delegate memory queries to subagents — they cannot access MCP tools.
+Run memory queries **sequentially** in the lead agent context. Do not delegate memory queries to subagents — they cannot access MCP tools. Do NOT run these queries in parallel.
+
+After each query, discard any results that duplicate earlier findings. Stop querying early if accumulated memory exceeds the total cap of 16K tokens.
 
 Use the causantic MCP tools to surface deferred and aspirational work:
-- \`search\` query: "deferred", \`max_tokens: 8000\`
-- \`search\` query: "aspirational", \`max_tokens: 8000\`
-- \`search\` query: "someday", \`max_tokens: 8000\`
-- \`search\` query: "future work", \`max_tokens: 8000\`
-- \`search\` query: "TODO", \`max_tokens: 8000\`
-- \`search\` query: "roadmap", \`max_tokens: 8000\`
-- \`search\` query: "milestone", \`max_tokens: 8000\`
-- \`search\` query: "release plan", \`max_tokens: 8000\`
-- \`recall\` query: "features we want to build", \`max_tokens: 8000\`
-- \`predict\` context: "project roadmap and future work", \`max_tokens: 8000\`
+1. \`search\` query: "deferred TODO future work", \`max_tokens: 4000\`
+2. \`search\` query: "roadmap milestone release plan", \`max_tokens: 4000\`
+3. \`recall\` query: "features we want to build", \`max_tokens: 4000\`
+4. \`predict\` context: "project roadmap and future work", \`max_tokens: 4000\`
 - Tag each with source: "memory"
 
 If causantic MCP tools are unavailable or return nothing, note the gap and proceed with other sources.

diff --git a/src/hooks/hook-utils.ts b/src/hooks/hook-utils.ts
@@ -477,6 +477,11 @@ export async function ingestionHookCli(
 export function isTransientError(error: Error): boolean {
   const message = error.message.toLowerCase();
 
+  // Native module version mismatch — not transient, requires reinstall
+  if (message.includes('node_module_version') || message.includes('was compiled against')) {
+    return false;
+  }
+
   // Network/connectivity errors
   if (
     message.includes('econnreset') ||

diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts
@@ -257,6 +257,9 @@ export function classifyError(error: unknown): string {
   ) {
     return 'embedder unavailable';
   }
+  if (msg.includes('node_module_version') || msg.includes('was compiled against')) {
+    return 'native module version mismatch — run: npm install -g causantic';
+  }
   return 'internal error';
 }
 

diff --git a/src/mcp/tools.ts b/src/mcp/tools.ts
@@ -318,6 +318,10 @@ export const listSessionsTool: ToolDefinition = {
         type: 'number',
         description: 'Look back N days from now. Alternative to from/to.',
       },
+      limit: {
+        type: 'number',
+        description: 'Maximum number of sessions to display (default: 30).',
+      },
     },
     required: ['project'],
   },
@@ -326,6 +330,7 @@ export const listSessionsTool: ToolDefinition = {
     let from = args.from as string | undefined;
     let to = args.to as string | undefined;
     const daysBack = args.days_back as number | undefined;
+    const limit = (args.limit as number | undefined) ?? 30;
 
     if (daysBack !== null && daysBack !== undefined) {
       to = new Date().toISOString();
@@ -338,7 +343,11 @@ export const listSessionsTool: ToolDefinition = {
       return `No sessions found for project "${project}".`;
     }
 
-    const lines = sessions.map((s) => {
+    const totalCount = sessions.length;
+    const truncated = totalCount > limit;
+    const displaySessions = truncated ? sessions.slice(0, limit) : sessions;
+
+    const lines = displaySessions.map((s) => {
       const start = new Date(s.firstChunkTime).toLocaleDateString('en-US', {
         month: 'short',
         day: 'numeric',
@@ -352,7 +361,11 @@ export const listSessionsTool: ToolDefinition = {
       return `- ${s.sessionId.slice(0, 8)} (${start} – ${end}, ${s.chunkCount} chunks, ${s.totalTokens} tokens)`;
     });
 
-    return `Sessions for "${project}" (${sessions.length} total):\n${lines.join('\n')}`;
+    let output = `Sessions for "${project}" (${totalCount} total):\n${lines.join('\n')}`;
+    if (truncated) {
+      output += `\n(showing ${limit} of ${totalCount} sessions — use 'from'/'to' to narrow)`;
+    }
+    return output;
   },
 };
 

diff --git a/src/retrieval/search-assembler.ts b/src/retrieval/search-assembler.ts
@@ -289,7 +289,16 @@ export async function searchContext(request: SearchRequest): Promise<SearchRespo
 }
 
 /**
- * Assemble text within token budget.
+ * Formatting overhead constants.
+ *
+ * Per-chunk: header (~50 tokens from formatSearchChunk) + separator (~3 tokens) + margin.
+ * Fixed: response header (~20 tokens) + diagnostics (~100-500 tokens) added by tools.ts.
+ */
+const SEARCH_FIXED_OVERHEAD = 200;
+const SEARCH_PER_CHUNK_OVERHEAD = 55;
+
+/**
+ * Assemble text within token budget, reserving space for formatting overhead.
  */
 function assembleWithinBudget(
   ranked: RankedItem[],
@@ -306,6 +315,8 @@ function assembleWithinBudget(
     source?: 'vector' | 'keyword' | 'cluster';
   }>;
 } {
+  const effectiveBudget = Math.max(0, maxTokens - SEARCH_FIXED_OVERHEAD);
+
   const parts: string[] = [];
   const includedChunks: Array<{
     id: string;
@@ -314,20 +325,21 @@ function assembleWithinBudget(
     preview: string;
     source?: 'vector' | 'keyword' | 'cluster';
   }> = [];
-  let totalTokens = 0;
+  let budgetUsed = 0;
 
   for (const item of ranked) {
     const chunk = getChunkById(item.chunkId);
     if (!chunk) continue;
 
     const chunkTokens = chunk.approxTokens || approximateTokens(chunk.content);
+    const chunkCost = chunkTokens + SEARCH_PER_CHUNK_OVERHEAD;
 
-    if (totalTokens + chunkTokens > maxTokens) {
-      const remainingTokens = maxTokens - totalTokens;
+    if (budgetUsed + chunkCost > effectiveBudget) {
+      const remainingTokens = effectiveBudget - budgetUsed - SEARCH_PER_CHUNK_OVERHEAD;
       if (remainingTokens > 100) {
         const truncated = truncateChunk(chunk.content, remainingTokens);
         parts.push(formatSearchChunk(chunk, truncated, item.score));
-        totalTokens += approximateTokens(truncated);
+        budgetUsed += approximateTokens(truncated) + SEARCH_PER_CHUNK_OVERHEAD;
         includedChunks.push({
           id: chunk.id,
           sessionSlug: chunk.sessionSlug,
@@ -340,7 +352,7 @@ function assembleWithinBudget(
     }
 
     parts.push(formatSearchChunk(chunk, chunk.content, item.score));
-    totalTokens += chunkTokens;
+    budgetUsed += chunkCost;
     includedChunks.push({
       id: chunk.id,
       sessionSlug: chunk.sessionSlug,
@@ -352,7 +364,7 @@ function assembleWithinBudget(
 
   return {
     text: parts.join('\n\n---\n\n'),
-    tokenCount: totalTokens,
+    tokenCount: budgetUsed,
     includedChunks,
   };
 }

diff --git a/src/retrieval/session-reconstructor.ts b/src/retrieval/session-reconstructor.ts
@@ -109,32 +109,44 @@ export function resolveTimeWindow(req: ReconstructRequest): {
 }
 
 /**
- * Apply token budget to a list of chunks.
+ * Formatting overhead for reconstruction output.
+ *
+ * Fixed: result header (~25 tokens) + truncation notice + margin.
+ * Per-chunk: separator `---` (~2 tokens) + session/agent headers amortized (~3 tokens).
+ */
+const RECONSTRUCT_FIXED_OVERHEAD = 50;
+const RECONSTRUCT_PER_CHUNK_OVERHEAD = 5;
+
+/**
+ * Apply token budget to a list of chunks, reserving space for formatting overhead.
  * Returns the subset that fits within the budget.
  */
 export function applyTokenBudget(
   chunks: StoredChunk[],
   maxTokens: number,
   keepNewest: boolean,
 ): { kept: StoredChunk[]; truncated: boolean } {
-  let totalTokens = 0;
+  const effectiveBudget = Math.max(0, maxTokens - RECONSTRUCT_FIXED_OVERHEAD);
+
+  let totalCost = 0;
   for (const c of chunks) {
-    totalTokens += c.approxTokens;
+    totalCost += c.approxTokens + RECONSTRUCT_PER_CHUNK_OVERHEAD;
   }
 
-  if (totalTokens <= maxTokens) {
+  if (totalCost <= effectiveBudget) {
     return { kept: chunks, truncated: false };
   }
 
   // Walk from the preferred end and collect until budget exhausted
   const ordered = keepNewest ? [...chunks].reverse() : [...chunks];
   const kept: StoredChunk[] = [];
-  let budget = maxTokens;
+  let budget = effectiveBudget;
 
   for (const chunk of ordered) {
-    if (chunk.approxTokens > budget) break;
+    const chunkCost = chunk.approxTokens + RECONSTRUCT_PER_CHUNK_OVERHEAD;
+    if (chunkCost > budget) break;
     kept.push(chunk);
-    budget -= chunk.approxTokens;
+    budget -= chunkCost;
   }
 
   // Restore chronological order

diff --git a/test/cli/skill-templates.test.ts b/test/cli/skill-templates.test.ts
@@ -204,10 +204,10 @@ describe('skill-templates', () => {
 
     it('causantic-cleanup references memory tools in Phase 1.5', () => {
       const skill = CAUSANTIC_SKILLS.find((s) => s.dirName === 'causantic-cleanup')!;
-      expect(skill.content).toContain('`recall`');
       expect(skill.content).toContain('`search`');
       expect(skill.content).toContain('Phase 1.5');
-      expect(skill.content).toContain('max_tokens: 8000');
+      expect(skill.content).toContain('max_tokens: 4000');
+      expect(skill.content).toContain('sequentially');
     });
 
     it('causantic-forget has argument-hint', () => {

diff --git a/test/retrieval/session-reconstructor.test.ts b/test/retrieval/session-reconstructor.test.ts
@@ -171,7 +171,8 @@ describe('applyTokenBudget', () => {
       makeChunk({ id: 'c2', approxTokens: 100, startTime: '2024-01-15T11:00:00Z' }),
       makeChunk({ id: 'c3', approxTokens: 100, startTime: '2024-01-15T12:00:00Z' }),
     ];
-    const { kept, truncated } = applyTokenBudget(chunks, 200, true);
+    // Budget: 50 fixed overhead + 2×(100+5) per-chunk = 260
+    const { kept, truncated } = applyTokenBudget(chunks, 260, true);
     expect(kept).toHaveLength(2);
     expect(kept[0].id).toBe('c2'); // Kept newer
     expect(kept[1].id).toBe('c3');
@@ -184,7 +185,8 @@ describe('applyTokenBudget', () => {
       makeChunk({ id: 'c2', approxTokens: 100, startTime: '2024-01-15T11:00:00Z' }),
       makeChunk({ id: 'c3', approxTokens: 100, startTime: '2024-01-15T12:00:00Z' }),
     ];
-    const { kept, truncated } = applyTokenBudget(chunks, 200, false);
+    // Budget: 50 fixed overhead + 2×(100+5) per-chunk = 260
+    const { kept, truncated } = applyTokenBudget(chunks, 260, false);
     expect(kept).toHaveLength(2);
     expect(kept[0].id).toBe('c1'); // Kept older
     expect(kept[1].id).toBe('c2');
@@ -211,7 +213,8 @@ describe('applyTokenBudget', () => {
       makeChunk({ id: 'c3', approxTokens: 50 }),
       makeChunk({ id: 'c4', approxTokens: 50 }),
     ];
-    const { kept } = applyTokenBudget(chunks, 150, true);
+    // Budget: 50 fixed overhead + 3×(50+5) per-chunk = 215
+    const { kept } = applyTokenBudget(chunks, 215, true);
     expect(kept).toHaveLength(3);
     expect(kept[0].id).toBe('c2');
     expect(kept[1].id).toBe('c3');
@@ -630,11 +633,12 @@ describe('reconstructSession (integration)', () => {
       }),
     );
 
+    // Budget: 50 fixed overhead + 1×(100+5) per-chunk = 155
     const result = reconstructSession({
       project: 'proj',
       from: '2024-01-15T00:00:00Z',
       to: '2024-01-16T00:00:00Z',
-      maxTokens: 100,
+      maxTokens: 155,
     });
 
     expect(result.chunks).toHaveLength(1);
@@ -668,11 +672,12 @@ describe('reconstructSession (integration)', () => {
       }),
     );
 
+    // Budget: 50 fixed overhead + 1×(100+5) per-chunk = 155
     const result = reconstructSession({
       project: 'proj',
       from: '2024-01-15T00:00:00Z',
       to: '2024-01-16T00:00:00Z',
-      maxTokens: 100,
+      maxTokens: 155,
       keepNewest: false,
     });