From bef6dcf6fd1ce136a159410cef9f5e132b1d7750 Mon Sep 17 00:00:00 2001
From: "Christian M. Todie" <chris@todie.io>
Date: Tue, 7 Apr 2026 01:39:15 -0400
Subject: [PATCH] =?UTF-8?q?perf(claude-code):=20shrink=20SessionStart=20ho?=
 =?UTF-8?q?ok=20injection=20(~10KB=20=E2=86=92=20~2KB)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SessionStart hook was injecting ~10 KB of additionalContext into every
Claude Code session:

  - ~1.8 KB hardcoded "ACTIVE PROTOCOL" heredoc that duplicates the rules
    already shipped in skills/memory/SKILL.md (which loads on demand)
  - ~8 KB of /context payload, because the server inlines up to 300 chars
    of raw (often multi-line markdown) content per observation bullet and
    returns up to MaxContextResults (default 20)

On a busy project this meant every new session burned ~2.5k tokens on
redundant protocol reminders and verbose observation previews before the
user had typed a single word.

Changes to plugin/claude-code/scripts/session-start.sh:

1. Replace the 35-line PROTOCOL heredoc with a short pointer that lists the
   available tools and directs the agent to the engram:memory skill for the
   full protocol. The skill is already part of this plugin, so the rules
   are one ToolSearch away when they are actually needed.

2. Post-process the /context response with awk to (a) concatenate each
   observation's multi-line content onto a single line, (b) collapse
   whitespace, (c) cap per-bullet length at ENGRAM_CONTEXT_MAXLEN chars
   (default 140), and (d) keep at most ENGRAM_CONTEXT_LIMIT bullets
   (default 8). Both tunables are env-overridable so users can dial the
   verbosity back up if they want.

No server or Go changes — fully backward compatible. The raw /context
endpoint behaviour is unchanged; only the hook's rendering of it is
trimmed.

Measured on a ctodie project with 200+ observations:
  before: 7961 B context + ~1800 B protocol ≈ 9.8 KB per session start
  after:  1487 B context +  ~450 B protocol ≈  1.9 KB per session start
  savings: ~8 KB (~80%) of additionalContext per session

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 plugin/claude-code/scripts/session-start.sh | 85 ++++++++++++---------
 1 file changed, 50 insertions(+), 35 deletions(-)

diff --git a/plugin/claude-code/scripts/session-start.sh b/plugin/claude-code/scripts/session-start.sh
index 8365e10..e9e2049 100755
--- a/plugin/claude-code/scripts/session-start.sh
+++ b/plugin/claude-code/scripts/session-start.sh
@@ -4,11 +4,22 @@
 # 1. Ensures the engram server is running
 # 2. Creates a session in engram
 # 3. Auto-imports git-synced chunks if .engram/manifest.json exists
-# 4. Injects Memory Protocol instructions + memory context
+# 4. Injects a minimal tool-availability pointer + compacted memory context
+#
+# Memory protocol (when/what to save, search, close) lives in the
+# `engram:memory` skill shipped with this plugin and is loaded on demand.
+# Re-injecting the full protocol on every SessionStart wastes ~1.8 KB of
+# context window per session, so this script only emits a short pointer.
 
 ENGRAM_PORT="${ENGRAM_PORT:-7437}"
 ENGRAM_URL="http://127.0.0.1:${ENGRAM_PORT}"
 
+# Tunables (override via env)
+#   ENGRAM_CONTEXT_LIMIT   — max observations to inject (default 8)
+#   ENGRAM_CONTEXT_MAXLEN  — max chars per observation line (default 140)
+CTX_LIMIT="${ENGRAM_CONTEXT_LIMIT:-8}"
+CTX_MAXLEN="${ENGRAM_CONTEXT_MAXLEN:-140}"
+
 # Load shared helpers
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 source "${SCRIPT_DIR}/_helpers.sh"
@@ -55,46 +66,50 @@ fi
 ENCODED_PROJECT=$(printf '%s' "$PROJECT" | jq -sRr @uri)
 CONTEXT=$(curl -sf "${ENGRAM_URL}/context?project=${ENCODED_PROJECT}" --max-time 3 2>/dev/null | jq -r '.context // empty')
 
-# Inject Memory Protocol + context — stdout goes to Claude as additionalContext
-cat <<'PROTOCOL'
-## Engram Persistent Memory — ACTIVE PROTOCOL
-
-You have engram memory tools. This protocol is MANDATORY and ALWAYS ACTIVE.
-
-### CORE TOOLS — always available, no ToolSearch needed
-mem_save, mem_search, mem_context, mem_session_summary, mem_get_observation, mem_save_prompt
-
-Use ToolSearch for other tools: mem_update, mem_suggest_topic_key, mem_session_start, mem_session_end, mem_stats, mem_delete, mem_timeline, mem_capture_passive
-
-### PROACTIVE SAVE — do NOT wait for user to ask
-Call `mem_save` IMMEDIATELY after ANY of these:
-- Decision made (architecture, convention, workflow, tool choice)
-- Bug fixed (include root cause)
-- Convention or workflow documented/updated
-- Notion/Jira/GitHub artifact created or updated with significant content
-- Non-obvious discovery, gotcha, or edge case found
-- Pattern established (naming, structure, approach)
-- User preference or constraint learned
-- Feature implemented with non-obvious approach
-- User confirms your recommendation ("dale", "go with that", "sounds good", "sí, esa")
-- User rejects an approach or expresses a preference ("no, better X", "I prefer X", "siempre hacé X")
-- Discussion concludes with a clear direction chosen
+# Compact the "### Recent Observations" section: keep at most $CTX_LIMIT
+# observations, each flattened onto a single line and truncated to
+# $CTX_MAXLEN chars. The server inlines up to 300 chars of raw content per
+# bullet (often multi-line, since session summaries are markdown documents),
+# so a raw /context response for a busy project is ~8 KB. This awk pass
+# concatenates each bullet's continuation lines, collapses whitespace, and
+# caps both the count and per-bullet length — typical injected context drops
+# to ~1.5 KB. Headers, recent sessions, and recent prompts pass through.
+if [ -n "$CONTEXT" ]; then
+  CONTEXT=$(printf '%s\n' "$CONTEXT" | awk -v lim="$CTX_LIMIT" -v max="$CTX_MAXLEN" '
+    function flush() {
+      if (buf == "") return
+      if (kept < lim) {
+        gsub(/[[:space:]]+/, " ", buf)
+        if (length(buf) > max) buf = substr(buf, 1, max - 1) "…"
+        print buf
+        kept++
+      }
+      buf = ""
+    }
+    /^### Recent Observations/ { flush(); in_obs = 1; print; next }
+    /^### / { flush(); in_obs = 0; print; next }
+    in_obs && /^- \[/ { flush(); buf = $0; next }
+    in_obs { if (buf != "") buf = buf " " $0; next }
+    { print }
+    END { flush() }
+  ')
+fi
 
-**Self-check after EVERY task**: "Did I or the user just make a decision, confirm a recommendation, express a preference, fix a bug, learn something, or establish a convention? If yes → mem_save NOW."
+# Inject minimal protocol pointer + compacted context as additionalContext.
+cat <<'PROTOCOL'
+## Engram Memory — active
 
-### SEARCH MEMORY when:
-- User asks to recall anything ("remember", "what did we do", "acordate", "qué hicimos")
-- Starting work on something that might have been done before
-- User mentions a topic you have no context on
-- User's FIRST message references the project, a feature, or a problem — call `mem_search` with keywords from their message to check for prior work before responding
+Core tools (always available): mem_save, mem_search, mem_context,
+mem_session_summary, mem_get_observation, mem_suggest_topic_key, mem_update,
+mem_session_start, mem_session_end, mem_save_prompt.
+Admin tools via ToolSearch: mem_stats, mem_delete, mem_timeline, mem_capture_passive.
 
-### SESSION CLOSE — before saying "done"/"listo":
-Call `mem_session_summary` with: Goal, Discoveries, Accomplished, Next Steps, Relevant Files.
+Full protocol (when/what to save, search rules, session close) lives in the
+`engram:memory` skill — load it on demand when you need the rules.
 PROTOCOL
 
-# Inject memory context if available
 if [ -n "$CONTEXT" ]; then
-  printf "\n%s\n" "$CONTEXT"
+  printf '\n%s\n' "$CONTEXT"
 fi
 
 exit 0