From b663a55b0d1a588dc1be861dcfc4fb3c3d998a7d Mon Sep 17 00:00:00 2001
From: Florian DAVID <fdavid@digital-village.fr>
Date: Sun, 21 Jun 2026 23:11:38 +0200
Subject: [PATCH] fix: cap extract size so long sessions can't stall saves
 (#96)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A single long-lived session can grow an extract larger than Haiku's
context window. build-prompt embedded the full extract with no size
cap, so the Haiku call failed, the save aborted, and daily rotation
stopped. It was self-reinforcing: a failed save never advanced the
saved position, so the same session re-extracted the full transcript
and failed identically on every later save.

cmd_build_prompt now caps the extract at thresholds.extract_max_bytes
(default 300 KB), keeping the most-recent tail with a truncation note
so the summary still reflects current work. 0 disables the cap. Wired
through the CLI dispatcher and save-session.sh via config.

Distinct from #94 (timeout + reachable error handler) — complementary,
not a replacement. Thanks to @selvi5006-commits for the diagnosis and
a tested patch.

Co-Authored-By: Max <noreply>
---
 .claude-plugin/plugin.json |  2 +-
 CHANGELOG.md               |  6 ++++
 README.md                  |  5 ++--
 config.example.json        |  3 +-
 pipeline/shell.py          | 18 ++++++++++++
 scripts/save-session.sh    |  3 +-
 tests/test_prompts.py      | 56 ++++++++++++++++++++++++++++++++++++++
 tests/test_shell.py        | 16 +++++++++++
 8 files changed, 104 insertions(+), 5 deletions(-)
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index 378ea6d..948e7ed 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "remember",
   "description": "Continuous memory for Claude Code. Extracts, summarizes, and compresses conversations into tiered daily logs. Claude remembers what you did yesterday.",
-  "version": "0.8.1",
+  "version": "0.8.2",
   "author": {
     "name": "Digital Process Tools"
   },
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9d431dc..58545a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.8.2] — Oversized-extract guard keeps long sessions saving
+
+### Fixed
+
+- **A very long session could silently halt all memory saves** ([#96](https://github.com/Digital-Process-Tools/claude-remember/issues/96)) — a single long-lived session can grow an extract larger than Haiku's context window. `build-prompt` embedded the full extract with no size cap, so the Haiku call failed, the save aborted, and daily rotation stopped. Worse, it was self-reinforcing: a failed save never advanced the saved position, so the same session re-extracted the full transcript and failed identically on every subsequent save. The extract is now capped at `thresholds.extract_max_bytes` (default 300 KB), keeping the most-recent tail with a truncation note so the summary still reflects current work. Set to `0` to disable. Thanks to [@selvi5006-commits](https://github.com/selvi5006-commits) for the precise diagnosis and a tested patch.
+
 ## [0.8.1] — Handoff survives context-preview truncation
 
 ### Fixed
diff --git a/README.md b/README.md
index 28557b5..b1cd508 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 [![Python](https://img.shields.io/badge/python-3.9%2B-blue)](https://www.python.org/)
 [![OS](https://img.shields.io/badge/tested%20on-Linux%20%7C%20macOS%20%7C%20Windows-blue)](https://github.com/Digital-Process-Tools/claude-remember/actions/workflows/tests.yml)
 [![License](https://img.shields.io/badge/license-Community-brightgreen)](LICENSE)
-[![Version](https://img.shields.io/badge/version-0.8.1-orange)](.claude-plugin/plugin.json)
+[![Version](https://img.shields.io/badge/version-0.8.2-orange)](.claude-plugin/plugin.json)
 
 Claude Code starts every session blank. It doesn't know what you worked on yesterday, what conventions your team follows, or what mistakes it already made. You re-explain everything, every time.
 
@@ -35,7 +35,7 @@ To update later:
 
 Claude Remember is also available in the official Anthropic Marketplace. In Claude Code, type `/plugin` and search for "remember".
 
-**Known issue — stuck on v0.5.0:** The Anthropic marketplace is still serving v0.5.0, which has known bugs ([#54](https://github.com/Digital-Process-Tools/claude-remember/issues/54) hook stderr redirect fails on first session, [#14](https://github.com/Digital-Process-Tools/claude-remember/issues/14) NDC subshell killed by `set -e`). Anthropic takes a long time to roll updates to the official marketplace. All of these are fixed in v0.8.1 — install from the DPT marketplace above to get the current version.
+**Known issue — stuck on v0.5.0:** The Anthropic marketplace is still serving v0.5.0, which has known bugs ([#54](https://github.com/Digital-Process-Tools/claude-remember/issues/54) hook stderr redirect fails on first session, [#14](https://github.com/Digital-Process-Tools/claude-remember/issues/14) NDC subshell killed by `set -e`). Anthropic takes a long time to roll updates to the official marketplace. All of these are fixed in v0.8.2 — install from the DPT marketplace above to get the current version.
 
 **Known issue — `plugin update`:** The official marketplace's `plugin update` command may report "already at latest version" even when it's not — it checks a stale local cache without pulling first ([#37252](https://github.com/anthropics/claude-code/issues/37252), [#38271](https://github.com/anthropics/claude-code/issues/38271)). Another reason to use our marketplace instead.
 
@@ -214,6 +214,7 @@ Put cross-project preferences (timezone, cooldowns) in `~/.remember/config.json`
 | `cooldowns.git_backup_seconds`   | `900`            | Minimum seconds between auto-backup commits (no-op if `~/.remember/` is not a git repo)                                                                                                                                                |
 | `thresholds.min_human_messages`  | `3`              | Minimum messages before saving                                                                                                                                                                                                         |
 | `thresholds.delta_lines_trigger` | `50`             | Tool call output lines that trigger auto-save                                                                                                                                                                                          |
+| `thresholds.extract_max_bytes`   | `300000`         | Max UTF-8 size of the session extract sent to Haiku. Larger extracts are truncated to their most-recent tail so a very long session can't overflow the model's context window and silently stall saves. `0` disables the cap.          |
 | `features.ndc_compression`       | `true`           | Enable hourly compression of daily files                                                                                                                                                                                               |
 | `features.recovery`              | `true`           | Recover missed saves on session start                                                                                                                                                                                                  |
 | `timezone`                       | _(system local)_ | IANA name (e.g. `America/New_York`, `Europe/Paris`) for timestamps and daily file boundaries. Omit or leave empty to use the system clock's local zone. Set this explicitly on a VPS whose system clock is UTC.                        |
diff --git a/config.example.json b/config.example.json
index 37a3778..efd0bce 100644
--- a/config.example.json
+++ b/config.example.json
@@ -7,7 +7,8 @@
   },
   "thresholds": {
     "min_human_messages": 3,
-    "delta_lines_trigger": 50
+    "delta_lines_trigger": 50,
+    "extract_max_bytes": 300000
   },
   "features": {
     "ndc_compression": true,
diff --git a/pipeline/shell.py b/pipeline/shell.py
index b501c06..19ea318 100644
--- a/pipeline/shell.py
+++ b/pipeline/shell.py
@@ -100,6 +100,7 @@ def cmd_build_prompt(
     time: str,
     branch: str,
     output_file: str,
+    max_extract_bytes: int = 0,
 ) -> None:
     """Build the save-summary prompt and write it to an output file.
 
@@ -112,12 +113,28 @@ def cmd_build_prompt(
         time: Current timestamp string (e.g., "14:32").
         branch: Current git branch name.
         output_file: Path where the assembled prompt will be written.
+        max_extract_bytes: Upper bound on the extract's UTF-8 byte size. A
+            long-lived session can accumulate an extract larger than Haiku's
+            context window, making the prompt unsendable and silently halting
+            daily rotation (#96). When the extract exceeds this size, keep only
+            the most-recent tail (the work worth summarizing) and prepend a
+            truncation note. ``0`` disables the cap.
     """
     with open(extract_file, encoding="utf-8", errors="replace") as f:
         extract = f.read().strip()
     with open(last_entry_file, encoding="utf-8", errors="replace") as f:
         last_entry = f.read().strip()
 
+    if max_extract_bytes > 0:
+        raw = extract.encode("utf-8")
+        if len(raw) > max_extract_bytes:
+            kept = raw[-max_extract_bytes:].decode("utf-8", errors="replace")
+            extract = (
+                f"[NOTE: transcript truncated to the last {max_extract_bytes} "
+                f"of {len(raw)} bytes — summarize the most recent work below]"
+                f"\n\n{kept}"
+            )
+
     prompt = build_save_prompt(
         time=time,
         branch=branch,
@@ -349,6 +366,7 @@ def main() -> None:
             time=sys.argv[4],
             branch=sys.argv[5],
             output_file=sys.argv[6],
+            max_extract_bytes=int(sys.argv[7]) if len(sys.argv) > 7 else 0,
         )
     elif cmd == "build-ndc-prompt":
         cmd_build_ndc_prompt(memory_file=sys.argv[2], output_file=sys.argv[3])
diff --git a/scripts/save-session.sh b/scripts/save-session.sh
index dbf3a2f..ffc9fdd 100755
--- a/scripts/save-session.sh
+++ b/scripts/save-session.sh
@@ -162,7 +162,8 @@ fi
 TMP_PROMPT=$(mktemp "${TMPDIR:-/tmp}"/remember-prompt-XXXXXX)
 CLEANUP_FILES+=("$TMP_PROMPT")
 
-cd "$PIPELINE_DIR" && $PYTHON -m pipeline.shell build-prompt "$EXTRACT_FILE" "$TMP_LAST_ENTRY" "$CURRENT_TIME" "$BRANCH" "$TMP_PROMPT"
+EXTRACT_MAX_BYTES=$(config ".thresholds.extract_max_bytes" 300000)
+cd "$PIPELINE_DIR" && $PYTHON -m pipeline.shell build-prompt "$EXTRACT_FILE" "$TMP_LAST_ENTRY" "$CURRENT_TIME" "$BRANCH" "$TMP_PROMPT" "$EXTRACT_MAX_BYTES"
 
 [ ! -s "$TMP_PROMPT" ] && { log "prompt" "ERROR: empty"; exit 1; }
 grep -q '{{TIME}}\|{{BRANCH}}\|{{LAST_ENTRY}}\|{{EXTRACT}}' "$TMP_PROMPT" && { log "prompt" "ERROR: unsubstituted placeholders in prompt"; exit 1; }
diff --git a/tests/test_prompts.py b/tests/test_prompts.py
index 76f54a5..02756b3 100644
--- a/tests/test_prompts.py
+++ b/tests/test_prompts.py
@@ -9,6 +9,7 @@
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 
 from pipeline import prompts
+from pipeline import shell
 
 
 def _make_template(tmpdir: str, name: str, content: str) -> None:
@@ -119,3 +120,58 @@ def test_build_consolidation_prompt_empty_staging(monkeypatch):
         assert "Staging:\n" in result
         assert "# Recent" in result
         assert "# Archive" in result
+
+
+def _run_build_prompt(monkeypatch, extract, max_extract_bytes):
+    """Drive shell.cmd_build_prompt with a stub template and return the prompt."""
+    with tempfile.TemporaryDirectory() as d:
+        _make_template(d, "save-session.prompt.txt", "{{EXTRACT}}")
+        monkeypatch.setattr(prompts, "PROMPTS_DIR", d)
+
+        extract_file = os.path.join(d, "extract.txt")
+        last_entry_file = os.path.join(d, "last.txt")
+        output_file = os.path.join(d, "prompt.txt")
+        with open(extract_file, "w", encoding="utf-8") as f:
+            f.write(extract)
+        with open(last_entry_file, "w", encoding="utf-8") as f:
+            f.write("(no previous entry)")
+
+        shell.cmd_build_prompt(
+            extract_file=extract_file,
+            last_entry_file=last_entry_file,
+            time="10:30",
+            branch="master",
+            output_file=output_file,
+            max_extract_bytes=max_extract_bytes,
+        )
+        with open(output_file, encoding="utf-8") as f:
+            return f.read()
+
+
+def test_build_prompt_caps_oversized_extract(monkeypatch):
+    """An extract larger than the cap is truncated to its tail with a NOTE."""
+    extract = "HEAD_MARKER\n" + ("x" * 5000) + "\nTAIL_MARKER"
+    result = _run_build_prompt(monkeypatch, extract, max_extract_bytes=200)
+
+    assert "TAIL_MARKER" in result          # most-recent work survives
+    assert "HEAD_MARKER" not in result       # oldest content dropped
+    assert "truncated to the last 200" in result
+    # Body (note + kept tail) stays within cap + a small note allowance.
+    assert len(result.encode("utf-8")) < 200 + 200
+
+
+def test_build_prompt_keeps_small_extract_intact(monkeypatch):
+    """An extract under the cap is passed through unchanged (no NOTE)."""
+    extract = "[HUMAN] hi\n[AGENT] hello"
+    result = _run_build_prompt(monkeypatch, extract, max_extract_bytes=300000)
+
+    assert result == extract
+    assert "truncated" not in result
+
+
+def test_build_prompt_cap_disabled_with_zero(monkeypatch):
+    """max_extract_bytes=0 disables the cap entirely (back-compat default)."""
+    extract = "A" * 10000
+    result = _run_build_prompt(monkeypatch, extract, max_extract_bytes=0)
+
+    assert result == extract
diff --git a/tests/test_shell.py b/tests/test_shell.py
index 19f6695..1944092 100644
--- a/tests/test_shell.py
+++ b/tests/test_shell.py
@@ -630,6 +630,22 @@ def test_main_dispatches_build_prompt():
         time="15m",
         branch="main",
         output_file="out",
+        max_extract_bytes=0,
+    )
+
+
+def test_main_dispatches_build_prompt_with_max_extract_bytes():
+    with patch("pipeline.shell.cmd_build_prompt") as mock_fn:
+        with patch("sys.argv",
+                   ["shell.py", "build-prompt", "ef", "lef", "15m", "main", "out", "300000"]):
+            main()
+    mock_fn.assert_called_once_with(
+        extract_file="ef",
+        last_entry_file="lef",
+        time="15m",
+        branch="main",
+        output_file="out",
+        max_extract_bytes=300000,
     )