diff --git a/README.md b/README.md index b1cd508..985cb67 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,8 @@ A few runtime overrides aren't in `config.json` because they're per-shell rather | ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `REMEMBER_BRANCH` | Overrides the `\| ` identity field in each `## HH:MM \| ` memory header. Useful when Claude Code runs from a non-git directory (`$HOME`, a scratch dir) — without it the header falls back to the literal string `unknown`, which collapses the identity slot for every entry. Set to a meaningful tag (e.g. `laptop`, `cloud`, `staging`, an instance name) in your shell rc. | | `REMEMBER_DEBUG` | `1` (default) emits verbose hook/cooldown lines to logs; `0` silences them. | +| `REMEMBER_MODEL` | Model used for summarization/consolidation (the `claude -p` call). Default `haiku`. Point it at a more capable tier (e.g. `sonnet`) to improve salience and compression-cap compliance — the call is backgrounded, so there's no interactive-latency cost. **`config.json` → `model` is the source of truth** (per-project); this env var overrides it. Blank falls back to the default. | +| `REMEMBER_REJECT_PATTERN` | Overrides the reject-gate regex that keeps model refusals/clarifications out of the memory layer. Blank → the narrow built-in default (anchored refusal/clarification stems only); `none` → gate disabled (only the literal `SKIP` contract applies); anything else → a custom case-insensitive regex. An invalid regex falls back to the default rather than failing the run. **`config.json` → `reject_pattern` is the source of truth**; this env var overrides it. | | `REMEMBER_TZ` | Set automatically by `log.sh` from `config.json` → `timezone`. Don't set this manually unless you're debugging. | ## External storage mode diff --git a/config.example.json b/config.example.json index efd0bce..34c0446 100644 --- a/config.example.json +++ b/config.example.json @@ -16,8 +16,23 @@ }, "debug": false, "time_format": "24h", + "model": "haiku", + "reject_pattern": "", "_comments": { + "model": [ + "Model for the summarize/consolidate calls (claude -p). Default 'haiku'.", + "Set 'sonnet' (or another tier) to improve salience + compression-cap", + "compliance; the call is backgrounded so there is no interactive cost.", + "An explicit REMEMBER_MODEL env var overrides this." + ], + "reject_pattern": [ + "Reject-gate regex keeping model refusals/clarifications out of memory.", + "'' -> the narrow built-in default (anchored refusal stems only).", + "'none' -> gate disabled (only the literal SKIP contract applies).", + "anything else -> a custom case-insensitive regex; invalid falls back to", + "the default. An explicit REMEMBER_REJECT_PATTERN env var overrides this." + ], "data_dir": [ "Controls where memory data files are stored.", "", diff --git a/pipeline/haiku.py b/pipeline/haiku.py index 1788461..937f66b 100644 --- a/pipeline/haiku.py +++ b/pipeline/haiku.py @@ -158,22 +158,44 @@ def call_haiku( return _parse_response(result.stdout) -# Reject-gate: a real memory entry starts with "##" (a header) or is exactly -# SKIP. Conversational refusals / clarifications must NEVER reach the memory -# layer (the audit found a model refusal stored verbatim as a memory). Anchored -# at the START so dense legitimate summaries are never dropped. -_NON_SUMMARY = re.compile( - r"^\s*(i (cannot|can't|can not|won't|will not|am unable|'m unable|" - r"don't have|do not have|need (you|the))|could you|do you want|" - r"please (provide|paste|share)|there (is|are) no|i'm sorry|sorry[,!]|" - r"unfortunately|i notice|it (seems|looks like|appears))", - re.I, +# Reject-gate: conversational refusals / clarifications must NEVER reach the +# memory layer (the audit found a model refusal stored verbatim as a memory). +# The DEFAULT pattern is deliberately NARROW — anchored at the start and limited +# to unambiguous refusal/clarification stems — so dense legitimate summaries +# (which may legitimately open "Unfortunately the build broke...", "There are no +# blockers...", "I notice the cache was stale...") are never silently dropped. +# Widen, override, or disable via REMEMBER_REJECT_PATTERN (see _resolve_reject_pattern). +DEFAULT_REJECT_PATTERN = ( + r"^\s*(" + r"i (cannot|can't|can not|won't|will not|am unable|'m unable|am not able)|" + r"could you|please (provide|paste|share)|i'm sorry|i am sorry" + r")\b" ) +def _resolve_reject_pattern() -> "re.Pattern[str] | None": + """Compiled reject-gate pattern, or None when the gate is disabled. + + REMEMBER_REJECT_PATTERN overrides the default, mirroring the REMEMBER_MODEL / + REMEMBER_MAX_TURNS env pattern: blank falls back to the narrow default, the + literal "none" disables the gate entirely, anything else is used as a custom + case-insensitive regex. An invalid custom regex falls back to the default + rather than crashing the backgrounded consolidation run. + """ + raw = os.environ.get("REMEMBER_REJECT_PATTERN", "").strip() + if raw.lower() == "none": + return None + pattern = raw if raw else DEFAULT_REJECT_PATTERN + try: + return re.compile(pattern, re.I) + except re.error: + return re.compile(DEFAULT_REJECT_PATTERN, re.I) + + def _is_non_summary(text: str) -> bool: """True if the output looks like a refusal/clarification, not a summary.""" - return bool(_NON_SUMMARY.match(text or "")) + pattern = _resolve_reject_pattern() + return bool(pattern.match(text or "")) if pattern else False def _parse_response(raw: str) -> HaikuResult: diff --git a/scripts/log.sh b/scripts/log.sh index fdb1faa..21f12a8 100644 --- a/scripts/log.sh +++ b/scripts/log.sh @@ -72,6 +72,15 @@ config() { REMEMBER_TZ=$(config ".timezone" "") export REMEMBER_TZ +# Model + reject-gate knobs. config.json is the source of truth; an explicit +# shell env var still wins (override) via ${VAR:=...}, then config, then the +# built-in default. Exported here (log.sh is sourced by every script) so both +# the summarize and consolidate model calls in pipeline/haiku.py see them. +: "${REMEMBER_MODEL:=$(config ".model" "haiku")}" +export REMEMBER_MODEL +: "${REMEMBER_REJECT_PATTERN:=$(config ".reject_pattern" "")}" +export REMEMBER_REJECT_PATTERN + # Resolve "today" / "now" using REMEMBER_TZ when set, else system local. # Crucially, an empty REMEMBER_TZ must NOT produce `TZ="" date` — that's UTC. _remember_date() { diff --git a/tests/test_haiku.py b/tests/test_haiku.py index 3aca8d0..9cddb8e 100644 --- a/tests/test_haiku.py +++ b/tests/test_haiku.py @@ -370,3 +370,63 @@ def test_parse_response_keeps_real_summaries(good): deliberately permissive about (format validation stays the shell's job).""" result = _parse_response(_mock_claude_response(good)) assert result.is_skip is False + + +# --- reject-gate: narrow default must NOT eat legit hedged summaries ---------- +@pytest.mark.parametrize("good", [ + "There are no blockers; merged !24648 and the pipeline is green.", + "Unfortunately the build broke on flaky DNS; retried and it is green now.", + "It seems the cache was stale — cleared it and the page renders.", + "I notice the staging DB drifted from prod; resynced via the script.", + "Sorry state machine had a missing transition; added PENDING->DONE.", +]) +def test_parse_response_keeps_hedged_summaries(good): + """Regression guard for the over-broad pattern: legitimate summaries that + happen to open with a hedge word ("Unfortunately", "There are no", + "It seems", "I notice", "Sorry ...") must be preserved, not silently + dropped from the memory layer.""" + result = _parse_response(_mock_claude_response(good)) + assert result.is_skip is False + + +# --- REMEMBER_REJECT_PATTERN env knob (mirrors REMEMBER_MODEL) ---------------- +from pipeline.haiku import _resolve_reject_pattern, DEFAULT_REJECT_PATTERN + + +def test_resolve_reject_pattern_default(monkeypatch): + monkeypatch.delenv("REMEMBER_REJECT_PATTERN", raising=False) + assert _resolve_reject_pattern().pattern == DEFAULT_REJECT_PATTERN + + +def test_resolve_reject_pattern_blank_falls_back(monkeypatch): + monkeypatch.setenv("REMEMBER_REJECT_PATTERN", " ") + assert _resolve_reject_pattern().pattern == DEFAULT_REJECT_PATTERN + + +def test_resolve_reject_pattern_none_disables(monkeypatch): + monkeypatch.setenv("REMEMBER_REJECT_PATTERN", "none") + assert _resolve_reject_pattern() is None + + +def test_resolve_reject_pattern_custom(monkeypatch): + monkeypatch.setenv("REMEMBER_REJECT_PATTERN", r"^banana") + assert _resolve_reject_pattern().pattern == r"^banana" + + +def test_resolve_reject_pattern_invalid_falls_back(monkeypatch): + monkeypatch.setenv("REMEMBER_REJECT_PATTERN", r"(unclosed") + assert _resolve_reject_pattern().pattern == DEFAULT_REJECT_PATTERN + + +def test_reject_gate_disabled_keeps_refusal(monkeypatch): + """With the gate disabled, only the literal SKIP contract applies — a + refusal is no longer rejected by the pattern.""" + monkeypatch.setenv("REMEMBER_REJECT_PATTERN", "none") + result = _parse_response(_mock_claude_response("I cannot do that.")) + assert result.is_skip is False + + +def test_reject_gate_custom_pattern_applies(monkeypatch): + monkeypatch.setenv("REMEMBER_REJECT_PATTERN", r"^banana") + assert _parse_response(_mock_claude_response("banana split")).is_skip is True + assert _parse_response(_mock_claude_response("I cannot do that.")).is_skip is False diff --git a/tests/test_layered_config.py b/tests/test_layered_config.py index 32a09b2..2543c99 100644 --- a/tests/test_layered_config.py +++ b/tests/test_layered_config.py @@ -176,3 +176,71 @@ def test_missing_user_global_skipped(self, tmp_path): result = _run_lib(str(project), str(pipeline), str(home)) assert result.get("MERGED_SAVE_SECONDS") == "55" + + +def _run_log_env(project_dir: str, pipeline_dir: str, home_dir: str, env_extra: "dict | None" = None) -> dict: + """Source log.sh (via detect-tools + lib-memory-dir) and return the model + knobs it exports. REMEMBER_MODEL / REMEMBER_REJECT_PATTERN are stripped from + the base env so the config-vs-default resolution is deterministic; pass + env_extra to simulate an explicit shell override.""" + script = f""" + set -e + export PROJECT_DIR={project_dir} + export PIPELINE_DIR={pipeline_dir} + export HOME={home_dir} + source {DETECT_SCRIPT} + source {LIB_SCRIPT} + source {REPO_ROOT / "scripts" / "log.sh"} + echo "REMEMBER_MODEL=$REMEMBER_MODEL" + echo "REMEMBER_REJECT_PATTERN=$REMEMBER_REJECT_PATTERN" + """ + env = {k: v for k, v in os.environ.items() + if k not in ("REMEMBER_MODEL", "REMEMBER_REJECT_PATTERN")} + env.update(env_extra or {}) + result = subprocess.run(["bash", "-c", script], env=env, capture_output=True, text=True) + assert result.returncode == 0, f"log.sh failed:\n{result.stderr}" + parsed: dict = {} + for line in result.stdout.strip().splitlines(): + if "=" in line: + k, v = line.split("=", 1) + parsed[k] = v + return parsed + + +class TestModelConfigBridge: + """log.sh bridges config.json model/reject_pattern keys to the env vars + pipeline/haiku.py reads, with explicit shell env taking precedence.""" + + def _dirs(self, tmp_path): + project = tmp_path / "proj" + project.mkdir() + pipeline = tmp_path / "plugin" + pipeline.mkdir() + home = tmp_path / "home" + home.mkdir() + return project, pipeline, home + + def test_model_defaults_to_haiku(self, tmp_path): + project, pipeline, home = self._dirs(tmp_path) + (pipeline / "config.json").write_text(json.dumps({})) + result = _run_log_env(str(project), str(pipeline), str(home)) + assert result.get("REMEMBER_MODEL") == "haiku" + assert result.get("REMEMBER_REJECT_PATTERN") == "" + + def test_model_from_config(self, tmp_path): + project, pipeline, home = self._dirs(tmp_path) + (pipeline / "config.json").write_text( + json.dumps({"model": "sonnet", "reject_pattern": "none"}) + ) + result = _run_log_env(str(project), str(pipeline), str(home)) + assert result.get("REMEMBER_MODEL") == "sonnet" + assert result.get("REMEMBER_REJECT_PATTERN") == "none" + + def test_env_overrides_config(self, tmp_path): + project, pipeline, home = self._dirs(tmp_path) + (pipeline / "config.json").write_text(json.dumps({"model": "sonnet"})) + result = _run_log_env( + str(project), str(pipeline), str(home), + env_extra={"REMEMBER_MODEL": "opus"}, + ) + assert result.get("REMEMBER_MODEL") == "opus"