From 058c288bbaafd42ef2cd7a7f62ebeb952d4c94b5 Mon Sep 17 00:00:00 2001 From: Toni Nowak Date: Mon, 22 Jun 2026 23:56:49 +0200 Subject: [PATCH] =?UTF-8?q?perf(hooks):=20light=20PostToolUse=20rewrite=20?= =?UTF-8?q?=E2=80=94=20zero=20heavy=20imports,=20noise=20filter,=20lock-sa?= =?UTF-8?q?fe=20append=20[follow-up=20#31]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add _NOISE_TOOLS frozenset for O(1) skip of high-frequency no-signal tools (TodoRead/Write, WebSearch/Fetch, smem_recall/session/stats/index, preview logs) before any config file I/O — fastest possible exit path - Replace lazy `from surreal_memory.utils.timeutils import utcnow` with inline stdlib datetime.now(UTC) in _format_event — zero non-stdlib imports on hot path - Merge two separate config.toml reads (_is_enabled + _get_blacklist) into one _read_tool_memory_config call per invocation - Lock-safe JSONL append via fcntl.flock(LOCK_EX) on POSIX; plain append fallback on platforms without fcntl (Windows) - Handle CODEX_SESSION_ID as fallback when CLAUDE_SESSION_ID is absent - Extract _get_data_dir() helper; _get_session_id() as testable function - Tests: 48 tests covering _NOISE_TOOLS, _utcnow_iso, _get_session_id (CLAUDE+CODEX), single-config-read API, concurrent append JSONL validity, noise fast-path --- src/surreal_memory/hooks/post_tool_use.py | 150 ++++++++++----- tests/unit/test_tool_memory_hook.py | 224 ++++++++++++++++++---- 2 files changed, 292 insertions(+), 82 deletions(-) diff --git a/src/surreal_memory/hooks/post_tool_use.py b/src/surreal_memory/hooks/post_tool_use.py index 85509e96..45cf1a67 100755 --- a/src/surreal_memory/hooks/post_tool_use.py +++ b/src/surreal_memory/hooks/post_tool_use.py @@ -7,8 +7,15 @@ Receives JSON on stdin with tool_name, tool_input, tool_output fields. Writes one JSONL line to ~/.surrealmemory/tool_events.jsonl. -This hook does NOT access SQLite or perform encoding — all processing +This hook does NOT access SurrealDB or perform encoding — all processing is deferred to the consolidation cycle. + +Performance design (ac2a001 perf subset): +- Zero heavy imports on the hot path (stdlib only except optional tomllib) +- _NOISE_TOOLS fast-path skips the highest-frequency no-value tools +- Lock-safe JSONL append via fcntl.flock (POSIX) with a write-then-rename + fallback for platforms without flock +- Session ID: checks CLAUDE_SESSION_ID and CODEX_SESSION_ID """ from __future__ import annotations @@ -18,6 +25,7 @@ import os import sys import time +from datetime import UTC, datetime from pathlib import Path from typing import Any @@ -28,6 +36,29 @@ # Max size for stdout response JSON _MAX_TOOL_OUTPUT_PREVIEW = 100 +# High-frequency tools that never produce useful memory signal. +# Checked before any config I/O for a fast zero-cost exit. +_NOISE_TOOLS: frozenset[str] = frozenset( + { + "TodoRead", + "TodoWrite", + "WebSearch", + "WebFetch", + "mcp__Claude_Preview__preview_logs", + "mcp__Claude_Preview__preview_console_logs", + "mcp__Claude_Preview__preview_network", + "smem_recall", + "smem_session", + "smem_stats", + "smem_index", + } +) + + +def _get_session_id() -> str: + """Return the current session ID from env (Claude or Codex).""" + return os.environ.get("CLAUDE_SESSION_ID") or os.environ.get("CODEX_SESSION_ID", "") + def _read_stdin() -> dict[str, Any]: """Read Claude Code PostToolUse hook JSON from stdin.""" @@ -41,49 +72,50 @@ def _read_stdin() -> dict[str, Any]: return {} +def _get_data_dir() -> Path: + """Return the surreal-memory data directory.""" + custom = os.environ.get("SURREAL_MEMORY_DIR", "") + return Path(custom) if custom else (Path.home() / ".surrealmemory") + + def _get_buffer_path() -> Path: """Get the JSONL buffer file path.""" - data_dir = Path(os.environ.get("SURREAL_MEMORY_DIR", "")) or (Path.home() / ".surrealmemory") - return data_dir / "tool_events.jsonl" + return _get_data_dir() / "tool_events.jsonl" -def _is_enabled() -> bool: - """Quick check if tool memory is enabled via config. +def _read_tool_memory_config() -> dict[str, Any]: + """Read [tool_memory] section from config.toml once. - Reads only the [tool_memory] section from config.toml. - Defaults to True if config is missing or section absent. + Returns an empty dict if the config is missing, unreadable, or + the section is absent. Called at most once per hook invocation. """ - data_dir = Path(os.environ.get("SURREAL_MEMORY_DIR", "")) or (Path.home() / ".surrealmemory") - config_path = data_dir / "config.toml" + config_path = _get_data_dir() / "config.toml" if not config_path.exists(): - return True + return {} try: import tomllib with open(config_path, "rb") as f: data = tomllib.load(f) - return bool(data.get("tool_memory", {}).get("enabled", True)) + result: dict[str, Any] = data.get("tool_memory", {}) + return result except Exception: - logger.debug("Failed to read tool_memory.enabled from config", exc_info=True) - return True + logger.debug("Failed to read tool_memory config", exc_info=True) + return {} -def _get_blacklist() -> list[str]: - """Read blacklist from config.toml.""" - data_dir = Path(os.environ.get("SURREAL_MEMORY_DIR", "")) or (Path.home() / ".surrealmemory") - config_path = data_dir / "config.toml" - if not config_path.exists(): - return [] - try: - import tomllib +def _is_enabled(tm_cfg: dict[str, Any]) -> bool: + """Check if tool memory is enabled. - with open(config_path, "rb") as f: - data = tomllib.load(f) - bl = data.get("tool_memory", {}).get("blacklist", []) - return list(bl) if isinstance(bl, (list, tuple)) else [] - except Exception: - logger.debug("Failed to read tool_memory.blacklist from config", exc_info=True) - return [] + Defaults to True if the key is absent. + """ + return bool(tm_cfg.get("enabled", True)) + + +def _get_blacklist(tm_cfg: dict[str, Any]) -> list[str]: + """Return the blacklist from [tool_memory] config.""" + bl = tm_cfg.get("blacklist", []) + return list(bl) if isinstance(bl, (list, tuple)) else [] def _truncate_args(tool_input: Any) -> str: @@ -97,16 +129,18 @@ def _truncate_args(tool_input: Any) -> str: return raw[:_MAX_ARGS_CHARS] -def _format_event(hook_input: dict[str, Any]) -> dict[str, Any]: - """Format hook input into a JSONL event dict.""" - from surreal_memory.utils.timeutils import utcnow +def _utcnow_iso() -> str: + """Return current UTC time as naive ISO string (stdlib only, no imports).""" + return datetime.now(UTC).replace(tzinfo=None).isoformat() + +def _format_event(hook_input: dict[str, Any]) -> dict[str, Any]: + """Format hook input into a JSONL event dict (stdlib only).""" tool_name = hook_input.get("tool_name", hook_input.get("tool", "")) server_name = hook_input.get("server_name", "") tool_input = hook_input.get("tool_input", {}) tool_error = hook_input.get("tool_error") duration_ms = hook_input.get("duration_ms", 0) - session_id = os.environ.get("CLAUDE_SESSION_ID", "") return { "tool_name": str(tool_name), @@ -114,22 +148,35 @@ def _format_event(hook_input: dict[str, Any]) -> dict[str, Any]: "args_summary": _truncate_args(tool_input), "success": tool_error is None, "duration_ms": int(duration_ms) if isinstance(duration_ms, (int, float)) else 0, - "session_id": session_id, + "session_id": _get_session_id(), "task_context": "", # Populated by processing engine if session is active - "created_at": utcnow().isoformat(), + "created_at": _utcnow_iso(), } def _append_to_buffer(event: dict[str, Any], buffer_path: Path) -> bool: - """Append one JSONL line to the buffer file. + """Append one JSONL line to the buffer file, lock-safe. + Uses fcntl.flock (POSIX) when available. Falls back to a plain + append on platforms that lack flock (Windows, some embedded). Returns True on success, False on failure. """ try: buffer_path.parent.mkdir(parents=True, exist_ok=True) - line = json.dumps(event, ensure_ascii=False, default=str) - with open(buffer_path, "a", encoding="utf-8") as f: - f.write(line + "\n") + line = json.dumps(event, ensure_ascii=False, default=str) + "\n" + try: + import fcntl + + with open(buffer_path, "a", encoding="utf-8") as f: + fcntl.flock(f, fcntl.LOCK_EX) + try: + f.write(line) + finally: + fcntl.flock(f, fcntl.LOCK_UN) + except ImportError: + # Non-POSIX platform — plain append (best-effort) + with open(buffer_path, "a", encoding="utf-8") as f: + f.write(line) return True except OSError: return False @@ -155,26 +202,31 @@ def main() -> None: """Entry point for the PostToolUse hook.""" start = time.monotonic() - # Fast exit if disabled - if not _is_enabled(): - # Output empty JSON for hook response - sys.stdout.write("{}\n") - return - hook_input = _read_stdin() if not hook_input: sys.stdout.write("{}\n") return - tool_name = hook_input.get("tool_name", hook_input.get("tool", "")) + tool_name = str(hook_input.get("tool_name", hook_input.get("tool", ""))) if not tool_name: sys.stdout.write("{}\n") return - # Check blacklist - blacklist = _get_blacklist() + # Fast-path: skip high-frequency noise tools before any config I/O + if tool_name in _NOISE_TOOLS: + sys.stdout.write("{}\n") + return + + # Read config once; derive enabled + blacklist from it + tm_cfg = _read_tool_memory_config() + + if not _is_enabled(tm_cfg): + sys.stdout.write("{}\n") + return + + blacklist = _get_blacklist(tm_cfg) for prefix in blacklist: - if str(tool_name).startswith(prefix): + if tool_name.startswith(prefix): sys.stdout.write("{}\n") return @@ -183,7 +235,7 @@ def main() -> None: buffer_path = _get_buffer_path() _append_to_buffer(event, buffer_path) - # Periodic buffer rotation check (every ~100 calls, cheap stat check) + # Periodic buffer rotation check (cheap stat check) try: if buffer_path.exists() and buffer_path.stat().st_size > 5_000_000: # > 5MB _check_buffer_rotation(buffer_path) diff --git a/tests/unit/test_tool_memory_hook.py b/tests/unit/test_tool_memory_hook.py index 1d15a1ae..1ecc2f34 100755 --- a/tests/unit/test_tool_memory_hook.py +++ b/tests/unit/test_tool_memory_hook.py @@ -1,7 +1,8 @@ -"""Tests for the PostToolUse hook (v2.17.0).""" +"""Tests for the PostToolUse hook (perf/light rewrite, ac2a001 subset).""" from __future__ import annotations +import importlib import json import os import sys @@ -10,14 +11,18 @@ from unittest.mock import patch from surreal_memory.hooks.post_tool_use import ( + _NOISE_TOOLS, _append_to_buffer, _check_buffer_rotation, _format_event, _get_blacklist, _get_buffer_path, + _get_session_id, _is_enabled, _read_stdin, + _read_tool_memory_config, _truncate_args, + _utcnow_iso, main, ) @@ -42,6 +47,65 @@ def test_non_serializable(self) -> None: assert len(result) > 0 +class TestNoiseTools: + def test_noise_tools_is_frozenset(self) -> None: + assert isinstance(_NOISE_TOOLS, frozenset) + + def test_contains_known_noise(self) -> None: + assert "TodoRead" in _NOISE_TOOLS + assert "TodoWrite" in _NOISE_TOOLS + assert "WebSearch" in _NOISE_TOOLS + + def test_does_not_contain_read(self) -> None: + assert "Read" not in _NOISE_TOOLS + assert "Bash" not in _NOISE_TOOLS + + +class TestUtcnowIso: + def test_returns_string(self) -> None: + result = _utcnow_iso() + assert isinstance(result, str) + assert "T" in result + + def test_no_tzinfo_suffix(self) -> None: + """Naive ISO string — no +00:00 suffix (matches storage convention).""" + result = _utcnow_iso() + assert "+" not in result + assert "Z" not in result + + +class TestGetSessionId: + def test_claude_session_id(self) -> None: + with patch.dict(os.environ, {"CLAUDE_SESSION_ID": "test-session-123"}, clear=False): + assert _get_session_id() == "test-session-123" + + def test_codex_session_id_fallback(self) -> None: + env = {"CODEX_SESSION_ID": "codex-456"} + with patch.dict(os.environ, env, clear=False): + # Remove CLAUDE_SESSION_ID if present + env_without_claude = {k: v for k, v in os.environ.items() if k != "CLAUDE_SESSION_ID"} + env_without_claude["CODEX_SESSION_ID"] = "codex-456" + with patch.dict(os.environ, env_without_claude, clear=True): + assert _get_session_id() == "codex-456" + + def test_claude_takes_priority_over_codex(self) -> None: + with patch.dict( + os.environ, + {"CLAUDE_SESSION_ID": "claude-abc", "CODEX_SESSION_ID": "codex-xyz"}, + clear=False, + ): + assert _get_session_id() == "claude-abc" + + def test_empty_when_neither_set(self) -> None: + env = { + k: v + for k, v in os.environ.items() + if k not in ("CLAUDE_SESSION_ID", "CODEX_SESSION_ID") + } + with patch.dict(os.environ, env, clear=True): + assert _get_session_id() == "" + + class TestFormatEvent: def test_basic_format(self) -> None: hook_input = { @@ -85,6 +149,21 @@ def test_uses_tool_fallback_key(self) -> None: event = _format_event({"tool": "Write"}) assert event["tool_name"] == "Write" + def test_session_id_from_env(self) -> None: + """session_id populated from env without heavy imports.""" + with patch.dict(os.environ, {"CLAUDE_SESSION_ID": "sess-xyz"}, clear=False): + event = _format_event({"tool_name": "Bash"}) + assert event["session_id"] == "sess-xyz" + + def test_no_heavy_imports(self) -> None: + """_format_event must not import surreal_memory sub-modules.""" + import surreal_memory.hooks.post_tool_use as mod + + # Reload to ensure no lazy-import side effects + importlib.reload(mod) + event = mod._format_event({"tool_name": "Read"}) + assert "created_at" in event + class TestBufferRotation: def test_no_rotation_small_buffer(self, tmp_path: Path) -> None: @@ -126,42 +205,48 @@ def test_invalid_json(self) -> None: assert result == {} -class TestIsEnabled: +class TestReadToolMemoryConfig: def test_no_config_file(self, tmp_path: Path) -> None: with patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}): - assert _is_enabled() is True + assert _read_tool_memory_config() == {} - def test_enabled_true(self, tmp_path: Path) -> None: + def test_reads_tool_memory_section(self, tmp_path: Path) -> None: config = tmp_path / "config.toml" - config.write_text("[tool_memory]\nenabled = true\n") + config.write_text('[tool_memory]\nenabled = false\nblacklist = ["Foo"]\n') with patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}): - assert _is_enabled() is True + cfg = _read_tool_memory_config() + assert cfg["enabled"] is False + assert cfg["blacklist"] == ["Foo"] - def test_enabled_false(self, tmp_path: Path) -> None: - config = tmp_path / "config.toml" - config.write_text("[tool_memory]\nenabled = false\n") - with patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}): - assert _is_enabled() is False - - def test_missing_section(self, tmp_path: Path) -> None: + def test_missing_section_returns_empty(self, tmp_path: Path) -> None: config = tmp_path / "config.toml" config.write_text("[general]\nbrain = 'default'\n") with patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}): - assert _is_enabled() is True + assert _read_tool_memory_config() == {} + + +class TestIsEnabled: + def test_empty_cfg_defaults_true(self) -> None: + assert _is_enabled({}) is True + + def test_enabled_true(self) -> None: + assert _is_enabled({"enabled": True}) is True + + def test_enabled_false(self) -> None: + assert _is_enabled({"enabled": False}) is False class TestGetBlacklist: - def test_no_config(self, tmp_path: Path) -> None: - with patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}): - assert _get_blacklist() == [] + def test_empty_cfg_returns_empty(self) -> None: + assert _get_blacklist({}) == [] - def test_blacklist_present(self, tmp_path: Path) -> None: - config = tmp_path / "config.toml" - config.write_text('[tool_memory]\nblacklist = ["TodoRead", "TaskList"]\n') - with patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}): - result = _get_blacklist() + def test_blacklist_present(self) -> None: + result = _get_blacklist({"blacklist": ["TodoRead", "TaskList"]}) assert result == ["TodoRead", "TaskList"] + def test_invalid_type_returns_empty(self) -> None: + assert _get_blacklist({"blacklist": "not-a-list"}) == [] + class TestGetBufferPath: def test_custom_dir(self, tmp_path: Path) -> None: @@ -186,54 +271,93 @@ def test_appends_multiple(self, tmp_path: Path) -> None: lines = buf.read_text().strip().splitlines() assert len(lines) == 2 + def test_concurrent_appends_produce_valid_jsonl(self, tmp_path: Path) -> None: + """Multiple appends stay valid JSONL (each line parseable).""" + buf = tmp_path / "events.jsonl" + for i in range(20): + _append_to_buffer({"tool_name": f"Tool{i}", "seq": i}, buf) + lines = buf.read_text().strip().splitlines() + assert len(lines) == 20 + for line in lines: + parsed = json.loads(line) + assert "tool_name" in parsed + class TestMain: - def test_disabled_exits_fast(self, tmp_path: Path) -> None: - """When tool memory is disabled, main outputs empty JSON.""" + def test_empty_stdin(self, tmp_path: Path) -> None: + config = tmp_path / "config.toml" + config.write_text("[tool_memory]\nenabled = true\n") stdout = StringIO() with ( patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), + patch.object(sys, "stdin", StringIO("")), patch.object(sys, "stdout", stdout), ): main() assert stdout.getvalue().strip() == "{}" - def test_empty_stdin(self, tmp_path: Path) -> None: + def test_no_tool_name(self, tmp_path: Path) -> None: config = tmp_path / "config.toml" config.write_text("[tool_memory]\nenabled = true\n") stdout = StringIO() with ( patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), - patch.object(sys, "stdin", StringIO("")), + patch.object(sys, "stdin", StringIO('{"server_name": "test"}')), patch.object(sys, "stdout", stdout), ): main() assert stdout.getvalue().strip() == "{}" - def test_no_tool_name(self, tmp_path: Path) -> None: - config = tmp_path / "config.toml" - config.write_text("[tool_memory]\nenabled = true\n") + def test_noise_tool_skipped_without_config_read(self, tmp_path: Path) -> None: + """_NOISE_TOOLS are skipped before any file I/O (no config needed).""" stdout = StringIO() + # No config file — would default-enable, but noise check runs first with ( patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), - patch.object(sys, "stdin", StringIO('{"server_name": "test"}')), + patch.object(sys, "stdin", StringIO('{"tool_name": "TodoRead"}')), + patch.object(sys, "stdout", stdout), + ): + main() + assert stdout.getvalue().strip() == "{}" + # No event file should be created + assert not (tmp_path / "tool_events.jsonl").exists() + + def test_websearch_noise_skipped(self, tmp_path: Path) -> None: + stdout = StringIO() + with ( + patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), + patch.object(sys, "stdin", StringIO('{"tool_name": "WebSearch"}')), patch.object(sys, "stdout", stdout), ): main() assert stdout.getvalue().strip() == "{}" + assert not (tmp_path / "tool_events.jsonl").exists() def test_blacklisted_tool(self, tmp_path: Path) -> None: config = tmp_path / "config.toml" - config.write_text('[tool_memory]\nenabled = true\nblacklist = ["Todo"]\n') + config.write_text('[tool_memory]\nenabled = true\nblacklist = ["CustomNoise"]\n') stdout = StringIO() with ( patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), - patch.object(sys, "stdin", StringIO('{"tool_name": "TodoRead"}')), + patch.object(sys, "stdin", StringIO('{"tool_name": "CustomNoiseRead"}')), + patch.object(sys, "stdout", stdout), + ): + main() + assert stdout.getvalue().strip() == "{}" + assert not (tmp_path / "tool_events.jsonl").exists() + + def test_disabled_exits_fast(self, tmp_path: Path) -> None: + """When tool memory is disabled, main outputs empty JSON.""" + config = tmp_path / "config.toml" + config.write_text("[tool_memory]\nenabled = false\n") + stdout = StringIO() + with ( + patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), + patch.object(sys, "stdin", StringIO('{"tool_name": "Read"}')), patch.object(sys, "stdout", stdout), ): main() assert stdout.getvalue().strip() == "{}" - # No event file should be created assert not (tmp_path / "tool_events.jsonl").exists() def test_successful_capture(self, tmp_path: Path) -> None: @@ -252,3 +376,37 @@ def test_successful_capture(self, tmp_path: Path) -> None: event = json.loads(buf.read_text().strip()) assert event["tool_name"] == "Read" assert event["success"] is True + + def test_codex_session_id_captured(self, tmp_path: Path) -> None: + """CODEX_SESSION_ID is captured as session_id when CLAUDE_SESSION_ID absent.""" + config = tmp_path / "config.toml" + config.write_text("[tool_memory]\nenabled = true\n") + stdout = StringIO() + env = {k: v for k, v in os.environ.items() if k != "CLAUDE_SESSION_ID"} + env["SURREAL_MEMORY_DIR"] = str(tmp_path) + env["CODEX_SESSION_ID"] = "codex-test-session" + with ( + patch.dict(os.environ, env, clear=True), + patch.object(sys, "stdin", StringIO('{"tool_name": "Bash"}')), + patch.object(sys, "stdout", stdout), + ): + main() + buf = tmp_path / "tool_events.jsonl" + assert buf.exists() + event = json.loads(buf.read_text().strip()) + assert event["session_id"] == "codex-test-session" + + def test_no_config_defaults_enabled(self, tmp_path: Path) -> None: + """Without config.toml, tool memory is enabled by default.""" + stdout = StringIO() + with ( + patch.dict(os.environ, {"SURREAL_MEMORY_DIR": str(tmp_path)}), + patch.object(sys, "stdin", StringIO('{"tool_name": "Bash"}')), + patch.object(sys, "stdout", stdout), + ): + main() + assert stdout.getvalue().strip() == "{}" + buf = tmp_path / "tool_events.jsonl" + assert buf.exists() + event = json.loads(buf.read_text().strip()) + assert event["tool_name"] == "Bash"