cppalliance · wpak-ai · May 28, 2026 · May 27, 2026 · May 28, 2026 · May 28, 2026
diff --git a/docs/architecture.md b/docs/architecture.md
@@ -15,9 +15,9 @@
          ▼                         ▼                         ▼
 ┌─────────────────┐    ┌─────────────────────┐    ┌──────────────────┐
 │ session_path    │    │ jsonl_parser        │    │ exclusion_rules  │
-│ list_projects   │    │ parse_session       │    │ load + match     │
-│ list_sessions   │    │ quick_session_info  │    └────────┬─────────┘
-│ safe_join       │    │ _parse_tool_result  │             │
+│ list_projects   │    │ session_peek        │    │ load + match     │
+│ list_sessions   │    │ tool_dispatch       │    └────────┬─────────┘
+│ safe_join       │    │ jsonl_helpers       │             │
 └────────┬────────┘    └──────────┬──────────┘             │
          │                        │                        │
          └────────────┬───────────┴────────────────────────┘
@@ -48,7 +48,7 @@
 | Layer | Responsibility | Key modules |
 |-------|----------------|-------------|
 | **Data discovery** | Resolve `~/.claude/projects/`, list projects and sessions, prevent path traversal | `utils/session_path.py` |
-| **Parsing** | JSONL → session dict (messages, metadata, tool rendering) | `utils/jsonl_parser.py` |
+| **Parsing** | JSONL → session dict (messages, metadata, tool rendering) | `utils/jsonl_parser.py`, `utils/tool_dispatch.py`, `utils/session_peek.py`, `utils/jsonl_helpers.py` |
 | **Filtering** | Exclude sensitive sessions via rules file | `utils/exclusion_rules.py` |
 | **Statistics** | Aggregates for API and exporters | `utils/session_stats.py` |
 | **Export — Markdown** | Session → YAML-frontmatter Markdown | `utils/md_exporter.py` |
@@ -71,13 +71,13 @@
 
 ## Dispatch table
 
-In `utils/jsonl_parser.py`, tool results are classified through `_parse_tool_result`, a **predicate-ordered dispatch table** (not a simple `if tool_name == ...` chain). **Order is load-bearing**: the first matching predicate wins. Tests in `tests/test_jsonl_parser.py` guard ordering regressions.
+In `utils/tool_dispatch.py`, tool results are classified through `_parse_tool_result`, a **predicate-ordered dispatch table** (not a simple `if tool_name == ...` chain). **Order is load-bearing**: the first matching predicate wins. Tests in `tests/test_jsonl_parser.py` and `tests/test_real_session_fixtures.py` guard ordering regressions.
 
 When adding a new tool renderer:
 
-1. Add predicate + builder pair in the dispatch table in the correct order (specific before generic).
-2. Add or extend a JSONL fixture under `tests/fixtures/` if needed.
-3. Run `pytest tests/test_jsonl_parser.py -v`.
+1. Add a `(predicate, builder)` pair to `_TOOL_RESULT_DISPATCH` in `utils/tool_dispatch.py`, preserving existing predicate order unless you also update fixtures and ordering tests (`tests/test_jsonl_parser.py`, `tests/test_real_session_fixtures.py`). Order is **not** “specific before generic” in general — the first match wins. `_tool_result_pred_task_message` is the intentional broad-before-narrow exception (`task_id` or `message` before retrieval/completed/async).
+2. Add or extend a JSONL fixture under `tests/fixtures/` (especially for overlaps with existing predicates).
+3. Run `pytest tests/test_jsonl_parser.py tests/test_real_session_fixtures.py -v`.
 
 ## Export state machine
 

diff --git a/tests/test_jsonl_parser.py b/tests/test_jsonl_parser.py
@@ -234,6 +234,16 @@ def test_plan_result(self):
         r = _parse_tool_result({"plan": [], "filePath": "/plan.md"})
         assert r["result_type"] == "plan"
 
+    def test_plan_with_content_not_classified_as_file_write(self):
+        """plan is registered before file_write in _TOOL_RESULT_DISPATCH."""
+        r = _parse_tool_result({
+            "plan": [],
+            "filePath": "/plan.md",
+            "content": "plan body",
+        })
+        assert r["result_type"] == "plan"
+        assert r["file_path"] == "/plan.md"
+
     def test_unknown_fallback(self):
         r = _parse_tool_result({"unexpected": True})
         assert r["result_type"] == "unknown"

diff --git a/utils/jsonl_helpers.py b/utils/jsonl_helpers.py
@@ -0,0 +1,99 @@
+"""Shared content helpers for JSONL parsing and session peek."""
+
+import re
+from typing import Any
+
+from models.session import MessageDict
+
+
+def entry_message(entry: dict[str, Any]) -> dict[str, Any]:
+    m = entry.get("message")
+    return m if isinstance(m, dict) else {}
+
+
+def normalize_content(content: Any) -> list[dict[str, Any]]:
+    """Content can be a plain string, a list of strings, or a list of typed
+    blocks. Normalize everything into [{type, text}, ...] form."""
+    if isinstance(content, str):
+        return [{"type": "text", "text": content}]
+    if isinstance(content, list):
+        result = []
+        for part in content:
+            if isinstance(part, str):
+                result.append({"type": "text", "text": part})
+            elif isinstance(part, dict):
+                result.append(part)
+        return result
+    return []
+
+
+def extract_text(content_parts: Any) -> str:
+    """Grab just the text blocks out of a content array, ignore tool_use/thinking."""
+    parts = normalize_content(content_parts)
+    texts = []
+    for part in parts:
+        if part.get("type") == "text":
+            texts.append(part.get("text", ""))
+    return "\n".join(texts)
+
+
+def extract_images(content_parts: Any) -> list[dict[str, Any]]:
+    """Pull base64 image blocks out of a content array.
+    Also looks inside nested tool_result content blocks."""
+    parts = normalize_content(content_parts)
+    images = []
+    for part in parts:
+        if part.get("type") == "image":
+            source = part.get("source", {})
+            if source.get("type") == "base64" and source.get("data"):
+                images.append({
+                    "media_type": source.get("media_type", "image/png"),
+                    "data": source["data"],
+                })
+        elif part.get("type") == "tool_result":
+            # Nested content is usually a block list; string content is not normalized here.
+            nested = part.get("content", [])
+            if isinstance(nested, list):
+                for sub in nested:
+                    if isinstance(sub, dict) and sub.get("type") == "image":
+                        source = sub.get("source", {})
+                        if source.get("type") == "base64" and source.get("data"):
+                            images.append({
+                                "media_type": source.get("media_type", "image/png"),
+                                "data": source["data"],
+                            })
+    return images
+
+
+def first_title_line(text: str, max_chars: int = 100) -> str:
+    """First non-empty line after system-tag strip, truncated for session titles."""
+    return strip_system_tags(text).strip().split("\n")[0][:max_chars]
+
+
+def infer_title(messages: list[MessageDict]) -> str:
+    """Use the first line of the first real user message as the session title."""
+    for msg in messages:
+        if msg["role"] == "user" and msg.get("text"):
+            first_line = first_title_line(msg["text"])
+            if first_line:
+                return first_line
+    return "Untitled Session"
+
+
+def strip_system_tags(text: str) -> str:
+    """Strip out the internal XML tags Claude Code injects (system-reminder,
+    ide_opened_file, etc.) so exported text is clean."""
+    # Remove block tags and their content
+    for tag in (
+        "system-reminder", "ide_opened_file", "user-prompt-submit-hook",
+        "claude_background_info", "fast_mode_info", "env",
+    ):
+        text = re.sub(rf"<{tag}>[\s\S]*?</{tag}>", "", text)
+    # Strip remaining known opening/closing tags
+    text = re.sub(
+        r"</?(?:ide_selection|local-command-stdout|local-command-stderr|"
+        r"command-name|antml:\w+|function_calls|example\w*)>",
+        "",
+        text,
+    )
+    return text.strip()