Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────────┐ ┌──────────────────┐
│ session_path │ │ jsonl_parser │ │ exclusion_rules │
│ list_projects │ │ parse_session │ │ load + match │
│ list_sessions │ │ quick_session_info │ └────────┬─────────┘
│ safe_join │ │ _parse_tool_result │ │
│ list_projects │ │ session_peek │ │ load + match │
│ list_sessions │ │ tool_dispatch │ └────────┬─────────┘
│ safe_join │ │ jsonl_helpers │ │
└────────┬────────┘ └──────────┬──────────┘ │
│ │ │
└────────────┬───────────┴────────────────────────┘
Expand Down Expand Up @@ -48,7 +48,7 @@
| Layer | Responsibility | Key modules |
|-------|----------------|-------------|
| **Data discovery** | Resolve `~/.claude/projects/`, list projects and sessions, prevent path traversal | `utils/session_path.py` |
| **Parsing** | JSONL → session dict (messages, metadata, tool rendering) | `utils/jsonl_parser.py` |
| **Parsing** | JSONL → session dict (messages, metadata, tool rendering) | `utils/jsonl_parser.py`, `utils/tool_dispatch.py`, `utils/session_peek.py`, `utils/jsonl_helpers.py` |
| **Filtering** | Exclude sensitive sessions via rules file | `utils/exclusion_rules.py` |
| **Statistics** | Aggregates for API and exporters | `utils/session_stats.py` |
| **Export — Markdown** | Session → YAML-frontmatter Markdown | `utils/md_exporter.py` |
Expand All @@ -71,13 +71,13 @@

## Dispatch table

In `utils/jsonl_parser.py`, tool results are classified through `_parse_tool_result`, a **predicate-ordered dispatch table** (not a simple `if tool_name == ...` chain). **Order is load-bearing**: the first matching predicate wins. Tests in `tests/test_jsonl_parser.py` guard ordering regressions.
In `utils/tool_dispatch.py`, tool results are classified through `_parse_tool_result`, a **predicate-ordered dispatch table** (not a simple `if tool_name == ...` chain). **Order is load-bearing**: the first matching predicate wins. Tests in `tests/test_jsonl_parser.py` and `tests/test_real_session_fixtures.py` guard ordering regressions.

When adding a new tool renderer:

1. Add predicate + builder pair in the dispatch table in the correct order (specific before generic).
2. Add or extend a JSONL fixture under `tests/fixtures/` if needed.
3. Run `pytest tests/test_jsonl_parser.py -v`.
1. Add a `(predicate, builder)` pair to `_TOOL_RESULT_DISPATCH` in `utils/tool_dispatch.py`, preserving existing predicate order unless you also update fixtures and ordering tests (`tests/test_jsonl_parser.py`, `tests/test_real_session_fixtures.py`). Order is **not** “specific before generic” in general — the first match wins. `_tool_result_pred_task_message` is the intentional broad-before-narrow exception (`task_id` or `message` before retrieval/completed/async).
2. Add or extend a JSONL fixture under `tests/fixtures/` (especially for overlaps with existing predicates).
3. Run `pytest tests/test_jsonl_parser.py tests/test_real_session_fixtures.py -v`.

## Export state machine

Expand Down
10 changes: 10 additions & 0 deletions tests/test_jsonl_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,16 @@ def test_plan_result(self):
r = _parse_tool_result({"plan": [], "filePath": "/plan.md"})
assert r["result_type"] == "plan"

def test_plan_with_content_not_classified_as_file_write(self):
Comment thread
clean6378-max-it marked this conversation as resolved.
"""plan is registered before file_write in _TOOL_RESULT_DISPATCH."""
r = _parse_tool_result({
"plan": [],
"filePath": "/plan.md",
"content": "plan body",
})
assert r["result_type"] == "plan"
assert r["file_path"] == "/plan.md"

def test_unknown_fallback(self):
r = _parse_tool_result({"unexpected": True})
assert r["result_type"] == "unknown"
Expand Down
99 changes: 99 additions & 0 deletions utils/jsonl_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Shared content helpers for JSONL parsing and session peek."""

import re
from typing import Any

from models.session import MessageDict


def entry_message(entry: dict[str, Any]) -> dict[str, Any]:
m = entry.get("message")
return m if isinstance(m, dict) else {}


def normalize_content(content: Any) -> list[dict[str, Any]]:
"""Content can be a plain string, a list of strings, or a list of typed
blocks. Normalize everything into [{type, text}, ...] form."""
if isinstance(content, str):
return [{"type": "text", "text": content}]
if isinstance(content, list):
result = []
for part in content:
if isinstance(part, str):
result.append({"type": "text", "text": part})
elif isinstance(part, dict):
result.append(part)
return result
return []


def extract_text(content_parts: Any) -> str:
"""Grab just the text blocks out of a content array, ignore tool_use/thinking."""
parts = normalize_content(content_parts)
texts = []
for part in parts:
if part.get("type") == "text":
texts.append(part.get("text", ""))
return "\n".join(texts)


def extract_images(content_parts: Any) -> list[dict[str, Any]]:
"""Pull base64 image blocks out of a content array.
Also looks inside nested tool_result content blocks."""
parts = normalize_content(content_parts)
images = []
for part in parts:
if part.get("type") == "image":
source = part.get("source", {})
if source.get("type") == "base64" and source.get("data"):
images.append({
"media_type": source.get("media_type", "image/png"),
"data": source["data"],
})
elif part.get("type") == "tool_result":
# Nested content is usually a block list; string content is not normalized here.
nested = part.get("content", [])
if isinstance(nested, list):
for sub in nested:
if isinstance(sub, dict) and sub.get("type") == "image":
source = sub.get("source", {})
if source.get("type") == "base64" and source.get("data"):
images.append({
"media_type": source.get("media_type", "image/png"),
"data": source["data"],
})
return images


def first_title_line(text: str, max_chars: int = 100) -> str:
"""First non-empty line after system-tag strip, truncated for session titles."""
return strip_system_tags(text).strip().split("\n")[0][:max_chars]
Comment thread
clean6378-max-it marked this conversation as resolved.


def infer_title(messages: list[MessageDict]) -> str:
"""Use the first line of the first real user message as the session title."""
for msg in messages:
if msg["role"] == "user" and msg.get("text"):
first_line = first_title_line(msg["text"])
if first_line:
return first_line
return "Untitled Session"


def strip_system_tags(text: str) -> str:
"""Strip out the internal XML tags Claude Code injects (system-reminder,
ide_opened_file, etc.) so exported text is clean."""
# Remove block tags and their content
for tag in (
"system-reminder", "ide_opened_file", "user-prompt-submit-hook",
"claude_background_info", "fast_mode_info", "env",
):
text = re.sub(rf"<{tag}>[\s\S]*?</{tag}>", "", text)
# Strip remaining known opening/closing tags
text = re.sub(
r"</?(?:ide_selection|local-command-stdout|local-command-stderr|"
r"command-name|antml:\w+|function_calls|example\w*)>",
"",
text,
)
return text.strip()
Loading
Loading