diff --git a/pyproject.toml b/pyproject.toml index e1eaed6..266be47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ build-backend = "uv_build" [dependency-groups] dev = [ + "black>=24.0.0", "pytest>=9.0.2", "pytest-httpx>=0.35.0", "syrupy>=5.0.0", diff --git a/src/claude_code_transcripts/__init__.py b/src/claude_code_transcripts/__init__.py index 862a5e0..49a8081 100644 --- a/src/claude_code_transcripts/__init__.py +++ b/src/claude_code_transcripts/__init__.py @@ -9,7 +9,7 @@ import subprocess import tempfile import webbrowser -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path import click @@ -556,9 +556,207 @@ def parse_session_file(filepath): if filepath.suffix == ".jsonl": return _parse_jsonl_file(filepath) else: - # Standard JSON format with open(filepath, "r", encoding="utf-8") as f: - return json.load(f) + data = json.load(f) + + # If already in normalized format, return as-is + if isinstance(data, dict) and isinstance(data.get("loglines"), list): + return data + + # Attempt to parse Augment ("Augument") chat exports into normalized loglines + augment_parsed = _parse_augment_export_data(data) + if augment_parsed is not None: + return augment_parsed + + # Fallback: return raw JSON (may not be compatible with HTML generation) + return data + + +def _coerce_timestamp_to_iso_z(value): + """Best-effort conversion of common timestamp shapes to an ISO 8601 string ending in 'Z'.""" + if value is None: + return None + + if isinstance(value, str): + v = value.strip() + return v or None + + if isinstance(value, (int, float)): + # Heuristic: treat very large values as milliseconds since epoch + seconds = float(value) / 1000.0 if value > 10_000_000_000 else float(value) + try: + return ( + datetime.fromtimestamp(seconds, tz=timezone.utc).isoformat( + timespec="seconds" + ) + + "Z" + ) + except (OverflowError, OSError, ValueError): + return None + + if isinstance(value, dict): + # Common shapes: {"seconds": ...}, {"ms": ...}, {"epoch_ms": ...} + for k in ("timestamp", "time", "created_at", "createdAt", "date"): + if k in value: + coerced = _coerce_timestamp_to_iso_z(value.get(k)) + if coerced: + return coerced + if "seconds" in value: + return _coerce_timestamp_to_iso_z(value.get("seconds")) + if "ms" in value: + return _coerce_timestamp_to_iso_z(value.get("ms")) + if "epoch_ms" in value: + return _coerce_timestamp_to_iso_z(value.get("epoch_ms")) + + return None + + +def _normalize_role_to_user_or_assistant(value): + if value is None: + return None + + if isinstance(value, dict): + for k in ("role", "type", "name", "sender"): + if k in value: + value = value.get(k) + break + + if not isinstance(value, str): + return None + + role = value.strip().lower() + if role in ("user", "human", "me", "client", "customer"): + return "user" + if role in ("assistant", "ai", "bot", "augment", "agent"): + return "assistant" + if role in ("u", "usr"): + return "user" + if role in ("a", "asst"): + return "assistant" + if role in ("system", "tool", "function"): + return None + return None + + +def _extract_text_from_maybe_rich_content(value): + """Extract a text string from common export shapes.""" + if value is None: + return "" + if isinstance(value, str): + return value + if isinstance(value, dict): + for k in ("text", "content", "message", "value", "body"): + if k in value: + return _extract_text_from_maybe_rich_content(value.get(k)) + return json.dumps(value, ensure_ascii=False) + if isinstance(value, list): + parts = [] + for item in value: + t = _extract_text_from_maybe_rich_content(item) + if t: + parts.append(t) + return "\n".join(parts) + return str(value) + + +def _iter_augment_message_dicts(data): + """Yield message dicts from common Augment export shapes.""" + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + yield item + return + + if not isinstance(data, dict): + return + + # Some exports wrap the payload in a "data" field + if isinstance(data.get("data"), (dict, list)): + yield from _iter_augment_message_dicts(data["data"]) + + # Top-level messages list + if isinstance(data.get("messages"), list): + for m in data["messages"]: + if isinstance(m, dict): + yield m + + # Single conversation wrapper + conv = data.get("conversation") + if isinstance(conv, dict) and isinstance(conv.get("messages"), list): + for m in conv["messages"]: + if isinstance(m, dict): + yield m + + # Multiple conversations/chats + for key in ("conversations", "chats"): + if not isinstance(data.get(key), list): + continue + for c in data[key]: + if not isinstance(c, dict): + continue + msgs = c.get("messages") + if isinstance(msgs, list): + for m in msgs: + if isinstance(m, dict): + yield m + + +def _parse_augment_export_data(data): + """Parse Augment ("Augument") export JSON into normalized loglines. + + Returns {"loglines": [...]} on success, or None if the data does not look like an Augment export. + """ + loglines = [] + saw_any_message = False + + for idx, msg in enumerate(_iter_augment_message_dicts(data), start=1): + saw_any_message = True + + role = ( + _normalize_role_to_user_or_assistant(msg.get("role")) + or _normalize_role_to_user_or_assistant(msg.get("sender")) + or _normalize_role_to_user_or_assistant(msg.get("from")) + or _normalize_role_to_user_or_assistant(msg.get("author")) + ) + if role not in ("user", "assistant"): + continue + + ts = _coerce_timestamp_to_iso_z( + msg.get("created_at") + or msg.get("createdAt") + or msg.get("timestamp") + or msg.get("time") + or msg.get("date") + ) + if not ts: + ts = f"unknown-{idx:04d}" + + text = _extract_text_from_maybe_rich_content( + msg.get("content") + if "content" in msg + else msg.get("text", msg.get("message", msg.get("body", ""))) + ) + + # Prefer Claude-style content blocks for assistant so Markdown renders correctly + if role == "assistant": + content = [{"type": "text", "text": text}] + else: + content = text + + loglines.append( + { + "type": role, + "timestamp": ts, + "message": {"role": role, "content": content}, + } + ) + + if loglines: + return {"loglines": loglines} + if saw_any_message: + # Data had messages but none were user/assistant; treat as non-Augment. + return None + return None def _is_codex_cli_format(filepath): diff --git a/tests/sample_augment_export.json b/tests/sample_augment_export.json new file mode 100644 index 0000000..74346e4 --- /dev/null +++ b/tests/sample_augment_export.json @@ -0,0 +1,35 @@ +{ + "app": "Augument", + "version": "1.0", + "exported_at": "2026-01-01T12:00:00Z", + "conversation": { + "id": "conv-1", + "title": "Test conversation", + "messages": [ + { + "id": "m1", + "role": "user", + "content": "Hello **Augment**", + "created_at": "2026-01-01T12:00:01Z" + }, + { + "id": "m2", + "role": "assistant", + "content": "Hi there!\n\n```python\nprint('hi')\n```", + "created_at": "2026-01-01T12:00:02Z" + }, + { + "id": "m3", + "role": "user", + "content": "Thanks", + "created_at": "2026-01-01T12:00:03Z" + }, + { + "id": "m4", + "role": "assistant", + "content": "You're welcome.", + "created_at": "2026-01-01T12:00:04Z" + } + ] + } +} diff --git a/tests/test_augment_format.py b/tests/test_augment_format.py new file mode 100644 index 0000000..f5d67e1 --- /dev/null +++ b/tests/test_augment_format.py @@ -0,0 +1,92 @@ +"""Tests for Augment ("Augument") export format support.""" + +import json +from pathlib import Path + +import pytest + +from claude_code_transcripts import generate_html, parse_session_file + + +class TestAugmentExportParsing: + def test_parses_sample_augment_export(self): + fixture_path = Path(__file__).parent / "sample_augment_export.json" + data = parse_session_file(fixture_path) + + assert "loglines" in data + assert [e["type"] for e in data["loglines"]] == [ + "user", + "assistant", + "user", + "assistant", + ] + + first = data["loglines"][0] + assert first["timestamp"] == "2026-01-01T12:00:01Z" + assert first["message"]["role"] == "user" + assert first["message"]["content"] == "Hello **Augment**" + + second = data["loglines"][1] + assert second["timestamp"] == "2026-01-01T12:00:02Z" + assert second["message"]["role"] == "assistant" + # Ensure assistant content is markdown-renderable (Claude-style content blocks) + assert isinstance(second["message"]["content"], list) + assert second["message"]["content"][0]["type"] == "text" + assert "print('hi')" in second["message"]["content"][0]["text"] + + @pytest.mark.parametrize( + "payload", + [ + # Minimal dict with top-level messages list + { + "messages": [ + {"role": "USER", "text": "hi", "timestamp": 1735732800}, + {"role": "ASSISTANT", "text": "hello", "timestamp": 1735732801}, + ] + }, + # Conversation wrapper, alternate keys + { + "conversation": { + "messages": [ + { + "sender": "user", + "content": "hi", + "createdAt": "2026-01-01T00:00:00Z", + }, + { + "sender": "assistant", + "message": "hello", + "createdAt": "2026-01-01T00:00:01Z", + }, + ] + } + }, + ], + ) + def test_parses_common_augment_variants(self, tmp_path, payload): + p = tmp_path / "augment.json" + p.write_text(json.dumps(payload), encoding="utf-8") + + data = parse_session_file(p) + assert "loglines" in data + assert len(data["loglines"]) == 2 + assert data["loglines"][0]["type"] == "user" + assert data["loglines"][1]["type"] == "assistant" + + +class TestAugmentHtmlGeneration: + def test_generates_html_from_augment_export(self, tmp_path): + fixture_path = Path(__file__).parent / "sample_augment_export.json" + output_dir = tmp_path / "out" + + generate_html(fixture_path, output_dir) + + index_html = (output_dir / "index.html").read_text(encoding="utf-8") + assert "Hello" in index_html + # User markdown is rendered + assert "Augment" in index_html + + # Assistant content (including code blocks) is rendered on the per-page transcript + page_html = (output_dir / "page-001.html").read_text(encoding="utf-8") + assert "print" in page_html + assert "hi" in page_html