Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ build-backend = "uv_build"

[dependency-groups]
dev = [
"black>=24.0.0",
"pytest>=9.0.2",
"pytest-httpx>=0.35.0",
"syrupy>=5.0.0",
Expand Down
204 changes: 201 additions & 3 deletions src/claude_code_transcripts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import subprocess
import tempfile
import webbrowser
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path

import click
Expand Down Expand Up @@ -556,9 +556,207 @@ def parse_session_file(filepath):
if filepath.suffix == ".jsonl":
return _parse_jsonl_file(filepath)
else:
# Standard JSON format
with open(filepath, "r", encoding="utf-8") as f:
return json.load(f)
data = json.load(f)

# If already in normalized format, return as-is
if isinstance(data, dict) and isinstance(data.get("loglines"), list):
return data

# Attempt to parse Augment ("Augument") chat exports into normalized loglines
augment_parsed = _parse_augment_export_data(data)
if augment_parsed is not None:
return augment_parsed

# Fallback: return raw JSON (may not be compatible with HTML generation)
return data


def _coerce_timestamp_to_iso_z(value):
"""Best-effort conversion of common timestamp shapes to an ISO 8601 string ending in 'Z'."""
if value is None:
return None

if isinstance(value, str):
v = value.strip()
return v or None

if isinstance(value, (int, float)):
# Heuristic: treat very large values as milliseconds since epoch
seconds = float(value) / 1000.0 if value > 10_000_000_000 else float(value)
try:
return (
datetime.fromtimestamp(seconds, tz=timezone.utc).isoformat(
timespec="seconds"
)
+ "Z"
)
except (OverflowError, OSError, ValueError):
return None

if isinstance(value, dict):
# Common shapes: {"seconds": ...}, {"ms": ...}, {"epoch_ms": ...}
for k in ("timestamp", "time", "created_at", "createdAt", "date"):
if k in value:
coerced = _coerce_timestamp_to_iso_z(value.get(k))
if coerced:
return coerced
if "seconds" in value:
return _coerce_timestamp_to_iso_z(value.get("seconds"))
if "ms" in value:
return _coerce_timestamp_to_iso_z(value.get("ms"))
if "epoch_ms" in value:
return _coerce_timestamp_to_iso_z(value.get("epoch_ms"))

return None


def _normalize_role_to_user_or_assistant(value):
if value is None:
return None

if isinstance(value, dict):
for k in ("role", "type", "name", "sender"):
if k in value:
value = value.get(k)
break

if not isinstance(value, str):
return None

role = value.strip().lower()
if role in ("user", "human", "me", "client", "customer"):
return "user"
if role in ("assistant", "ai", "bot", "augment", "agent"):
return "assistant"
if role in ("u", "usr"):
return "user"
if role in ("a", "asst"):
return "assistant"
if role in ("system", "tool", "function"):
return None
return None


def _extract_text_from_maybe_rich_content(value):
"""Extract a text string from common export shapes."""
if value is None:
return ""
if isinstance(value, str):
return value
if isinstance(value, dict):
for k in ("text", "content", "message", "value", "body"):
if k in value:
return _extract_text_from_maybe_rich_content(value.get(k))
return json.dumps(value, ensure_ascii=False)
if isinstance(value, list):
parts = []
for item in value:
t = _extract_text_from_maybe_rich_content(item)
if t:
parts.append(t)
return "\n".join(parts)
return str(value)


def _iter_augment_message_dicts(data):
"""Yield message dicts from common Augment export shapes."""
if isinstance(data, list):
for item in data:
if isinstance(item, dict):
yield item
return

if not isinstance(data, dict):
return

# Some exports wrap the payload in a "data" field
if isinstance(data.get("data"), (dict, list)):
yield from _iter_augment_message_dicts(data["data"])

# Top-level messages list
if isinstance(data.get("messages"), list):
for m in data["messages"]:
if isinstance(m, dict):
yield m

# Single conversation wrapper
conv = data.get("conversation")
if isinstance(conv, dict) and isinstance(conv.get("messages"), list):
for m in conv["messages"]:
if isinstance(m, dict):
yield m

# Multiple conversations/chats
for key in ("conversations", "chats"):
if not isinstance(data.get(key), list):
continue
for c in data[key]:
if not isinstance(c, dict):
continue
msgs = c.get("messages")
if isinstance(msgs, list):
for m in msgs:
if isinstance(m, dict):
yield m


def _parse_augment_export_data(data):
"""Parse Augment ("Augument") export JSON into normalized loglines.

Returns {"loglines": [...]} on success, or None if the data does not look like an Augment export.
"""
loglines = []
saw_any_message = False

for idx, msg in enumerate(_iter_augment_message_dicts(data), start=1):
saw_any_message = True

role = (
_normalize_role_to_user_or_assistant(msg.get("role"))
or _normalize_role_to_user_or_assistant(msg.get("sender"))
or _normalize_role_to_user_or_assistant(msg.get("from"))
or _normalize_role_to_user_or_assistant(msg.get("author"))
)
if role not in ("user", "assistant"):
continue

ts = _coerce_timestamp_to_iso_z(
msg.get("created_at")
or msg.get("createdAt")
or msg.get("timestamp")
or msg.get("time")
or msg.get("date")
)
if not ts:
ts = f"unknown-{idx:04d}"

text = _extract_text_from_maybe_rich_content(
msg.get("content")
if "content" in msg
else msg.get("text", msg.get("message", msg.get("body", "")))
)

# Prefer Claude-style content blocks for assistant so Markdown renders correctly
if role == "assistant":
content = [{"type": "text", "text": text}]
else:
content = text

loglines.append(
{
"type": role,
"timestamp": ts,
"message": {"role": role, "content": content},
}
)

if loglines:
return {"loglines": loglines}
if saw_any_message:
# Data had messages but none were user/assistant; treat as non-Augment.
return None
return None


def _is_codex_cli_format(filepath):
Expand Down
35 changes: 35 additions & 0 deletions tests/sample_augment_export.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"app": "Augument",
"version": "1.0",
"exported_at": "2026-01-01T12:00:00Z",
"conversation": {
"id": "conv-1",
"title": "Test conversation",
"messages": [
{
"id": "m1",
"role": "user",
"content": "Hello **Augment**",
"created_at": "2026-01-01T12:00:01Z"
},
{
"id": "m2",
"role": "assistant",
"content": "Hi there!\n\n```python\nprint('hi')\n```",
"created_at": "2026-01-01T12:00:02Z"
},
{
"id": "m3",
"role": "user",
"content": "Thanks",
"created_at": "2026-01-01T12:00:03Z"
},
{
"id": "m4",
"role": "assistant",
"content": "You're welcome.",
"created_at": "2026-01-01T12:00:04Z"
}
]
}
}
92 changes: 92 additions & 0 deletions tests/test_augment_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Tests for Augment ("Augument") export format support."""

import json
from pathlib import Path

import pytest

from claude_code_transcripts import generate_html, parse_session_file


class TestAugmentExportParsing:
def test_parses_sample_augment_export(self):
fixture_path = Path(__file__).parent / "sample_augment_export.json"
data = parse_session_file(fixture_path)

assert "loglines" in data
assert [e["type"] for e in data["loglines"]] == [
"user",
"assistant",
"user",
"assistant",
]

first = data["loglines"][0]
assert first["timestamp"] == "2026-01-01T12:00:01Z"
assert first["message"]["role"] == "user"
assert first["message"]["content"] == "Hello **Augment**"

second = data["loglines"][1]
assert second["timestamp"] == "2026-01-01T12:00:02Z"
assert second["message"]["role"] == "assistant"
# Ensure assistant content is markdown-renderable (Claude-style content blocks)
assert isinstance(second["message"]["content"], list)
assert second["message"]["content"][0]["type"] == "text"
assert "print('hi')" in second["message"]["content"][0]["text"]

@pytest.mark.parametrize(
"payload",
[
# Minimal dict with top-level messages list
{
"messages": [
{"role": "USER", "text": "hi", "timestamp": 1735732800},
{"role": "ASSISTANT", "text": "hello", "timestamp": 1735732801},
]
},
# Conversation wrapper, alternate keys
{
"conversation": {
"messages": [
{
"sender": "user",
"content": "hi",
"createdAt": "2026-01-01T00:00:00Z",
},
{
"sender": "assistant",
"message": "hello",
"createdAt": "2026-01-01T00:00:01Z",
},
]
}
},
],
)
def test_parses_common_augment_variants(self, tmp_path, payload):
p = tmp_path / "augment.json"
p.write_text(json.dumps(payload), encoding="utf-8")

data = parse_session_file(p)
assert "loglines" in data
assert len(data["loglines"]) == 2
assert data["loglines"][0]["type"] == "user"
assert data["loglines"][1]["type"] == "assistant"


class TestAugmentHtmlGeneration:
def test_generates_html_from_augment_export(self, tmp_path):
fixture_path = Path(__file__).parent / "sample_augment_export.json"
output_dir = tmp_path / "out"

generate_html(fixture_path, output_dir)

index_html = (output_dir / "index.html").read_text(encoding="utf-8")
assert "Hello" in index_html
# User markdown is rendered
assert "<strong>Augment</strong>" in index_html

# Assistant content (including code blocks) is rendered on the per-page transcript
page_html = (output_dir / "page-001.html").read_text(encoding="utf-8")
assert "print" in page_html
assert "hi" in page_html