Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions docs/session-pipeline-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,54 @@ During any Claude Code session working with Zuhn:
- **Explicit intent only** — the hook only reads `/tmp/`, never scrapes other directories
- **No auto-extraction** — `extract-session` must be run manually
- **`.claude/` files are local** — never commit personal `.claude/` config to the public repo

---

## Automatic capture (SessionEnd hook)

Phase 8 above is *explicit-intent* capture (Claude manually queues insights). The
**session-capture hook** adds *automatic* capture — it records the whole session
at exit, so nothing has to be queued by hand. This is safe now because the
**quality gate exists**: the gate (not manual intent) is the filter that keeps
junk out, so capture can be automatic while quality stays enforced.

### How it works

1. `capture-session.ts` runs on `SessionEnd`. It reads the session transcript,
strips noise (tool output, `thinking`, subagent sidechains, framework
injections) down to user prompts + assistant prose, and writes one `session`
source to `sources/session/`. **No LLM, no extraction** — just capture.
2. `npm run autoknowledge` (manual or scheduled) extracts stanced insights from
those sources — point it at a stricter gate so the noisier session input
clears a higher bar:
`ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional npm run autoknowledge`
3. The gate admits only sharp, novel session-insights; dedup prevents re-capture.

### Setup

```bash
cp templates/hooks/session-capture.sh .claude/hooks/session-capture.sh
chmod +x .claude/hooks/session-capture.sh
```

Register a `SessionEnd` hook in `.claude/settings.json`:

```json
"SessionEnd": [
{
"matcher": "",
"hooks": [
{ "type": "command", "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/session-capture.sh\"" }
]
}
]
```

### Design note — supersedes Phase 8's constraint *for capture only*

- **Capture is automated; extraction is not.** The hook only *captures* (writes a
source); insight extraction stays the deliberate, gated `autoknowledge` step —
so Phase 8's "no auto-extraction" still holds.
- **The gate replaces manual intent as the quality filter.** Phase 8 required
explicit intent because there was no automatic quality control; the gate now
provides it, so passive transcript capture is safe.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
"quality-score": "tsx scripts/quality-score.ts",
"gate": "tsx scripts/insight-gate.ts",
"sync-claude-mem": "tsx scripts/sync-claude-mem.ts",
"capture-session": "tsx scripts/capture-session.ts",
"ask": "tsx scripts/ask.ts",
"autoknowledge": "tsx scripts/autoknowledge.ts",
"wake": "tsx scripts/wake.ts",
Expand Down
102 changes: 102 additions & 0 deletions scripts/capture-session.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env npx tsx
/**
* capture-session.ts — Claude Code SessionEnd hook → Zuhn "session" source.
*
* Reads the hook's stdin JSON ({ session_id, transcript_path, ... }), parses the
* transcript into a clean conversation, and writes one Zuhn session source. It
* does NOT extract insights — that stays the deliberate, gated autoknowledge
* step (so Phase 8's "no auto-extraction" holds; only capture is automated).
*
* Designed to be a safe hook: never throws, never blocks the session, exits 0.
*
* Hook usage (settings.json SessionEnd): piped the hook JSON on stdin.
* Manual/testing:
* npx tsx scripts/capture-session.ts --transcript <path.jsonl> --session <id> --dry-run
*/

import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import matter from "gray-matter";

import { buildTranscriptSource } from "./lib/transcript";
import { KB_ROOT } from "./lib/kb-root";

const SESSION_DIR = join(KB_ROOT, "sources", "session");

interface HookInput {
session_id?: string;
transcript_path?: string;
}

function readStdin(): string {
try {
return readFileSync(0, "utf-8");
} catch {
return "";
}
}

function resolveInput(argv: string[]): { transcriptPath?: string; sessionId?: string } {
const tIdx = argv.indexOf("--transcript");
if (tIdx !== -1) {
const sIdx = argv.indexOf("--session");
return { transcriptPath: argv[tIdx + 1], sessionId: sIdx !== -1 ? argv[sIdx + 1] : "manual" };
}
// Hook mode: JSON on stdin. Guard against a TTY (no pipe) so manual runs
// without --transcript don't block waiting on fd 0.
if (process.stdin.isTTY) return {};
const raw = readStdin().trim();
if (!raw) return {};
try {
const input = JSON.parse(raw) as HookInput;
return { transcriptPath: input.transcript_path, sessionId: input.session_id };
} catch {
return {};
}
}

function main(): void {
const dryRun = process.argv.includes("--dry-run");
const { transcriptPath, sessionId } = resolveInput(process.argv);

// Graceful no-op on missing/odd input — a hook must never break the session.
if (!transcriptPath || !existsSync(transcriptPath)) {
if (dryRun) console.log(`No transcript (${transcriptPath ?? "none"}) — nothing to capture.`);
return;
}

let src;
try {
const jsonl = readFileSync(transcriptPath, "utf-8");
src = buildTranscriptSource(jsonl, sessionId ?? transcriptPath);
} catch (err) {
if (dryRun) console.log(`Could not read/parse transcript: ${(err as Error).message}`);
return; // never throw out of a hook
}

if (!src) {
if (dryRun) console.log("Transcript had no usable conversation — skipping.");
return;
}

const path = join(SESSION_DIR, `${src.slug}.md`);
if (existsSync(path)) {
if (dryRun) console.log(`Already captured: ${src.id} → ${path}`);
return; // idempotent
}

if (dryRun) {
console.log(`WOULD write ${src.id} "${src.title}" (${src.data.word_count} words) → ${path}`);
return;
}

try {
mkdirSync(SESSION_DIR, { recursive: true });
writeFileSync(path, matter.stringify(src.body, src.data), "utf-8");
console.log(`Zuhn: captured session ${src.id} — run 'npm run autoknowledge' to extract + gate.`);
} catch {
// Swallow — a capture failure must not surface as a session error.
}
}

main();
107 changes: 107 additions & 0 deletions scripts/lib/transcript.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import { describe, expect, it } from "vitest";

import {
buildTranscriptSource,
parseTranscript,
renderConversation,
DEFAULT_BODY_CAP,
type Turn,
} from "./transcript";

// Build a transcript JSONL string from line objects.
function jsonl(objs: unknown[]): string {
return objs.map((o) => JSON.stringify(o)).join("\n");
}

const FIXTURE = jsonl([
{ type: "user", isSidechain: false, message: { role: "user", content: "First real prompt" } },
{
type: "assistant",
message: {
role: "assistant",
content: [
{ type: "thinking", thinking: "internal reasoning" },
{ type: "text", text: "Assistant reply" },
{ type: "tool_use", name: "Bash", input: {} },
],
},
},
// tool_result (user, array content, no text block) → dropped
{ type: "user", message: { role: "user", content: [{ type: "tool_result", content: "huge output" }] } },
// subagent → dropped
{ type: "user", isSidechain: true, message: { role: "user", content: "sidechain prompt" } },
// non-conversation line → dropped
{ type: "attachment", foo: 1 },
// framework injection → dropped
{ type: "user", message: { role: "user", content: "<system-reminder>noise</system-reminder>" } },
"not valid json",
]);

describe("parseTranscript", () => {
it("keeps only real user prompts + assistant text, dropping all noise", () => {
const turns = parseTranscript(FIXTURE);
expect(turns).toEqual<Turn[]>([
{ role: "user", text: "First real prompt" },
{ role: "assistant", text: "Assistant reply" },
]);
});

it("extracts text blocks from array content but skips tool/thinking blocks", () => {
const t = parseTranscript(
jsonl([
{ type: "user", message: { role: "user", content: [{ type: "text", text: "typed with attachment" }] } },
])
);
expect(t).toEqual([{ role: "user", text: "typed with attachment" }]);
});

it("tolerates malformed lines and empty input", () => {
expect(parseTranscript("not json\n\n{bad")).toEqual([]);
expect(parseTranscript("")).toEqual([]);
});
});

describe("renderConversation", () => {
it("labels turns and caps to the tail with a truncation note", () => {
const long: Turn[] = [
{ role: "user", text: "X".repeat(100) },
{ role: "assistant", text: "Y".repeat(100) },
];
const out = renderConversation(long, 50);
expect(out.length).toBeLessThanOrEqual(50 + "_[earlier turns truncated]_\n\n".length);
expect(out).toContain("earlier turns truncated");
expect(out.endsWith("Y")).toBe(true); // tail kept
});

it("formats user/assistant labels when under the cap", () => {
const out = renderConversation([{ role: "user", text: "hi" }, { role: "assistant", text: "yo" }]);
expect(out).toBe("**User:** hi\n\n**Assistant:** yo");
expect(DEFAULT_BODY_CAP).toBeGreaterThan(0);
});
});

describe("buildTranscriptSource", () => {
it("builds a 'session' source titled by the first user prompt, salted by session id", () => {
const src = buildTranscriptSource(FIXTURE, "sess-xyz12345", "2026-05-26");
expect(src).not.toBeNull();
expect(src!.title).toBe("First real prompt");
expect(src!.id).toMatch(/^SRC-\d{6}-[A-F0-9]{4}$/);
expect(src!.slug).toContain("sessxyz1"); // first 8 alphanumerics of session id
expect(src!.data.type).toBe("session");
expect(src!.data.date_ingested).toBe("2026-05-26");
expect(src!.body).toContain("**User:** First real prompt");
expect(src!.body).toContain("**Assistant:** Assistant reply");
});

it("returns null when there's no usable conversation", () => {
expect(buildTranscriptSource("", "s")).toBeNull();
expect(buildTranscriptSource(jsonl([{ type: "attachment" }, { type: "system" }]), "s")).toBeNull();
});

it("is deterministic for a given session id", () => {
const a = buildTranscriptSource(FIXTURE, "sess-1", "2026-05-26");
const b = buildTranscriptSource(FIXTURE, "sess-1", "2026-05-26");
expect(a!.id).toBe(b!.id);
expect(a!.slug).toBe(b!.slug);
});
});
Loading
Loading