gorajing · gorajing · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/docs/session-pipeline-setup.md b/docs/session-pipeline-setup.md
@@ -54,3 +54,54 @@ During any Claude Code session working with Zuhn:
 - **Explicit intent only** — the hook only reads `/tmp/`, never scrapes other directories
 - **No auto-extraction** — `extract-session` must be run manually
 - **`.claude/` files are local** — never commit personal `.claude/` config to the public repo
+
+---
+
+## Automatic capture (SessionEnd hook)
+
+Phase 8 above is *explicit-intent* capture (Claude manually queues insights). The
+**session-capture hook** adds *automatic* capture — it records the whole session
+at exit, so nothing has to be queued by hand. This is safe now because the
+**quality gate exists**: the gate (not manual intent) is the filter that keeps
+junk out, so capture can be automatic while quality stays enforced.
+
+### How it works
+
+1. `capture-session.ts` runs on `SessionEnd`. It reads the session transcript,
+   strips noise (tool output, `thinking`, subagent sidechains, framework
+   injections) down to user prompts + assistant prose, and writes one `session`
+   source to `sources/session/`. **No LLM, no extraction** — just capture.
+2. `npm run autoknowledge` (manual or scheduled) extracts stanced insights from
+   those sources — point it at a stricter gate so the noisier session input
+   clears a higher bar:
+   `ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional npm run autoknowledge`
+3. The gate admits only sharp, novel session-insights; dedup prevents re-capture.
+
+### Setup
+
+```bash
+cp templates/hooks/session-capture.sh .claude/hooks/session-capture.sh
+chmod +x .claude/hooks/session-capture.sh
+```
+
+Register a `SessionEnd` hook in `.claude/settings.json`:
+
+```json
+"SessionEnd": [
+  {
+    "matcher": "",
+    "hooks": [
+      { "type": "command", "command": "bash \"$CLAUDE_PROJECT_DIR/.claude/hooks/session-capture.sh\"" }
+    ]
+  }
+]
+```
+
+### Design note — supersedes Phase 8's constraint *for capture only*
+
+- **Capture is automated; extraction is not.** The hook only *captures* (writes a
+  source); insight extraction stays the deliberate, gated `autoknowledge` step —
+  so Phase 8's "no auto-extraction" still holds.
+- **The gate replaces manual intent as the quality filter.** Phase 8 required
+  explicit intent because there was no automatic quality control; the gate now
+  provides it, so passive transcript capture is safe.
diff --git a/package.json b/package.json
@@ -24,6 +24,7 @@
     "quality-score": "tsx scripts/quality-score.ts",
     "gate": "tsx scripts/insight-gate.ts",
     "sync-claude-mem": "tsx scripts/sync-claude-mem.ts",
+    "capture-session": "tsx scripts/capture-session.ts",
     "ask": "tsx scripts/ask.ts",
     "autoknowledge": "tsx scripts/autoknowledge.ts",
     "wake": "tsx scripts/wake.ts",

diff --git a/scripts/capture-session.ts b/scripts/capture-session.ts
@@ -0,0 +1,102 @@
+#!/usr/bin/env npx tsx
+/**
+ * capture-session.ts — Claude Code SessionEnd hook → Zuhn "session" source.
+ *
+ * Reads the hook's stdin JSON ({ session_id, transcript_path, ... }), parses the
+ * transcript into a clean conversation, and writes one Zuhn session source. It
+ * does NOT extract insights — that stays the deliberate, gated autoknowledge
+ * step (so Phase 8's "no auto-extraction" holds; only capture is automated).
+ *
+ * Designed to be a safe hook: never throws, never blocks the session, exits 0.
+ *
+ * Hook usage (settings.json SessionEnd): piped the hook JSON on stdin.
+ * Manual/testing:
+ *   npx tsx scripts/capture-session.ts --transcript <path.jsonl> --session <id> --dry-run
+ */
+
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
+import matter from "gray-matter";
+
+import { buildTranscriptSource } from "./lib/transcript";
+import { KB_ROOT } from "./lib/kb-root";
+
+const SESSION_DIR = join(KB_ROOT, "sources", "session");
+
+interface HookInput {
+  session_id?: string;
+  transcript_path?: string;
+}
+
+function readStdin(): string {
+  try {
+    return readFileSync(0, "utf-8");
+  } catch {
+    return "";
+  }
+}
+
+function resolveInput(argv: string[]): { transcriptPath?: string; sessionId?: string } {
+  const tIdx = argv.indexOf("--transcript");
+  if (tIdx !== -1) {
+    const sIdx = argv.indexOf("--session");
+    return { transcriptPath: argv[tIdx + 1], sessionId: sIdx !== -1 ? argv[sIdx + 1] : "manual" };
+  }
+  // Hook mode: JSON on stdin. Guard against a TTY (no pipe) so manual runs
+  // without --transcript don't block waiting on fd 0.
+  if (process.stdin.isTTY) return {};
+  const raw = readStdin().trim();
+  if (!raw) return {};
+  try {
+    const input = JSON.parse(raw) as HookInput;
+    return { transcriptPath: input.transcript_path, sessionId: input.session_id };
+  } catch {
+    return {};
+  }
+}
+
+function main(): void {
+  const dryRun = process.argv.includes("--dry-run");
+  const { transcriptPath, sessionId } = resolveInput(process.argv);
+
+  // Graceful no-op on missing/odd input — a hook must never break the session.
+  if (!transcriptPath || !existsSync(transcriptPath)) {
+    if (dryRun) console.log(`No transcript (${transcriptPath ?? "none"}) — nothing to capture.`);
+    return;
+  }
+
+  let src;
+  try {
+    const jsonl = readFileSync(transcriptPath, "utf-8");
+    src = buildTranscriptSource(jsonl, sessionId ?? transcriptPath);
+  } catch (err) {
+    if (dryRun) console.log(`Could not read/parse transcript: ${(err as Error).message}`);
+    return; // never throw out of a hook
+  }
+
+  if (!src) {
+    if (dryRun) console.log("Transcript had no usable conversation — skipping.");
+    return;
+  }
+
+  const path = join(SESSION_DIR, `${src.slug}.md`);
+  if (existsSync(path)) {
+    if (dryRun) console.log(`Already captured: ${src.id} → ${path}`);
+    return; // idempotent
+  }
+
+  if (dryRun) {
+    console.log(`WOULD write ${src.id}  "${src.title}"  (${src.data.word_count} words) → ${path}`);
+    return;
+  }
+
+  try {
+    mkdirSync(SESSION_DIR, { recursive: true });
+    writeFileSync(path, matter.stringify(src.body, src.data), "utf-8");
+    console.log(`Zuhn: captured session ${src.id} — run 'npm run autoknowledge' to extract + gate.`);
+  } catch {
+    // Swallow — a capture failure must not surface as a session error.
+  }
+}
+
+main();
diff --git a/scripts/lib/transcript.test.ts b/scripts/lib/transcript.test.ts
@@ -0,0 +1,107 @@
+import { describe, expect, it } from "vitest";
+
+import {
+  buildTranscriptSource,
+  parseTranscript,
+  renderConversation,
+  DEFAULT_BODY_CAP,
+  type Turn,
+} from "./transcript";
+
+// Build a transcript JSONL string from line objects.
+function jsonl(objs: unknown[]): string {
+  return objs.map((o) => JSON.stringify(o)).join("\n");
+}
+
+const FIXTURE = jsonl([
+  { type: "user", isSidechain: false, message: { role: "user", content: "First real prompt" } },
+  {
+    type: "assistant",
+    message: {
+      role: "assistant",
+      content: [
+        { type: "thinking", thinking: "internal reasoning" },
+        { type: "text", text: "Assistant reply" },
+        { type: "tool_use", name: "Bash", input: {} },
+      ],
+    },
+  },
+  // tool_result (user, array content, no text block) → dropped
+  { type: "user", message: { role: "user", content: [{ type: "tool_result", content: "huge output" }] } },
+  // subagent → dropped
+  { type: "user", isSidechain: true, message: { role: "user", content: "sidechain prompt" } },
+  // non-conversation line → dropped
+  { type: "attachment", foo: 1 },
+  // framework injection → dropped
+  { type: "user", message: { role: "user", content: "<system-reminder>noise</system-reminder>" } },
+  "not valid json",
+]);
+
+describe("parseTranscript", () => {
+  it("keeps only real user prompts + assistant text, dropping all noise", () => {
+    const turns = parseTranscript(FIXTURE);
+    expect(turns).toEqual<Turn[]>([
+      { role: "user", text: "First real prompt" },
+      { role: "assistant", text: "Assistant reply" },
+    ]);
+  });
+
+  it("extracts text blocks from array content but skips tool/thinking blocks", () => {
+    const t = parseTranscript(
+      jsonl([
+        { type: "user", message: { role: "user", content: [{ type: "text", text: "typed with attachment" }] } },
+      ])
+    );
+    expect(t).toEqual([{ role: "user", text: "typed with attachment" }]);
+  });
+
+  it("tolerates malformed lines and empty input", () => {
+    expect(parseTranscript("not json\n\n{bad")).toEqual([]);
+    expect(parseTranscript("")).toEqual([]);
+  });
+});
+
+describe("renderConversation", () => {
+  it("labels turns and caps to the tail with a truncation note", () => {
+    const long: Turn[] = [
+      { role: "user", text: "X".repeat(100) },
+      { role: "assistant", text: "Y".repeat(100) },
+    ];
+    const out = renderConversation(long, 50);
+    expect(out.length).toBeLessThanOrEqual(50 + "_[earlier turns truncated]_\n\n".length);
+    expect(out).toContain("earlier turns truncated");
+    expect(out.endsWith("Y")).toBe(true); // tail kept
+  });
+
+  it("formats user/assistant labels when under the cap", () => {
+    const out = renderConversation([{ role: "user", text: "hi" }, { role: "assistant", text: "yo" }]);
+    expect(out).toBe("**User:** hi\n\n**Assistant:** yo");
+    expect(DEFAULT_BODY_CAP).toBeGreaterThan(0);
+  });
+});
+
+describe("buildTranscriptSource", () => {
+  it("builds a 'session' source titled by the first user prompt, salted by session id", () => {
+    const src = buildTranscriptSource(FIXTURE, "sess-xyz12345", "2026-05-26");
+    expect(src).not.toBeNull();
+    expect(src!.title).toBe("First real prompt");
+    expect(src!.id).toMatch(/^SRC-\d{6}-[A-F0-9]{4}$/);
+    expect(src!.slug).toContain("sessxyz1"); // first 8 alphanumerics of session id
+    expect(src!.data.type).toBe("session");
+    expect(src!.data.date_ingested).toBe("2026-05-26");
+    expect(src!.body).toContain("**User:** First real prompt");
+    expect(src!.body).toContain("**Assistant:** Assistant reply");
+  });
+
+  it("returns null when there's no usable conversation", () => {
+    expect(buildTranscriptSource("", "s")).toBeNull();
+    expect(buildTranscriptSource(jsonl([{ type: "attachment" }, { type: "system" }]), "s")).toBeNull();
+  });
+
+  it("is deterministic for a given session id", () => {
+    const a = buildTranscriptSource(FIXTURE, "sess-1", "2026-05-26");
+    const b = buildTranscriptSource(FIXTURE, "sess-1", "2026-05-26");
+    expect(a!.id).toBe(b!.id);
+    expect(a!.slug).toBe(b!.slug);
+  });
+});