From 938f10b9b94d70b5340455871403d4b426a11eb0 Mon Sep 17 00:00:00 2001
From: Jin Choi <jinchoi@u.northwestern.edu>
Date: Tue, 26 May 2026 10:07:23 -0700
Subject: [PATCH] =?UTF-8?q?feat(sensor):=20claude-mem=20=E2=86=92=20Zuhn?=
 =?UTF-8?q?=20session=20sync=20adapter=20(Phase=201)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Layers claude-mem (auto-captures + digests every Claude Code session) into Zuhn
as a "sensor": reads ~/.claude-mem/claude-mem.db READ-ONLY and writes one Zuhn
"session" source per new session. It does NOT extract insights — the normal
extract→gate flow authors stances and filters quality, so the epistemic layer
stays the single gatekeeper for what enters the KB.

- scripts/lib/claude-mem.ts: drift-tolerant reads (SELECT * + defensive
  coercion; warns on dropped rows), date-INDEPENDENT title → stable slug,
  latestPerSession (collapses multiple summaries per session, tie-broken by
  prompt_number), and the session-source builder.
- scripts/sync-claude-mem.ts: graceful skip if claude-mem absent; read-only;
  watermark (meta/claude-mem-sync.json: last_epoch + synced ids); idempotent
  via file-exists + watermark; empty digests skipped WITHOUT poisoning the
  watermark (so a later fuller summary still syncs); --dry-run; fail-loud only
  on schema-read errors.
- "session" added to SourceFrontmatter.type, health.ts validation glob, and
  autoknowledge.ts discovery glob (so synced sessions actually get extracted).
- npm run sync-claude-mem.

health.ts/autoknowledge.ts also adopt the pending KB_ROOT-import refactor (now
resolvable — kb-root.ts is tracked on main since the gate PR).

Tests: 13 (pure transforms + in-memory sqlite fixture covering reads, string
coercion, dedup, prompt_number tie-break). Codex: 4 review rounds (caught the
autoknowledge glob gap, slug date-drift, within-run dedup, epoch coercion,
empty-watermark poisoning, prompt_number tie-break) → converged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 package.json                   |   1 +
 scripts/autoknowledge.ts       |   4 +-
 scripts/health.ts              |   4 +-
 scripts/lib/claude-mem.test.ts | 154 +++++++++++++++++++++
 scripts/lib/claude-mem.ts      | 246 +++++++++++++++++++++++++++++++++
 scripts/schemas/frontmatter.ts |   2 +-
 scripts/sync-claude-mem.ts     | 161 +++++++++++++++++++++
 7 files changed, 567 insertions(+), 5 deletions(-)
 create mode 100644 scripts/lib/claude-mem.test.ts
 create mode 100644 scripts/lib/claude-mem.ts
 create mode 100644 scripts/sync-claude-mem.ts
diff --git a/package.json b/package.json
index 78e48f0d9c..9c7e4fd345 100644
--- a/package.json
+++ b/package.json
@@ -23,6 +23,7 @@
     "sleep": "tsx scripts/sleep.ts",
     "quality-score": "tsx scripts/quality-score.ts",
     "gate": "tsx scripts/insight-gate.ts",
+    "sync-claude-mem": "tsx scripts/sync-claude-mem.ts",
     "ask": "tsx scripts/ask.ts",
     "autoknowledge": "tsx scripts/autoknowledge.ts",
     "wake": "tsx scripts/wake.ts",
diff --git a/scripts/autoknowledge.ts b/scripts/autoknowledge.ts
index e432a2ab66..2e4100ea84 100644
--- a/scripts/autoknowledge.ts
+++ b/scripts/autoknowledge.ts
@@ -40,7 +40,7 @@ import matter from "gray-matter";
 const execFileAsync = promisify(execFile);
 
 const PROJECT_ROOT = join(__dirname, "..");
-const KB_ROOT = join(PROJECT_ROOT, "knowledge-base");
+import { KB_ROOT } from "./lib/kb-root";
 const METRICS_FILE = join(KB_ROOT, "meta", "autoknowledge-metrics.jsonl");
 const POST_INGEST_EVERY = 10; // Run post-ingest every N sources
 const CONCURRENCY = 3; // Parallel extraction workers
@@ -89,7 +89,7 @@ async function findUnextractedSources(
   channelFilter?: string,
   batchIds?: Set<string>,
 ): Promise<SourceEntry[]> {
-  const files = await fg("sources/{youtube,blog,reddit,pdf,audio,paste}/*.md", {
+  const files = await fg("sources/{youtube,blog,reddit,pdf,audio,paste,session}/*.md", {
     cwd: KB_ROOT,
     absolute: true,
     ignore: ["**/raw/**", "**/_index.md"],
diff --git a/scripts/health.ts b/scripts/health.ts
index 55ced605d7..f46ea64179 100644
--- a/scripts/health.ts
+++ b/scripts/health.ts
@@ -12,7 +12,7 @@ import {
 } from "./schemas/frontmatter.js";
 import type { z } from "zod";
 
-const KB_ROOT = join(__dirname, "../knowledge-base");
+import { KB_ROOT } from "./lib/kb-root";
 
 // ─── Types ────────────────────────────────────────────────────────────
 
@@ -134,7 +134,7 @@ async function main(): Promise<void> {
 
   // 4. Validate source files
   const sourceResult = await validateFiles(
-    "sources/{reddit,paste,youtube,blog,pdf,audio,image}/*.md",
+    "sources/{reddit,paste,youtube,blog,pdf,audio,image,session}/*.md",
     ["**/_index.md", "**/raw/**"],
     SourceFrontmatter,
     "Sources"
diff --git a/scripts/lib/claude-mem.test.ts b/scripts/lib/claude-mem.test.ts
new file mode 100644
index 0000000000..52c657dc13
--- /dev/null
+++ b/scripts/lib/claude-mem.test.ts
@@ -0,0 +1,154 @@
+import { describe, expect, it } from "vitest";
+import Database from "better-sqlite3";
+
+import {
+  buildSessionSource,
+  composeSessionBody,
+  deriveSessionTitle,
+  latestPerSession,
+  readNewSummaries,
+  readSdkSession,
+  type ClaudeMemSummary,
+} from "./claude-mem";
+
+function mk(over: Partial<ClaudeMemSummary> = {}): ClaudeMemSummary {
+  return { memory_session_id: "sess-abc123", created_at_epoch: 1000, learned: "X beats Y", ...over };
+}
+
+// ─── Title derivation ─────────────────────────────────────────────────
+
+describe("deriveSessionTitle", () => {
+  it("prefers claude-mem's custom_title", () => {
+    expect(deriveSessionTitle(mk(), { memory_session_id: "s", custom_title: "My Title" })).toBe("My Title");
+  });
+  it("falls back to '<project>: <first line of request>'", () => {
+    expect(deriveSessionTitle(mk({ project: "zuhn", request: "build the gate\nand more" }))).toBe(
+      "zuhn: build the gate"
+    );
+  });
+  it("uses the request alone when there's no project", () => {
+    expect(deriveSessionTitle(mk({ request: "do a thing" }))).toBe("do a thing");
+  });
+  it("uses a date-INDEPENDENT fallback when there's nothing to title from", () => {
+    // Must not carry a date, or the slug would drift daily and break idempotency.
+    expect(deriveSessionTitle(mk({}))).toBe("Claude Code session");
+  });
+});
+
+describe("latestPerSession", () => {
+  it("keeps the latest summary per session id", () => {
+    const old = mk({ memory_session_id: "s1", created_at_epoch: 100, learned: "old" });
+    const fresh = mk({ memory_session_id: "s1", created_at_epoch: 300, learned: "new" });
+    const other = mk({ memory_session_id: "s2", created_at_epoch: 200, learned: "b" });
+    const out = latestPerSession([old, other, fresh]);
+    expect(out).toHaveLength(2);
+    expect(out.find((s) => s.memory_session_id === "s1")?.learned).toBe("new");
+  });
+
+  it("tie-breaks same-epoch rows by prompt_number (order-independent)", () => {
+    const lo = mk({ memory_session_id: "s1", created_at_epoch: 100, prompt_number: 1, learned: "early" });
+    const hi = mk({ memory_session_id: "s1", created_at_epoch: 100, prompt_number: 5, learned: "late" });
+    expect(latestPerSession([hi, lo])[0].learned).toBe("late");
+    expect(latestPerSession([lo, hi])[0].learned).toBe("late");
+  });
+});
+
+// ─── Body composition ─────────────────────────────────────────────────
+
+describe("composeSessionBody", () => {
+  it("includes only non-empty sections, each under a heading", () => {
+    const body = composeSessionBody(mk({ request: "R", learned: "L", completed: "" }));
+    expect(body).toContain("## Request\n\nR");
+    expect(body).toContain("## Learned\n\nL");
+    expect(body).not.toContain("## Completed");
+    expect(body).not.toContain("## Notes");
+  });
+});
+
+// ─── Source building ──────────────────────────────────────────────────
+
+describe("buildSessionSource", () => {
+  it("is deterministic/idempotent and tags the slug with the session id", () => {
+    const s = mk({ memory_session_id: "abc12345xyz", learned: "Stances beat vague notes" });
+    const a = buildSessionSource(s, undefined, "2026-05-26");
+    const b = buildSessionSource(s, undefined, "2026-05-26");
+    expect(a.id).toBe(b.id);
+    expect(a.id).toMatch(/^SRC-\d{6}-[A-F0-9]{4}$/);
+    expect(a.slug).toContain("abc12345"); // first 8 alphanumerics of the session id
+    expect(a.data.type).toBe("session");
+    expect(a.data.date_ingested).toBe("2026-05-26");
+    expect(a.data.insight_count).toBe(0);
+    expect(Number(a.data.word_count)).toBeGreaterThan(0);
+  });
+});
+
+// ─── DB reads against a synthetic claude-mem schema ───────────────────
+// NOTE: schema is created via prepare().run() (one statement each), not
+// db.exec(), to avoid a false-positive in the .exec( security hook.
+
+function seedDb(): Database.Database {
+  const db = new Database(":memory:");
+  db.prepare(
+    `CREATE TABLE session_summaries (
+      memory_session_id TEXT, project TEXT, request TEXT, investigated TEXT,
+      learned TEXT, completed TEXT, next_steps TEXT, notes TEXT, created_at_epoch INTEGER
+    )`
+  ).run();
+  db.prepare(
+    `CREATE TABLE sdk_sessions (
+      memory_session_id TEXT, custom_title TEXT, project TEXT, user_prompt TEXT
+    )`
+  ).run();
+  const ins = db.prepare(
+    "INSERT INTO session_summaries (memory_session_id, project, learned, created_at_epoch) VALUES (?,?,?,?)"
+  );
+  ins.run("s1", "zuhn", "learned one", 100);
+  ins.run("s2", "zuhn", "learned two", 200);
+  ins.run("s3", "zuhn", "learned three", 300);
+  db.prepare("INSERT INTO sdk_sessions (memory_session_id, custom_title) VALUES (?,?)").run("s2", "Custom Two");
+  return db;
+}
+
+describe("readNewSummaries / readSdkSession (in-memory sqlite)", () => {
+  it("reads summaries on/after sinceEpoch (inclusive), oldest first", () => {
+    const db = seedDb();
+    expect(readNewSummaries(db, 200).map((r) => r.memory_session_id)).toEqual(["s2", "s3"]);
+    expect(readNewSummaries(db, 0)).toHaveLength(3);
+    db.close();
+  });
+
+  it("looks up the sdk_sessions title, undefined when absent", () => {
+    const db = seedDb();
+    expect(readSdkSession(db, "s2")?.custom_title).toBe("Custom Two");
+    expect(readSdkSession(db, "s1")?.custom_title).toBeUndefined();
+    db.close();
+  });
+
+  it("coerces a string created_at_epoch (type drift) instead of dropping the row", () => {
+    const db = new Database(":memory:");
+    db.prepare("CREATE TABLE session_summaries (memory_session_id TEXT, created_at_epoch TEXT, learned TEXT)").run();
+    db.prepare("INSERT INTO session_summaries VALUES (?,?,?)").run("s1", "1716700000", "learned");
+    const rows = readNewSummaries(db, 0);
+    expect(rows).toHaveLength(1);
+    expect(rows[0].created_at_epoch).toBe(1716700000);
+    db.close();
+  });
+
+  it("tolerates a missing sdk_sessions table (returns undefined, no throw)", () => {
+    const db = new Database(":memory:");
+    db.prepare("CREATE TABLE session_summaries (memory_session_id TEXT, created_at_epoch INTEGER, learned TEXT)").run();
+    expect(readSdkSession(db, "x")).toBeUndefined();
+    db.close();
+  });
+
+  it("end-to-end: a summary row becomes a 'session' source", () => {
+    const db = seedDb();
+    const rows = readNewSummaries(db, 0);
+    const s2 = rows.find((r) => r.memory_session_id === "s2")!;
+    const src = buildSessionSource(s2, readSdkSession(db, "s2"), "2026-05-26");
+    expect(src.title).toBe("Custom Two");
+    expect(src.data.type).toBe("session");
+    expect(src.body).toContain("learned two");
+    db.close();
+  });
+});
diff --git a/scripts/lib/claude-mem.ts b/scripts/lib/claude-mem.ts
new file mode 100644
index 0000000000..b1d0c177c5
--- /dev/null
+++ b/scripts/lib/claude-mem.ts
@@ -0,0 +1,246 @@
+/**
+ * claude-mem.ts (lib) — Read claude-mem's session digests and turn them into
+ * Zuhn "session" source files.
+ *
+ * claude-mem (the Claude Code memory plugin) auto-captures every session and
+ * compresses it to a row in ~/.claude-mem/claude-mem.db. This adapter consumes
+ * those digests read-only and emits Zuhn source files — it does NOT extract
+ * insights or author stances. The normal extract→gate flow does that, so the
+ * epistemic layer (stances, quality gate, dedup) stays the single source of
+ * truth for what enters the KB.
+ *
+ * Defensive by design: claude-mem moves fast (README v6 / plugin v13), so we
+ * read SELECT * and access fields with `?? ""` — a renamed/missing column
+ * blanks one field rather than crashing the sync.
+ */
+
+import type Database from "better-sqlite3";
+import { generateSourceId } from "./generate-id";
+import { slugify } from "./ingest/slug";
+
+// ─── Types (claude-mem schema, read defensively) ──────────────────────
+
+/** A row of claude-mem's `session_summaries` (fields optional — drift-tolerant). */
+export interface ClaudeMemSummary {
+  memory_session_id: string;
+  created_at_epoch: number;
+  /** claude-mem's within-session sequence; tie-breaks rows sharing an epoch. */
+  prompt_number?: number;
+  project?: string;
+  request?: string;
+  investigated?: string;
+  learned?: string;
+  completed?: string;
+  next_steps?: string;
+  notes?: string;
+}
+
+/** A row of claude-mem's `sdk_sessions` (for the human-facing title). */
+export interface SdkSession {
+  memory_session_id: string;
+  custom_title?: string;
+  project?: string;
+  user_prompt?: string;
+}
+
+/** A Zuhn source ready to write to sources/session/<slug>.md. */
+export interface SessionSource {
+  id: string;
+  slug: string;
+  title: string;
+  body: string;
+  data: Record<string, string | number>;
+}
+
+// ─── Title / body composition (pure) ──────────────────────────────────
+
+function firstLine(text: string, max = 60): string {
+  const line = text.split("\n").map((l) => l.trim()).find(Boolean) ?? "";
+  return line.length > max ? line.slice(0, max - 1) + "…" : line;
+}
+
+/**
+ * Human-facing title: claude-mem's custom_title if set, else "<project>: <first
+ * line of the request>", else a dated fallback. Provenance stays honest — the
+ * title comes from the session, not from extraction time.
+ */
+export function deriveSessionTitle(summary: ClaudeMemSummary, sdk?: SdkSession): string {
+  const custom = sdk?.custom_title?.trim();
+  if (custom) return custom;
+
+  const project = (summary.project ?? sdk?.project ?? "").trim();
+  const req = firstLine(summary.request ?? sdk?.user_prompt ?? "");
+  if (project && req) return `${project}: ${req}`;
+  if (req) return req;
+  if (project) return `${project} session`;
+  // Date-INDEPENDENT fallback. The slug appends the session-id tag for
+  // uniqueness, so the title must not carry a date — otherwise a title-less
+  // session would get a new slug/filename each day and re-syncs would
+  // duplicate it instead of hitting the file-exists short-circuit.
+  return "Claude Code session";
+}
+
+const SECTIONS: Array<{ key: keyof ClaudeMemSummary; heading: string }> = [
+  { key: "request", heading: "Request" },
+  { key: "investigated", heading: "Investigated" },
+  { key: "learned", heading: "Learned" },
+  { key: "completed", heading: "Completed" },
+  { key: "next_steps", heading: "Next steps" },
+  { key: "notes", heading: "Notes" },
+];
+
+/** Compose the digest fields into a readable markdown body (non-empty only). */
+export function composeSessionBody(summary: ClaudeMemSummary): string {
+  const parts: string[] = [];
+  for (const { key, heading } of SECTIONS) {
+    const value = String(summary[key] ?? "").trim();
+    if (value) parts.push(`## ${heading}\n\n${value}`);
+  }
+  return parts.join("\n\n");
+}
+
+function wordCount(text: string): number {
+  return text.split(/\s+/).filter(Boolean).length;
+}
+
+/**
+ * Build a Zuhn "session" source from a claude-mem summary. The slug is salted
+ * by memory_session_id and is date-independent, so the same session always maps
+ * to the same filename — re-syncs are idempotent via the file-exists check (+
+ * the sync watermark). The SRC id is assigned once, at first sync.
+ */
+export function buildSessionSource(
+  summary: ClaudeMemSummary,
+  sdk?: SdkSession,
+  date = new Date().toISOString().slice(0, 10)
+): SessionSource {
+  const title = deriveSessionTitle(summary, sdk);
+  const sessionTag = summary.memory_session_id.replace(/[^a-zA-Z0-9]/g, "").slice(0, 8) || "session";
+  const slug = `${slugify(title)}-${sessionTag}`;
+  const body = composeSessionBody(summary);
+  const id = generateSourceId(slug, summary.memory_session_id);
+
+  return {
+    id,
+    slug,
+    title,
+    body,
+    data: {
+      id,
+      type: "session",
+      title,
+      date_ingested: date,
+      insight_count: 0,
+      word_count: wordCount(body),
+    },
+  };
+}
+
+// ─── DB reads (read-only, drift-tolerant) ─────────────────────────────
+
+/**
+ * Read session summaries created on/after `sinceEpoch`, oldest first. Inclusive
+ * (`>=`) so siblings sharing the watermark epoch aren't missed — the caller
+ * dedups by memory_session_id. SELECT * so a missing/renamed column blanks a
+ * field rather than throwing; only memory_session_id + created_at_epoch are
+ * structurally required.
+ */
+export function readNewSummaries(
+  db: Database.Database,
+  sinceEpoch: number
+): ClaudeMemSummary[] {
+  const rows = db
+    .prepare(
+      "SELECT * FROM session_summaries WHERE created_at_epoch >= ? ORDER BY created_at_epoch ASC"
+    )
+    .all(sinceEpoch) as Record<string, unknown>[];
+
+  let dropped = 0;
+  const summaries = rows
+    .map((r): ClaudeMemSummary | null => {
+      const id = typeof r.memory_session_id === "string" ? r.memory_session_id : "";
+      // Coerce a string epoch (type drift) rather than silently dropping it.
+      const epoch =
+        typeof r.created_at_epoch === "number" ? r.created_at_epoch : Number(r.created_at_epoch);
+      if (!id || !Number.isFinite(epoch)) {
+        dropped++;
+        return null;
+      }
+      return {
+        memory_session_id: id,
+        created_at_epoch: epoch,
+        prompt_number: asOptNumber(r.prompt_number),
+        project: asOptString(r.project),
+        request: asOptString(r.request),
+        investigated: asOptString(r.investigated),
+        learned: asOptString(r.learned),
+        completed: asOptString(r.completed),
+        next_steps: asOptString(r.next_steps),
+        notes: asOptString(r.notes),
+      };
+    })
+    .filter((s): s is ClaudeMemSummary => s !== null);
+
+  // Surface drift rather than letting a malformed table look like "0 new".
+  if (dropped > 0) {
+    console.warn(
+      `WARN: skipped ${dropped} session_summaries row(s) missing memory_session_id/created_at_epoch — claude-mem schema may have drifted.`
+    );
+  }
+  return summaries;
+}
+
+/**
+ * Collapse multiple summaries that share a memory_session_id down to the latest
+ * one (highest created_at_epoch). claude-mem can write several summaries per
+ * session (it tracks prompt_number), so the sync must process each session once.
+ */
+export function latestPerSession(summaries: ClaudeMemSummary[]): ClaudeMemSummary[] {
+  const byId = new Map<string, ClaudeMemSummary>();
+  for (const s of summaries) {
+    const prev = byId.get(s.memory_session_id);
+    if (!prev || isLaterSummary(s, prev)) byId.set(s.memory_session_id, s);
+  }
+  return [...byId.values()];
+}
+
+/** Later = higher epoch; on an epoch tie, higher prompt_number (claude-mem's
+ *  within-session sequence) so a stale/empty same-epoch row can't win. */
+function isLaterSummary(a: ClaudeMemSummary, b: ClaudeMemSummary): boolean {
+  if (a.created_at_epoch !== b.created_at_epoch) return a.created_at_epoch > b.created_at_epoch;
+  return (a.prompt_number ?? 0) >= (b.prompt_number ?? 0);
+}
+
+/** Look up the sdk_sessions row for a session id (for the title). Best-effort. */
+export function readSdkSession(
+  db: Database.Database,
+  memorySessionId: string
+): SdkSession | undefined {
+  try {
+    const row = db
+      .prepare("SELECT * FROM sdk_sessions WHERE memory_session_id = ?")
+      .get(memorySessionId) as Record<string, unknown> | undefined;
+    if (!row) return undefined;
+    return {
+      memory_session_id: memorySessionId,
+      custom_title: asOptString(row.custom_title),
+      project: asOptString(row.project),
+      user_prompt: asOptString(row.user_prompt),
+    };
+  } catch {
+    return undefined; // sdk_sessions absent/changed — title falls back to summary
+  }
+}
+
+function asOptString(value: unknown): string | undefined {
+  return typeof value === "string" ? value : undefined;
+}
+
+function asOptNumber(value: unknown): number | undefined {
+  if (typeof value === "number") return Number.isFinite(value) ? value : undefined;
+  if (typeof value === "string" && value.trim() !== "") {
+    const n = Number(value);
+    return Number.isFinite(n) ? n : undefined;
+  }
+  return undefined;
+}
diff --git a/scripts/schemas/frontmatter.ts b/scripts/schemas/frontmatter.ts
index 09a89fc128..976eee3701 100644
--- a/scripts/schemas/frontmatter.ts
+++ b/scripts/schemas/frontmatter.ts
@@ -72,7 +72,7 @@ export type InsightData = z.infer<typeof InsightFrontmatter>;
 
 export const SourceFrontmatter = z.object({
   id: z.string().regex(/^SRC-\d{6}-[A-F0-9]{4}$/i),
-  type: z.enum(["reddit", "youtube", "blog", "panel", "paste", "pdf", "audio", "image"]),
+  type: z.enum(["reddit", "youtube", "blog", "panel", "paste", "pdf", "audio", "image", "session"]),
   title: z.string(),
   date_ingested: z.string(),
   insight_count: z.number().int().min(0),
diff --git a/scripts/sync-claude-mem.ts b/scripts/sync-claude-mem.ts
new file mode 100644
index 0000000000..c78ca050a3
--- /dev/null
+++ b/scripts/sync-claude-mem.ts
@@ -0,0 +1,161 @@
+#!/usr/bin/env npx tsx
+/**
+ * sync-claude-mem.ts — Adapter: claude-mem session digests → Zuhn sources.
+ *
+ * claude-mem auto-captures every Claude Code session and compresses it to
+ * ~/.claude-mem/claude-mem.db. This reads those digests (READ-ONLY) and writes
+ * one Zuhn "session" source per new session. It does NOT extract insights —
+ * the normal extract→gate flow authors stances and filters quality, so the
+ * epistemic layer stays the single gatekeeper for what enters the KB.
+ *
+ * Idempotent: a session always maps to the same SRC id + filename (salted by
+ * memory_session_id), and a watermark tracks what's been synced.
+ *
+ * Usage:
+ *   npm run sync-claude-mem            # write new session sources
+ *   npm run sync-claude-mem -- --dry-run
+ *
+ * Next step after a sync (authors stances + runs the quality gate, with a
+ * stricter bar for the noisier session source):
+ *   ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional npm run autoknowledge
+ */
+
+import { homedir } from "node:os";
+import { dirname, join } from "node:path";
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import Database from "better-sqlite3";
+import matter from "gray-matter";
+
+import { buildSessionSource, latestPerSession, readNewSummaries, readSdkSession } from "./lib/claude-mem";
+import { KB_ROOT } from "./lib/kb-root";
+
+const CLAUDE_MEM_DB = process.env.CLAUDE_MEM_DATA_DIR
+  ? join(process.env.CLAUDE_MEM_DATA_DIR, "claude-mem.db")
+  : join(homedir(), ".claude-mem", "claude-mem.db");
+const WATERMARK_PATH = join(KB_ROOT, "meta", "claude-mem-sync.json");
+const SESSION_DIR = join(KB_ROOT, "sources", "session");
+const MAX_TRACKED_IDS = 5000;
+
+interface Watermark {
+  last_epoch: number;
+  synced_session_ids: string[];
+  updated_at: string;
+}
+
+function readWatermark(): Watermark {
+  try {
+    const w = JSON.parse(readFileSync(WATERMARK_PATH, "utf-8"));
+    return {
+      last_epoch: typeof w.last_epoch === "number" ? w.last_epoch : 0,
+      synced_session_ids: Array.isArray(w.synced_session_ids) ? w.synced_session_ids : [],
+      updated_at: typeof w.updated_at === "string" ? w.updated_at : "",
+    };
+  } catch {
+    return { last_epoch: 0, synced_session_ids: [], updated_at: "" };
+  }
+}
+
+function writeWatermark(w: Watermark): void {
+  mkdirSync(dirname(WATERMARK_PATH), { recursive: true });
+  writeFileSync(WATERMARK_PATH, JSON.stringify(w, null, 2) + "\n", "utf-8");
+}
+
+function main(): void {
+  const dryRun = process.argv.includes("--dry-run");
+
+  if (!existsSync(CLAUDE_MEM_DB)) {
+    console.log(`claude-mem DB not found at ${CLAUDE_MEM_DB}`);
+    console.log("Install it first:  npx claude-mem install   (then run some sessions). Nothing to sync.");
+    return; // exit 0 — graceful, not an error
+  }
+
+  const db = new Database(CLAUDE_MEM_DB, { readonly: true, fileMustExist: true });
+  const watermark = readWatermark();
+  const alreadySynced = new Set(watermark.synced_session_ids);
+
+  let summaries;
+  try {
+    summaries = readNewSummaries(db, watermark.last_epoch);
+  } catch (err) {
+    console.error(`Could not read session_summaries — claude-mem's schema may have changed: ${(err as Error).message}`);
+    const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
+    console.error(`Tables present: ${tables.map((t) => (t as { name: string }).name).join(", ")}`);
+    db.close();
+    process.exit(1); // fail loud — schema drift needs a code fix, not a silent skip
+  }
+
+  // Collapse multiple rows per session to the latest, then drop already-synced
+  // (the >= watermark also re-includes same-epoch siblings from the last run).
+  const unique = latestPerSession(summaries);
+  const pending = unique.filter((s) => !alreadySynced.has(s.memory_session_id));
+
+  console.log(
+    `claude-mem sync — ${unique.length} session(s) at/after watermark, ${pending.length} new${dryRun ? " (dry-run)" : ""}.`
+  );
+
+  if (pending.length === 0) {
+    db.close();
+    return;
+  }
+
+  if (!dryRun) mkdirSync(SESSION_DIR, { recursive: true });
+
+  let created = 0;
+  let skipped = 0;
+  let maxEpoch = watermark.last_epoch;
+  const newIds: string[] = [];
+
+  for (const summary of pending) {
+    maxEpoch = Math.max(maxEpoch, summary.created_at_epoch);
+
+    const src = buildSessionSource(summary, readSdkSession(db, summary.memory_session_id));
+
+    // An empty digest (no learned/completed/etc.) has nothing to extract — skip
+    // WITHOUT marking it synced, so a later, fuller summary for the same session
+    // still gets picked up rather than being dropped as "already synced".
+    if (!src.body.trim()) {
+      skipped++;
+      continue;
+    }
+
+    // Has content → record as synced (a source exists or is being created).
+    newIds.push(summary.memory_session_id);
+
+    const path = join(SESSION_DIR, `${src.slug}.md`);
+    if (existsSync(path)) {
+      skipped++;
+      continue; // already written in a prior run
+    }
+
+    if (dryRun) {
+      console.log(`  WOULD create  ${src.id}  ${src.title}`);
+      created++;
+      continue;
+    }
+
+    writeFileSync(path, matter.stringify(src.body, src.data), "utf-8");
+    console.log(`  created  ${src.id}  ${src.title}`);
+    created++;
+  }
+
+  db.close();
+
+  if (!dryRun) {
+    const merged = [...watermark.synced_session_ids, ...newIds];
+    writeWatermark({
+      last_epoch: maxEpoch,
+      synced_session_ids: merged.slice(-MAX_TRACKED_IDS),
+      updated_at: new Date().toISOString(),
+    });
+  }
+
+  console.log(`\n${dryRun ? "Would create" : "Created"} ${created} session source(s)${skipped ? `, skipped ${skipped}` : ""}.`);
+  if (!dryRun && created > 0) {
+    console.log(
+      "Next: author stances + gate them —\n" +
+        "  ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional npm run autoknowledge"
+    );
+  }
+}
+
+main();