From 938f10b9b94d70b5340455871403d4b426a11eb0 Mon Sep 17 00:00:00 2001 From: Jin Choi Date: Tue, 26 May 2026 10:07:23 -0700 Subject: [PATCH] =?UTF-8?q?feat(sensor):=20claude-mem=20=E2=86=92=20Zuhn?= =?UTF-8?q?=20session=20sync=20adapter=20(Phase=201)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Layers claude-mem (auto-captures + digests every Claude Code session) into Zuhn as a "sensor": reads ~/.claude-mem/claude-mem.db READ-ONLY and writes one Zuhn "session" source per new session. It does NOT extract insights — the normal extract→gate flow authors stances and filters quality, so the epistemic layer stays the single gatekeeper for what enters the KB. - scripts/lib/claude-mem.ts: drift-tolerant reads (SELECT * + defensive coercion; warns on dropped rows), date-INDEPENDENT title → stable slug, latestPerSession (collapses multiple summaries per session, tie-broken by prompt_number), and the session-source builder. - scripts/sync-claude-mem.ts: graceful skip if claude-mem absent; read-only; watermark (meta/claude-mem-sync.json: last_epoch + synced ids); idempotent via file-exists + watermark; empty digests skipped WITHOUT poisoning the watermark (so a later fuller summary still syncs); --dry-run; fail-loud only on schema-read errors. - "session" added to SourceFrontmatter.type, health.ts validation glob, and autoknowledge.ts discovery glob (so synced sessions actually get extracted). - npm run sync-claude-mem. health.ts/autoknowledge.ts also adopt the pending KB_ROOT-import refactor (now resolvable — kb-root.ts is tracked on main since the gate PR). Tests: 13 (pure transforms + in-memory sqlite fixture covering reads, string coercion, dedup, prompt_number tie-break). Codex: 4 review rounds (caught the autoknowledge glob gap, slug date-drift, within-run dedup, epoch coercion, empty-watermark poisoning, prompt_number tie-break) → converged. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.json | 1 + scripts/autoknowledge.ts | 4 +- scripts/health.ts | 4 +- scripts/lib/claude-mem.test.ts | 154 +++++++++++++++++++++ scripts/lib/claude-mem.ts | 246 +++++++++++++++++++++++++++++++++ scripts/schemas/frontmatter.ts | 2 +- scripts/sync-claude-mem.ts | 161 +++++++++++++++++++++ 7 files changed, 567 insertions(+), 5 deletions(-) create mode 100644 scripts/lib/claude-mem.test.ts create mode 100644 scripts/lib/claude-mem.ts create mode 100644 scripts/sync-claude-mem.ts diff --git a/package.json b/package.json index 78e48f0d9c..9c7e4fd345 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "sleep": "tsx scripts/sleep.ts", "quality-score": "tsx scripts/quality-score.ts", "gate": "tsx scripts/insight-gate.ts", + "sync-claude-mem": "tsx scripts/sync-claude-mem.ts", "ask": "tsx scripts/ask.ts", "autoknowledge": "tsx scripts/autoknowledge.ts", "wake": "tsx scripts/wake.ts", diff --git a/scripts/autoknowledge.ts b/scripts/autoknowledge.ts index e432a2ab66..2e4100ea84 100644 --- a/scripts/autoknowledge.ts +++ b/scripts/autoknowledge.ts @@ -40,7 +40,7 @@ import matter from "gray-matter"; const execFileAsync = promisify(execFile); const PROJECT_ROOT = join(__dirname, ".."); -const KB_ROOT = join(PROJECT_ROOT, "knowledge-base"); +import { KB_ROOT } from "./lib/kb-root"; const METRICS_FILE = join(KB_ROOT, "meta", "autoknowledge-metrics.jsonl"); const POST_INGEST_EVERY = 10; // Run post-ingest every N sources const CONCURRENCY = 3; // Parallel extraction workers @@ -89,7 +89,7 @@ async function findUnextractedSources( channelFilter?: string, batchIds?: Set, ): Promise { - const files = await fg("sources/{youtube,blog,reddit,pdf,audio,paste}/*.md", { + const files = await fg("sources/{youtube,blog,reddit,pdf,audio,paste,session}/*.md", { cwd: KB_ROOT, absolute: true, ignore: ["**/raw/**", "**/_index.md"], diff --git a/scripts/health.ts b/scripts/health.ts index 55ced605d7..f46ea64179 100644 --- a/scripts/health.ts +++ b/scripts/health.ts @@ -12,7 +12,7 @@ import { } from "./schemas/frontmatter.js"; import type { z } from "zod"; -const KB_ROOT = join(__dirname, "../knowledge-base"); +import { KB_ROOT } from "./lib/kb-root"; // ─── Types ──────────────────────────────────────────────────────────── @@ -134,7 +134,7 @@ async function main(): Promise { // 4. Validate source files const sourceResult = await validateFiles( - "sources/{reddit,paste,youtube,blog,pdf,audio,image}/*.md", + "sources/{reddit,paste,youtube,blog,pdf,audio,image,session}/*.md", ["**/_index.md", "**/raw/**"], SourceFrontmatter, "Sources" diff --git a/scripts/lib/claude-mem.test.ts b/scripts/lib/claude-mem.test.ts new file mode 100644 index 0000000000..52c657dc13 --- /dev/null +++ b/scripts/lib/claude-mem.test.ts @@ -0,0 +1,154 @@ +import { describe, expect, it } from "vitest"; +import Database from "better-sqlite3"; + +import { + buildSessionSource, + composeSessionBody, + deriveSessionTitle, + latestPerSession, + readNewSummaries, + readSdkSession, + type ClaudeMemSummary, +} from "./claude-mem"; + +function mk(over: Partial = {}): ClaudeMemSummary { + return { memory_session_id: "sess-abc123", created_at_epoch: 1000, learned: "X beats Y", ...over }; +} + +// ─── Title derivation ───────────────────────────────────────────────── + +describe("deriveSessionTitle", () => { + it("prefers claude-mem's custom_title", () => { + expect(deriveSessionTitle(mk(), { memory_session_id: "s", custom_title: "My Title" })).toBe("My Title"); + }); + it("falls back to ': '", () => { + expect(deriveSessionTitle(mk({ project: "zuhn", request: "build the gate\nand more" }))).toBe( + "zuhn: build the gate" + ); + }); + it("uses the request alone when there's no project", () => { + expect(deriveSessionTitle(mk({ request: "do a thing" }))).toBe("do a thing"); + }); + it("uses a date-INDEPENDENT fallback when there's nothing to title from", () => { + // Must not carry a date, or the slug would drift daily and break idempotency. + expect(deriveSessionTitle(mk({}))).toBe("Claude Code session"); + }); +}); + +describe("latestPerSession", () => { + it("keeps the latest summary per session id", () => { + const old = mk({ memory_session_id: "s1", created_at_epoch: 100, learned: "old" }); + const fresh = mk({ memory_session_id: "s1", created_at_epoch: 300, learned: "new" }); + const other = mk({ memory_session_id: "s2", created_at_epoch: 200, learned: "b" }); + const out = latestPerSession([old, other, fresh]); + expect(out).toHaveLength(2); + expect(out.find((s) => s.memory_session_id === "s1")?.learned).toBe("new"); + }); + + it("tie-breaks same-epoch rows by prompt_number (order-independent)", () => { + const lo = mk({ memory_session_id: "s1", created_at_epoch: 100, prompt_number: 1, learned: "early" }); + const hi = mk({ memory_session_id: "s1", created_at_epoch: 100, prompt_number: 5, learned: "late" }); + expect(latestPerSession([hi, lo])[0].learned).toBe("late"); + expect(latestPerSession([lo, hi])[0].learned).toBe("late"); + }); +}); + +// ─── Body composition ───────────────────────────────────────────────── + +describe("composeSessionBody", () => { + it("includes only non-empty sections, each under a heading", () => { + const body = composeSessionBody(mk({ request: "R", learned: "L", completed: "" })); + expect(body).toContain("## Request\n\nR"); + expect(body).toContain("## Learned\n\nL"); + expect(body).not.toContain("## Completed"); + expect(body).not.toContain("## Notes"); + }); +}); + +// ─── Source building ────────────────────────────────────────────────── + +describe("buildSessionSource", () => { + it("is deterministic/idempotent and tags the slug with the session id", () => { + const s = mk({ memory_session_id: "abc12345xyz", learned: "Stances beat vague notes" }); + const a = buildSessionSource(s, undefined, "2026-05-26"); + const b = buildSessionSource(s, undefined, "2026-05-26"); + expect(a.id).toBe(b.id); + expect(a.id).toMatch(/^SRC-\d{6}-[A-F0-9]{4}$/); + expect(a.slug).toContain("abc12345"); // first 8 alphanumerics of the session id + expect(a.data.type).toBe("session"); + expect(a.data.date_ingested).toBe("2026-05-26"); + expect(a.data.insight_count).toBe(0); + expect(Number(a.data.word_count)).toBeGreaterThan(0); + }); +}); + +// ─── DB reads against a synthetic claude-mem schema ─────────────────── +// NOTE: schema is created via prepare().run() (one statement each), not +// db.exec(), to avoid a false-positive in the .exec( security hook. + +function seedDb(): Database.Database { + const db = new Database(":memory:"); + db.prepare( + `CREATE TABLE session_summaries ( + memory_session_id TEXT, project TEXT, request TEXT, investigated TEXT, + learned TEXT, completed TEXT, next_steps TEXT, notes TEXT, created_at_epoch INTEGER + )` + ).run(); + db.prepare( + `CREATE TABLE sdk_sessions ( + memory_session_id TEXT, custom_title TEXT, project TEXT, user_prompt TEXT + )` + ).run(); + const ins = db.prepare( + "INSERT INTO session_summaries (memory_session_id, project, learned, created_at_epoch) VALUES (?,?,?,?)" + ); + ins.run("s1", "zuhn", "learned one", 100); + ins.run("s2", "zuhn", "learned two", 200); + ins.run("s3", "zuhn", "learned three", 300); + db.prepare("INSERT INTO sdk_sessions (memory_session_id, custom_title) VALUES (?,?)").run("s2", "Custom Two"); + return db; +} + +describe("readNewSummaries / readSdkSession (in-memory sqlite)", () => { + it("reads summaries on/after sinceEpoch (inclusive), oldest first", () => { + const db = seedDb(); + expect(readNewSummaries(db, 200).map((r) => r.memory_session_id)).toEqual(["s2", "s3"]); + expect(readNewSummaries(db, 0)).toHaveLength(3); + db.close(); + }); + + it("looks up the sdk_sessions title, undefined when absent", () => { + const db = seedDb(); + expect(readSdkSession(db, "s2")?.custom_title).toBe("Custom Two"); + expect(readSdkSession(db, "s1")?.custom_title).toBeUndefined(); + db.close(); + }); + + it("coerces a string created_at_epoch (type drift) instead of dropping the row", () => { + const db = new Database(":memory:"); + db.prepare("CREATE TABLE session_summaries (memory_session_id TEXT, created_at_epoch TEXT, learned TEXT)").run(); + db.prepare("INSERT INTO session_summaries VALUES (?,?,?)").run("s1", "1716700000", "learned"); + const rows = readNewSummaries(db, 0); + expect(rows).toHaveLength(1); + expect(rows[0].created_at_epoch).toBe(1716700000); + db.close(); + }); + + it("tolerates a missing sdk_sessions table (returns undefined, no throw)", () => { + const db = new Database(":memory:"); + db.prepare("CREATE TABLE session_summaries (memory_session_id TEXT, created_at_epoch INTEGER, learned TEXT)").run(); + expect(readSdkSession(db, "x")).toBeUndefined(); + db.close(); + }); + + it("end-to-end: a summary row becomes a 'session' source", () => { + const db = seedDb(); + const rows = readNewSummaries(db, 0); + const s2 = rows.find((r) => r.memory_session_id === "s2")!; + const src = buildSessionSource(s2, readSdkSession(db, "s2"), "2026-05-26"); + expect(src.title).toBe("Custom Two"); + expect(src.data.type).toBe("session"); + expect(src.body).toContain("learned two"); + db.close(); + }); +}); diff --git a/scripts/lib/claude-mem.ts b/scripts/lib/claude-mem.ts new file mode 100644 index 0000000000..b1d0c177c5 --- /dev/null +++ b/scripts/lib/claude-mem.ts @@ -0,0 +1,246 @@ +/** + * claude-mem.ts (lib) — Read claude-mem's session digests and turn them into + * Zuhn "session" source files. + * + * claude-mem (the Claude Code memory plugin) auto-captures every session and + * compresses it to a row in ~/.claude-mem/claude-mem.db. This adapter consumes + * those digests read-only and emits Zuhn source files — it does NOT extract + * insights or author stances. The normal extract→gate flow does that, so the + * epistemic layer (stances, quality gate, dedup) stays the single source of + * truth for what enters the KB. + * + * Defensive by design: claude-mem moves fast (README v6 / plugin v13), so we + * read SELECT * and access fields with `?? ""` — a renamed/missing column + * blanks one field rather than crashing the sync. + */ + +import type Database from "better-sqlite3"; +import { generateSourceId } from "./generate-id"; +import { slugify } from "./ingest/slug"; + +// ─── Types (claude-mem schema, read defensively) ────────────────────── + +/** A row of claude-mem's `session_summaries` (fields optional — drift-tolerant). */ +export interface ClaudeMemSummary { + memory_session_id: string; + created_at_epoch: number; + /** claude-mem's within-session sequence; tie-breaks rows sharing an epoch. */ + prompt_number?: number; + project?: string; + request?: string; + investigated?: string; + learned?: string; + completed?: string; + next_steps?: string; + notes?: string; +} + +/** A row of claude-mem's `sdk_sessions` (for the human-facing title). */ +export interface SdkSession { + memory_session_id: string; + custom_title?: string; + project?: string; + user_prompt?: string; +} + +/** A Zuhn source ready to write to sources/session/.md. */ +export interface SessionSource { + id: string; + slug: string; + title: string; + body: string; + data: Record; +} + +// ─── Title / body composition (pure) ────────────────────────────────── + +function firstLine(text: string, max = 60): string { + const line = text.split("\n").map((l) => l.trim()).find(Boolean) ?? ""; + return line.length > max ? line.slice(0, max - 1) + "…" : line; +} + +/** + * Human-facing title: claude-mem's custom_title if set, else ": ", else a dated fallback. Provenance stays honest — the + * title comes from the session, not from extraction time. + */ +export function deriveSessionTitle(summary: ClaudeMemSummary, sdk?: SdkSession): string { + const custom = sdk?.custom_title?.trim(); + if (custom) return custom; + + const project = (summary.project ?? sdk?.project ?? "").trim(); + const req = firstLine(summary.request ?? sdk?.user_prompt ?? ""); + if (project && req) return `${project}: ${req}`; + if (req) return req; + if (project) return `${project} session`; + // Date-INDEPENDENT fallback. The slug appends the session-id tag for + // uniqueness, so the title must not carry a date — otherwise a title-less + // session would get a new slug/filename each day and re-syncs would + // duplicate it instead of hitting the file-exists short-circuit. + return "Claude Code session"; +} + +const SECTIONS: Array<{ key: keyof ClaudeMemSummary; heading: string }> = [ + { key: "request", heading: "Request" }, + { key: "investigated", heading: "Investigated" }, + { key: "learned", heading: "Learned" }, + { key: "completed", heading: "Completed" }, + { key: "next_steps", heading: "Next steps" }, + { key: "notes", heading: "Notes" }, +]; + +/** Compose the digest fields into a readable markdown body (non-empty only). */ +export function composeSessionBody(summary: ClaudeMemSummary): string { + const parts: string[] = []; + for (const { key, heading } of SECTIONS) { + const value = String(summary[key] ?? "").trim(); + if (value) parts.push(`## ${heading}\n\n${value}`); + } + return parts.join("\n\n"); +} + +function wordCount(text: string): number { + return text.split(/\s+/).filter(Boolean).length; +} + +/** + * Build a Zuhn "session" source from a claude-mem summary. The slug is salted + * by memory_session_id and is date-independent, so the same session always maps + * to the same filename — re-syncs are idempotent via the file-exists check (+ + * the sync watermark). The SRC id is assigned once, at first sync. + */ +export function buildSessionSource( + summary: ClaudeMemSummary, + sdk?: SdkSession, + date = new Date().toISOString().slice(0, 10) +): SessionSource { + const title = deriveSessionTitle(summary, sdk); + const sessionTag = summary.memory_session_id.replace(/[^a-zA-Z0-9]/g, "").slice(0, 8) || "session"; + const slug = `${slugify(title)}-${sessionTag}`; + const body = composeSessionBody(summary); + const id = generateSourceId(slug, summary.memory_session_id); + + return { + id, + slug, + title, + body, + data: { + id, + type: "session", + title, + date_ingested: date, + insight_count: 0, + word_count: wordCount(body), + }, + }; +} + +// ─── DB reads (read-only, drift-tolerant) ───────────────────────────── + +/** + * Read session summaries created on/after `sinceEpoch`, oldest first. Inclusive + * (`>=`) so siblings sharing the watermark epoch aren't missed — the caller + * dedups by memory_session_id. SELECT * so a missing/renamed column blanks a + * field rather than throwing; only memory_session_id + created_at_epoch are + * structurally required. + */ +export function readNewSummaries( + db: Database.Database, + sinceEpoch: number +): ClaudeMemSummary[] { + const rows = db + .prepare( + "SELECT * FROM session_summaries WHERE created_at_epoch >= ? ORDER BY created_at_epoch ASC" + ) + .all(sinceEpoch) as Record[]; + + let dropped = 0; + const summaries = rows + .map((r): ClaudeMemSummary | null => { + const id = typeof r.memory_session_id === "string" ? r.memory_session_id : ""; + // Coerce a string epoch (type drift) rather than silently dropping it. + const epoch = + typeof r.created_at_epoch === "number" ? r.created_at_epoch : Number(r.created_at_epoch); + if (!id || !Number.isFinite(epoch)) { + dropped++; + return null; + } + return { + memory_session_id: id, + created_at_epoch: epoch, + prompt_number: asOptNumber(r.prompt_number), + project: asOptString(r.project), + request: asOptString(r.request), + investigated: asOptString(r.investigated), + learned: asOptString(r.learned), + completed: asOptString(r.completed), + next_steps: asOptString(r.next_steps), + notes: asOptString(r.notes), + }; + }) + .filter((s): s is ClaudeMemSummary => s !== null); + + // Surface drift rather than letting a malformed table look like "0 new". + if (dropped > 0) { + console.warn( + `WARN: skipped ${dropped} session_summaries row(s) missing memory_session_id/created_at_epoch — claude-mem schema may have drifted.` + ); + } + return summaries; +} + +/** + * Collapse multiple summaries that share a memory_session_id down to the latest + * one (highest created_at_epoch). claude-mem can write several summaries per + * session (it tracks prompt_number), so the sync must process each session once. + */ +export function latestPerSession(summaries: ClaudeMemSummary[]): ClaudeMemSummary[] { + const byId = new Map(); + for (const s of summaries) { + const prev = byId.get(s.memory_session_id); + if (!prev || isLaterSummary(s, prev)) byId.set(s.memory_session_id, s); + } + return [...byId.values()]; +} + +/** Later = higher epoch; on an epoch tie, higher prompt_number (claude-mem's + * within-session sequence) so a stale/empty same-epoch row can't win. */ +function isLaterSummary(a: ClaudeMemSummary, b: ClaudeMemSummary): boolean { + if (a.created_at_epoch !== b.created_at_epoch) return a.created_at_epoch > b.created_at_epoch; + return (a.prompt_number ?? 0) >= (b.prompt_number ?? 0); +} + +/** Look up the sdk_sessions row for a session id (for the title). Best-effort. */ +export function readSdkSession( + db: Database.Database, + memorySessionId: string +): SdkSession | undefined { + try { + const row = db + .prepare("SELECT * FROM sdk_sessions WHERE memory_session_id = ?") + .get(memorySessionId) as Record | undefined; + if (!row) return undefined; + return { + memory_session_id: memorySessionId, + custom_title: asOptString(row.custom_title), + project: asOptString(row.project), + user_prompt: asOptString(row.user_prompt), + }; + } catch { + return undefined; // sdk_sessions absent/changed — title falls back to summary + } +} + +function asOptString(value: unknown): string | undefined { + return typeof value === "string" ? value : undefined; +} + +function asOptNumber(value: unknown): number | undefined { + if (typeof value === "number") return Number.isFinite(value) ? value : undefined; + if (typeof value === "string" && value.trim() !== "") { + const n = Number(value); + return Number.isFinite(n) ? n : undefined; + } + return undefined; +} diff --git a/scripts/schemas/frontmatter.ts b/scripts/schemas/frontmatter.ts index 09a89fc128..976eee3701 100644 --- a/scripts/schemas/frontmatter.ts +++ b/scripts/schemas/frontmatter.ts @@ -72,7 +72,7 @@ export type InsightData = z.infer; export const SourceFrontmatter = z.object({ id: z.string().regex(/^SRC-\d{6}-[A-F0-9]{4}$/i), - type: z.enum(["reddit", "youtube", "blog", "panel", "paste", "pdf", "audio", "image"]), + type: z.enum(["reddit", "youtube", "blog", "panel", "paste", "pdf", "audio", "image", "session"]), title: z.string(), date_ingested: z.string(), insight_count: z.number().int().min(0), diff --git a/scripts/sync-claude-mem.ts b/scripts/sync-claude-mem.ts new file mode 100644 index 0000000000..c78ca050a3 --- /dev/null +++ b/scripts/sync-claude-mem.ts @@ -0,0 +1,161 @@ +#!/usr/bin/env npx tsx +/** + * sync-claude-mem.ts — Adapter: claude-mem session digests → Zuhn sources. + * + * claude-mem auto-captures every Claude Code session and compresses it to + * ~/.claude-mem/claude-mem.db. This reads those digests (READ-ONLY) and writes + * one Zuhn "session" source per new session. It does NOT extract insights — + * the normal extract→gate flow authors stances and filters quality, so the + * epistemic layer stays the single gatekeeper for what enters the KB. + * + * Idempotent: a session always maps to the same SRC id + filename (salted by + * memory_session_id), and a watermark tracks what's been synced. + * + * Usage: + * npm run sync-claude-mem # write new session sources + * npm run sync-claude-mem -- --dry-run + * + * Next step after a sync (authors stances + runs the quality gate, with a + * stricter bar for the noisier session source): + * ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional npm run autoknowledge + */ + +import { homedir } from "node:os"; +import { dirname, join } from "node:path"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import Database from "better-sqlite3"; +import matter from "gray-matter"; + +import { buildSessionSource, latestPerSession, readNewSummaries, readSdkSession } from "./lib/claude-mem"; +import { KB_ROOT } from "./lib/kb-root"; + +const CLAUDE_MEM_DB = process.env.CLAUDE_MEM_DATA_DIR + ? join(process.env.CLAUDE_MEM_DATA_DIR, "claude-mem.db") + : join(homedir(), ".claude-mem", "claude-mem.db"); +const WATERMARK_PATH = join(KB_ROOT, "meta", "claude-mem-sync.json"); +const SESSION_DIR = join(KB_ROOT, "sources", "session"); +const MAX_TRACKED_IDS = 5000; + +interface Watermark { + last_epoch: number; + synced_session_ids: string[]; + updated_at: string; +} + +function readWatermark(): Watermark { + try { + const w = JSON.parse(readFileSync(WATERMARK_PATH, "utf-8")); + return { + last_epoch: typeof w.last_epoch === "number" ? w.last_epoch : 0, + synced_session_ids: Array.isArray(w.synced_session_ids) ? w.synced_session_ids : [], + updated_at: typeof w.updated_at === "string" ? w.updated_at : "", + }; + } catch { + return { last_epoch: 0, synced_session_ids: [], updated_at: "" }; + } +} + +function writeWatermark(w: Watermark): void { + mkdirSync(dirname(WATERMARK_PATH), { recursive: true }); + writeFileSync(WATERMARK_PATH, JSON.stringify(w, null, 2) + "\n", "utf-8"); +} + +function main(): void { + const dryRun = process.argv.includes("--dry-run"); + + if (!existsSync(CLAUDE_MEM_DB)) { + console.log(`claude-mem DB not found at ${CLAUDE_MEM_DB}`); + console.log("Install it first: npx claude-mem install (then run some sessions). Nothing to sync."); + return; // exit 0 — graceful, not an error + } + + const db = new Database(CLAUDE_MEM_DB, { readonly: true, fileMustExist: true }); + const watermark = readWatermark(); + const alreadySynced = new Set(watermark.synced_session_ids); + + let summaries; + try { + summaries = readNewSummaries(db, watermark.last_epoch); + } catch (err) { + console.error(`Could not read session_summaries — claude-mem's schema may have changed: ${(err as Error).message}`); + const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all(); + console.error(`Tables present: ${tables.map((t) => (t as { name: string }).name).join(", ")}`); + db.close(); + process.exit(1); // fail loud — schema drift needs a code fix, not a silent skip + } + + // Collapse multiple rows per session to the latest, then drop already-synced + // (the >= watermark also re-includes same-epoch siblings from the last run). + const unique = latestPerSession(summaries); + const pending = unique.filter((s) => !alreadySynced.has(s.memory_session_id)); + + console.log( + `claude-mem sync — ${unique.length} session(s) at/after watermark, ${pending.length} new${dryRun ? " (dry-run)" : ""}.` + ); + + if (pending.length === 0) { + db.close(); + return; + } + + if (!dryRun) mkdirSync(SESSION_DIR, { recursive: true }); + + let created = 0; + let skipped = 0; + let maxEpoch = watermark.last_epoch; + const newIds: string[] = []; + + for (const summary of pending) { + maxEpoch = Math.max(maxEpoch, summary.created_at_epoch); + + const src = buildSessionSource(summary, readSdkSession(db, summary.memory_session_id)); + + // An empty digest (no learned/completed/etc.) has nothing to extract — skip + // WITHOUT marking it synced, so a later, fuller summary for the same session + // still gets picked up rather than being dropped as "already synced". + if (!src.body.trim()) { + skipped++; + continue; + } + + // Has content → record as synced (a source exists or is being created). + newIds.push(summary.memory_session_id); + + const path = join(SESSION_DIR, `${src.slug}.md`); + if (existsSync(path)) { + skipped++; + continue; // already written in a prior run + } + + if (dryRun) { + console.log(` WOULD create ${src.id} ${src.title}`); + created++; + continue; + } + + writeFileSync(path, matter.stringify(src.body, src.data), "utf-8"); + console.log(` created ${src.id} ${src.title}`); + created++; + } + + db.close(); + + if (!dryRun) { + const merged = [...watermark.synced_session_ids, ...newIds]; + writeWatermark({ + last_epoch: maxEpoch, + synced_session_ids: merged.slice(-MAX_TRACKED_IDS), + updated_at: new Date().toISOString(), + }); + } + + console.log(`\n${dryRun ? "Would create" : "Created"} ${created} session source(s)${skipped ? `, skipped ${skipped}` : ""}.`); + if (!dryRun && created > 0) { + console.log( + "Next: author stances + gate them —\n" + + " ZUHN_GATE_BLOCKING_CHECKS=stance_present,stance_directional npm run autoknowledge" + ); + } +} + +main();