From 9fa18ffec4716e4a8948d2f2bf37c71183d7a3bb Mon Sep 17 00:00:00 2001 From: Ed Heltzel <402910+edheltzel@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:17:22 -0400 Subject: [PATCH 1/4] feat(export): add recall export with JSON, Markdown, SQL dump, and SQLite backup formats - recall export --format json|markdown|sql|sqlite with file/dir/stdout output contract - explicit provenance on every provenance-bearing row; NULL exported as 'unknown' - manifest with version, schema, counts, provenance counts incl. unknown - --backup: timestamped SQL dump to ~/.agents/Recall/backups/, never overwrites - keyset-paginated reads sized by shared SQLITE_SAFE_CHUNK_SIZE for large exports --- docs/cli-reference.md | 46 +++++ src/commands/export.ts | 168 +++++++++++++++++++ src/index.ts | 24 ++- src/lib/export.ts | 275 ++++++++++++++++++++++++++++++ tests/commands/export.test.ts | 308 ++++++++++++++++++++++++++++++++++ tests/lib/export.test.ts | 81 +++++++++ 6 files changed, 901 insertions(+), 1 deletion(-) create mode 100644 src/commands/export.ts create mode 100644 src/lib/export.ts create mode 100644 tests/commands/export.test.ts create mode 100644 tests/lib/export.test.ts diff --git a/docs/cli-reference.md b/docs/cli-reference.md index 43b47da..cf36184 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -252,6 +252,52 @@ Suite B (token efficiency) compares the v2 wake-up bundle against v1 and the CLAUDE.md baseline. Results are written to `benchmarks/results/` as JSONL plus a human-readable `.md` alongside. See `benchmarks/README.md` for methodology. +## Export & Backup + +Portable and disaster-recovery exports of the memory database. + +```bash +recall export # JSON export to stdout (summary on stderr) +recall export --format markdown # Human-readable Markdown export +recall export --format sql --output dump.sql # Textual SQL dump (schema + INSERTs) +recall export --format sqlite --output copy.db # Full database backup (VACUUM INTO) +recall export --output exports/ # Directory mode: artifact + manifest.json +recall export --backup # Timestamped SQL dump to ~/.agents/Recall/backups/ +``` + +Formats: + +- **json / markdown** — app-level export of the durable memory tables + (`sessions`, `messages`, `decisions`, `learnings`, `breadcrumbs`, + `loa_entries`). Every row of a provenance-bearing table carries an explicit + `provenance` field; legacy `NULL` provenance is exported as the literal + `unknown` — never omitted, never guessed (see Record Provenance above). + Embeddings are excluded. +- **sql** — textual SQL dump (CREATE TABLE + INSERT statements) of the same + durable tables. Restorable into an empty database; one-command restore is + intentionally not provided. +- **sqlite** — binary database backup via `VACUUM INTO`: the full DB including + embeddings and internal tables. Always requires `--output`. + +Output contract: + +- No `--output`: export data goes to stdout, the manifest summary to stderr — + stdout stays clean for piping. +- `--output `: writes a single export file and prints the manifest + summary to stdout. +- `--output `: writes the export artifact plus `manifest.json` into the + directory. Directory exports always write `manifest.json`. + +The manifest records the Recall version, timestamp, schema version +(`PRAGMA user_version`), format, included tables, row counts per table, +provenance counts per table (including `unknown`), and whether embeddings were +included. + +`recall export --backup` writes a timestamped SQL dump to +`~/.agents/Recall/backups/` (creating the directory if needed), never +overwrites an existing file (a `-N` suffix is added on collision), and prints +the output path. + ## Admin ```bash diff --git a/src/commands/export.ts b/src/commands/export.ts new file mode 100644 index 0000000..fdc426d --- /dev/null +++ b/src/commands/export.ts @@ -0,0 +1,168 @@ +// recall export — portable and disaster-recovery exports (issue #43). +// +// Formats: +// - json / markdown: app-level export of the durable memory tables with an +// explicit provenance field per provenance-bearing row (NULL → 'unknown'). +// Embeddings are excluded. +// - sql: textual SQL dump (schema + INSERTs) of the durable tables. +// - sqlite: database backup via VACUUM INTO — full DB including embeddings +// and internal tables. Binary, so it always requires --output. +// +// Output contract: +// - no --output: export data goes to stdout, manifest summary to stderr — +// stdout stays clean for piping. +// - --output : data written to the file, manifest summary to stdout. +// - --output : artifact plus manifest.json written into the directory, +// manifest summary to stdout. Directory exports always write manifest.json. +// - --backup: timestamped SQL dump into ~/.agents/Recall/backups/ (created if +// needed), never overwrites an existing file, prints the output path. + +import { existsSync, mkdirSync, statSync, writeFileSync } from 'fs'; +import { dirname, join, resolve } from 'path'; +import { Database } from 'bun:sqlite'; +import { getDb } from '../db/connection.js'; +import { + EXPORT_FORMATS, + EXPORT_TABLES, + buildManifest, + collectExportData, + defaultBackupDir, + listPhysicalTables, + renderJsonExport, + renderMarkdownExport, + renderSqlDump, + resolveNonClobbering, + timestampSlug, + type ExportFormat, + type ExportManifest, +} from '../lib/export.js'; + +export interface ExportOptions { + format?: string; + output?: string; + backup?: boolean; + /** Test seam — the CLI always uses defaultBackupDir(). */ + backupDir?: string; + /** Test seam — the CLI always uses the current time. */ + now?: Date; +} + +const ARTIFACT_EXT: Record = { + json: 'json', + markdown: 'md', + sql: 'sql', + sqlite: 'db', +}; + +function manifestSummary(manifest: ExportManifest, paths: string[]): string { + const lines: string[] = []; + lines.push(`Recall export — format: ${manifest.format}`); + for (const p of paths) lines.push(` Output: ${p}`); + lines.push(` Recall ${manifest.recall_version} | schema v${manifest.schema_version} | ${manifest.created_at}`); + lines.push(` Rows: ${manifest.tables.map(t => `${t}=${manifest.counts[t]}`).join(', ')}`); + const provenance = Object.entries(manifest.provenance_counts) + .map(([table, hist]) => `${table} { ${Object.entries(hist).map(([k, v]) => `${k}=${v}`).join(', ')} }`) + .join('; '); + lines.push(` Provenance: ${provenance}`); + lines.push(` Embeddings included: ${manifest.includes_embeddings ? 'yes' : 'no'}`); + return lines.join('\n'); +} + +function renderAppExport(db: Database, format: 'json' | 'markdown', createdAt: Date): { content: string; manifest: ExportManifest } { + const manifest = buildManifest(db, format, [...EXPORT_TABLES], { includesEmbeddings: false, createdAt }); + const data = collectExportData(db); + const content = format === 'json' + ? renderJsonExport(manifest, data) + : renderMarkdownExport(manifest, data); + return { content, manifest }; +} + +function renderSqlExport(db: Database, createdAt: Date): { content: string; manifest: ExportManifest } { + const manifest = buildManifest(db, 'sql', [...EXPORT_TABLES], { includesEmbeddings: false, createdAt }); + return { content: renderSqlDump(db, manifest), manifest }; +} + +function runBackup(options: ExportOptions): void { + if (options.output) { + throw new Error('--backup writes to the backup directory; do not combine it with --output'); + } + if (options.format && options.format !== 'sql') { + throw new Error(`--backup always writes a SQL dump; do not combine it with --format ${options.format}`); + } + const db = getDb(); + const now = options.now ?? new Date(); + const dir = options.backupDir ?? defaultBackupDir(); + mkdirSync(dir, { recursive: true }); + const target = resolveNonClobbering(join(dir, `recall-backup-${timestampSlug(now)}.sql`)); + const { content, manifest } = renderSqlExport(db, now); + writeFileSync(target, content); + console.log(manifestSummary(manifest, [target])); +} + +export function runExport(options: ExportOptions): void { + if (options.backup) { + runBackup(options); + return; + } + + const format = (options.format ?? 'json') as ExportFormat; + if (!(EXPORT_FORMATS as readonly string[]).includes(format)) { + throw new Error(`Unknown format '${options.format}'. Supported: ${EXPORT_FORMATS.join(', ')}`); + } + + const db = getDb(); + const now = options.now ?? new Date(); + + // Resolve output mode: none (stdout), file, or directory. + const output = options.output ? resolve(options.output) : undefined; + const isDir = options.output !== undefined + && (options.output.endsWith('/') || (existsSync(output!) && statSync(output!).isDirectory())); + + if (format === 'sqlite') { + if (!output) { + throw new Error('--format sqlite produces a binary database backup and requires --output '); + } + // Manifest first — VACUUM INTO copies the live DB, so counts match it. + const manifest = buildManifest(db, 'sqlite', listPhysicalTables(db), { includesEmbeddings: true, createdAt: now }); + let target: string; + if (isDir) { + mkdirSync(output, { recursive: true }); + target = resolveNonClobbering(join(output, `recall-export-${timestampSlug(now)}.db`)); + writeFileSync(join(output, 'manifest.json'), JSON.stringify(manifest, null, 2) + '\n'); + } else { + if (existsSync(output)) { + throw new Error(`Refusing to overwrite existing database backup at ${output}`); + } + mkdirSync(dirname(output), { recursive: true }); + target = output; + } + db.prepare('VACUUM INTO ?').run(target); + console.log(manifestSummary(manifest, isDir ? [target, join(output, 'manifest.json')] : [target])); + return; + } + + const { content, manifest } = format === 'sql' + ? renderSqlExport(db, now) + : renderAppExport(db, format, now); + + if (!output) { + // Piping contract: stdout carries only the export data. + process.stdout.write(content); + console.error(manifestSummary(manifest, [])); + return; + } + + if (isDir) { + mkdirSync(output, { recursive: true }); + const artifact = resolveNonClobbering(join(output, `recall-export-${timestampSlug(now)}.${ARTIFACT_EXT[format]}`)); + const manifestPath = join(output, 'manifest.json'); + writeFileSync(artifact, content); + writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n'); + console.log(manifestSummary(manifest, [artifact, manifestPath])); + return; + } + + mkdirSync(dirname(output), { recursive: true }); + writeFileSync(output, content); + console.log(manifestSummary(manifest, [output])); +} diff --git a/src/index.ts b/src/index.ts index e36e94b..9555a05 100644 --- a/src/index.ts +++ b/src/index.ts @@ -29,6 +29,7 @@ import { runBenchmark, listBenchmarks, reportLatestBenchmark } from './commands/ import { runOnboard } from './commands/onboard.js'; import { runMigrate } from './commands/migrate.js'; import { runPath } from './commands/path.js'; +import { runExport } from './commands/export.js'; import { closeDb } from './db/connection.js'; const program = new Command(); @@ -634,6 +635,27 @@ program closeDb(); }); +// recall export — portable + disaster-recovery exports (issue #43) +program + .command('export') + .description('Export memory to JSON, Markdown, SQL dump, or SQLite backup') + .option('-f, --format ', 'Format: json, markdown, sql, sqlite (default: json)') + .option('-o, --output ', 'Output file or directory (directory exports also write manifest.json)') + .option('--backup', 'Write a timestamped SQL dump to ~/.agents/Recall/backups/ (never overwrites)') + .action((options) => { + try { + runExport({ + format: options.format, + output: options.output, + backup: options.backup + }); + } catch (err) { + console.error(`Export failed: ${err instanceof Error ? err.message : String(err)}`); + process.exitCode = 1; + } + closeDb(); + }); + // Default command: recall → hybrid search (Phase 3: best of both worlds) program .arguments('[query]') @@ -644,7 +666,7 @@ program .option('-k, --keyword', 'Use keyword search only (FTS5)') .option('-v, --vector', 'Use vector search only (semantic)') .action(async (query, options) => { - if (query && !['init', 'add', 'search', 'recent', 'show', 'stats', 'import', 'import-conversations', 'loa', 'telos', 'docs', 'dump', 'embed', 'semantic', 'hybrid', 'doctor', 'importance', 'provenance', 'pin', 'unpin', 'decision', 'prune', 'cluster', 'import-legacy', 'benchmark', 'onboard', 'migrate', 'path'].includes(query)) { + if (query && !['init', 'add', 'search', 'recent', 'show', 'stats', 'import', 'import-conversations', 'loa', 'telos', 'docs', 'dump', 'embed', 'semantic', 'hybrid', 'doctor', 'importance', 'provenance', 'pin', 'unpin', 'decision', 'prune', 'cluster', 'import-legacy', 'benchmark', 'onboard', 'migrate', 'path', 'export'].includes(query)) { if (options.keyword) { // FTS5 only runSearch(query, { diff --git a/src/lib/export.ts b/src/lib/export.ts new file mode 100644 index 0000000..4f68163 --- /dev/null +++ b/src/lib/export.ts @@ -0,0 +1,275 @@ +// recall export — core export logic (issue #43). +// +// Pure(ish) building blocks for the export command: table row collection, +// manifest construction, and per-format renderers (JSON, Markdown, SQL dump). +// Everything here takes an open Database handle and plain data so it stays +// unit-testable (issue #44 phase 2 adds property tests over these functions). +// +// Provenance contract (ADR-0001, issue #42): JSON/Markdown exports carry an +// explicit `provenance` field for every row of a provenance-bearing table. +// Legacy NULL provenance is rendered as the literal string 'unknown' — never +// omitted, never guessed (matches the search/recent display contract). +// +// Bind-count note (see src/lib/chunk.ts): export reads bind a fixed number of +// parameters per statement — keyset pagination (`WHERE id > ? LIMIT ?`) — so +// bind counts never scale with selected rows and chunked() IN-lists are not +// needed. The shared SQLITE_SAFE_CHUNK_SIZE constant is reused as the batch +// size so large exports stream in bounded batches instead of one giant read. + +import { Database } from 'bun:sqlite'; +import { existsSync } from 'fs'; +import { homedir } from 'os'; +import { join, extname, dirname, basename } from 'path'; +import { SQLITE_SAFE_CHUNK_SIZE } from './chunk.js'; +import { getMigrationVersion } from '../db/migrations.js'; +import { VERSION } from '../version.js'; + +/** Durable memory tables included in app-level (JSON/Markdown/SQL) exports. */ +export const EXPORT_TABLES = [ + 'sessions', + 'messages', + 'decisions', + 'learnings', + 'breadcrumbs', + 'loa_entries', +] as const; +export type ExportTable = typeof EXPORT_TABLES[number]; + +/** Subset of EXPORT_TABLES carrying the provenance column (migration 8→9). */ +export const PROVENANCE_TABLES = [ + 'messages', + 'decisions', + 'learnings', + 'breadcrumbs', + 'loa_entries', +] as const; + +export const EXPORT_FORMATS = ['json', 'markdown', 'sql', 'sqlite'] as const; +export type ExportFormat = typeof EXPORT_FORMATS[number]; + +export type ExportRow = Record; +export type ExportData = Record; + +export interface ExportManifest { + recall_version: string; + created_at: string; + schema_version: number; + format: ExportFormat; + tables: string[]; + counts: Record; + provenance_counts: Record>; + includes_embeddings: boolean; +} + +/** Default disaster-recovery backup directory (install layout, AGENTS.md). */ +export function defaultBackupDir(): string { + return join(homedir(), '.agents', 'Recall', 'backups'); +} + +/** UTC timestamp slug for export artifact filenames (YYYYMMDD-HHMMSS). */ +export function timestampSlug(date: Date): string { + return date.toISOString().replace(/[-:]/g, '').replace('T', '-').slice(0, 15); +} + +/** + * Return `path` if free, otherwise the first `name-N.ext` variant that does + * not exist. Used by --backup, which must never overwrite an existing file. + */ +export function resolveNonClobbering(path: string): string { + if (!existsSync(path)) return path; + const ext = extname(path); + const stem = join(dirname(path), basename(path, ext)); + for (let i = 1; i < 1000; i++) { + const candidate = `${stem}-${i}${ext}`; + if (!existsSync(candidate)) return candidate; + } + throw new Error(`Could not find a non-clobbering name for ${path}`); +} + +/** + * Normalize a row for app-level export: provenance-bearing tables get an + * explicit provenance field with NULL rendered as 'unknown'. Tables without + * the column (sessions) are returned unchanged — no field is invented. + */ +export function toExportRow(table: string, row: ExportRow): ExportRow { + if (!(PROVENANCE_TABLES as readonly string[]).includes(table)) return row; + return { ...row, provenance: row.provenance ?? 'unknown' }; +} + +/** + * Read every row of a durable table in bounded batches via keyset pagination. + * Fixed two-parameter bind per statement regardless of table size. + */ +export function collectTableRows( + db: Database, + table: ExportTable, + batchSize: number = SQLITE_SAFE_CHUNK_SIZE +): ExportRow[] { + const stmt = db.prepare(`SELECT * FROM ${table} WHERE id > ? ORDER BY id LIMIT ?`); + const rows: ExportRow[] = []; + let lastId = 0; + for (;;) { + const batch = stmt.all(lastId, batchSize) as ExportRow[]; + if (batch.length === 0) break; + rows.push(...batch); + lastId = batch[batch.length - 1].id as number; + } + return rows; +} + +/** Collect all durable tables, with export-row normalization applied. */ +export function collectExportData(db: Database): ExportData { + const data: ExportData = {}; + for (const table of EXPORT_TABLES) { + data[table] = collectTableRows(db, table).map(row => toExportRow(table, row)); + } + return data; +} + +/** + * Per-table provenance histograms for the manifest. NULL is counted under the + * explicit 'unknown' key, which is always present (even at zero) so consumers + * never have to infer it. + */ +export function buildProvenanceCounts(db: Database): Record> { + const counts: Record> = {}; + for (const table of PROVENANCE_TABLES) { + const rows = db.prepare( + `SELECT COALESCE(provenance, 'unknown') AS p, COUNT(*) AS c FROM ${table} GROUP BY COALESCE(provenance, 'unknown')` + ).all() as Array<{ p: string; c: number }>; + const histogram: Record = { unknown: 0 }; + for (const row of rows) histogram[row.p] = row.c; + counts[table] = histogram; + } + return counts; +} + +export function buildManifest( + db: Database, + format: ExportFormat, + tables: string[], + options: { includesEmbeddings: boolean; createdAt: Date } +): ExportManifest { + const counts: Record = {}; + for (const table of tables) { + counts[table] = (db.prepare(`SELECT COUNT(*) AS c FROM ${table}`).get() as { c: number }).c; + } + return { + recall_version: VERSION, + created_at: options.createdAt.toISOString(), + schema_version: getMigrationVersion(db), + format, + tables, + counts, + provenance_counts: buildProvenanceCounts(db), + includes_embeddings: options.includesEmbeddings, + }; +} + +/** + * Physical user-data tables present in the database file — what a SQLite + * backup actually carries. Excludes SQLite internals, FTS virtual tables, + * and their shadow tables (derived indexes, rebuildable). + */ +export function listPhysicalTables(db: Database): string[] { + const rows = db.prepare( + `SELECT name, sql FROM sqlite_master WHERE type = 'table' AND name NOT LIKE 'sqlite_%' ORDER BY name` + ).all() as Array<{ name: string; sql: string | null }>; + const virtual = new Set( + rows.filter(r => /CREATE VIRTUAL TABLE/i.test(r.sql ?? '')).map(r => r.name) + ); + return rows + .filter(r => { + if (virtual.has(r.name)) return false; + for (const v of virtual) { + if (r.name.startsWith(`${v}_`)) return false; + } + return true; + }) + .map(r => r.name); +} + +// --------------------------------------------------------------------------- +// Renderers +// --------------------------------------------------------------------------- + +export function renderJsonExport(manifest: ExportManifest, data: ExportData): string { + return JSON.stringify({ manifest, tables: data }, null, 2) + '\n'; +} + +function markdownValue(value: unknown): string { + if (value === null || value === undefined) return '_null_'; + const s = String(value); + if (!s.includes('\n')) return s; + // Multi-line values as an indented blockquote — safe for arbitrary content. + return '\n' + s.split('\n').map(line => ` > ${line}`).join('\n'); +} + +export function renderMarkdownExport(manifest: ExportManifest, data: ExportData): string { + const lines: string[] = []; + lines.push('# Recall Memory Export'); + lines.push(''); + lines.push(`- **Recall version:** ${manifest.recall_version}`); + lines.push(`- **Created:** ${manifest.created_at}`); + lines.push(`- **Schema version:** ${manifest.schema_version}`); + lines.push(`- **Tables:** ${manifest.tables.join(', ')}`); + lines.push(`- **Embeddings included:** ${manifest.includes_embeddings ? 'yes' : 'no'}`); + lines.push(''); + for (const table of manifest.tables) { + const rows = data[table] ?? []; + lines.push(`## ${table} (${rows.length} rows)`); + lines.push(''); + for (const row of rows) { + lines.push(`### ${table} #${row.id}`); + lines.push(''); + for (const [key, value] of Object.entries(row)) { + if (key === 'id') continue; + lines.push(`- **${key}:** ${markdownValue(value)}`); + } + lines.push(''); + } + } + return lines.join('\n'); +} + +/** SQL-literal quoting for dump output. */ +export function sqlQuote(value: unknown): string { + if (value === null || value === undefined) return 'NULL'; + if (typeof value === 'number') return Number.isFinite(value) ? String(value) : 'NULL'; + if (typeof value === 'bigint') return value.toString(); + if (value instanceof Uint8Array) return `X'${Buffer.from(value).toString('hex')}'`; + return `'${String(value).replace(/'/g, "''")}'`; +} + +/** + * Textual SQL dump of the durable tables: CREATE TABLE statements from + * sqlite_master plus one INSERT per row. Restorable into an empty database; + * one-command restore is intentionally out of scope (issue #43). + */ +export function renderSqlDump(db: Database, manifest: ExportManifest): string { + const lines: string[] = []; + lines.push('-- Recall SQL export'); + lines.push(`-- recall_version: ${manifest.recall_version}`); + lines.push(`-- created_at: ${manifest.created_at}`); + lines.push(`-- schema_version: ${manifest.schema_version}`); + lines.push(`-- tables: ${manifest.tables.join(', ')}`); + lines.push('PRAGMA foreign_keys=OFF;'); + lines.push('BEGIN TRANSACTION;'); + for (const table of manifest.tables) { + const master = db.prepare( + `SELECT sql FROM sqlite_master WHERE type = 'table' AND name = ?` + ).get(table) as { sql: string | null } | undefined; + if (master?.sql) { + lines.push(`${master.sql};`); + } + const columns = (db.prepare(`PRAGMA table_info(${table})`).all() as Array<{ name: string }>) + .map(c => c.name); + const columnList = columns.map(c => `"${c}"`).join(', '); + for (const row of collectTableRows(db, table as ExportTable)) { + const values = columns.map(c => sqlQuote(row[c])).join(', '); + lines.push(`INSERT INTO "${table}" (${columnList}) VALUES (${values});`); + } + } + lines.push('COMMIT;'); + return lines.join('\n') + '\n'; +} diff --git a/tests/commands/export.test.ts b/tests/commands/export.test.ts new file mode 100644 index 0000000..9afed09 --- /dev/null +++ b/tests/commands/export.test.ts @@ -0,0 +1,308 @@ +// recall export — issue #43 acceptance criteria. +// +// Behavior under test: +// - JSON/Markdown exports carry an explicit provenance field on every +// provenance-bearing row; legacy NULL renders as 'unknown', never omitted +// - sessions rows (no provenance column) get no invented field +// - manifest carries version, schema, counts, and provenance counts incl. unknown +// - SQL dump and SQLite backup creation; backup includes embeddings, app +// formats exclude them +// - --backup writes a timestamped dump, creates the directory, never overwrites +// - --output file vs directory contract; directory exports write manifest.json +// - no --output keeps stdout data-only (piping contract) +// - large exports span multiple read batches without dropping or duplicating rows + +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; +import { Database } from 'bun:sqlite'; +import { existsSync, mkdtempSync, readFileSync, readdirSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { setupTestDb, teardownTestDb } from '../helpers/setup'; +import { runExport } from '../../src/commands/export'; +import { EXPORT_TABLES, PROVENANCE_TABLES } from '../../src/lib/export'; +import { SQLITE_SAFE_CHUNK_SIZE } from '../../src/lib/chunk'; +import { + createSession, + addMessage, + addMessagesBatch, + addDecision, + addLearning, + addBreadcrumb, + createLoaEntry, +} from '../../src/lib/memory'; + +let dbPath: string; +let outDir: string; +const NOW = new Date('2026-01-02T03:04:05Z'); +const originalLog = console.log; +const originalError = console.error; + +beforeEach(() => { + dbPath = setupTestDb(); + outDir = mkdtempSync(join(tmpdir(), 'recall-export-out-')); + console.log = () => {}; +}); + +afterEach(() => { + console.log = originalLog; + console.error = originalError; + teardownTestDb(); + rmSync(outDir, { recursive: true, force: true }); +}); + +/** One known-provenance and one legacy (NULL) row across the durable tables. */ +function seed(): void { + createSession({ session_id: 's1', started_at: '2026-01-01T00:00:00Z', project: 'demo' }); + addMessage({ session_id: 's1', timestamp: '2026-01-01T00:00:01Z', role: 'user', content: 'known message', provenance: 'verbatim' }); + addMessage({ session_id: 's1', timestamp: '2026-01-01T00:00:02Z', role: 'assistant', content: "legacy 'quoted' message\nsecond line" }); + addDecision({ session_id: 's1', decision: 'known decision', status: 'active', provenance: 'user_authored' }); + addDecision({ session_id: 's1', decision: 'legacy decision', status: 'active' }); + addLearning({ session_id: 's1', problem: 'legacy problem', solution: 'fix' }); + addBreadcrumb({ session_id: 's1', content: 'known crumb', importance: 5, provenance: 'extracted' }); + createLoaEntry({ title: 'legacy entry', fabric_extract: 'extract body' }); +} + +function readJsonExport(file: string): any { + return JSON.parse(readFileSync(file, 'utf-8')); +} + +describe('JSON export provenance', () => { + test('every provenance-bearing row carries an explicit field; NULL renders as unknown', () => { + seed(); + const file = join(outDir, 'export.json'); + runExport({ format: 'json', output: file, now: NOW }); + + const doc = readJsonExport(file); + expect(Object.keys(doc.tables).sort()).toEqual([...EXPORT_TABLES].sort()); + + for (const table of PROVENANCE_TABLES) { + for (const row of doc.tables[table]) { + expect(typeof row.provenance).toBe('string'); + } + } + + const legacy = doc.tables.messages.find((m: any) => m.content.startsWith('legacy')); + const known = doc.tables.messages.find((m: any) => m.content.startsWith('known')); + expect(legacy.provenance).toBe('unknown'); + expect(known.provenance).toBe('verbatim'); + expect(doc.tables.decisions.map((d: any) => d.provenance).sort()).toEqual(['unknown', 'user_authored']); + expect(doc.tables.loa_entries[0].provenance).toBe('unknown'); + + // sessions has no provenance column — no field is invented + for (const row of doc.tables.sessions) { + expect('provenance' in row).toBe(false); + } + + // embeddings excluded from app-level export + expect(doc.tables.embeddings).toBeUndefined(); + expect(doc.manifest.includes_embeddings).toBe(false); + }); +}); + +describe('Markdown export provenance', () => { + test('renders provenance per row, including explicit unknown', () => { + seed(); + const file = join(outDir, 'export.md'); + runExport({ format: 'markdown', output: file, now: NOW }); + + const md = readFileSync(file, 'utf-8'); + expect(md).toContain('# Recall Memory Export'); + expect(md).toContain('## messages (2 rows)'); + expect(md).toContain('**provenance:** verbatim'); + expect(md).toContain('**provenance:** unknown'); + expect(md).toContain('**Embeddings included:** no'); + }); +}); + +describe('manifest', () => { + test('directory export writes manifest.json with counts and provenance counts incl. unknown', () => { + seed(); + runExport({ format: 'json', output: outDir, now: NOW }); + + const manifestPath = join(outDir, 'manifest.json'); + expect(existsSync(manifestPath)).toBe(true); + const manifest = JSON.parse(readFileSync(manifestPath, 'utf-8')); + + expect(manifest.recall_version).toBeTruthy(); + expect(manifest.created_at).toBe(NOW.toISOString()); + expect(manifest.schema_version).toBeGreaterThanOrEqual(9); + expect(manifest.format).toBe('json'); + expect(manifest.counts).toEqual({ + sessions: 1, + messages: 2, + decisions: 2, + learnings: 1, + breadcrumbs: 1, + loa_entries: 1, + }); + expect(manifest.provenance_counts.messages).toEqual({ unknown: 1, verbatim: 1 }); + expect(manifest.provenance_counts.decisions).toEqual({ unknown: 1, user_authored: 1 }); + expect(manifest.provenance_counts.learnings).toEqual({ unknown: 1 }); + // unknown is explicit even at zero + expect(manifest.provenance_counts.breadcrumbs).toEqual({ unknown: 0, extracted: 1 }); + expect(manifest.provenance_counts.loa_entries).toEqual({ unknown: 1 }); + expect(manifest.includes_embeddings).toBe(false); + + // artifact written alongside the manifest + const artifacts = readdirSync(outDir).filter(f => /^recall-export-.*\.json$/.test(f)); + expect(artifacts.length).toBe(1); + }); + + test('file export writes a single file and no manifest.json', () => { + seed(); + const file = join(outDir, 'single.json'); + runExport({ format: 'json', output: file, now: NOW }); + expect(existsSync(file)).toBe(true); + expect(existsSync(join(outDir, 'manifest.json'))).toBe(false); + }); +}); + +describe('SQL dump', () => { + test('creates a textual dump of the durable tables, excluding embeddings', () => { + seed(); + const file = join(outDir, 'dump.sql'); + runExport({ format: 'sql', output: file, now: NOW }); + + const sql = readFileSync(file, 'utf-8'); + expect(sql).toContain('CREATE TABLE sessions'); + expect(sql).toContain('INSERT INTO "messages"'); + expect(sql).toContain("''quoted''"); // escaped single quotes + expect(sql).toContain('BEGIN TRANSACTION;'); + expect(sql).toContain('COMMIT;'); + expect(sql).not.toContain('INSERT INTO "embeddings"'); + expect(sql).not.toContain('CREATE TABLE embeddings'); + }); +}); + +describe('SQLite backup format', () => { + test('copies the full database including embeddings', () => { + seed(); + const raw = new Database(dbPath); + raw.prepare( + `INSERT INTO embeddings (source_table, source_id, model, dimensions, embedding) VALUES ('messages', 1, 'test-model', 2, ?)` + ).run(new Uint8Array([1, 2])); + raw.close(); + + const copyPath = join(outDir, 'copy.db'); + runExport({ format: 'sqlite', output: copyPath, now: NOW }); + + const copy = new Database(copyPath, { readonly: true }); + try { + const embeddings = copy.prepare('SELECT COUNT(*) AS c FROM embeddings').get() as { c: number }; + const messages = copy.prepare('SELECT COUNT(*) AS c FROM messages').get() as { c: number }; + expect(embeddings.c).toBe(1); + expect(messages.c).toBe(2); + } finally { + copy.close(); + } + }); + + test('refuses to overwrite an existing backup file', () => { + seed(); + const copyPath = join(outDir, 'copy.db'); + runExport({ format: 'sqlite', output: copyPath, now: NOW }); + expect(() => runExport({ format: 'sqlite', output: copyPath, now: NOW })).toThrow(/Refusing to overwrite/); + }); + + test('requires --output', () => { + seed(); + expect(() => runExport({ format: 'sqlite', now: NOW })).toThrow(/requires --output/); + }); + + test('directory export writes the backup plus manifest.json with embeddings included', () => { + seed(); + runExport({ format: 'sqlite', output: outDir, now: NOW }); + const manifest = JSON.parse(readFileSync(join(outDir, 'manifest.json'), 'utf-8')); + expect(manifest.format).toBe('sqlite'); + expect(manifest.includes_embeddings).toBe(true); + expect(manifest.tables).toContain('embeddings'); + const artifacts = readdirSync(outDir).filter(f => /^recall-export-.*\.db$/.test(f)); + expect(artifacts.length).toBe(1); + }); +}); + +describe('--backup', () => { + test('writes a timestamped SQL dump, creates the directory, and prints the path', () => { + seed(); + const backupDir = join(outDir, 'nested', 'backups'); + const logs: string[] = []; + console.log = (msg: unknown) => { logs.push(String(msg)); }; + + runExport({ backup: true, backupDir, now: NOW }); + + const expected = join(backupDir, 'recall-backup-20260102-030405.sql'); + expect(existsSync(expected)).toBe(true); + expect(logs.join('\n')).toContain(expected); + expect(readFileSync(expected, 'utf-8')).toContain('INSERT INTO "messages"'); + }); + + test('never overwrites an existing backup file', () => { + seed(); + const backupDir = join(outDir, 'backups'); + runExport({ backup: true, backupDir, now: NOW }); + const first = join(backupDir, 'recall-backup-20260102-030405.sql'); + const firstContent = readFileSync(first, 'utf-8'); + + // Same timestamp → must pick a new name, not clobber + runExport({ backup: true, backupDir, now: NOW }); + const second = join(backupDir, 'recall-backup-20260102-030405-1.sql'); + expect(existsSync(second)).toBe(true); + expect(readFileSync(first, 'utf-8')).toBe(firstContent); + }); + + test('rejects conflicting flags', () => { + seed(); + expect(() => runExport({ backup: true, output: join(outDir, 'x.sql'), now: NOW })).toThrow(/--output/); + expect(() => runExport({ backup: true, format: 'json', backupDir: join(outDir, 'b'), now: NOW })).toThrow(/SQL dump/); + // --format sql is the backup format and is allowed + runExport({ backup: true, format: 'sql', backupDir: join(outDir, 'b'), now: NOW }); + }); +}); + +describe('stdout piping contract', () => { + test('without --output, stdout carries only the export data', () => { + seed(); + const chunks: string[] = []; + const errors: string[] = []; + const originalWrite = process.stdout.write; + (process.stdout as any).write = (chunk: unknown) => { chunks.push(String(chunk)); return true; }; + console.error = (msg: unknown) => { errors.push(String(msg)); }; + try { + runExport({ format: 'json', now: NOW }); + } finally { + process.stdout.write = originalWrite; + } + + // stdout parses as pure JSON — no manifest text mixed in + const doc = JSON.parse(chunks.join('')); + expect(doc.manifest.format).toBe('json'); + // summary went to stderr + expect(errors.join('\n')).toContain('Recall export'); + }); +}); + +describe('large export', () => { + test('spans multiple read batches without dropping or duplicating rows', () => { + seed(); + const bulk = SQLITE_SAFE_CHUNK_SIZE * 2 + 50; + addMessagesBatch( + Array.from({ length: bulk }, (_, i) => ({ + session_id: 's1', + timestamp: `2026-01-01T01:00:00.${String(i).padStart(3, '0')}Z`, + role: 'user' as const, + content: `bulk ${i}`, + provenance: 'verbatim' as const, + })) + ); + + const file = join(outDir, 'large.json'); + runExport({ format: 'json', output: file, now: NOW }); + + const doc = readJsonExport(file); + const ids = doc.tables.messages.map((m: any) => m.id); + expect(ids.length).toBe(bulk + 2); // bulk + two seeded messages + expect(new Set(ids).size).toBe(ids.length); // no duplicates + const sorted = [...ids].sort((a, b) => a - b); + expect(ids).toEqual(sorted); // keyset order preserved + }); +}); diff --git a/tests/lib/export.test.ts b/tests/lib/export.test.ts new file mode 100644 index 0000000..6006dab --- /dev/null +++ b/tests/lib/export.test.ts @@ -0,0 +1,81 @@ +// recall export — pure helper functions (issue #43). +// +// Behavior under test: +// - sqlQuote produces valid SQL literals (NULL, numbers, escaped strings, blobs) +// - resolveNonClobbering never returns an existing path +// - toExportRow renders NULL provenance as explicit 'unknown' on +// provenance-bearing tables and invents nothing on sessions +// - timestampSlug yields filesystem-safe UTC slugs + +import { describe, test, expect } from 'bun:test'; +import { mkdtempSync, rmSync, writeFileSync } from 'fs'; +import { tmpdir } from 'os'; +import { join } from 'path'; +import { + resolveNonClobbering, + sqlQuote, + timestampSlug, + toExportRow, +} from '../../src/lib/export'; + +describe('sqlQuote', () => { + test('renders NULL for null and undefined', () => { + expect(sqlQuote(null)).toBe('NULL'); + expect(sqlQuote(undefined)).toBe('NULL'); + }); + + test('renders numbers bare and non-finite as NULL', () => { + expect(sqlQuote(42)).toBe('42'); + expect(sqlQuote(4.5)).toBe('4.5'); + expect(sqlQuote(Infinity)).toBe('NULL'); + }); + + test('escapes single quotes in strings', () => { + expect(sqlQuote("it's a 'test'")).toBe("'it''s a ''test'''"); + }); + + test('renders blobs as hex literals', () => { + expect(sqlQuote(new Uint8Array([0xde, 0xad]))).toBe("X'dead'"); + }); +}); + +describe('resolveNonClobbering', () => { + test('returns the path unchanged when free, suffixed when taken', () => { + const dir = mkdtempSync(join(tmpdir(), 'recall-export-')); + try { + const path = join(dir, 'backup.sql'); + expect(resolveNonClobbering(path)).toBe(path); + + writeFileSync(path, ''); + expect(resolveNonClobbering(path)).toBe(join(dir, 'backup-1.sql')); + + writeFileSync(join(dir, 'backup-1.sql'), ''); + expect(resolveNonClobbering(path)).toBe(join(dir, 'backup-2.sql')); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); + +describe('toExportRow', () => { + test('renders NULL provenance as explicit unknown', () => { + const row = toExportRow('messages', { id: 1, content: 'hi', provenance: null }); + expect(row.provenance).toBe('unknown'); + }); + + test('preserves known provenance values', () => { + const row = toExportRow('decisions', { id: 1, decision: 'x', provenance: 'user_authored' }); + expect(row.provenance).toBe('user_authored'); + }); + + test('does not invent provenance on sessions', () => { + const row = toExportRow('sessions', { id: 1, session_id: 's1' }); + expect('provenance' in row).toBe(false); + }); +}); + +describe('timestampSlug', () => { + test('produces a filesystem-safe UTC slug', () => { + expect(timestampSlug(new Date('2026-06-10T19:22:33.456Z'))).toBe('20260610-192233'); + }); +}); From a757727c4370907e1223559345a323f46ad2a5f8 Mon Sep 17 00:00:00 2001 From: Ed Heltzel <402910+edheltzel@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:22:34 -0400 Subject: [PATCH 2/4] refactor(export): single canonical PROVENANCE_TABLES in types; tidy sqlite dir-mode paths - hoist the provenance-bearing table list to types/index.ts (was duplicated between lib/export.ts and the provenance backfill's BACKFILL_TABLES) - name manifest.json path once in the sqlite directory branch --- src/commands/export.ts | 8 ++++++-- src/commands/provenance.ts | 8 ++++---- src/lib/export.ts | 12 ++++-------- src/types/index.ts | 5 +++++ 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/commands/export.ts b/src/commands/export.ts index fdc426d..60aef10 100644 --- a/src/commands/export.ts +++ b/src/commands/export.ts @@ -125,19 +125,23 @@ export function runExport(options: ExportOptions): void { // Manifest first — VACUUM INTO copies the live DB, so counts match it. const manifest = buildManifest(db, 'sqlite', listPhysicalTables(db), { includesEmbeddings: true, createdAt: now }); let target: string; + const paths: string[] = []; if (isDir) { mkdirSync(output, { recursive: true }); target = resolveNonClobbering(join(output, `recall-export-${timestampSlug(now)}.db`)); - writeFileSync(join(output, 'manifest.json'), JSON.stringify(manifest, null, 2) + '\n'); + const manifestPath = join(output, 'manifest.json'); + writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n'); + paths.push(target, manifestPath); } else { if (existsSync(output)) { throw new Error(`Refusing to overwrite existing database backup at ${output}`); } mkdirSync(dirname(output), { recursive: true }); target = output; + paths.push(target); } db.prepare('VACUUM INTO ?').run(target); - console.log(manifestSummary(manifest, isDir ? [target, join(output, 'manifest.json')] : [target])); + console.log(manifestSummary(manifest, paths)); return; } diff --git a/src/commands/provenance.ts b/src/commands/provenance.ts index db0cde1..56f128a 100644 --- a/src/commands/provenance.ts +++ b/src/commands/provenance.ts @@ -31,9 +31,9 @@ // variables — bulk UPDATEs with literal predicates — so no chunking applies. import { getDb } from '../db/connection.js'; +import { PROVENANCE_TABLES } from '../types/index.js'; -const BACKFILL_TABLES = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'] as const; -type BackfillTable = typeof BACKFILL_TABLES[number]; +type BackfillTable = typeof PROVENANCE_TABLES[number]; export interface ProvenanceBackfillOptions { dryRun?: boolean; @@ -93,8 +93,8 @@ export function runProvenanceBackfill(options: ProvenanceBackfillOptions = {}): const dryRun = options.dryRun ?? true; const target = options.table ?? 'all'; - if (target !== 'all' && !(BACKFILL_TABLES as readonly string[]).includes(target)) { - console.error(`Unknown table: ${target}. Use one of: ${BACKFILL_TABLES.join(', ')}, all`); + if (target !== 'all' && !(PROVENANCE_TABLES as readonly string[]).includes(target)) { + console.error(`Unknown table: ${target}. Use one of: ${PROVENANCE_TABLES.join(', ')}, all`); process.exitCode = 1; return []; } diff --git a/src/lib/export.ts b/src/lib/export.ts index 4f68163..57b9caf 100644 --- a/src/lib/export.ts +++ b/src/lib/export.ts @@ -35,14 +35,10 @@ export const EXPORT_TABLES = [ ] as const; export type ExportTable = typeof EXPORT_TABLES[number]; -/** Subset of EXPORT_TABLES carrying the provenance column (migration 8→9). */ -export const PROVENANCE_TABLES = [ - 'messages', - 'decisions', - 'learnings', - 'breadcrumbs', - 'loa_entries', -] as const; +// Subset of EXPORT_TABLES carrying the provenance column — canonical list +// lives in types/index.ts; re-exported here as part of the export surface. +export { PROVENANCE_TABLES } from '../types/index.js'; +import { PROVENANCE_TABLES } from '../types/index.js'; export const EXPORT_FORMATS = ['json', 'markdown', 'sql', 'sqlite'] as const; export type ExportFormat = typeof EXPORT_FORMATS[number]; diff --git a/src/types/index.ts b/src/types/index.ts index e7635b3..b973ad3 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -9,6 +9,11 @@ export const PROVENANCE_VALUES = ['user_authored', 'verbatim', 'extracted', 'derived'] as const; export type Provenance = typeof PROVENANCE_VALUES[number]; +// Tables carrying the provenance column (migration 8→9). Single source of +// truth — consumed by the export renderers and the provenance backfill. +export const PROVENANCE_TABLES = ['messages', 'decisions', 'learnings', 'breadcrumbs', 'loa_entries'] as const; +export type ProvenanceTable = typeof PROVENANCE_TABLES[number]; + export interface Session { id?: number; session_id: string; From c40733aa3805b9f2d6736b171dabcc83b3838c07 Mon Sep 17 00:00:00 2001 From: Ed Heltzel <402910+edheltzel@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:35:38 -0400 Subject: [PATCH 3/4] fix(export): write sqlite dir-mode manifest only after VACUUM succeeds; unify text-format rendering Review findings from the pre-PR pass: - a failed VACUUM INTO no longer leaves a manifest.json describing a backup that was never created - renderAppExport/renderSqlExport collapsed into renderTextExport; ARTIFACT_EXT narrowed to the formats that reach it - document why --format has no Commander default and the id-PK invariant behind keyset pagination; docs note trailing-slash rule for new dirs --- docs/cli-reference.md | 4 +++- src/commands/export.ts | 33 ++++++++++++++++----------------- src/index.ts | 2 ++ src/lib/export.ts | 2 ++ 4 files changed, 23 insertions(+), 18 deletions(-) diff --git a/docs/cli-reference.md b/docs/cli-reference.md index cf36184..c3d38ff 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -286,7 +286,9 @@ Output contract: - `--output `: writes a single export file and prints the manifest summary to stdout. - `--output `: writes the export artifact plus `manifest.json` into the - directory. Directory exports always write `manifest.json`. + directory. Directory exports always write `manifest.json`. A path that does + not exist yet is treated as a file — add a trailing slash (`exports/`) to + request a new directory. The manifest records the Recall version, timestamp, schema version (`PRAGMA user_version`), format, included tables, row counts per table, diff --git a/src/commands/export.ts b/src/commands/export.ts index 60aef10..71a4deb 100644 --- a/src/commands/export.ts +++ b/src/commands/export.ts @@ -47,11 +47,11 @@ export interface ExportOptions { now?: Date; } -const ARTIFACT_EXT: Record = { +// sqlite takes its own branch in runExport and never reaches this map. +const ARTIFACT_EXT: Record<'json' | 'markdown' | 'sql', string> = { json: 'json', markdown: 'md', sql: 'sql', - sqlite: 'db', }; function manifestSummary(manifest: ExportManifest, paths: string[]): string { @@ -68,8 +68,11 @@ function manifestSummary(manifest: ExportManifest, paths: string[]): string { return lines.join('\n'); } -function renderAppExport(db: Database, format: 'json' | 'markdown', createdAt: Date): { content: string; manifest: ExportManifest } { +function renderTextExport(db: Database, format: 'json' | 'markdown' | 'sql', createdAt: Date): { content: string; manifest: ExportManifest } { const manifest = buildManifest(db, format, [...EXPORT_TABLES], { includesEmbeddings: false, createdAt }); + if (format === 'sql') { + return { content: renderSqlDump(db, manifest), manifest }; + } const data = collectExportData(db); const content = format === 'json' ? renderJsonExport(manifest, data) @@ -77,11 +80,6 @@ function renderAppExport(db: Database, format: 'json' | 'markdown', createdAt: D return { content, manifest }; } -function renderSqlExport(db: Database, createdAt: Date): { content: string; manifest: ExportManifest } { - const manifest = buildManifest(db, 'sql', [...EXPORT_TABLES], { includesEmbeddings: false, createdAt }); - return { content: renderSqlDump(db, manifest), manifest }; -} - function runBackup(options: ExportOptions): void { if (options.output) { throw new Error('--backup writes to the backup directory; do not combine it with --output'); @@ -94,7 +92,7 @@ function runBackup(options: ExportOptions): void { const dir = options.backupDir ?? defaultBackupDir(); mkdirSync(dir, { recursive: true }); const target = resolveNonClobbering(join(dir, `recall-backup-${timestampSlug(now)}.sql`)); - const { content, manifest } = renderSqlExport(db, now); + const { content, manifest } = renderTextExport(db, 'sql', now); writeFileSync(target, content); console.log(manifestSummary(manifest, [target])); } @@ -125,29 +123,30 @@ export function runExport(options: ExportOptions): void { // Manifest first — VACUUM INTO copies the live DB, so counts match it. const manifest = buildManifest(db, 'sqlite', listPhysicalTables(db), { includesEmbeddings: true, createdAt: now }); let target: string; - const paths: string[] = []; if (isDir) { mkdirSync(output, { recursive: true }); target = resolveNonClobbering(join(output, `recall-export-${timestampSlug(now)}.db`)); - const manifestPath = join(output, 'manifest.json'); - writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n'); - paths.push(target, manifestPath); } else { if (existsSync(output)) { throw new Error(`Refusing to overwrite existing database backup at ${output}`); } mkdirSync(dirname(output), { recursive: true }); target = output; - paths.push(target); } db.prepare('VACUUM INTO ?').run(target); + const paths = [target]; + if (isDir) { + // Manifest written only after the backup exists — a failed VACUUM must + // not leave a manifest describing a backup that was never created. + const manifestPath = join(output, 'manifest.json'); + writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n'); + paths.push(manifestPath); + } console.log(manifestSummary(manifest, paths)); return; } - const { content, manifest } = format === 'sql' - ? renderSqlExport(db, now) - : renderAppExport(db, format, now); + const { content, manifest } = renderTextExport(db, format, now); if (!output) { // Piping contract: stdout carries only the export data. diff --git a/src/index.ts b/src/index.ts index 9555a05..4040e17 100644 --- a/src/index.ts +++ b/src/index.ts @@ -636,6 +636,8 @@ program }); // recall export — portable + disaster-recovery exports (issue #43) +// --format has no Commander default: runExport defaults to json, and --backup +// must be able to tell an explicitly passed --format from an omitted one. program .command('export') .description('Export memory to JSON, Markdown, SQL dump, or SQLite backup') diff --git a/src/lib/export.ts b/src/lib/export.ts index 57b9caf..f708069 100644 --- a/src/lib/export.ts +++ b/src/lib/export.ts @@ -95,6 +95,8 @@ export function toExportRow(table: string, row: ExportRow): ExportRow { /** * Read every row of a durable table in bounded batches via keyset pagination. * Fixed two-parameter bind per statement regardless of table size. + * Relies on every EXPORT_TABLES table having an INTEGER PRIMARY KEY `id` + * (schema.ts) — a future table without one cannot use this pagination. */ export function collectTableRows( db: Database, From 320ee04d23aa6c9f1c1c5660108e0505309ad6e1 Mon Sep 17 00:00:00 2001 From: Ed Heltzel <402910+edheltzel@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:54:22 -0400 Subject: [PATCH 4/4] =?UTF-8?q?fix(export):=20address=20PR=20#59=20review?= =?UTF-8?q?=20=E2=80=94=20atomic=20no-overwrite=20writes,=20VACUUM=20clean?= =?UTF-8?q?up,=20round-trip=20test,=20changelog?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - --backup and dir-mode text artifacts now claim their filename atomically (writeFileSync flag 'wx', advance to next -N candidate on EEXIST) — four concurrent same-second backups verified to produce four distinct files; resolveNonClobbering retained only for the VACUUM INTO target, where SQLite itself refuses an existing file - a failed VACUUM INTO now unlinks its partial output so a retry is not blocked by the refuse-to-overwrite guard on a corrupt remnant - round-trip test pins that SQL dumps keep NULL provenance (never the 'unknown' display literal) and restore cleanly into an empty database - CHANGELOG: Unreleased entry for recall export - fix two comment mis-citations (search-only display contract; rendering rule comes from #43, not ADR-0001) --- CHANGELOG.md | 5 ++++ src/commands/export.ts | 22 ++++++++++----- src/lib/export.ts | 50 +++++++++++++++++++++++++++-------- tests/commands/export.test.ts | 42 +++++++++++++++++++++++++++++ tests/lib/export.test.ts | 28 +++++++++++++++++++- 5 files changed, 129 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44745ac..4a11335 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ while MCP tool names (`memory_search`, `memory_add`, etc.) remain stable. ### Added +- **`recall export`** — portable and disaster-recovery exports (#43): JSON, + Markdown, SQL dump, and SQLite (`VACUUM INTO`) formats with a manifest + (counts + provenance counts including explicit `unknown`), a stdout/file/ + directory output contract, and `--backup` writing timestamped, never- + overwritten SQL dumps to `~/.agents/Recall/backups/`. - Added macOS-primary GitHub Actions CI with Ubuntu portability smoke coverage and deterministic release/tag version consistency checks. diff --git a/src/commands/export.ts b/src/commands/export.ts index 71a4deb..6a0e93d 100644 --- a/src/commands/export.ts +++ b/src/commands/export.ts @@ -17,7 +17,7 @@ // - --backup: timestamped SQL dump into ~/.agents/Recall/backups/ (created if // needed), never overwrites an existing file, prints the output path. -import { existsSync, mkdirSync, statSync, writeFileSync } from 'fs'; +import { existsSync, mkdirSync, statSync, unlinkSync, writeFileSync } from 'fs'; import { dirname, join, resolve } from 'path'; import { Database } from 'bun:sqlite'; import { getDb } from '../db/connection.js'; @@ -33,6 +33,7 @@ import { renderSqlDump, resolveNonClobbering, timestampSlug, + writeNonClobbering, type ExportFormat, type ExportManifest, } from '../lib/export.js'; @@ -91,9 +92,8 @@ function runBackup(options: ExportOptions): void { const now = options.now ?? new Date(); const dir = options.backupDir ?? defaultBackupDir(); mkdirSync(dir, { recursive: true }); - const target = resolveNonClobbering(join(dir, `recall-backup-${timestampSlug(now)}.sql`)); const { content, manifest } = renderTextExport(db, 'sql', now); - writeFileSync(target, content); + const target = writeNonClobbering(join(dir, `recall-backup-${timestampSlug(now)}.sql`), content); console.log(manifestSummary(manifest, [target])); } @@ -133,7 +133,15 @@ export function runExport(options: ExportOptions): void { mkdirSync(dirname(output), { recursive: true }); target = output; } - db.prepare('VACUUM INTO ?').run(target); + try { + db.prepare('VACUUM INTO ?').run(target); + } catch (err) { + // SQLite documents that an interrupted VACUUM INTO can leave a partial + // output file the application must delete. Without this cleanup a + // retry hits the refuse-to-overwrite guard on a corrupt remnant. + try { unlinkSync(target); } catch { /* nothing was created */ } + throw err; + } const paths = [target]; if (isDir) { // Manifest written only after the backup exists — a failed VACUUM must @@ -157,9 +165,11 @@ export function runExport(options: ExportOptions): void { if (isDir) { mkdirSync(output, { recursive: true }); - const artifact = resolveNonClobbering(join(output, `recall-export-${timestampSlug(now)}.${ARTIFACT_EXT[format]}`)); + const artifact = writeNonClobbering( + join(output, `recall-export-${timestampSlug(now)}.${ARTIFACT_EXT[format]}`), + content + ); const manifestPath = join(output, 'manifest.json'); - writeFileSync(artifact, content); writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n'); console.log(manifestSummary(manifest, [artifact, manifestPath])); return; diff --git a/src/lib/export.ts b/src/lib/export.ts index f708069..2a072bb 100644 --- a/src/lib/export.ts +++ b/src/lib/export.ts @@ -5,10 +5,11 @@ // Everything here takes an open Database handle and plain data so it stays // unit-testable (issue #44 phase 2 adds property tests over these functions). // -// Provenance contract (ADR-0001, issue #42): JSON/Markdown exports carry an -// explicit `provenance` field for every row of a provenance-bearing table. -// Legacy NULL provenance is rendered as the literal string 'unknown' — never -// omitted, never guessed (matches the search/recent display contract). +// Provenance contract (issue #43; storage semantics from issue #42/ADR-0001): +// JSON/Markdown exports carry an explicit `provenance` field for every row of +// a provenance-bearing table. Legacy NULL provenance is rendered as the +// literal string 'unknown' — never omitted, never guessed (the same rendering +// search.ts uses for its display contract). // // Bind-count note (see src/lib/chunk.ts): export reads bind a fixed number of // parameters per statement — keyset pagination (`WHERE id > ? LIMIT ?`) — so @@ -17,7 +18,7 @@ // size so large exports stream in bounded batches instead of one giant read. import { Database } from 'bun:sqlite'; -import { existsSync } from 'fs'; +import { existsSync, writeFileSync } from 'fs'; import { homedir } from 'os'; import { join, extname, dirname, basename } from 'path'; import { SQLITE_SAFE_CHUNK_SIZE } from './chunk.js'; @@ -67,21 +68,48 @@ export function timestampSlug(date: Date): string { return date.toISOString().replace(/[-:]/g, '').replace('T', '-').slice(0, 15); } +const MAX_NAME_CANDIDATES = 1000; + +/** Candidate n for a non-clobbering name: the path itself, then name-N.ext. */ +function nthCandidate(path: string, n: number): string { + if (n === 0) return path; + const ext = extname(path); + return `${join(dirname(path), basename(path, ext))}-${n}${ext}`; +} + /** * Return `path` if free, otherwise the first `name-N.ext` variant that does - * not exist. Used by --backup, which must never overwrite an existing file. + * not exist. Check-then-act: only safe when the subsequent writer itself + * refuses an existing file (VACUUM INTO does — a lost race fails loudly + * instead of overwriting). Text writes must use writeNonClobbering instead. */ export function resolveNonClobbering(path: string): string { - if (!existsSync(path)) return path; - const ext = extname(path); - const stem = join(dirname(path), basename(path, ext)); - for (let i = 1; i < 1000; i++) { - const candidate = `${stem}-${i}${ext}`; + for (let i = 0; i < MAX_NAME_CANDIDATES; i++) { + const candidate = nthCandidate(path, i); if (!existsSync(candidate)) return candidate; } throw new Error(`Could not find a non-clobbering name for ${path}`); } +/** + * Atomically write `content` to `path`, or to the first `name-N.ext` variant + * not yet taken. O_EXCL (flag 'wx') makes the claim atomic: concurrent + * writers racing for the same name each land on a distinct file — no + * check-then-act window. Returns the path actually written. + */ +export function writeNonClobbering(path: string, content: string): string { + for (let i = 0; i < MAX_NAME_CANDIDATES; i++) { + const candidate = nthCandidate(path, i); + try { + writeFileSync(candidate, content, { flag: 'wx' }); + return candidate; + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err; + } + } + throw new Error(`Could not find a non-clobbering name for ${path}`); +} + /** * Normalize a row for app-level export: provenance-bearing tables get an * explicit provenance field with NULL rendered as 'unknown'. Tables without diff --git a/tests/commands/export.test.ts b/tests/commands/export.test.ts index 9afed09..b7e0274 100644 --- a/tests/commands/export.test.ts +++ b/tests/commands/export.test.ts @@ -172,6 +172,48 @@ describe('SQL dump', () => { expect(sql).not.toContain('INSERT INTO "embeddings"'); expect(sql).not.toContain('CREATE TABLE embeddings'); }); + + test('keeps NULL provenance and restores cleanly into an empty database', () => { + seed(); + const file = join(outDir, 'roundtrip.sql'); + runExport({ format: 'sql', output: file, now: NOW }); + const sql = readFileSync(file, 'utf-8'); + + // The dump must never contain the display literal 'unknown' — the schema + // CHECK constraint would reject it on restore. NULL is the on-disk truth. + expect(sql).not.toContain("'unknown'"); + + const restored = new Database(':memory:'); + try { + restored.exec(sql); + + // Counts match the seeded source for every durable table + const counts: Record = {}; + for (const table of EXPORT_TABLES) { + counts[table] = (restored.prepare(`SELECT COUNT(*) AS c FROM ${table}`).get() as { c: number }).c; + } + expect(counts).toEqual({ + sessions: 1, + messages: 2, + decisions: 2, + learnings: 1, + breadcrumbs: 1, + loa_entries: 1, + }); + + // Legacy NULL restores as NULL; known values survive verbatim + const messages = restored.prepare('SELECT content, provenance FROM messages ORDER BY id').all() as Array<{ content: string; provenance: string | null }>; + const legacy = messages.find(m => m.content.startsWith('legacy'))!; + const known = messages.find(m => m.content.startsWith('known'))!; + expect(legacy.provenance).toBeNull(); + expect(known.provenance).toBe('verbatim'); + + // Quote + newline content round-trips byte-for-byte + expect(legacy.content).toBe("legacy 'quoted' message\nsecond line"); + } finally { + restored.close(); + } + }); }); describe('SQLite backup format', () => { diff --git a/tests/lib/export.test.ts b/tests/lib/export.test.ts index 6006dab..a9f12bf 100644 --- a/tests/lib/export.test.ts +++ b/tests/lib/export.test.ts @@ -8,7 +8,7 @@ // - timestampSlug yields filesystem-safe UTC slugs import { describe, test, expect } from 'bun:test'; -import { mkdtempSync, rmSync, writeFileSync } from 'fs'; +import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'fs'; import { tmpdir } from 'os'; import { join } from 'path'; import { @@ -16,6 +16,7 @@ import { sqlQuote, timestampSlug, toExportRow, + writeNonClobbering, } from '../../src/lib/export'; describe('sqlQuote', () => { @@ -57,6 +58,31 @@ describe('resolveNonClobbering', () => { }); }); +describe('writeNonClobbering', () => { + test('writes atomically with O_EXCL and never disturbs existing files', () => { + const dir = mkdtempSync(join(tmpdir(), 'recall-export-')); + try { + const path = join(dir, 'backup.sql'); + expect(writeNonClobbering(path, 'first')).toBe(path); + + // Same target again: lands on -1, original content untouched + expect(writeNonClobbering(path, 'second')).toBe(join(dir, 'backup-1.sql')); + expect(readFileSync(path, 'utf-8')).toBe('first'); + + // Pre-claimed -2 (a racing writer) is skipped, not truncated + writeFileSync(join(dir, 'backup-2.sql'), 'racer'); + expect(writeNonClobbering(path, 'third')).toBe(join(dir, 'backup-3.sql')); + expect(readFileSync(join(dir, 'backup-2.sql'), 'utf-8')).toBe('racer'); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + test('propagates non-EEXIST errors', () => { + expect(() => writeNonClobbering(join(tmpdir(), 'no-such-dir-xyz', 'f.sql'), 'x')).toThrow(); + }); +}); + describe('toExportRow', () => { test('renders NULL provenance as explicit unknown', () => { const row = toExportRow('messages', { id: 1, content: 'hi', provenance: null });