From 75407861236417f1beb50b3253601ad8c43ebec8 Mon Sep 17 00:00:00 2001 From: Ben Scholtens Date: Thu, 30 Apr 2026 12:35:07 -0400 Subject: [PATCH 1/4] Add notebook schema versioning ladder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a per-file `metadata.schemaVersion` on `.codex` and `.source` notebooks, plus a shared migration ladder run at four points so files always reach the current schema before any merge or render: 1. Activation: scan every notebook and bring it to current (no completion flag — the per-file version is the truth). 2. Save: serializer stamps `schemaVersion` on every write. 3. Merge: `resolveCodexCustomMerge` brings both ours and theirs to current before merging, then stamps the result. Replaces the ad-hoc `needsEditHistoryMigration` / `migrateEditHistoryInContent` helpers, which become the v0 → v1 ladder step. 4. Post-sync (clean fast-forward): `executeSyncInBackground` walks `syncResult.changedFiles ∪ newFiles` and brings each touched notebook to current before downstream helpers (index rebuild, webview refresh) read them. `bringNotebookToCurrent(notebook, ctx)` is the single entry point and is idempotent. It refuses to migrate downward when a file's version exceeds `CURRENT_SCHEMA_VERSION`, so a teammate on a newer build won't accidentally regress everyone else's files. The v0 → v1 step lifts the legacy `cellValue` + missing `editMap` shape into the modern `value` + `editMap = ["value"]` form. Future schema bumps will append further steps. Made-with: Cursor --- src/extension.ts | 2 + src/projectManager/syncManager.ts | 40 +++++ src/projectManager/utils/merge/resolvers.ts | 102 +++---------- src/projectManager/utils/migrationUtils.ts | 69 +++++++++ src/projectManager/utils/schema/file.ts | 38 +++++ src/projectManager/utils/schema/index.ts | 98 +++++++++++++ .../utils/schema/migrations/v0_to_v1.ts | 27 ++++ src/serializer.ts | 11 +- src/test/suite/schemaLadder.test.ts | 137 ++++++++++++++++++ types/index.d.ts | 7 + 10 files changed, 445 insertions(+), 86 deletions(-) create mode 100644 src/projectManager/utils/schema/file.ts create mode 100644 src/projectManager/utils/schema/index.ts create mode 100644 src/projectManager/utils/schema/migrations/v0_to_v1.ts create mode 100644 src/test/suite/schemaLadder.test.ts diff --git a/src/extension.ts b/src/extension.ts index effac6234..b3ca80294 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -17,6 +17,7 @@ import { migration_verseRangeLabelsAndPositions, migration_cellIdsToUuid, migration_recoverTempFilesAndMergeDuplicates, + migration_normalizeAllNotebooksToCurrentSchema, } from "./projectManager/utils/migrationUtils"; import { createIndexWithContext } from "./activationHelpers/contextAware/contentIndexes/indexes"; import { StatusBarItem } from "vscode"; @@ -899,6 +900,7 @@ export async function activate(context: vscode.ExtensionContext) { await migration_addGlobalReferences(context); await migration_cellIdsToUuid(context); await migration_recoverTempFilesAndMergeDuplicates(context); + await migration_normalizeAllNotebooksToCurrentSchema(context); } // Remove leftover files from features that have been removed diff --git a/src/projectManager/syncManager.ts b/src/projectManager/syncManager.ts index 8e5b6f527..aa22a4b15 100644 --- a/src/projectManager/syncManager.ts +++ b/src/projectManager/syncManager.ts @@ -12,6 +12,7 @@ import { checkRemoteUpdatingRequired } from "../utils/remoteUpdatingManager"; import { markPendingUpdateRequired, clearPendingUpdate, readLocalProjectSettings } from "../utils/localProjectSettings"; import { isDatabaseReady } from "../utils/sqliteDatabaseFactory"; import { isOnline } from "../utils/connectivityChecker"; +import { bringNotebookToCurrentForFile } from "./utils/schema/file"; const DEBUG_SYNC_MANAGER = false; @@ -1226,6 +1227,45 @@ export class SyncManager { this.currentSyncStage = "Finishing up..."; this.notifySyncStatusListeners(); + // Schema normalization: bring any .codex/.source files that arrived in this + // sync up to CURRENT_SCHEMA_VERSION before the rest of the post-sync helpers + // (index rebuild, webview refresh) read them. This handles the clean + // fast-forward case where files came down without going through + // resolveCodexCustomMerge — the merge resolver path already calls the + // ladder, so this only does work when there were no conflicts. + try { + if (workspaceFolders && workspaceFolders.length > 0) { + const wsRoot = workspaceFolders[0].uri; + const touched = new Set([ + ...syncResult.changedFiles, + ...syncResult.newFiles, + ]); + const notebookPaths = Array.from(touched).filter( + (p) => p.endsWith(".codex") || p.endsWith(".source") + ); + if (notebookPaths.length > 0) { + let author = "anonymous"; + try { + const authApi = await getAuthApi(); + const userInfo = await authApi?.getUserInfo(); + if (userInfo?.username) author = userInfo.username; + } catch (_) { /* ignore */ } + + let migrated = 0; + for (const relPath of notebookPaths) { + const uri = vscode.Uri.joinPath(wsRoot, relPath); + const result = await bringNotebookToCurrentForFile(uri, { author }); + if (result.migrated) migrated++; + } + if (migrated > 0) { + debug(`Schema-normalized ${migrated}/${notebookPaths.length} synced notebook(s)`); + } + } + } + } catch (error) { + console.error("[SyncManager] Error during post-sync schema normalization:", error); + } + // Check if comments.json was affected by the sync - if so, run targeted repair const commentsWasChanged = syncResult.changedFiles.includes('.project/comments.json') || syncResult.newFiles.includes('.project/comments.json') || diff --git a/src/projectManager/utils/merge/resolvers.ts b/src/projectManager/utils/merge/resolvers.ts index 0643d709f..3286fb3b9 100644 --- a/src/projectManager/utils/merge/resolvers.ts +++ b/src/projectManager/utils/merge/resolvers.ts @@ -11,12 +11,12 @@ import { normalizeProjectSwapInfo } from "../../../utils/projectSwapManager"; import { ProjectSwapInfo, ProjectSwapEntry, ProjectSwapUserEntry, RemoteUpdatingEntry } from "../../../../types"; import { NotebookCommentThread, NotebookComment, CustomNotebookCellData, CustomNotebookMetadata } from "../../../../types"; import { CommentsMigrator } from "../../../utils/commentsMigrationUtils"; -import { CodexCell } from "@/utils/codexNotebookUtils"; import { CodexCellTypes, EditType } from "../../../../types/enums"; import { EditHistory, ValidationEntry, FileEditHistory, ProjectEditHistory, ProjectUserVersionEntry } from "../../../../types/index.d"; import { EditMapUtils, deduplicateFileMetadataEdits } from "../../../utils/editMapUtils"; import { normalizeAttachmentUrl } from "@/utils/pathUtils"; import { formatJsonForNotebookFile } from "../../../utils/notebookFileFormattingUtils"; +import { bringNotebookToCurrent, CURRENT_SCHEMA_VERSION } from "../schema"; import { ORPHANED_PROJECT_FILES } from "../../../utils/fileUtils"; import { buildCellPositionContextMap, @@ -632,31 +632,6 @@ export async function resolveConflictFile( } } -/** - * Helper function to check if content contains old format edits that need migration - */ -function needsEditHistoryMigration(content: string): boolean { - try { - const notebook = JSON.parse(content); - const cells: CodexCell[] = notebook.cells || []; - - for (const cell of cells) { - if (cell.metadata?.edits && cell.metadata.edits.length > 0) { - for (const edit of cell.metadata.edits) { - // Check if this is an old format edit (has cellValue but no editMap) - if ((edit as any).cellValue !== undefined && !edit.editMap) { - return true; - } - } - } - } - return false; - } catch (error) { - debugLog("Error checking for migration need:", error); - return false; - } -} - /** * Helper function to resolve metadata conflicts using edit history * This function determines the latest edit for each metadata field and applies it @@ -983,44 +958,6 @@ function applyEditToCell(cell: CustomNotebookCellData, edit: EditHistory): void } } -/** - * Helper function to migrate old format edits to new format in-place - */ -function migrateEditHistoryInContent(content: string): string { - try { - const notebook = JSON.parse(content); - const cells: CodexCell[] = notebook.cells || []; - let hasChanges = false; - - for (const cell of cells) { - if (cell.metadata?.edits && cell.metadata.edits.length > 0) { - for (const edit of cell.metadata.edits as any) { - // Check if this is an old format edit (has cellValue but no editMap) - if (edit.cellValue !== undefined && !edit.editMap) { - // Migrate old format to new format - edit.value = edit.cellValue; // Move cellValue to value - edit.editMap = ["value"]; // Set editMap to point to value - delete edit.cellValue; // Remove old property - hasChanges = true; - - debugLog(`Migrated edit in cell ${cell.metadata.id}: converted cellValue to value with editMap`); - } - } - } - } - - if (hasChanges) { - debugLog("Edit history migration completed for content"); - return JSON.stringify(notebook, null, 2); - } - - return content; - } catch (error) { - debugLog("Error migrating edit history in content:", error); - return content; - } -} - function mergeTwoCellsUsingResolverLogic( ourCell: CustomNotebookCellData, theirCell: CustomNotebookCellData @@ -1134,7 +1071,6 @@ export async function resolveCodexCustomMerge( debugLog({ ourContent: ourContent.slice(0, 1000), theirContent: theirContent.slice(0, 1000) }); debugLog("Starting resolveCodexCustomMerge"); - // Check if content needs migration and migrate if necessary if (!ourContent) { debugLog("No our content, returning their content"); return theirContent; @@ -1144,26 +1080,19 @@ export async function resolveCodexCustomMerge( return ourContent; } - // Migrate content if needed - let migratedOurContent = ourContent; - let migratedTheirContent = theirContent; - - const ourNeedsMigration = needsEditHistoryMigration(ourContent); - const theirNeedsMigration = needsEditHistoryMigration(theirContent); - - if (ourNeedsMigration) { - debugLog("Migrating our content edit history format"); - migratedOurContent = migrateEditHistoryInContent(ourContent); - } - - if (theirNeedsMigration) { - debugLog("Migrating their content edit history format"); - migratedTheirContent = migrateEditHistoryInContent(theirContent); - } - debugLog("Parsing notebook content"); - const ourNotebook = JSON.parse(migratedOurContent); - const theirNotebook = JSON.parse(migratedTheirContent); + const ourNotebook = JSON.parse(ourContent); + const theirNotebook = JSON.parse(theirContent); + + // Bring both sides up to CURRENT_SCHEMA_VERSION before merging so the merge + // logic only ever sees one shape. Files already at the current version + // short-circuit to a no-op. The schema ladder folds in the legacy + // cellValue → value/editMap transform that used to live as a one-shot + // helper here; future schema bumps will append further steps. + const mergeAuthorForLadder = process.env.CODEX_MERGE_USER || await getCurrentUserName(); + await bringNotebookToCurrent(ourNotebook, { author: mergeAuthorForLadder }); + await bringNotebookToCurrent(theirNotebook, { author: mergeAuthorForLadder }); + const ourCells: CustomNotebookCellData[] = ourNotebook.cells; const theirCells: CustomNotebookCellData[] = theirNotebook.cells; @@ -1282,6 +1211,11 @@ export async function resolveCodexCustomMerge( } } + // Stamp the merged notebook at the current schema version. Both sides were + // brought to current at the top of this function, so the merged output is + // guaranteed to be at current too. + mergedMetadata.schemaVersion = CURRENT_SCHEMA_VERSION; + // Return the full notebook structure with merged cells and metadata // (formatted consistently for `.codex`/`.source` file writes) return formatJsonForNotebookFile( diff --git a/src/projectManager/utils/migrationUtils.ts b/src/projectManager/utils/migrationUtils.ts index ecbc059aa..0bf8557f8 100644 --- a/src/projectManager/utils/migrationUtils.ts +++ b/src/projectManager/utils/migrationUtils.ts @@ -21,6 +21,7 @@ import bibleData from "../../../webviews/codex-webviews/src/assets/bible-books-l import { resolveCodexCustomMerge, mergeDuplicateCellsUsingResolverLogic } from "./merge/resolvers"; import { atomicWriteUriText } from "../../utils/notebookSafeSaveUtils"; import { normalizeNotebookFileText, formatJsonForNotebookFile } from "../../utils/notebookFileFormattingUtils"; +import { bringNotebookToCurrentForFile } from "./schema/file"; // FIXME: move notebook format migration here @@ -4087,3 +4088,71 @@ export const migration_recoverTempFilesAndMergeDuplicates = async (context?: vsc console.error("Error running temp files recovery and duplicate merge migration:", error); } }; + +/** + * Activation-time pass: scan every `.codex` and `.source` notebook in the workspace + * and bring it up to `CURRENT_SCHEMA_VERSION` via the shared schema migration ladder. + * + * There is no completion flag — the per-file `metadata.schemaVersion` field IS the + * truth, so the activation pass is just a fast read-only scan on a settled project + * (no writes when every file is already current). On first run after upgrade, it + * does the work; subsequent runs are nearly free. + */ +export const migration_normalizeAllNotebooksToCurrentSchema = async ( + _context?: vscode.ExtensionContext +) => { + try { + const workspaceFolders = vscode.workspace.workspaceFolders; + if (!workspaceFolders || workspaceFolders.length === 0) { + return; + } + + const files = await vscode.workspace.findFiles("**/*.{codex,source}"); + if (files.length === 0) { + return; + } + + let author = "anonymous"; + try { + const authApi = await getAuthApi(); + const userInfo = await authApi?.getUserInfo(); + if (userInfo?.username) { + author = userInfo.username; + } + } catch (_) { /* ignore */ } + + let migratedFiles = 0; + let scannedFiles = 0; + let aheadOfClientFiles = 0; + + await vscode.window.withProgress( + { + location: vscode.ProgressLocation.Notification, + title: "Checking notebook schema versions...", + cancellable: false, + }, + async (progress) => { + for (let i = 0; i < files.length; i++) { + const file = files[i]; + progress.report({ + message: `Processing file ${i + 1}/${files.length}`, + increment: 100 / files.length, + }); + + const result = await bringNotebookToCurrentForFile(file, { author }); + scannedFiles++; + if (result.migrated) migratedFiles++; + if (result.aheadOfClient) aheadOfClientFiles++; + } + } + ); + + if (migratedFiles > 0 || aheadOfClientFiles > 0) { + debug( + `Schema normalization scan complete: ${migratedFiles}/${scannedFiles} migrated, ${aheadOfClientFiles} ahead of client.` + ); + } + } catch (error) { + console.error("Error running schema normalization migration:", error); + } +}; diff --git a/src/projectManager/utils/schema/file.ts b/src/projectManager/utils/schema/file.ts new file mode 100644 index 000000000..18abdfb47 --- /dev/null +++ b/src/projectManager/utils/schema/file.ts @@ -0,0 +1,38 @@ +import * as vscode from "vscode"; +import { atomicWriteUriText } from "../../../utils/notebookSafeSaveUtils"; +import { formatJsonForNotebookFile } from "../../../utils/notebookFileFormattingUtils"; +import { + bringNotebookToCurrent, + BringToCurrentResult, + SchemaMigrationContext, +} from "./index"; + +/** + * Reads a notebook URI, runs the schema ladder, and atomically writes it back + * iff the ladder reported a change. Used by both the activation-time normalization + * pass and the post-sync hook so they share identical semantics. + * + * Returns `migrated: false` when the file is already at the current schema version + * or when the file is at a version newer than this client understands (in which + * case the file is left untouched). + */ +export async function bringNotebookToCurrentForFile( + uri: vscode.Uri, + ctx: SchemaMigrationContext +): Promise { + try { + const data = await vscode.workspace.fs.readFile(uri); + const text = new TextDecoder().decode(data); + const notebook = JSON.parse(text); + + const result = await bringNotebookToCurrent(notebook, ctx); + if (result.migrated) { + const newContent = formatJsonForNotebookFile(notebook); + await atomicWriteUriText(uri, newContent); + } + return result; + } catch (error) { + console.error(`[schema] Failed to migrate ${uri.fsPath}:`, error); + return { migrated: false, from: -1, to: -1, aheadOfClient: false, error }; + } +} diff --git a/src/projectManager/utils/schema/index.ts b/src/projectManager/utils/schema/index.ts new file mode 100644 index 000000000..f198f1681 --- /dev/null +++ b/src/projectManager/utils/schema/index.ts @@ -0,0 +1,98 @@ +import { migrate_v0_to_v1 } from "./migrations/v0_to_v1"; + +/** + * Notebook schema versioning. + * + * Every `.codex` and `.source` notebook on disk carries `metadata.schemaVersion: number` + * (files written before this system existed are treated as v0). The `migrations` map + * is a ladder: `migrations[N]` takes a notebook **at v(N-1)** and produces v(N) in place. + * + * Going v0 → vK runs `migrations[1]`, `migrations[2]`, ..., `migrations[K]` in order. + * + * Single entry point: `bringNotebookToCurrent(notebook, ctx)`. Idempotent — when the + * notebook is already at `CURRENT_SCHEMA_VERSION` it short-circuits and returns + * `migrated: false` so callers can skip the disk write. + */ + +export const CURRENT_SCHEMA_VERSION = 1; + +/** Context passed to every ladder step (e.g. for deterministic id generation). */ +export interface SchemaMigrationContext { + /** Username to attribute synthesized edits to when the source edit lacks an author. */ + author: string; +} + +export type SchemaMigration = ( + notebook: any, + ctx: SchemaMigrationContext +) => Promise | void; + +/** + * Ladder registry. Add new entries here when the on-disk shape changes: + * migrations[2] = migrate_v1_to_v2; // bumps CURRENT_SCHEMA_VERSION too. + */ +const migrations: Record = { + 1: migrate_v0_to_v1, +}; + +/** Reads `metadata.schemaVersion` defensively; missing/non-numeric → 0. */ +export function getSchemaVersion(notebook: any): number { + const raw = notebook?.metadata?.schemaVersion; + const n = typeof raw === "number" ? raw : Number(raw); + return Number.isFinite(n) && n >= 0 ? n : 0; +} + +export interface BringToCurrentResult { + migrated: boolean; + from: number; + to: number; + /** + * True when the file is at a version newer than this build understands. Callers + * should treat the file as opaque and avoid rewriting it. Merge resolution can + * still proceed best-effort — unknown fields pass through untouched. + */ + aheadOfClient: boolean; +} + +/** + * Brings a parsed notebook to `CURRENT_SCHEMA_VERSION` in place. + * + * - No-op when already current → `migrated: false`, no field writes. + * - When the file is ahead of this client (`schemaVersion > CURRENT_SCHEMA_VERSION`), + * logs a warning and returns `aheadOfClient: true` without modifying the notebook. + * We never downgrade — there's no inverse ladder. + */ +export async function bringNotebookToCurrent( + notebook: any, + ctx: SchemaMigrationContext +): Promise { + const from = getSchemaVersion(notebook); + + if (from > CURRENT_SCHEMA_VERSION) { + console.warn( + `[schema] Notebook reports schemaVersion=${from} but client only understands up to ${CURRENT_SCHEMA_VERSION}. Leaving file untouched.` + ); + return { migrated: false, from, to: from, aheadOfClient: true }; + } + + if (from >= CURRENT_SCHEMA_VERSION) { + return { migrated: false, from, to: from, aheadOfClient: false }; + } + + for (let v = from + 1; v <= CURRENT_SCHEMA_VERSION; v++) { + const step = migrations[v]; + if (!step) { + throw new Error( + `[schema] Missing ladder step for version ${v}. Cannot bring notebook from v${from} to v${CURRENT_SCHEMA_VERSION}.` + ); + } + await step(notebook, ctx); + } + + if (!notebook.metadata) { + notebook.metadata = {}; + } + notebook.metadata.schemaVersion = CURRENT_SCHEMA_VERSION; + + return { migrated: true, from, to: CURRENT_SCHEMA_VERSION, aheadOfClient: false }; +} diff --git a/src/projectManager/utils/schema/migrations/v0_to_v1.ts b/src/projectManager/utils/schema/migrations/v0_to_v1.ts new file mode 100644 index 000000000..7280357cb --- /dev/null +++ b/src/projectManager/utils/schema/migrations/v0_to_v1.ts @@ -0,0 +1,27 @@ +import type { SchemaMigration } from "../index"; + +/** + * v0 → v1: Lift the legacy edit shape into the modern editMap-based shape. + * + * Some old `.codex` files carry edits with a `cellValue` field and no `editMap` + * (the pre-editMap design). This step rewrites them to the modern shape: + * `{ cellValue: X }` → `{ value: X, editMap: ["value"] }`. + * + * This is a pure data transform — no edit-id generation, no activeEditId logic, + * and no INITIAL_IMPORT synthesis. Future schema bumps will append further steps + * (e.g. v1 → v2) on top of this baseline. + */ +export const migrate_v0_to_v1: SchemaMigration = (notebook) => { + for (const cell of (notebook.cells || [])) { + const edits = cell.metadata?.edits; + if (!Array.isArray(edits) || edits.length === 0) continue; + + for (const edit of edits as any[]) { + if (edit.cellValue !== undefined && !edit.editMap) { + edit.value = edit.cellValue; + edit.editMap = ["value"]; + delete edit.cellValue; + } + } + } +}; diff --git a/src/serializer.ts b/src/serializer.ts index e76406c03..a5ea1616b 100644 --- a/src/serializer.ts +++ b/src/serializer.ts @@ -4,6 +4,7 @@ import * as vscode from "vscode"; import { TextDecoder, TextEncoder } from "util"; import { CodexNotebookAsJSONData, CustomNotebookCellData } from "../types"; import { formatJsonForNotebookFile } from "./utils/notebookFileFormattingUtils"; +import { CURRENT_SCHEMA_VERSION } from "./projectManager/utils/schema"; export interface CodexNotebookDocument extends vscode.NotebookDocument { cells: CustomNotebookCellData[]; @@ -77,10 +78,16 @@ export class CodexContentSerializer implements vscode.NotebookSerializer { token: vscode.CancellationToken ): Promise { debug("Serializing notebook data", { cellCount: data.cells.length }); - // Map the Notebook data into the format we want to save the Notebook data as + // Stamp the on-disk schema version on every save. Activation, merge, and + // post-sync hooks all keep this in sync; setting it here guarantees that + // any file the editor writes is at the current version. + const stampedMetadata = { + ...(data.metadata || {}), + schemaVersion: CURRENT_SCHEMA_VERSION, + }; const contents: RawNotebookData = { cells: [], - metadata: data.metadata, + metadata: stampedMetadata, }; for (const cell of data.cells) { debug("Processing cell for serialization", { id: cell.metadata?.id, kind: cell.kind }); diff --git a/src/test/suite/schemaLadder.test.ts b/src/test/suite/schemaLadder.test.ts new file mode 100644 index 000000000..24d959438 --- /dev/null +++ b/src/test/suite/schemaLadder.test.ts @@ -0,0 +1,137 @@ +import * as assert from "assert"; +import { bringNotebookToCurrent, CURRENT_SCHEMA_VERSION, getSchemaVersion } from "../../projectManager/utils/schema"; +import { resolveCodexCustomMerge } from "../../projectManager/utils/merge/resolvers"; +import { EditType, CodexCellTypes } from "../../../types/enums"; + +const ctx = { author: "test-author" }; + +const buildV0Notebook = () => ({ + cells: [ + // Modern shape — has editMap already, no transform needed. + { + kind: 2, + languageId: "html", + value: "Hello world", + metadata: { + id: "cell-text-1", + type: CodexCellTypes.TEXT, + edits: [ + { + editMap: ["value"], + value: "Hello world", + timestamp: 1_000, + type: EditType.INITIAL_IMPORT, + author: "alice", + }, + ], + }, + }, + // Legacy shape: edit has cellValue + no editMap. Should be rewritten by v0 → v1. + { + kind: 2, + languageId: "html", + value: "Legacy text", + metadata: { + id: "cell-text-legacy", + type: CodexCellTypes.TEXT, + edits: [ + { + cellValue: "Legacy text", + timestamp: 2_000, + type: EditType.USER_EDIT, + author: "bob", + } as any, + ], + }, + }, + // Milestone with empty edits — must be left alone. + { + kind: 2, + languageId: "html", + value: "1 John 1", + metadata: { + id: "cell-milestone", + type: CodexCellTypes.MILESTONE, + edits: [], + }, + }, + ], + metadata: { + id: "notebook-test", + edits: [], + }, +}); + +suite("schema ladder", () => { + test("v0 → current: stamps version, rewrites legacy cellValue, leaves modern shape and milestones alone", async () => { + const notebook = buildV0Notebook(); + assert.strictEqual(getSchemaVersion(notebook), 0, "starts at v0"); + + const result = await bringNotebookToCurrent(notebook, ctx); + assert.strictEqual(result.migrated, true); + assert.strictEqual(result.from, 0); + assert.strictEqual(result.to, CURRENT_SCHEMA_VERSION); + assert.strictEqual(notebook.metadata.schemaVersion, CURRENT_SCHEMA_VERSION); + + const [textCell, legacyCell, milestoneCell] = notebook.cells; + + // Modern-shape edit: untouched (already had editMap). + const textEdit = textCell.metadata.edits[0] as any; + assert.deepStrictEqual(textEdit.editMap, ["value"]); + assert.strictEqual(textEdit.value, "Hello world"); + + // Legacy-shape edit: cellValue lifted to value/editMap. + const legacyEdit = legacyCell.metadata.edits[0] as any; + assert.deepStrictEqual(legacyEdit.editMap, ["value"], "legacy cellValue lifted to editMap=['value']"); + assert.strictEqual(legacyEdit.value, "Legacy text", "legacy value preserved"); + assert.strictEqual(legacyEdit.cellValue, undefined, "legacy cellValue field removed"); + + // Milestone: untouched. + assert.deepStrictEqual(milestoneCell.metadata.edits, [], "milestone untouched"); + assert.strictEqual(milestoneCell.value, "1 John 1"); + }); + + test("idempotent: a notebook already at current schema is byte-identical after a second pass", async () => { + const notebook = buildV0Notebook(); + await bringNotebookToCurrent(notebook, ctx); + + const before = JSON.stringify(notebook); + const second = await bringNotebookToCurrent(notebook, ctx); + const after = JSON.stringify(notebook); + + assert.strictEqual(second.migrated, false, "second pass reports migrated: false"); + assert.strictEqual(before, after, "no fields mutated on the second pass"); + }); + + test("ahead-of-client: notebook with future version is left alone and reported", async () => { + const future = { metadata: { schemaVersion: CURRENT_SCHEMA_VERSION + 99 }, cells: [] }; + const before = JSON.stringify(future); + const result = await bringNotebookToCurrent(future, ctx); + const after = JSON.stringify(future); + + assert.strictEqual(result.migrated, false); + assert.strictEqual(result.aheadOfClient, true); + assert.strictEqual(before, after, "future-version notebook untouched"); + }); + + test("resolveCodexCustomMerge: v0 inputs produce a current-version output", async () => { + const ours = JSON.stringify(buildV0Notebook()); + const theirs = JSON.stringify(buildV0Notebook()); + + const merged = await resolveCodexCustomMerge(ours, theirs); + const mergedNotebook = JSON.parse(merged); + assert.strictEqual( + mergedNotebook.metadata.schemaVersion, + CURRENT_SCHEMA_VERSION, + "merge output is stamped at the current schema version" + ); + + // The legacy edit on both sides should have been rewritten before the merge ran. + const legacyCell = mergedNotebook.cells.find((c: any) => c.metadata?.id === "cell-text-legacy"); + const legacyEdit = legacyCell.metadata.edits.find( + (e: any) => Array.isArray(e.editMap) && e.editMap[0] === "value" + ); + assert.ok(legacyEdit, "legacy edit was lifted into editMap form"); + assert.strictEqual(legacyEdit.cellValue, undefined, "legacy cellValue field gone"); + }); +}); diff --git a/types/index.d.ts b/types/index.d.ts index 5271e2902..5187a945f 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -755,6 +755,13 @@ export type CustomNotebookCellData = Omit & export interface CustomNotebookMetadata { id: string; + /** + * On-disk schema version for this notebook. Files predating the schema-versioning + * system are treated as v0. The merge resolver, save serializer, post-sync hook, + * and activation-time normalization pass all run a shared migration ladder to + * bring notebooks up to `CURRENT_SCHEMA_VERSION` before any merge or render. + */ + schemaVersion?: number; textDirection?: "ltr" | "rtl"; textDirectionSource?: "global" | "local"; // Track whether text direction was set globally or locally perf?: any; From 2345d6f8f7bb95f4e9075d3e18d37afe4aac8deb Mon Sep 17 00:00:00 2001 From: Ben Scholtens Date: Thu, 30 Apr 2026 13:22:34 -0400 Subject: [PATCH 2/4] Tighten types in schema migration code Replaces the `any`-typed `notebook`/`edit`/`cell` parameters in the schema ladder with structural interfaces co-located in `src/projectManager/utils/schema/index.ts`: - `SchemaNotebook` / `SchemaNotebookMetadata` - `SchemaCell` / `SchemaCellMetadata` - `SchemaEdit` (with `validatedBy: ValidationEntry[]`) Each interface has explicit fields the ladder reads/writes plus an `[key: string]: unknown` index signature so unrelated fields a future migration step might add (e.g. `generationId`) round-trip untouched. `SchemaMigration`, `getSchemaVersion`, `bringNotebookToCurrent`, and `bringNotebookToCurrentForFile` all accept `SchemaNotebook` directly. No runtime behavior change. Made-with: Cursor --- src/projectManager/utils/schema/file.ts | 5 +- src/projectManager/utils/schema/index.ts | 78 ++++++++++++++++++- .../utils/schema/migrations/v0_to_v1.ts | 4 +- 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/projectManager/utils/schema/file.ts b/src/projectManager/utils/schema/file.ts index 18abdfb47..1c6d06b68 100644 --- a/src/projectManager/utils/schema/file.ts +++ b/src/projectManager/utils/schema/file.ts @@ -5,6 +5,7 @@ import { bringNotebookToCurrent, BringToCurrentResult, SchemaMigrationContext, + SchemaNotebook, } from "./index"; /** @@ -19,11 +20,11 @@ import { export async function bringNotebookToCurrentForFile( uri: vscode.Uri, ctx: SchemaMigrationContext -): Promise { +): Promise { try { const data = await vscode.workspace.fs.readFile(uri); const text = new TextDecoder().decode(data); - const notebook = JSON.parse(text); + const notebook: SchemaNotebook = JSON.parse(text); const result = await bringNotebookToCurrent(notebook, ctx); if (result.migrated) { diff --git a/src/projectManager/utils/schema/index.ts b/src/projectManager/utils/schema/index.ts index f198f1681..9b22181df 100644 --- a/src/projectManager/utils/schema/index.ts +++ b/src/projectManager/utils/schema/index.ts @@ -1,3 +1,4 @@ +import type { ValidationEntry } from "../../../../types"; import { migrate_v0_to_v1 } from "./migrations/v0_to_v1"; /** @@ -16,6 +17,75 @@ import { migrate_v0_to_v1 } from "./migrations/v0_to_v1"; export const CURRENT_SCHEMA_VERSION = 1; +/* ── Structural notebook types used by the ladder ───────────────────────────── */ + +/** + * Edit-history entry as it might appear on disk. + * + * The schema ladder operates on un-normalized notebooks — files may be at v0, + * v1, or future shapes. This interface is intentionally a structural superset + * that covers every variant the ladder needs to read or rewrite, with an index + * signature so unrelated fields a future step might add (e.g. `generationId`) + * round-trip untouched. + */ +export interface SchemaEdit { + /** Modern: deterministic SHA-256 / UUID id. */ + id?: string; + /** Modern: path into the cell/metadata tree the edit applies to. */ + editMap?: readonly string[]; + value?: unknown; + /** + * Legacy (pre-editMap): the value lived here without an editMap. v0 → v1 + * rewrites this into `value` + `editMap = ["value"]`. + */ + cellValue?: unknown; + timestamp?: number; + type?: string; + author?: string; + /** Preview-only edits (e.g. LLM previews) aren't applied to cell.value. */ + preview?: boolean; + validatedBy?: ValidationEntry[]; + /** Pass-through for fields a future ladder step might add. */ + [key: string]: unknown; +} + +export interface SchemaCellMetadata { + id?: string; + type?: string; + /** Optional pointer at the edit whose value matches `cell.value`. */ + activeEditId?: string; + edits?: SchemaEdit[]; + [key: string]: unknown; +} + +export interface SchemaCell { + kind?: number; + languageId?: string; + /** + * Modern: a string. Future ladder steps may flatten/transform other shapes + * back to a string here. + */ + value?: unknown; + metadata?: SchemaCellMetadata; + [key: string]: unknown; +} + +export interface SchemaNotebookMetadata { + /** On-disk schema version. Missing → 0. */ + schemaVersion?: number; + /** File-level edits (e.g. metadata.fontSize edits). */ + edits?: SchemaEdit[]; + [key: string]: unknown; +} + +export interface SchemaNotebook { + cells?: SchemaCell[]; + metadata?: SchemaNotebookMetadata; + [key: string]: unknown; +} + +/* ── Ladder ────────────────────────────────────────────────────────────────── */ + /** Context passed to every ladder step (e.g. for deterministic id generation). */ export interface SchemaMigrationContext { /** Username to attribute synthesized edits to when the source edit lacks an author. */ @@ -23,7 +93,7 @@ export interface SchemaMigrationContext { } export type SchemaMigration = ( - notebook: any, + notebook: SchemaNotebook, ctx: SchemaMigrationContext ) => Promise | void; @@ -36,8 +106,8 @@ const migrations: Record = { }; /** Reads `metadata.schemaVersion` defensively; missing/non-numeric → 0. */ -export function getSchemaVersion(notebook: any): number { - const raw = notebook?.metadata?.schemaVersion; +export function getSchemaVersion(notebook: SchemaNotebook): number { + const raw = notebook.metadata?.schemaVersion; const n = typeof raw === "number" ? raw : Number(raw); return Number.isFinite(n) && n >= 0 ? n : 0; } @@ -63,7 +133,7 @@ export interface BringToCurrentResult { * We never downgrade — there's no inverse ladder. */ export async function bringNotebookToCurrent( - notebook: any, + notebook: SchemaNotebook, ctx: SchemaMigrationContext ): Promise { const from = getSchemaVersion(notebook); diff --git a/src/projectManager/utils/schema/migrations/v0_to_v1.ts b/src/projectManager/utils/schema/migrations/v0_to_v1.ts index 7280357cb..33738c6e3 100644 --- a/src/projectManager/utils/schema/migrations/v0_to_v1.ts +++ b/src/projectManager/utils/schema/migrations/v0_to_v1.ts @@ -12,11 +12,11 @@ import type { SchemaMigration } from "../index"; * (e.g. v1 → v2) on top of this baseline. */ export const migrate_v0_to_v1: SchemaMigration = (notebook) => { - for (const cell of (notebook.cells || [])) { + for (const cell of notebook.cells ?? []) { const edits = cell.metadata?.edits; if (!Array.isArray(edits) || edits.length === 0) continue; - for (const edit of edits as any[]) { + for (const edit of edits) { if (edit.cellValue !== undefined && !edit.editMap) { edit.value = edit.cellValue; edit.editMap = ["value"]; From 91152cd46bdbd7f105e335e21783cffb8392b52e Mon Sep 17 00:00:00 2001 From: Ben Scholtens Date: Thu, 30 Apr 2026 13:45:56 -0400 Subject: [PATCH 3/4] Document schema-type layer boundary; type the resolver bridge - Add `src/projectManager/utils/schema/README.md` describing the three type layers (pre-ladder SchemaNotebook, ladder internals, post-ladder canonical CodexNotebookAsJSONData) and where each one is the right call. Includes a "how to add a new version" recipe so future contributors don't have to reverse-engineer the wiring. - In `resolveCodexCustomMerge`, type the parsed-JSON locals as `SchemaNotebook` (pre-ladder) and explicitly cast through `unknown` to the canonical `CustomNotebookCellData[]` / `CustomNotebookMetadata` after `bringNotebookToCurrent` returns. This makes the layer transition visible in code and matches the boundary the README describes. No runtime behavior change. Made-with: Cursor --- src/projectManager/utils/merge/resolvers.ts | 25 +++-- src/projectManager/utils/schema/README.md | 100 ++++++++++++++++++++ 2 files changed, 118 insertions(+), 7 deletions(-) create mode 100644 src/projectManager/utils/schema/README.md diff --git a/src/projectManager/utils/merge/resolvers.ts b/src/projectManager/utils/merge/resolvers.ts index 3286fb3b9..cd09a2591 100644 --- a/src/projectManager/utils/merge/resolvers.ts +++ b/src/projectManager/utils/merge/resolvers.ts @@ -16,7 +16,7 @@ import { EditHistory, ValidationEntry, FileEditHistory, ProjectEditHistory, Proj import { EditMapUtils, deduplicateFileMetadataEdits } from "../../../utils/editMapUtils"; import { normalizeAttachmentUrl } from "@/utils/pathUtils"; import { formatJsonForNotebookFile } from "../../../utils/notebookFileFormattingUtils"; -import { bringNotebookToCurrent, CURRENT_SCHEMA_VERSION } from "../schema"; +import { bringNotebookToCurrent, CURRENT_SCHEMA_VERSION, SchemaNotebook } from "../schema"; import { ORPHANED_PROJECT_FILES } from "../../../utils/fileUtils"; import { buildCellPositionContextMap, @@ -1081,8 +1081,12 @@ export async function resolveCodexCustomMerge( } debugLog("Parsing notebook content"); - const ourNotebook = JSON.parse(ourContent); - const theirNotebook = JSON.parse(theirContent); + // Pre-ladder: shape is "anything that might be on disk" (could be v0/v1/legacy). + // We type as SchemaNotebook here so the migration ladder sees the structural + // superset; once `bringNotebookToCurrent` returns we know the notebook is at + // CURRENT_SCHEMA_VERSION and we narrow to the canonical types for the merge. + const ourNotebook: SchemaNotebook = JSON.parse(ourContent); + const theirNotebook: SchemaNotebook = JSON.parse(theirContent); // Bring both sides up to CURRENT_SCHEMA_VERSION before merging so the merge // logic only ever sees one shape. Files already at the current version @@ -1093,12 +1097,19 @@ export async function resolveCodexCustomMerge( await bringNotebookToCurrent(ourNotebook, { author: mergeAuthorForLadder }); await bringNotebookToCurrent(theirNotebook, { author: mergeAuthorForLadder }); - const ourCells: CustomNotebookCellData[] = ourNotebook.cells; - const theirCells: CustomNotebookCellData[] = theirNotebook.cells; + // Post-ladder: the notebooks now match CURRENT_SCHEMA_VERSION, so the rest of + // the merge can read them through the canonical types. The casts are honest — + // the ladder runtime-validates the shape; TypeScript just doesn't see that. + // We route through `unknown` because the canonical types are stricter than + // the structural SchemaNotebook (e.g. CustomNotebookMetadata requires `id`). + const ourCells = (ourNotebook.cells ?? []) as unknown as CustomNotebookCellData[]; + const theirCells = (theirNotebook.cells ?? []) as unknown as CustomNotebookCellData[]; // Extract and merge file-level metadata - const ourMetadata: CustomNotebookMetadata = ourNotebook.metadata || {}; - const theirMetadata: CustomNotebookMetadata = theirNotebook.metadata || {}; + const ourMetadata: CustomNotebookMetadata = + (ourNotebook.metadata as unknown as CustomNotebookMetadata) || ({} as CustomNotebookMetadata); + const theirMetadata: CustomNotebookMetadata = + (theirNotebook.metadata as unknown as CustomNotebookMetadata) || ({} as CustomNotebookMetadata); // Initialize edits arrays if they don't exist if (!ourMetadata.edits) { diff --git a/src/projectManager/utils/schema/README.md b/src/projectManager/utils/schema/README.md new file mode 100644 index 000000000..ec9843869 --- /dev/null +++ b/src/projectManager/utils/schema/README.md @@ -0,0 +1,100 @@ +# Notebook schema versioning + +This module owns the on-disk schema for `.codex` and `.source` notebooks. +Every file carries `metadata.schemaVersion: number`; files that predate the +versioning system are treated as **v0**. A migration ladder brings any +older notebook up to `CURRENT_SCHEMA_VERSION` before it's merged or +rendered. + +``` +disk file (any version) ──► bringNotebookToCurrent() ──► canonical shape + (SchemaNotebook) (CodexNotebookAsJSONData) +``` + +## When to import these types — the layer boundary + +> **The schema types describe what's on disk. The canonical types describe +> what code expects. The ladder is the only function that turns one into +> the other.** + +There are three layers, each with its own type vocabulary: + +| Layer | What it represents | Use these types | +|-------|--------------------|-----------------| +| **Pre-ladder** (raw JSON, unknown version) | A notebook just parsed off disk that might be v0, v1, or some legacy shape | `SchemaNotebook` / `SchemaCell` / `SchemaCellMetadata` / `SchemaEdit` (this folder) | +| **Ladder internals** | The migration steps themselves | `SchemaMigration` (this folder) | +| **Post-ladder** (canonical, normalized) | A notebook the rest of the codebase reads/writes — guaranteed at `CURRENT_SCHEMA_VERSION` | `CodexNotebookAsJSONData` / `CustomNotebookCellData` / `CustomNotebookMetadata` / `EditHistory` (from `types/index.d.ts`) | + +The hand-off happens whenever `bringNotebookToCurrent` returns. After +that point the notebook conforms to the canonical types. + +### Where the schema types are the right call + +Only inside the migration boundary: + +- This folder (`src/projectManager/utils/schema/`). +- Call sites that **parse raw JSON and then run the ladder** before + handing the data to the rest of the system. The merge resolver + (`src/projectManager/utils/merge/resolvers.ts`) is the canonical + example — it parses both branches as `SchemaNotebook`, runs + `bringNotebookToCurrent` on each, then casts to the canonical types + for the actual merge. +- Tests that synthesize "old-shaped" notebook fixtures. + +### Where the schema types are *not* the right call + +- Anywhere a notebook has already been normalized — the editor at + runtime, webview message handlers, the serializer, `CodexCellDocument`, + etc. There the contract is "this is the current schema"; using + `SchemaNotebook` would relax that contract and force redundant + null-checks for fields the canonical types guarantee. +- Webviews. They never see un-normalized data; the extension host is + the boundary. +- Re-exports from `types/index.d.ts`. Keeping the schema types + co-located here keeps the boundary obvious. If you find yourself + reaching for `SchemaNotebook` outside the ladder, you almost + certainly want a canonical type instead. + +## Adding a new schema version + +When the on-disk shape changes: + +1. Create `migrations/v_to_v.ts` exporting a `SchemaMigration` + that mutates the parsed notebook in place. The function should + never invent state that didn't exist on disk (e.g. don't synthesize + edit history for cells that arrived with `edits: []`). +2. Register it in `index.ts`: + + ```ts + const migrations: Record = { + 1: migrate_v0_to_v1, + // ... + N + 1: migrate_v_to_v, + }; + ``` + +3. Bump `CURRENT_SCHEMA_VERSION` in `index.ts`. +4. Add a test in `src/test/suite/schemaLadder.test.ts` covering the new + step in isolation and the full v0 → current walk. + +The ladder runs in four places automatically: + +- **Activation** (`migration_normalizeAllNotebooksToCurrentSchema`): + scans every notebook on extension start. +- **Save** (`src/serializer.ts`): stamps `schemaVersion` on every write. +- **Merge** (`resolveCodexCustomMerge`): brings both ours and theirs to + current before merging. +- **Post-sync** (`SyncManager.executeSyncInBackground`): walks the + files touched by the latest sync. + +You don't need to wire your new step into any of those — registering +it in the migrations map is enough. + +## Forward-compat + +If a notebook arrives with `schemaVersion > CURRENT_SCHEMA_VERSION` +(e.g. a teammate on a newer build pushed it), `bringNotebookToCurrent` +logs a warning and returns `aheadOfClient: true` without touching the +file. The merge resolver still runs best-effort — unknown fields pass +through opaquely, but the activation pass and the post-sync hook leave +the file alone. There is no inverse ladder; we never downgrade. From 8d6aab3575229902d396d71ad4a1f921e65d1ae1 Mon Sep 17 00:00:00 2001 From: Ben Scholtens Date: Thu, 30 Apr 2026 14:13:48 -0400 Subject: [PATCH 4/4] Add cursor rule for notebook schema types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the layer-boundary doc in src/projectManager/utils/schema/README.md as a Cursor rule so the AI agent reaches for the right type family by default — SchemaNotebook inside the migration boundary, canonical types (CodexNotebookAsJSONData / CustomNotebookCellData / etc.) everywhere else. Same shape as the existing types.mdc. Made-with: Cursor --- .cursor/rules/notebook-schema-types.mdc | 59 +++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 .cursor/rules/notebook-schema-types.mdc diff --git a/.cursor/rules/notebook-schema-types.mdc b/.cursor/rules/notebook-schema-types.mdc new file mode 100644 index 000000000..9971c7a7f --- /dev/null +++ b/.cursor/rules/notebook-schema-types.mdc @@ -0,0 +1,59 @@ +--- +description: Notebook schema types — which type family to use, and where the migration-ladder boundary lives. +globs: +alwaysApply: true +--- + +# Notebook schema types: layer boundary + +The on-disk shape of `.codex` and `.source` notebooks is owned by +[src/projectManager/utils/schema/](mdc:src/projectManager/utils/schema/index.ts) and +brought up to `CURRENT_SCHEMA_VERSION` by `bringNotebookToCurrent()`. There +are **two type families** and the migration ladder is the only function +that turns one into the other. + +## Pre-ladder — `SchemaNotebook` and friends + +Use the structural types from [src/projectManager/utils/schema/index.ts](mdc:src/projectManager/utils/schema/index.ts): +`SchemaNotebook`, `SchemaCell`, `SchemaCellMetadata`, `SchemaEdit`. + +They describe **anything that might be on disk** (v0/v1/legacy +shapes), so they're permissive: optional fields, `unknown` values, +`[key: string]: unknown` index signatures. Use them **only** when: + +- Writing a migration step in [src/projectManager/utils/schema/migrations/](mdc:src/projectManager/utils/schema/migrations/). +- Parsing raw JSON immediately before calling `bringNotebookToCurrent`. +- Synthesizing "old-shaped" fixtures in tests. + +The merge resolver is the canonical example: + +```typescript +const ourNotebook: SchemaNotebook = JSON.parse(ourContent); +await bringNotebookToCurrent(ourNotebook, { author }); +// After the ladder returns, narrow to canonical types: +const ourCells = (ourNotebook.cells ?? []) as unknown as CustomNotebookCellData[]; +``` + +## Post-ladder / runtime — canonical types + +Everywhere else — editor at runtime, webviews, serializer downstream of +save, `CodexCellDocument`, message handlers, indexers, exporters — use +the canonical types from [types/index.d.ts](mdc:types/index.d.ts): +`CodexNotebookAsJSONData`, `CustomNotebookCellData`, +`CustomNotebookMetadata`, `EditHistory`. + +If you find yourself reaching for `SchemaNotebook` outside the migration +boundary, you almost certainly want a canonical type instead. Webviews +never see un-normalized data; the extension host is the boundary. + +Don't re-export the schema types from `types/index.d.ts` — keeping them +co-located in `schema/` keeps the boundary obvious. + +## Adding a new schema version + +See [src/projectManager/utils/schema/README.md](mdc:src/projectManager/utils/schema/README.md) +for the full recipe. Short version: drop `v_to_v.ts` into +`schema/migrations/`, register it in the `migrations` map in +`schema/index.ts`, bump `CURRENT_SCHEMA_VERSION`, and extend +[src/test/suite/schemaLadder.test.ts](mdc:src/test/suite/schemaLadder.test.ts). +The ladder runs automatically at activation, save, merge, and post-sync.