diff --git a/src/exportHandler/audioAttachmentUtils.ts b/src/exportHandler/audioAttachmentUtils.ts new file mode 100644 index 000000000..e67cbb0ec --- /dev/null +++ b/src/exportHandler/audioAttachmentUtils.ts @@ -0,0 +1,183 @@ +/** + * Shared audio-attachment helpers used by both the export pipeline + * (`audioExporter.ts`) and the export view's pre-flight scan + * (`projectManager/utils/exportViewUtils.ts`). + * + * Centralizing the predicate guarantees the Step 1 inline counts can never + * disagree with the actual export behavior, since both paths consult the same + * function. + */ + +export interface AudioAttachmentCandidate { + id: string; + url: string; + updatedAt?: number; + start?: number; + end?: number; +} + +export interface AudioPick { + id: string; + url: string; + start?: number; + end?: number; +} + +export type CellAudioState = + | "ready" + | "selection-missing" + | "none-selected" + | "none"; + +export interface AudioPickOutcome { + /** + * `ready` — `selectedAudioId` matched a valid candidate; export this take. + * `selection-missing` — `selectedAudioId` was set but the referenced + * attachment is gone (deleted / missing / unknown). We refuse + * to silently substitute another take. The user needs to pick + * again or re-record. + * `none-selected` — no `selectedAudioId` is set, but at least one + * non-deleted, non-missing audio take exists. We refuse to + * auto-pick — the user must explicitly choose a take. + * `none` — the cell has no usable audio attachments at all. + */ + state: CellAudioState; + /** Populated only when `state === "ready"`. */ + pick?: AudioPick; +} + +/** + * Walks a cell's audio attachments and decides whether an exportable take + * exists. We refuse to silently fall back to a different recording when the + * user's selected take is missing — that would export audio they never + * validated. + */ +export function pickAudioAttachment(cell: unknown): AudioPickOutcome { + const meta = (cell as { metadata?: Record } | undefined)?.metadata; + if (!meta || typeof meta !== "object") return { state: "none" }; + + const attachments = (meta as { attachments?: Record }).attachments; + if (!attachments || typeof attachments !== "object") return { state: "none" }; + + const selectedId = + typeof (meta as { selectedAudioId?: unknown }).selectedAudioId === "string" + ? ((meta as { selectedAudioId?: string }).selectedAudioId as string) + : undefined; + + const candidates: AudioAttachmentCandidate[] = []; + for (const [attId, attVal] of Object.entries(attachments)) { + if (!attVal || typeof attVal !== "object") continue; + const att = attVal as { + type?: string; + isDeleted?: boolean; + isMissing?: boolean; + url?: string; + updatedAt?: number; + startTime?: number; + endTime?: number; + }; + if (att.type !== "audio") continue; + if (att.isDeleted) continue; + if (!att.url || typeof att.url !== "string") continue; + // Note: we deliberately ignore `isMissing` here. The flag is a stale + // hint from the last migration scan; the resolution path (playback + // or `resolveAudioBytes` in audioExporter.ts) attempts the fetch + // end-to-end at access time. If it fails, the caller surfaces the + // failure as `audio-file-missing` then. + candidates.push({ + id: attId, + url: att.url, + updatedAt: att.updatedAt, + start: att.startTime, + end: att.endTime, + }); + } + + if (candidates.length === 0) return { state: "none" }; + + if (selectedId) { + const selected = candidates.find((c) => c.id === selectedId); + if (selected) { + return { + state: "ready", + pick: { + id: selected.id, + url: selected.url, + start: selected.start, + end: selected.end, + }, + }; + } + // selectedAudioId set but the referenced take is gone — surface as + // selection-missing rather than silently substituting an unapproved take. + return { state: "selection-missing" }; + } + + // No explicit selection but valid takes exist. We refuse to auto-pick — + // the user has to explicitly choose a take before export. + return { state: "none-selected" }; +} + +/** + * Quick categorical state for a cell's audio readiness. Used by the Step 1 + * pre-flight to count cells without doing the full attachment pick. + */ +export function getCellAudioState(cell: unknown): CellAudioState { + return pickAudioAttachment(cell).state; +} + +/** + * Returns true when a cell is an audio recording target. Mirrors the predicate + * used by `computeDialogueLineNumbers` in `audioExporter.ts` so chapter-start + * milestones and paratext (book intros, headings, etc.) are never counted as + * "missing audio" — users don't record audio for those. + */ +export function isExportableCell(cell: unknown): boolean { + const c = cell as { + kind?: number; + metadata?: { + type?: string; + data?: { merged?: boolean; deleted?: boolean; }; + }; + } | undefined; + if (!c) return false; + if (c.kind !== 2 && c.kind !== 1) return false; + const data = c.metadata?.data; + if (data?.merged) return false; + if (data?.deleted) return false; + const type = c.metadata?.type; + if (type === "paratext" || type === "milestone") return false; + return true; +} + +/** + * Returns true when we can produce a meaningful, human-readable identifier + * for this cell in the export progress UI — either via globalReferences + * (Bible), a user-set cellLabel, or non-empty text content. Cells that fail + * this check are omitted from missing-audio reporting because the user has + * no way to act on a row labelled with an opaque UUID or line number. + */ +export function isLabelableCell(cell: unknown): boolean { + const c = cell as { + value?: unknown; + metadata?: { + cellLabel?: unknown; + data?: { globalReferences?: unknown }; + }; + } | undefined; + if (!c) return false; + + const globalRefs = c.metadata?.data?.globalReferences; + if (Array.isArray(globalRefs) && globalRefs.length > 0) { + const first = globalRefs[0]; + if (typeof first === "string" && first.trim()) return true; + } + + const cellLabel = c.metadata?.cellLabel; + if (typeof cellLabel === "string" && cellLabel.trim()) return true; + + const raw = typeof c.value === "string" ? c.value : ""; + if (raw && raw.replace(/<[^>]+>/g, "").trim()) return true; + + return false; +} diff --git a/src/exportHandler/audioExporter.ts b/src/exportHandler/audioExporter.ts index 78dd82571..edf094593 100644 --- a/src/exportHandler/audioExporter.ts +++ b/src/exportHandler/audioExporter.ts @@ -6,6 +6,12 @@ import { promisify } from "util"; import * as os from "os"; import * as fs from "fs"; import { getFFmpegPath } from "../utils/ffmpegManager"; +import { isLfsPointerContent, parsePointerContent } from "../utils/lfsHelpers"; +import { getCachedLfsBytes, setCachedLfsBytes } from "../utils/mediaCache"; +import { getMediaFilesStrategy } from "../utils/localProjectSettings"; +import type { ExportProgressReporter, ExportMissingReason } from "./exportProgress"; +import { pickAudioAttachment, isExportableCell, type AudioPick, type AudioPickOutcome } from "./audioAttachmentUtils"; +import { formatCellDisplayLabel } from "./cellLabelUtils"; import { CodexCellTypes } from "../../types/enums"; const execAsync = promisify(exec); @@ -42,7 +48,13 @@ function sanitizeFileComponent(input: string): string { .replace(/_+/g, "_"); } -// REMOVE: This doesn't seem to be used anywhere +function sanitizeFolderName(input: string): string { + return input + .replace(/[<>:"/\\|?*]/g, "") + .replace(/\s+/g, " ") + .trim(); +} + /** * Parses a cell reference ID (from globalReferences) to extract book, chapter, and verse. * Falls back to parsing cellId if globalReferences not available (legacy support). @@ -79,41 +91,23 @@ function parseCellIdToBookChapterVerse(cell: any, cellId: string): { book: strin } } -// REMOVE: This doesn't seem to be used anywhere -function toBookChapterVerseBasename(cell: any, cellId: string): string { - const { book, chapter, verse } = parseCellIdToBookChapterVerse(cell, cellId); - const safePad = (n: number | undefined) => (typeof n === "number" && Number.isFinite(n) ? String(n) : "0").padStart(3, "0"); - const chapStr = safePad(chapter); - const verseStr = safePad(verse); - return sanitizeFileComponent(`${book}_${chapStr}_${verseStr}`); +/** + * Builds the chapter/verse segment for an export filename. + * Returns e.g. "C1_V25" when both are available, "C1" for chapter only, or "" if neither. + */ +function formatChapterVerseSuffix(chapter?: number, verse?: number): string { + if (chapter !== undefined && Number.isFinite(chapter)) { + if (verse !== undefined && Number.isFinite(verse)) { + return `C${chapter}_V${verse}`; + } + return `C${chapter}`; + } + return ""; } -// REMOVE: This doesn't seem to be used anywhere -function formatTimeRangeSuffix(start?: number, end?: number): string { - if (start === undefined && end === undefined) return ""; - const coerce = (v: any): number | undefined => { - if (v === undefined || v === null) return undefined; - const num = typeof v === "number" ? v : Number(v); - if (!Number.isFinite(num)) return undefined; - return num; - }; - const fmt = (v: number | undefined) => { - if (v === undefined) return ""; - // Truncate to milliseconds (no rounding up) and format like SRT/VTT but filename-safe: HH-MM-SS_mmm - const totalMs = Math.floor(v * 1000); - const hours = Math.floor(totalMs / 3600000); - const minutes = Math.floor((totalMs % 3600000) / 60000); - const seconds = Math.floor((totalMs % 60000) / 1000); - const millis = totalMs % 1000; - const pad2 = (n: number) => String(n).padStart(2, "0"); - const pad3 = (n: number) => String(n).padStart(3, "0"); - return `${pad2(hours)}-${pad2(minutes)}-${pad2(seconds)}_${pad3(millis)}`; - }; - const s = fmt(coerce(start)); - const e = fmt(coerce(end)); - if (!s && !e) return ""; - return `_${s || ""}-${e || ""}`; -} +// `formatCellDisplayLabel` and `extractCellTextSnippet` were extracted to +// `./cellLabelUtils.ts` so the export wizard's pre-flight scan can reuse the +// same identifiers — see that file for the rules and rationale. function getTargetLanguageCode(): string { const projectConfig = vscode.workspace.getConfiguration("codex-project-manager"); @@ -122,6 +116,42 @@ function getTargetLanguageCode(): string { return sanitizeFileComponent(String(code).toLowerCase()); } +/** + * Builds a mapping from cell ID to its milestone folder name. + * Folder names follow the pattern "N - milestone name" (e.g. "1 - Genesis 1"). + * If the milestone value is purely numeric, the folder is just the sequential number (e.g. "1"). + */ +function buildCellMilestoneMap(cells: CodexNotebookAsJSONData["cells"]): Map { + const map = new Map(); + let milestoneSeq = 0; + let currentFolderName: string | null = null; + + for (const cell of cells) { + const isMilestone = cell?.metadata?.type === "milestone"; + const data = cell?.metadata?.data; + const isDeleted = !!(data && data.deleted); + + if (isMilestone && !isDeleted) { + milestoneSeq++; + const milestoneValue = typeof cell?.value === "string" ? cell.value.trim() : ""; + const isNumericOnly = /^\d+$/.test(milestoneValue); + currentFolderName = isNumericOnly || !milestoneValue + ? `${milestoneSeq}` + : `${milestoneSeq} - ${milestoneValue}`; + continue; + } + + if (!currentFolderName) continue; + + const cellId: string | undefined = cell?.metadata?.id; + if (cellId) { + map.set(cellId, currentFolderName); + } + } + + return map; +} + function computeDialogueLineNumbers( cells: CodexNotebookAsJSONData["cells"] ): Map { @@ -372,8 +402,9 @@ async function convertToWav( throw new Error("FFmpeg not available"); } const tempDir = os.tmpdir(); - const tempInputPath = `${tempDir}/codex-audio-input-${Date.now()}${originalExt}`; - const tempOutputPath = `${tempDir}/codex-audio-output-${Date.now()}.wav`; + const uniqueId = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; + const tempInputPath = `${tempDir}/codex-audio-input-${uniqueId}${originalExt}`; + const tempOutputPath = `${tempDir}/codex-audio-output-${uniqueId}.wav`; try { fs.writeFileSync(tempInputPath, Buffer.from(inputBytes)); @@ -437,55 +468,148 @@ async function prepareAudioForExport( return { bytes: original, ext }; } +const EXPORT_CONCURRENCY = 30; + +/** + * Runs async tasks with a sliding-window concurrency pool. + * Keeps exactly `concurrency` tasks active at all times — as soon as one + * finishes, the next pending task starts immediately. + */ +export async function runWithConcurrencyPool( + items: T[], + concurrency: number, + processor: (item: T, index: number) => Promise, + onProgress?: (completed: number, total: number) => void +): Promise>> { + const results: Array> = new Array(items.length); + let nextIndex = 0; + let completedCount = 0; + + const runWorker = async (): Promise => { + let idx = nextIndex++; + while (idx < items.length) { + try { + const value = await processor(items[idx], idx); + results[idx] = { status: "fulfilled", value }; + } catch (reason: any) { + results[idx] = { status: "rejected", reason }; + } + + completedCount++; + onProgress?.(completedCount, items.length); + idx = nextIndex++; + } + }; + + const workerCount = Math.min(concurrency, items.length); + await Promise.all(Array.from({ length: workerCount }, () => runWorker())); + return results; +} + +function predictOutputExt(originalExt: string, includeTimestamps: boolean): string { + if (!includeTimestamps) return originalExt; + const lower = originalExt.toLowerCase(); + if (lower === ".webm" || lower === ".m4a") return ".wav"; + return originalExt; +} + async function readNotebook(uri: vscode.Uri): Promise { const bytes = await vscode.workspace.fs.readFile(uri); return JSON.parse(Buffer.from(bytes).toString()); } -function isActiveCell(cell: any): boolean { - const data = cell?.metadata?.data; - const isMerged = !!(data && data.merged); - const isDeleted = !!(data && data.deleted); - return !isMerged && !isDeleted; +function pickAudioAttachmentForCell(cell: any): AudioPickOutcome { + return pickAudioAttachment(cell); +} + +async function pathExists(uri: vscode.Uri): Promise { + try { await vscode.workspace.fs.stat(uri); return true; } catch { return false; } } -function pickAudioAttachmentForCell(cell: any): { id: string; url: string; start?: number; end?: number; } | null { - const attachments = cell?.metadata?.attachments || {}; - if (!attachments || typeof attachments !== "object") return null; - const selectedId: string | undefined = cell?.metadata?.selectedAudioId; - - const candidates: Array<{ id: string; url: string; updatedAt?: number; start?: number; end?: number; isDeleted?: boolean; isMissing?: boolean; }> - = []; - for (const [attId, attVal] of Object.entries(attachments)) { - if (!attVal || typeof attVal !== "object") continue; - if (attVal.type !== "audio") continue; - if (attVal.isDeleted) continue; - if (attVal.isMissing) continue; - if (!attVal.url || typeof attVal.url !== "string") continue; - candidates.push({ id: attId, url: attVal.url, updatedAt: attVal.updatedAt, start: attVal.startTime, end: attVal.endTime }); +type ResolveResult = + | { data: Uint8Array; error?: undefined; } + | { data?: undefined; error: string; }; + +/** + * Reads audio bytes from disk, resolving LFS pointers on-the-fly via the + * Frontier API when the file is a stub. Falls back to the pointers/ directory + * if the files/ entry doesn't exist at all. + */ +async function resolveAudioBytes( + absoluteSrc: vscode.Uri, + workspaceFolderUri: vscode.Uri, + frontierApi: { downloadLFSFile: (projectPath: string, oid: string, size: number) => Promise; } | null +): Promise { + const projectPath = workspaceFolderUri.fsPath; + + // Helper: download from LFS with cache support + const downloadFromPointer = async (pointerText: string): Promise => { + const pointer = parsePointerContent(pointerText); + if (!pointer) { + return { error: "Invalid LFS pointer format" }; + } + + // Check in-memory cache first + const cached = getCachedLfsBytes(pointer.oid); + if (cached) { + debug("Using cached LFS bytes for export"); + return { data: cached }; + } + + if (!frontierApi) { + return { error: "Frontier API not available — cannot stream audio for export" }; + } + + const lfsData = await frontierApi.downloadLFSFile(projectPath, pointer.oid, pointer.size); + setCachedLfsBytes(pointer.oid, lfsData); + return { data: lfsData }; + }; + + // Try reading the file at absoluteSrc + if (await pathExists(absoluteSrc)) { + const rawBytes = await vscode.workspace.fs.readFile(absoluteSrc); + + if (!isLfsPointerContent(rawBytes)) { + return { data: rawBytes }; + } + + // It's a pointer — resolve via LFS + const pointerText = Buffer.from(rawBytes).toString("utf-8"); + return downloadFromPointer(pointerText); } - if (candidates.length === 0) return null; - if (selectedId) { - const selected = candidates.find(c => c.id === selectedId); - if (selected) return selected; + + // files/ entry doesn't exist — try falling back to pointers/ directory + const fsPath = absoluteSrc.fsPath; + const normalizedPath = fsPath.replace(/\\/g, "/"); + let pointerPath: string | null = null; + + if (normalizedPath.includes("/.project/attachments/files/")) { + pointerPath = normalizedPath.replace("/.project/attachments/files/", "/.project/attachments/pointers/"); + } else if (normalizedPath.includes(".project/attachments/files/")) { + pointerPath = normalizedPath.replace(".project/attachments/files/", ".project/attachments/pointers/"); } - // fallback to most recently updated - candidates.sort((a, b) => (b.updatedAt || 0) - (a.updatedAt || 0)); - return candidates[0]; -} -async function pathExists(uri: vscode.Uri): Promise { - try { await vscode.workspace.fs.stat(uri); return true; } catch { return false; } + if (pointerPath) { + const pointerUri = vscode.Uri.file(pointerPath); + if (await pathExists(pointerUri)) { + const pointerBytes = await vscode.workspace.fs.readFile(pointerUri); + const pointerText = Buffer.from(pointerBytes).toString("utf-8"); + return downloadFromPointer(pointerText); + } + } + + return { error: "Audio file not found" }; } export async function exportAudioAttachments( userSelectedPath: string, filesToExport: string[], + reporter: ExportProgressReporter, options?: ExportAudioOptions ): Promise { const workspaceFolders = vscode.workspace.workspaceFolders; if (!workspaceFolders || workspaceFolders.length === 0) { - vscode.window.showErrorMessage("No project folder found. Please open a project first."); + reporter.error("No project folder found. Please open a project first."); return; } const workspaceFolder = workspaceFolders[0]; @@ -497,146 +621,480 @@ export async function exportAudioAttachments( const selectedFiles = filesToExport.map((p) => vscode.Uri.file(p)); debug(`Files to export: ${filesToExport.length}`, filesToExport); if (selectedFiles.length === 0) { - vscode.window.showInformationMessage("No files selected for export."); + reporter.error("No files selected for export."); return; } - return vscode.window.withProgress( - { - location: vscode.ProgressLocation.Notification, - title: "Exporting Audio Attachments", - cancellable: false, - }, - async (progress) => { - const increment = 100 / selectedFiles.length; - let copiedCount = 0; - let missingCount = 0; - - for (const [index, file] of selectedFiles.entries()) { - progress.report({ message: `Processing ${basename(file.fsPath)} (${index + 1}/${selectedFiles.length})`, increment }); - - const bookCode = basename(file.fsPath).split(".")[0] || "BOOK"; - const bookFolder = vscode.Uri.joinPath(exportDir, sanitizeFileComponent(bookCode)); - await vscode.workspace.fs.createDirectory(bookFolder); - - let notebook: CodexNotebookAsJSONData; - try { - notebook = await readNotebook(file); - debug(`Successfully read notebook: ${file.fsPath}`); - } catch (e) { - debug(`Failed to read notebook: ${file.fsPath}`, e); - missingCount++; - continue; - } + // Determine if we may need to stream audio from LFS + const mediaStrategy = await getMediaFilesStrategy(workspaceFolder.uri); + const mayNeedStreaming = mediaStrategy === "stream-only" || mediaStrategy === "stream-and-save"; - const langCode = getTargetLanguageCode(); - const dialogueMap = computeDialogueLineNumbers(notebook.cells); + // Obtain the Frontier API for LFS downloads (may be null if not available) + let frontierApi: { downloadLFSFile: (projectPath: string, oid: string, size: number) => Promise; } | null = null; + if (mayNeedStreaming) { + // Enforce version gates before attempting any LFS operations + try { + const { ensureAllVersionGatesForMedia } = await import("../utils/versionGate"); + const allowed = await ensureAllVersionGatesForMedia(true); + if (!allowed) { + reporter.error( + "Audio export requires a compatible version of Frontier. Please update and try again." + ); + return; + } + } catch (gateErr) { + debug("Version gate check failed:", gateErr); + } - debug(`Processing notebook with ${notebook.cells.length} cells`); + try { + const { getAuthApi } = await import("../extension"); + const api = getAuthApi(); + if (api?.downloadLFSFile) { + frontierApi = api; + } + } catch { + // Frontier not available — will be handled per-file + } - for (const cell of notebook.cells) { - // Accept both Code cells (kind 2) and Markup cells (kind 1) - consistent with other exporters - if (cell.kind !== 2 && cell.kind !== 1) { - debug(`Skipping cell with kind ${cell.kind}`); - continue; - } - if (cell?.metadata?.type === CodexCellTypes.MILESTONE) { - debug(`Skipping milestone cell: ${cell?.metadata?.id}`); - continue; - } - if (!isActiveCell(cell)) { - debug(`Skipping inactive cell: ${cell?.metadata?.id}`); - continue; - } - const cellId: string | undefined = cell?.metadata?.id; - if (!cellId) { - debug(`Skipping cell with no ID`); - continue; - } + if (!frontierApi) { + reporter.error( + "Cannot export audio in streaming mode: Frontier authentication is not available. " + + "Please ensure you are online and signed in, or switch to Auto Download mode first." + ); + return; + } + } - const pick = pickAudioAttachmentForCell(cell); - if (!pick) { - // Log detailed info about why no audio was found - const attachments = cell?.metadata?.attachments; - if (!attachments || Object.keys(attachments).length === 0) { - debug(`Cell ${cellId}: No attachments found`); - } else { - const attKeys = Object.keys(attachments); - debug(`Cell ${cellId}: Has ${attKeys.length} attachments but none are valid audio:`, - attKeys.map(k => ({ - id: k, - type: attachments[k]?.type, - isDeleted: attachments[k]?.isDeleted, - isMissing: attachments[k]?.isMissing, - hasUrl: !!attachments[k]?.url - })) - ); - } - continue; - } + let copiedCount = 0; + let missingCount = 0; + let streamFailCount = 0; + let notRecordedCount = 0; + let noneSelectedCount = 0; + let selectionMissingCount = 0; + + for (const [index, file] of selectedFiles.entries()) { + reporter.report({ + stage: "processing", + message: `Processing ${basename(file.fsPath)} (${index + 1}/${selectedFiles.length})`, + file: basename(file.fsPath), + current: index + 1, + total: selectedFiles.length, + }); + + const bookCode = basename(file.fsPath).split(".")[0] || "BOOK"; + const bookFolder = vscode.Uri.joinPath(exportDir, sanitizeFileComponent(bookCode)); + await vscode.workspace.fs.createDirectory(bookFolder); + + let notebook: CodexNotebookAsJSONData; + try { + notebook = await readNotebook(file); + debug(`Successfully read notebook: ${file.fsPath}`); + } catch (e) { + debug(`Failed to read notebook: ${file.fsPath}`, e); + missingCount++; + continue; + } - debug(`Cell ${cellId}: Found audio attachment ${pick.id} with URL: ${pick.url}`); + const dialogueMap = computeDialogueLineNumbers(notebook.cells); + debug(`Processing notebook with ${notebook.cells.length} cells`); + + // Build milestone folder mapping: cellId -> milestone folder name + const cellMilestoneFolder = buildCellMilestoneMap(notebook.cells); + + // Count audio cells for per-book progress. Paratext and + // milestone cells (e.g. chapter headers, intros) are not + // recording targets, so they're filtered out by + // `isExportableCell` — they would otherwise show up under + // "no audio recorded" purely as noise. + const audioCells: Array<{ cell: any; cellId: string; pick: AudioPick; }> = []; + for (const cell of notebook.cells) { + if (!isExportableCell(cell)) continue; + const cellId: string | undefined = cell?.metadata?.id; + if (!cellId) continue; + const outcome = pickAudioAttachmentForCell(cell); + if (outcome.state === "ready" && outcome.pick) { + audioCells.push({ cell, cellId, pick: outcome.pick }); + continue; + } + const label = formatCellDisplayLabel(cell, cellId, bookCode); + if (!label) { + // No identifier we can present to the user — omit + // entirely rather than reporting a row they can't act on. + continue; + } + if (outcome.state === "selection-missing") { + // The user explicitly chose a take but the attachment + // is gone (deleted, missing, or unknown). We refuse to + // substitute a different take they never approved. + reporter.fileMissing( + label, + "audio-file-missing", + "The audio file you selected for this cell cannot be found. Open the cell to choose another take or re-record." + ); + selectionMissingCount++; + continue; + } + if (outcome.state === "none-selected") { + // There are valid takes on this cell but the user has + // never picked one (or their previous pick was cleared + // when its take was deleted). We refuse to auto-pick. + reporter.fileMissing( + label, + "no-audio-selected", + "Audio is recorded for this cell but no take has been selected. Open the cell to choose which take to export." + ); + noneSelectedCount++; + continue; + } + // No usable attachment at all — Tier 1 informational. + reporter.fileMissing(label, "no-audio-recorded"); + notRecordedCount++; + } - // Resolve absolute source path (attachment urls are workspace-relative POSIX in this project) - const srcPath = pick.url; - const absoluteSrc = srcPath.startsWith("/") || srcPath.match(/^[A-Za-z]:\\/) - ? vscode.Uri.file(srcPath) - : vscode.Uri.joinPath(workspaceFolder.uri, srcPath); + // Snapshot every audio attachment currently flagged + // `isMissing=true`. If the resolver succeeds for one of them + // below, we'll clear the flag on disk so the next pre-flight + // scan and the audio-history "MISSING" badge converge to + // reality without waiting for the migration scan to re-run. + // + // Why per-file: we mutate `notebook` in memory and write the + // whole `.codex` back if anything changed; doing this once at + // end-of-file (not per attachment) keeps the write count low. + const wasMissingBefore = new Map>(); + for (const cell of notebook.cells) { + const cellId: string | undefined = cell?.metadata?.id; + if (!cellId) continue; + const attachments = (cell?.metadata?.attachments ?? {}) as Record; + for (const [attId, attVal] of Object.entries(attachments)) { + if (attVal?.type !== "audio") continue; + if (attVal?.isMissing !== true) continue; + let set = wasMissingBefore.get(cellId); + if (!set) { + set = new Set(); + wasMissingBefore.set(cellId, set); + } + set.add(attId); + } + } + // Tracks (cellId -> attachmentIds) whose bytes were successfully + // resolved + written during this file's pass. Used after the + // inner loop to decide which `isMissing=true` flags to clear. + const resolvedCells = new Map>(); + + // Phase 1: Pre-compute export tasks with unique destination paths + type AudioExportTask = { + cellId: string; + /** The attachmentId actually picked for this task — used to + * scope the post-export `isMissing` clear so we only touch + * the take that actually resolved. */ + attachmentId: string; + /** + * Human-readable label for the missing-files UI. Null when + * the cell has no identifier we can present (see + * `formatCellDisplayLabel`); in that case the audio is still + * exported but per-cell failure rows are suppressed. + */ + cellLabel: string | null; + absoluteSrc: vscode.Uri; + destUri: vscode.Uri; + targetFolder: vscode.Uri; + originalExt: string; + start?: number; + end?: number; + }; - debug(`Cell ${cellId}: Resolved absolute path: ${absoluteSrc.fsPath}`); + const tasks: AudioExportTask[] = []; + const assignedPaths = new Set(); - if (!(await pathExists(absoluteSrc))) { - debug(`Cell ${cellId}: Audio file does not exist at path: ${absoluteSrc.fsPath}`); - missingCount++; - continue; - } + for (const { cell, cellId, pick } of audioCells) { + const srcPath = pick.url; + const absoluteSrc = srcPath.startsWith("/") || srcPath.match(/^[A-Za-z]:\\/) + ? vscode.Uri.file(srcPath) + : vscode.Uri.joinPath(workspaceFolder.uri, srcPath); - // Build destination filename: __