From b8ce2de88f12f79ff4ff4057f956ebdba7827a22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Pacanovsk=C3=BD?= Date: Fri, 29 May 2026 15:31:16 +0200 Subject: [PATCH 01/12] Milestone Audio Export Option Third step in the exporter only for audio in bible files to select only specific milestone (chapters) to download during the export. --- .tmp-gen3-swap-snippet.xml | 72 + sharedUtils/milestoneIndexUtils.ts | 91 ++ src/exportHandler/audioExporter.ts | 62 +- src/exportHandler/exportHandler.ts | 10 +- src/projectManager/projectExportView.ts | 316 ++++- .../utils/bibleSwapCompatibility.ts | 249 ++++ src/projectManager/utils/exportViewUtils.ts | 10 +- .../importers/biblica/bibleSwap.test.ts | 495 +++++++ .../importers/biblica/bibleSwap.ts | 1160 +++++++++++++++++ 9 files changed, 2413 insertions(+), 52 deletions(-) create mode 100644 .tmp-gen3-swap-snippet.xml create mode 100644 sharedUtils/milestoneIndexUtils.ts create mode 100644 src/projectManager/utils/bibleSwapCompatibility.ts create mode 100644 webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts create mode 100644 webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts diff --git a/.tmp-gen3-swap-snippet.xml b/.tmp-gen3-swap-snippet.xml new file mode 100644 index 000000000..a06dd67c3 --- /dev/null +++ b/.tmp-gen3-swap-snippet.xml @@ -0,0 +1,72 @@ +>14 + + + + + + 14 + + + Então, Deus disse: ― Haja lumi­nares no firma­mento do céu para fazer sepa­ração entre o dia e a noite. Sejam eles sinais para mar­car tempos deter­mi­nados, dias e anos, + + + 14 + + + + + + 15 + + + + + + 15 + + + e sirvam de lumi­nares no firma­mento do céu para ilu­mi­nar a terra. + + + E assim foi. + + + + + + 15 + + + + + + 16 + + + + + + 16 + + + Deus fez os dois grandes lumi­nares: o maior para gover­nar o dia e o menor para gover­nar a noite; fez também as estrelas. + + + 16 + + + + + + 17 + + + + + + 17 + + + Deus os colo­cou no firma­mento do céu para ilu­mi­nar a terra, + + = 0) { + milestones[currentMilestoneIndex].cellCount = currentMilestoneCellCount; + } + currentMilestoneIndex++; + currentMilestoneCellCount = 0; + milestones.push({ + index: currentMilestoneIndex, + cellIndex: i, + value: cell.value || String(currentMilestoneIndex + 1), + cellCount: 0, + }); + } + continue; + } + + if (cellType !== CodexCellTypes.MILESTONE && cellType !== "paratext") { + const isDeleted = cell.metadata?.data?.deleted === true; + if (!isDeleted) { + totalContentCells++; + if (currentMilestoneIndex >= 0) { + currentMilestoneCellCount++; + } + } + } + } + + if (currentMilestoneIndex >= 0) { + milestones[currentMilestoneIndex].cellCount = currentMilestoneCellCount; + } + + if (milestones.length === 0) { + return [{ + index: 0, + cellIndex: 0, + value: "1", + cellCount: totalContentCells, + }]; + } + + return milestones; +} + +/** + * Returns the milestone index for a cell at the given position while iterating cells in order. + * Pass the current milestone index from the previous cell; returns updated index when a milestone cell is seen. + */ +export function advanceMilestoneIndexForCell( + cell: NotebookCell, + currentMilestoneIndex: number +): number { + if ( + cell.metadata?.type === CodexCellTypes.MILESTONE && + cell.metadata?.data?.deleted !== true + ) { + return currentMilestoneIndex + 1; + } + return currentMilestoneIndex; +} + +/** + * Effective milestone index for a content cell given the current milestone tracker (-1 if none yet). + */ +export function effectiveMilestoneIndex(currentMilestoneIndex: number): number { + return currentMilestoneIndex >= 0 ? currentMilestoneIndex : 0; +} diff --git a/src/exportHandler/audioExporter.ts b/src/exportHandler/audioExporter.ts index edf094593..388daae45 100644 --- a/src/exportHandler/audioExporter.ts +++ b/src/exportHandler/audioExporter.ts @@ -13,6 +13,10 @@ import type { ExportProgressReporter, ExportMissingReason } from "./exportProgre import { pickAudioAttachment, isExportableCell, type AudioPick, type AudioPickOutcome } from "./audioAttachmentUtils"; import { formatCellDisplayLabel } from "./cellLabelUtils"; import { CodexCellTypes } from "../../types/enums"; +import { + advanceMilestoneIndexForCell, + effectiveMilestoneIndex, +} from "../../sharedUtils/milestoneIndexUtils"; const execAsync = promisify(exec); @@ -32,6 +36,7 @@ function debug(...args: any[]) { type ExportAudioOptions = { includeTimestamps?: boolean; + selectedMilestonesByFile?: Record; }; type AudioCellData = { @@ -698,17 +703,13 @@ export async function exportAudioAttachments( const dialogueMap = computeDialogueLineNumbers(notebook.cells); debug(`Processing notebook with ${notebook.cells.length} cells`); - // Build milestone folder mapping: cellId -> milestone folder name - const cellMilestoneFolder = buildCellMilestoneMap(notebook.cells); - - // Count audio cells for per-book progress. Paratext and - // milestone cells (e.g. chapter headers, intros) are not - // recording targets, so they're filtered out by - // `isExportableCell` — they would otherwise show up under - // "no audio recorded" purely as noise. - const audioCells: Array<{ cell: any; cellId: string; pick: AudioPick; }> = []; - for (const cell of notebook.cells) { - if (!isExportableCell(cell)) continue; + const milestoneFilter = options?.selectedMilestonesByFile?.[file.fsPath]; + let currentMilestoneIndex = -1; + + for (const cell of notebook.cells) { + currentMilestoneIndex = advanceMilestoneIndexForCell(cell, currentMilestoneIndex); + + if (!isExportableCell(cell)) continue; const cellId: string | undefined = cell?.metadata?.id; if (!cellId) continue; const outcome = pickAudioAttachmentForCell(cell); @@ -722,6 +723,13 @@ export async function exportAudioAttachments( // entirely rather than reporting a row they can't act on. continue; } + if ( + milestoneFilter && + milestoneFilter.length > 0 && + !milestoneFilter.includes(effectiveMilestoneIndex(currentMilestoneIndex)) + ) { + continue; + } if (outcome.state === "selection-missing") { // The user explicitly chose a take but the attachment // is gone (deleted, missing, or unknown). We refuse to @@ -812,11 +820,11 @@ export async function exportAudioAttachments( ? vscode.Uri.file(srcPath) : vscode.Uri.joinPath(workspaceFolder.uri, srcPath); - const timeFromCell = (cell?.metadata?.data || {}) as AudioCellData; - // Use ?? so a literal 0 for audioStartTime/audioEndTime is preferred - // over the cell timestamps, instead of falling through. - const start = timeFromCell.audioStartTime ?? timeFromCell.startTime; - const end = timeFromCell.audioEndTime ?? timeFromCell.endTime; + const timeFromCell = (cell?.metadata?.data || {}) as AudioCellData; + // Use ?? so a literal 0 for audioStartTime/audioEndTime is preferred + // over the cell timestamps, instead of falling through. + const start = timeFromCell.audioStartTime ?? timeFromCell.startTime; + const end = timeFromCell.audioEndTime ?? timeFromCell.endTime; const originalExt = extname(absoluteSrc.fsPath) || ".wav"; const labelRaw = cell?.metadata?.cellLabel || "unlabeled"; const label = sanitizeFileComponent(String(labelRaw).toLowerCase()); @@ -854,17 +862,17 @@ export async function exportAudioAttachments( const cellLabel = formatCellDisplayLabel(cell, cellId, bookCode); - tasks.push({ - cellId, - attachmentId: pick.id, - cellLabel, - absoluteSrc, - destUri, - targetFolder, - originalExt, - start, - end, - }); + tasks.push({ + cellId, + attachmentId: pick.id, + cellLabel, + absoluteSrc, + destUri, + targetFolder, + originalExt, + start, + end, + }); } // Pre-create all target directories in parallel diff --git a/src/exportHandler/exportHandler.ts b/src/exportHandler/exportHandler.ts index ac6dbd195..5596848ae 100644 --- a/src/exportHandler/exportHandler.ts +++ b/src/exportHandler/exportHandler.ts @@ -255,6 +255,8 @@ export interface ExportOptions { removeIds?: boolean; includeAudio?: boolean; includeTimestamps?: boolean; + /** Per-file list of 0-based milestone indices to include when exporting audio */ + selectedMilestonesByFile?: Record; } // IDML Round-trip export: Uses idmlExporter or biblicaExporter based on filename @@ -1793,7 +1795,10 @@ export async function exportCodexContent( break; case CodexExportFormat.AUDIO: { const { exportAudioAttachments } = await import("./audioExporter"); - exportPromises.push(exportAudioAttachments(wrapperPath, filesToExport, childReporter, { includeTimestamps: options?.includeTimestamps })); + exportPromises.push(exportAudioAttachments(wrapperPath, filesToExport, childReporter, { + includeTimestamps: options?.includeTimestamps, + selectedMilestonesByFile: options?.selectedMilestonesByFile, + })); break; } case CodexExportFormat.SUBTITLES_VTT_WITH_STYLES: @@ -1828,8 +1833,9 @@ export async function exportCodexContent( if (includeAudio) { const { exportAudioAttachments } = await import("./audioExporter"); exportPromises.push( - exportAudioAttachments(audioPath, filesToExport, childReporter, { + exportAudioAttachments(audioPath, filesToExport, { includeTimestamps: options?.includeTimestamps, + selectedMilestonesByFile: options?.selectedMilestonesByFile, }) ); } diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index ea5dc49d9..b34fb0774 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -4,7 +4,7 @@ import * as fs from "fs"; import * as vscode from "vscode"; import { safePostMessageToPanel } from "../utils/webviewUtils"; import { EXPORT_OPTIONS_BY_FILE_TYPE } from "../../sharedUtils/exportOptionsEligibility"; -import { groupCodexFilesByImporterType, type FileGroup } from "./utils/exportViewUtils"; +import { groupCodexFilesByImporterType, type FileGroup, BIBLE_MILESTONE_EXPORT_GROUP_KEYS } from "./utils/exportViewUtils"; import { readCodexNotebookFromUri } from "../exportHandler/exportHandlerUtils"; import { compareHtmlStructure } from "../../sharedUtils/htmlStructureUtils"; import { getMediaFilesStrategy } from "../utils/localProjectSettings"; @@ -365,6 +365,7 @@ function getWebviewContent( const groupsJson = JSON.stringify(fileGroups); const exportOptionsConfigJson = JSON.stringify(EXPORT_OPTIONS_BY_FILE_TYPE); const initialExportFolderJson = JSON.stringify(initialExportFolder); + const bibleMilestoneGroupKeysJson = JSON.stringify([...BIBLE_MILESTONE_EXPORT_GROUP_KEYS]); return ` @@ -480,6 +481,45 @@ function getWebviewContent( background-color: var(--vscode-editor-background); border-top: 1px solid var(--vscode-input-border); } + .milestone-file-group { + border: 1px solid var(--vscode-input-border); + border-radius: 4px; + margin-bottom: 12px; + overflow: hidden; + } + .milestone-file-header { + display: flex; + align-items: center; + gap: 8px; + padding: 12px; + background-color: var(--vscode-editor-inactiveSelectionBackground); + } + .milestone-file-header h4 { + margin: 0; + flex: 1; + font-size: 0.95em; + } + .milestone-list { + padding: 8px 12px 12px 32px; + display: flex; + flex-direction: column; + gap: 6px; + background-color: var(--vscode-editor-background); + border-top: 1px solid var(--vscode-input-border); + } + .milestone-item { + display: flex; + align-items: center; + gap: 8px; + } + .milestone-item label { + cursor: pointer; + user-select: none; + } + .milestone-select-all { + font-size: 0.85em; + color: var(--vscode-descriptionForeground); + } .file-item { display: flex; align-items: center; @@ -1282,8 +1322,19 @@ function getWebviewContent( - +
+
+

Select Milestones

+

+ Choose which chapters (milestones) to include in the audio export. All milestones are selected by default. +

+
+
+
+ + +

Select Export Location

@@ -1372,10 +1423,13 @@ function getWebviewContent(

2
3
+
+
4
+
@@ -1458,6 +1512,7 @@ function getWebviewContent( const vscode = acquireVsCodeApi(); const fileGroups = ${groupsJson}; const exportOptionsConfig = ${exportOptionsConfigJson}; + const bibleMilestoneGroupKeys = new Set(${bibleMilestoneGroupKeysJson}); const isStreamOnly = ${JSON.stringify(isStreamOnly)}; let currentStep = 1; let selectedFormat = null; @@ -1465,6 +1520,153 @@ function getWebviewContent( let exportPath = ${initialExportFolderJson}; let selectedFiles = new Set(); let selectedGroupKey = null; + /** @type {Record>} */ + let selectedMilestonesByFile = {}; + + function shouldShowMilestoneStep() { + if (!selectedAudioMode) return false; + if (!selectedGroupKey || !bibleMilestoneGroupKeys.has(selectedGroupKey)) return false; + for (const path of selectedFiles) { + const f = fileLookup[path]; + if (f && f.milestones && f.milestones.length > 0) return true; + } + return false; + } + + function getTotalStepCount() { + return shouldShowMilestoneStep() ? 4 : 3; + } + + function getProgressDisplayStep(step) { + if (!shouldShowMilestoneStep() && step === 4) return 3; + return step; + } + + /** @type {string[]} Maps milestone UI file index to codex path */ + let milestoneFilePaths = []; + + function initMilestoneSelection() { + selectedMilestonesByFile = {}; + milestoneFilePaths = []; + for (const path of selectedFiles) { + const f = fileLookup[path]; + if (f && f.milestones && f.milestones.length > 0) { + selectedMilestonesByFile[path] = new Set(f.milestones.map(m => m.index)); + milestoneFilePaths.push(path); + } + } + renderMilestoneSelection(); + updateStep3Button(); + } + + function renderMilestoneSelection() { + const container = document.getElementById('milestoneGroupsContainer'); + if (!container) return; + if (milestoneFilePaths.length === 0) { + container.innerHTML = '

No milestones found in the selected files.

'; + return; + } + container.innerHTML = milestoneFilePaths.map((filePath, fileIdx) => { + const f = fileLookup[filePath]; + const selectedSet = selectedMilestonesByFile[filePath] || new Set(); + const allSelected = f.milestones.every(m => selectedSet.has(m.index)); + const selectAllId = 'milestone-select-all-' + fileIdx; + const milestonesHtml = f.milestones.map((m, mIdx) => { + const cbId = 'milestone-' + fileIdx + '-' + mIdx; + const checked = selectedSet.has(m.index) ? 'checked' : ''; + const label = ((m.value || String(m.index + 1)).replace(/<[^>]*>/g, '').trim()) || String(m.index + 1); + return \` +
+ + +
+ \`; + }).join(''); + return \` +
+
+

\${f.displayName}

+ +
+
\${milestonesHtml}
+
+ \`; + }).join(''); + } + + function onMilestoneCheckboxChange(fileIdx, milestoneIndex) { + const filePath = milestoneFilePaths[fileIdx]; + if (!filePath) return; + if (!selectedMilestonesByFile[filePath]) { + selectedMilestonesByFile[filePath] = new Set(); + } + const cb = document.querySelector('input[data-file-idx="' + fileIdx + '"][data-milestone-index="' + milestoneIndex + '"]'); + if (cb && cb.checked) { + selectedMilestonesByFile[filePath].add(milestoneIndex); + } else { + selectedMilestonesByFile[filePath].delete(milestoneIndex); + } + syncMilestoneSelectAllCheckbox(fileIdx); + updateStep3Button(); + } + + function onMilestoneSelectAllChange(fileIdx) { + const filePath = milestoneFilePaths[fileIdx]; + const f = filePath ? fileLookup[filePath] : null; + if (!f || !f.milestones) return; + const selectAllCb = document.querySelector('.milestone-file-group[data-file-idx="' + fileIdx + '"] input[data-file-idx]:not([data-milestone-index])'); + const shouldSelectAll = selectAllCb && selectAllCb.checked; + if (!selectedMilestonesByFile[filePath]) { + selectedMilestonesByFile[filePath] = new Set(); + } + if (shouldSelectAll) { + f.milestones.forEach(m => selectedMilestonesByFile[filePath].add(m.index)); + } else { + selectedMilestonesByFile[filePath].clear(); + } + renderMilestoneSelection(); + updateStep3Button(); + } + + function syncMilestoneSelectAllCheckbox(fileIdx) { + const filePath = milestoneFilePaths[fileIdx]; + const f = filePath ? fileLookup[filePath] : null; + if (!f || !f.milestones) return; + const selectedSet = selectedMilestonesByFile[filePath] || new Set(); + const allSelected = f.milestones.every(m => selectedSet.has(m.index)); + const selectAllCb = document.querySelector('.milestone-file-group[data-file-idx="' + fileIdx + '"] input[data-file-idx]:not([data-milestone-index])'); + if (selectAllCb) selectAllCb.checked = allSelected; + } + + function updateStep3Button() { + const btn = document.getElementById('nextStep3'); + if (!btn) return; + let anySelected = false; + for (const path of selectedFiles) { + const set = selectedMilestonesByFile[path]; + if (set && set.size > 0) { + anySelected = true; + break; + } + } + btn.disabled = !anySelected; + } + + function buildSelectedMilestonesPayload() { + if (!shouldShowMilestoneStep()) return undefined; + const payload = {}; + for (const path of selectedFiles) { + const set = selectedMilestonesByFile[path]; + if (set && set.size > 0) { + payload[path] = Array.from(set).sort((a, b) => a - b); + } + } + return Object.keys(payload).length > 0 ? payload : undefined; + } // Build a path→file lookup so Step 2 can check audio-only status const fileLookup = {}; @@ -2149,6 +2351,7 @@ function getWebviewContent( const back = document.getElementById('btnBack'); const next1 = document.getElementById('nextStep1'); const next2 = document.getElementById('nextStep2'); + const next3 = document.getElementById('nextStep3'); const exportBtn = document.getElementById('exportButton'); if (currentStep === 1) { if (cancel) cancel.classList.add('visible'); @@ -2157,40 +2360,96 @@ function getWebviewContent( if (back) back.classList.add('visible'); if (next2) next2.classList.add('visible'); } else if (currentStep === 3) { + if (back) back.classList.add('visible'); + if (next3) next3.classList.add('visible'); + } else if (currentStep === 4) { if (back) back.classList.add('visible'); if (exportBtn) exportBtn.classList.add('visible'); } } + function updateProgressBarVisuals(activeStep) { + const showMilestones = shouldShowMilestoneStep(); + const circle3 = document.getElementById('progressCircle3'); + const circle4 = document.getElementById('progressCircle4'); + const line3 = document.getElementById('progressLine3'); + if (circle4) circle4.style.display = showMilestones ? '' : 'none'; + if (line3) line3.style.display = showMilestones ? '' : 'none'; + + const resetCircle = (circle, label) => { + if (!circle) return; + circle.classList.remove('active', 'completed'); + circle.textContent = String(label); + }; + + resetCircle(document.getElementById('progressCircle1'), 1); + resetCircle(document.getElementById('progressCircle2'), 2); + resetCircle(circle3, showMilestones ? 3 : 3); + resetCircle(circle4, 4); + + const setCircleState = (circleId, state) => { + const circle = document.getElementById(circleId); + if (!circle) return; + circle.classList.remove('active', 'completed'); + if (state === 'completed') { + circle.classList.add('completed'); + circle.innerHTML = ''; + } else if (state === 'active') { + circle.classList.add('active'); + } + }; + + if (showMilestones) { + for (let step = 1; step <= 4; step++) { + const state = step < activeStep ? 'completed' : (step === activeStep ? 'active' : 'idle'); + if (state !== 'idle') setCircleState('progressCircle' + step, state); + } + document.querySelectorAll('[id^="progressLine"]').forEach((line, i) => { + line.classList.remove('completed'); + if (i + 1 < activeStep) line.classList.add('completed'); + }); + } else { + const circleSteps = [ + { circle: 1, step: 1 }, + { circle: 2, step: 2 }, + { circle: 3, step: 4 }, + ]; + for (const { circle, step } of circleSteps) { + const state = step < activeStep ? 'completed' : (step === activeStep ? 'active' : 'idle'); + if (state !== 'idle') setCircleState('progressCircle' + circle, state); + } + const line1 = document.getElementById('progressLine1'); + const line2 = document.getElementById('progressLine2'); + if (line1) line1.classList.toggle('completed', activeStep > 1); + if (line2) line2.classList.toggle('completed', activeStep >= 4); + } + + const compact = document.getElementById('progressCompact'); + if (compact) { + compact.textContent = 'Step ' + getProgressDisplayStep(activeStep) + ' of ' + getTotalStepCount(); + } + } + function goBack() { - goToStep(currentStep - 1); + if (currentStep === 4) { + goToStep(shouldShowMilestoneStep() ? 3 : 2); + } else { + goToStep(currentStep - 1); + } } function goToStep(n) { const prevStep = currentStep; document.querySelectorAll('.step-panel').forEach(p => p.classList.remove('active')); document.getElementById('step' + n).classList.add('active'); - document.querySelectorAll('[id^="progressCircle"]').forEach((circle, i) => { - circle.classList.remove('active', 'completed'); - if (i + 1 < n) { - circle.classList.add('completed'); - circle.innerHTML = ''; - } else { - circle.textContent = String(i + 1); - if (i + 1 === n) circle.classList.add('active'); - } - }); - document.querySelectorAll('[id^="progressLine"]').forEach((line, i) => { - line.classList.remove('completed'); - if (i + 1 < n) line.classList.add('completed'); - }); - const compact = document.getElementById('progressCompact'); - if (compact) compact.textContent = 'Step ' + n + ' of 3'; + updateProgressBarVisuals(n); currentStep = n; updateButtonVisibility(); if (n === 2) { initStep2Options(prevStep === 1); - } else if (n === 3) { + } else if (n === 3 && shouldShowMilestoneStep()) { + initMilestoneSelection(); + } else if (n === 4) { updateExportButton(); } } @@ -2198,6 +2457,11 @@ function getWebviewContent( function goToStep1() { goToStep(1); } function goToStep2() { goToStep(2); } function goToStep3() { goToStep(3); } + function goToStep4() { goToStep(4); } + + function advanceToNextStepAfterFormat() { + goToStep(shouldShowMilestoneStep() ? 3 : 4); + } function updateStep2Button() { const btn = document.getElementById('nextStep2'); @@ -2448,9 +2712,12 @@ function getWebviewContent( }); return; } - goToStep(3); + advanceToNextStepAfterFormat(); } + window.onMilestoneCheckboxChange = onMilestoneCheckboxChange; + window.onMilestoneSelectAllChange = onMilestoneSelectAllChange; + window.addEventListener('message', event => { const message = event.data; if (message.command === 'updateExportPath') { @@ -2504,7 +2771,7 @@ function getWebviewContent( pendingSubtitleOverlapCheck = false; updateStep2Button(); if (message.proceed) { - goToStep(3); + advanceToNextStepAfterFormat(); } } }); @@ -2513,6 +2780,7 @@ function getWebviewContent( renderFileGroups(); setupCellListPopover(); updateStep1Button(); + updateProgressBarVisuals(1); if (exportPath) { const pathEl = document.getElementById('exportPath'); if (pathEl) pathEl.textContent = exportPath; @@ -2675,6 +2943,10 @@ function getWebviewContent( options.includeAudio = true; options.includeTimestamps = selectedAudioMode === 'audio-timestamps'; } + const selectedMilestones = buildSelectedMilestonesPayload(); + if (selectedMilestones) { + options.selectedMilestonesByFile = selectedMilestones; + } // Optimistically switch UI to the in-panel exporting screen so // the user does not see Cancel / Back / Export anymore. The host // also broadcasts exportStarted, which is idempotent. diff --git a/src/projectManager/utils/bibleSwapCompatibility.ts b/src/projectManager/utils/bibleSwapCompatibility.ts new file mode 100644 index 000000000..cfbfd95aa --- /dev/null +++ b/src/projectManager/utils/bibleSwapCompatibility.ts @@ -0,0 +1,249 @@ +/** + * Bible Swap Compatibility (host-side) + * ==================================== + * + * Parses a selected Bible IDML file and each selected Study Bible `.codex` + * notebook's original IDML and produces a compatibility report (book / + * chapter / verse overlap). Used by the export webview to show the user + * how well the chosen Bible file aligns with their Study Bibles before + * they commit to a Bible Swap export. + */ + +import * as vscode from "vscode"; +import JSZip from "jszip"; +import { basename } from "path"; + +import { + buildBibleVerseIndex, + BibleVerseIndex, + listVerseKeys, + SKIPPED_BOOK_CODES, +} from "../../../webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap"; +import { readCodexNotebookFromUri } from "../../exportHandler/exportHandlerUtils"; +import { resolveOriginalFileUri } from "../../providers/NewSourceUploader/originalFileUtils"; + +export interface BibleSwapCompatibilityReport { + bibleFileName: string; + booksFound: number; // books in both Bible and at least one Study notebook + booksExpected: number; // distinct books across all Study notebooks + chaptersFound: number; // (book,chapter) pairs in both + chaptersExpected: number; + versesMatched: number; // (book,chapter,verse) triples in both + versesExpected: number; + psaSkipped: boolean; // true if the selected Study notebooks reference PSA + perBookMismatches: Array<{ book: string; missing: number; extra: number }>; +} + +/** + * Read a Bible IDML file's bytes, unzip it, find the largest `Stories/*.xml`, + * and build a verse index. + */ +export async function buildBibleIndexFromUri( + uri: vscode.Uri +): Promise { + const data = await vscode.workspace.fs.readFile(uri); + return buildBibleIndexFromBytes(data); +} + +export async function buildBibleIndexFromBytes( + data: Uint8Array +): Promise { + if (data.length < 4 || data[0] !== 0x50 || data[1] !== 0x4b) { + throw new Error( + "Selected file is not a valid IDML (ZIP) archive. Expected a .idml file." + ); + } + const zip = await JSZip.loadAsync(data); + const storyXml = await readLargestStoryXml(zip); + if (!storyXml) { + throw new Error( + "No Stories/*.xml entries found inside the IDML. The file may be empty or corrupted." + ); + } + return buildBibleVerseIndex(storyXml); +} + +/** + * Pull the largest XML file under `Stories/` from a loaded IDML ZIP and + * return its contents as a UTF-8 string. Per the analysis doc, the main + * Story XML is always the largest one in the folder. + */ +async function readLargestStoryXml(zip: JSZip): Promise { + let bestKey: string | null = null; + let bestSize = -1; + for (const name of Object.keys(zip.files)) { + if (!name.startsWith("Stories/") || !name.endsWith(".xml")) continue; + const file = zip.files[name]; + if (file.dir) continue; + // `_data.uncompressedSize` isn't part of the public types but it's + // available on the internal JSZip object. Fall back to reading the + // file if not present. + const size = + (file as unknown as { _data?: { uncompressedSize?: number } })._data + ?.uncompressedSize ?? -1; + if (size > bestSize) { + bestSize = size; + bestKey = name; + } + } + if (!bestKey) { + // Slow fallback: read every Stories XML, take the longest text. + let bestText: string | null = null; + for (const name of Object.keys(zip.files)) { + if (!name.startsWith("Stories/") || !name.endsWith(".xml")) continue; + const file = zip.file(name); + if (!file) continue; + const text = await file.async("text"); + if (!bestText || text.length > bestText.length) { + bestText = text; + } + } + return bestText; + } + const file = zip.file(bestKey); + if (!file) return null; + return file.async("text"); +} + +/** + * Walk the selected `.codex` notebooks: load each one's original IDML and + * extract its verse set. Returns the aggregated set as a verse-index-style + * map (innerXml/shape are unused on the Study side here, just need the keys). + */ +async function buildStudyVerseSetFromCodexFiles( + filesToExport: string[] +): Promise>> { + // Result: book -> set of "chapter|verse" + const result = new Map>(); + const workspaceFolders = vscode.workspace.workspaceFolders; + if (!workspaceFolders || workspaceFolders.length === 0) return result; + const workspaceFolder = workspaceFolders[0]; + + for (const filePath of filesToExport) { + try { + const uri = vscode.Uri.file(filePath); + const notebook = await readCodexNotebookFromUri(uri); + const meta = notebook.metadata as unknown as + | { originalFileName?: string; originalName?: string } + | undefined; + const originalFileName = + meta?.originalFileName || + meta?.originalName || + `${basename(filePath).split(".")[0]}.idml`; + const originalUri = await resolveOriginalFileUri( + workspaceFolder, + originalFileName + ); + const data = await vscode.workspace.fs.readFile(originalUri); + if (data.length < 4 || data[0] !== 0x50 || data[1] !== 0x4b) continue; + const zip = await JSZip.loadAsync(data); + const storyXml = await readLargestStoryXml(zip); + if (!storyXml) continue; + const studyIndex = buildBibleVerseIndex(storyXml); + for (const key of listVerseKeys(studyIndex)) { + const [book, chapter, verse] = key.split("|"); + let set = result.get(book); + if (!set) { + set = new Set(); + result.set(book, set); + } + set.add(`${chapter}|${verse}`); + } + } catch (err) { + console.warn( + `[BibleSwapCompatibility] Could not read original IDML for ${filePath}:`, + err + ); + } + } + return result; +} + +/** + * Compute a compatibility report between a chosen Bible IDML and the set of + * `.codex` files the user has selected for export. + */ +export async function analyzeBibleSwapCompatibility( + bibleIdmlPath: string, + filesToExport: string[] +): Promise { + const bibleUri = vscode.Uri.file(bibleIdmlPath); + const bibleFileName = basename(bibleIdmlPath); + + const [bibleIndex, studyByBook] = await Promise.all([ + buildBibleIndexFromUri(bibleUri), + buildStudyVerseSetFromCodexFiles(filesToExport), + ]); + + // Pre-bucket the Bible index by book for cheap lookups. + const bibleByBook = new Map>(); // book -> "chapter|verse" + for (const key of listVerseKeys(bibleIndex)) { + const [book, chapter, verse] = key.split("|"); + let set = bibleByBook.get(book); + if (!set) { + set = new Set(); + bibleByBook.set(book, set); + } + set.add(`${chapter}|${verse}`); + } + + let booksExpected = 0; + let booksFound = 0; + const chapterSetExpected = new Set(); // "book|chapter" + const chapterSetFound = new Set(); + let versesExpected = 0; + let versesMatched = 0; + let psaSkipped = false; + const perBookMismatches: Array<{ book: string; missing: number; extra: number }> = []; + + for (const [book, studyVerses] of studyByBook.entries()) { + booksExpected++; + if (SKIPPED_BOOK_CODES.has(book)) { + psaSkipped = true; + // PSA is excluded from the matched/expected totals so the + // "% match" number isn't artificially dragged down by a book + // we deliberately don't swap. + booksExpected--; + continue; + } + + const bibleVerses = bibleByBook.get(book); + if (bibleVerses && bibleVerses.size > 0) booksFound++; + + let missing = 0; + for (const cv of studyVerses) { + versesExpected++; + const [chapter] = cv.split("|"); + chapterSetExpected.add(`${book}|${chapter}`); + if (bibleVerses && bibleVerses.has(cv)) { + versesMatched++; + chapterSetFound.add(`${book}|${chapter}`); + } else { + missing++; + } + } + let extra = 0; + if (bibleVerses) { + for (const cv of bibleVerses) { + if (!studyVerses.has(cv)) extra++; + } + } + if (missing > 0 || extra > 0) { + perBookMismatches.push({ book, missing, extra }); + } + } + + perBookMismatches.sort((a, b) => b.missing + b.extra - (a.missing + a.extra)); + + return { + bibleFileName, + booksFound, + booksExpected, + chaptersFound: chapterSetFound.size, + chaptersExpected: chapterSetExpected.size, + versesMatched, + versesExpected, + psaSkipped, + perBookMismatches, + }; +} diff --git a/src/projectManager/utils/exportViewUtils.ts b/src/projectManager/utils/exportViewUtils.ts index 0cee77117..54ebe6f27 100644 --- a/src/projectManager/utils/exportViewUtils.ts +++ b/src/projectManager/utils/exportViewUtils.ts @@ -1,11 +1,12 @@ import * as vscode from "vscode"; -import { CodexNotebookAsJSONData } from "../../../types"; +import { CodexNotebookAsJSONData, MilestoneInfo } from "../../../types"; import { getCellAudioState, isExportableCell, isLabelableCell, } from "../../exportHandler/audioAttachmentUtils"; import { formatCellDisplayLabel } from "../../exportHandler/cellLabelUtils"; +import { extractMilestonesFromCells } from "../../../sharedUtils/milestoneIndexUtils"; export { EXPORT_OPTIONS_BY_FILE_TYPE, @@ -82,8 +83,12 @@ export interface FileGroupEntry { hasTranslations: boolean; hasAudio: boolean; audioStats?: NotebookAudioStats; + milestones: MilestoneInfo[]; } +/** File types that support milestone selection during bible audio export */ +export const BIBLE_MILESTONE_EXPORT_GROUP_KEYS = new Set(["usfm", "ebible", "paratext"]); + export interface FileGroup { groupKey: string; displayName: string; @@ -389,6 +394,7 @@ export async function groupCodexFilesByImporterType( const audioStats = hasAudio ? analyzeNotebookAudioStats(notebook, bookCode) : undefined; + const milestones = extractMilestonesFromCells(notebook.cells); if (!groupsMap.has(groupKey)) { groupsMap.set(groupKey, []); @@ -400,6 +406,7 @@ export async function groupCodexFilesByImporterType( hasTranslations, hasAudio, audioStats, + milestones, }); } catch { const name = uri.fsPath.split(/[/\\]/).pop() || ""; @@ -412,6 +419,7 @@ export async function groupCodexFilesByImporterType( displayName: name.replace(/\.codex$/i, "") || name, hasTranslations: false, hasAudio: false, + milestones: [{ index: 0, cellIndex: 0, value: "1", cellCount: 0 }], }); } } diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts new file mode 100644 index 000000000..e77a04882 --- /dev/null +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts @@ -0,0 +1,495 @@ +/** + * Tests for Bible Swap (hybrid block-swap + content-only fallback). + */ + +import { describe, it, expect } from "vitest"; +import { + buildBibleVerseIndex, + applyBibleSwapToStudyXml, + verseKey, +} from "./bibleSwap"; + +const wrapStory = (paragraphsXml: string) => + `` + + `` + + `` + + paragraphsXml + + ``; + +const bookMarker = (code: string) => + `` + + `` + + `${code}`; + +const noStyleCsr = (inner: string) => + `` + + `${inner}`; + +const verseMarkerCsr = (n: string) => + `` + + `${n}`; + +const chapterMarkerCsr = (n: string) => + `` + + `${n}:`; + +const chapterParagraph = (n: string) => + `` + + chapterMarkerCsr(n) + + ``; + +const simpleVerseParagraph = ( + style: string, + chapter: string | null, + verse: string, + body: string +) => + `` + + (chapter ? chapterMarkerCsr(chapter) : "") + + verseMarkerCsr(verse) + + noStyleCsr(body) + + verseMarkerCsr(verse) + + ``; + +/** Study/Bible share identical CSR skeleton in one paragraph → block swap. */ +const identicalSkeletonVerse = (bodyStudy: string, bodyBible: string) => { + const versePara = (body: string) => + `` + + chapterMarkerCsr("1") + + verseMarkerCsr("1") + + noStyleCsr(body) + + verseMarkerCsr("1") + + ``; + return { + study: bookMarker("GEN") + versePara(bodyStudy), + bible: bookMarker("GEN") + versePara(bodyBible), + }; +}; + +describe("buildBibleVerseIndex", () => { + it("indexes verse text and structure signature", () => { + const xml = wrapStory( + bookMarker("GEN") + simpleVerseParagraph("text%3ap", "1", "1", "No princípio") + ); + const idx = buildBibleVerseIndex(xml); + const v = idx.get(verseKey("GEN", "1", "1")); + expect(v?.text).toContain("No princípio"); + expect(v?.structureSig).toContain("meta:v"); + expect(v?.blockXml).toContain("No princípio"); + }); + + it("extracts book code from polluted meta:bk content", () => { + const polluted = + bookMarker("[PT] GEN") + simpleVerseParagraph("text%3ap", "1", "1", "x"); + const idx = buildBibleVerseIndex(wrapStory(polluted)); + expect(idx.get(verseKey("GEN", "1", "1"))).toBeDefined(); + }); + + it("concatenates cross-paragraph verse text (GEN 1:3 pattern)", () => { + const paraOpen = + `` + + verseMarkerCsr("3") + + noStyleCsr("Deus disse:") + + ``; + const paraMid = + `` + + noStyleCsr("Haja luz.") + + ``; + const paraClose = + `` + + noStyleCsr("E houve luz.") + + verseMarkerCsr("3") + + ``; + const xml = wrapStory(bookMarker("GEN") + chapterParagraph("1") + paraOpen + paraMid + paraClose); + const idx = buildBibleVerseIndex(xml); + const v = idx.get(verseKey("GEN", "1", "3"))!; + expect(v.text).toContain("Deus disse:"); + expect(v.text).toContain("Haja luz."); + expect(v.text).toContain("E houve luz."); + expect(v.singleParagraph).toBe(false); + }); +}); + +describe("applyBibleSwapToStudyXml — structure-preserving swap", () => { + it("replaces verse text when structure signatures match without replacing Study CSRs", () => { + const { study, bible } = identicalSkeletonVerse("ENGLISH", "PORTUGUÊS"); + const idx = buildBibleVerseIndex(wrapStory(bible)); + const entry = idx.get(verseKey("GEN", "1", "1")); + expect(entry?.structureSig).toBeTruthy(); + + const { xml, stats } = applyBibleSwapToStudyXml(wrapStory(study), idx); + + expect(stats.replacedCount).toBe(1); + expect(stats.blockSwapCount).toBe(1); + expect(stats.contentOnlyCount).toBe(0); + expect(xml).toContain("PORTUGUÊS"); + expect(xml).not.toContain("ENGLISH"); + }); + + it("preserves Study CharacterStyleRange attributes such as Tracking", () => { + const verseWithTracking = (body: string) => + bookMarker("GEN") + + `` + + chapterMarkerCsr("2") + + verseMarkerCsr("10") + + `` + + `${body}` + + verseMarkerCsr("10") + + ``; + + const studyXml = wrapStory(verseWithTracking("A river watered the garden.")); + const bibleXml = wrapStory(verseWithTracking("Um rio fluía do Éden.")); + const idx = buildBibleVerseIndex(bibleXml); + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.blockSwapCount).toBe(1); + expect(xml).toContain('Tracking="-15"'); + expect(xml).toContain("Um rio fluía"); + expect(xml).not.toContain("A river watered"); + }); + + it("preserves styled divine-name runs (nd) while swapping text", () => { + const verseWithNd = (lord: string, rest: string) => + bookMarker("GEN") + + `` + + chapterMarkerCsr("2") + + verseMarkerCsr("15") + + `` + + `The ` + + `` + + `${lord}` + + `` + + ` ${rest}` + + verseMarkerCsr("15") + + ``; + + const studyXml = wrapStory( + verseWithNd("Lord", "God put the man in the Garden of Eden.") + ); + const bibleXml = wrapStory( + verseWithNd("SENHOR", "Deus tomou o homem e o colocou no jardim do Éden.") + ); + const idx = buildBibleVerseIndex(bibleXml); + const { xml } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(xml).toContain('AppliedCharacterStyle="CharacterStyle/nd"'); + expect(xml).toContain("SENHOR"); + expect(xml).toContain("Deus tomou o homem"); + expect(xml).not.toContain("Lord"); + expect(xml).not.toContain("God put the man"); + }); +}); + +describe("applyBibleSwapToStudyXml — content-only fallback", () => { + it("uses content-only when Study and Bible verse structures differ", () => { + // Study: single paragraph, all of verse 3 in one block + const studyPara = + `` + + verseMarkerCsr("3") + + noStyleCsr("God said, 'Let there be light.' And there was light.") + + verseMarkerCsr("3") + + ``; + // Bible: three paragraphs (quoted speech split) — different structure + const bibleParas = + `` + + verseMarkerCsr("3") + + noStyleCsr("Deus disse:") + + `` + + `` + + noStyleCsr("Haja luz.") + + `` + + `` + + noStyleCsr("E houve luz.") + + verseMarkerCsr("3") + + ``; + + const studyXml = wrapStory(bookMarker("GEN") + chapterParagraph("1") + studyPara); + const bibleXml = wrapStory(bookMarker("GEN") + chapterParagraph("1") + bibleParas); + const idx = buildBibleVerseIndex(bibleXml); + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.replacedCount).toBe(1); + expect(stats.blockSwapCount).toBe(0); + expect(stats.contentOnlyCount).toBe(1); + expect(xml).toContain("Deus disse:"); + expect(xml).toContain("Haja luz."); + expect(xml).not.toContain("God said"); + // Study still has a single text%3ap paragraph for verse 3 (Bible's extra paras are not copied) + const studyVerse3Paras = (xml.match(/ParagraphStyle\/text%3ap">/g) || []).length; + expect(studyVerse3Paras).toBeGreaterThanOrEqual(1); + }); + + it("distributes multi-line poetry across Study paragraph slots instead of clearing them", () => { + const poetryVerse = (lines: string[], closingVerse = "23") => { + const speech = lines[0]; + const poetryLines = lines.slice(1); + let xml = + bookMarker("GEN") + + chapterParagraph("2") + + `` + + verseMarkerCsr("23") + + noStyleCsr(speech) + + ``; + for (const line of poetryLines) { + xml += + `` + + noStyleCsr(line) + + ``; + } + xml += + `` + + verseMarkerCsr(closingVerse) + + ``; + return xml; + }; + + const studyLines = [ + "The man said,", + "\t\t'Her bones and flesh!'", + "\t\tShe shall be called 'woman',", + "\t\tfor she was taken out of man.'", + ]; + const bibleLines = [ + "Então, o homem disse:", + "\t\t\"Esta, por fim, é osso dos meus ossos", + "\t\te carne da minha carne! Ela será chamada 'mulher',", + "\t\tporque do homem foi tirada\".", + ]; + + const studyXml = wrapStory(poetryVerse(studyLines)); + const bibleXml = wrapStory(poetryVerse(bibleLines)); + const idx = buildBibleVerseIndex(bibleXml); + const entry = idx.get(verseKey("GEN", "2", "23"))!; + expect(entry.segments.filter((s) => s.trim()).length).toBe(4); + + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.replacedCount).toBe(1); + expect(xml).toContain("Então, o homem disse:"); + expect(xml).toContain("Esta, por fim"); + expect(xml).toContain("Ela será chamada"); + expect(xml).toContain("porque do homem foi tirada"); + expect(xml).not.toContain("The man said"); + expect(xml).not.toContain("Her bones"); + + // Each poetry paragraph should still have prose (no empty ¶ slots). + const q1Contents = [...xml.matchAll(/text%3aq1">[\s\S]*?([^<]*)<\/Content>/g)].map( + (m) => m[1] + ); + expect(q1Contents.length).toBe(3); + expect(q1Contents.every((c) => c.trim().length > 0)).toBe(true); + }); + + it("uses paragraph-aligned mapping for multi-paragraph poetry (GEN 3:14 pattern)", () => { + const poetryVerse14 = (lang: "en" | "pt") => { + const intro = + lang === "en" + ? "So the Lord God spoke to the snake. He said, 'Because you have done this," + : "Então, o Senhor Deus declarou à serpente:"; + const q1a = lang === "en" ? "\t\t'You are set apart from all livestock" : "\t\t\"Por ter feito isso,"; + const q2a = + lang === "en" + ? "\t\tand all wild animals." + : "\t\tmaldita é você entre todos os animais de rebanho"; + const q2b = lang === "en" ? "\t\tI am putting a curse on you." : "\t\te entre todos os animais do campo!"; + const q1b = lang === "en" ? "\t\tYou will crawl on your belly." : "\t\tVocê rastejará sobre o seu ventre"; + const q2c = + lang === "en" ? "\t\tall the days of your life." : "\t\te comerá pó todos os dias da sua vida."; + + return ( + bookMarker("GEN") + + `` + + chapterMarkerCsr("3") + + verseMarkerCsr("14") + + noStyleCsr(intro) + + `` + + `` + + `
` + + `
` + + `` + + noStyleCsr(q1a) + + `` + + `` + + noStyleCsr(q2a) + + noStyleCsr(q2b) + + `` + + `` + + noStyleCsr(q1b) + + `` + + `` + + noStyleCsr(q2c) + + verseMarkerCsr("14") + + `` + ); + }; + + const studyXml = wrapStory(poetryVerse14("en")); + const bibleXml = wrapStory(poetryVerse14("pt")); + const idx = buildBibleVerseIndex(bibleXml); + const entry = idx.get(verseKey("GEN", "3", "14"))!; + expect(entry.paragraphSig).toContain("text:p"); + expect(entry.paragraphSig).toContain("text:q1"); + expect(entry.paragraphChunks.length).toBeGreaterThan(2); + + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + expect(stats.blockSwapCount).toBe(1); + expect(xml).toContain("Então, o Senhor"); + expect(xml).toContain("maldita é você"); + expect(xml).toContain("Você rastejará"); + expect(xml).not.toContain("You are set apart"); + expect(xml).not.toContain("all wild animals"); + }); + + it("uses Bible paragraph layout when Study alternates q1/q2 but Bible consolidates lines (GEN 8:22 pattern)", () => { + const studyVerse22 = + bookMarker("GEN") + + `` + + chapterMarkerCsr("8") + + verseMarkerCsr("21") + + noStyleCsr("Verse twenty-one text.") + + verseMarkerCsr("21") + + `` + + `` + + `
` + + `
` + + `` + + verseMarkerCsr("22") + + noStyleCsr("'As long as the earth lasts,") + + `` + + `` + + noStyleCsr("\t\tthere will always be a time to plant") + + noStyleCsr("\t\tand a time to gather the crops.") + + `` + + `` + + noStyleCsr("\t\tAs long as the earth lasts,") + + `` + + `` + + noStyleCsr("\t\tthere will always be cold and heat.") + + `` + + `` + + noStyleCsr("\t\tThere will always be summer and winter,") + + `` + + `` + + noStyleCsr("\t\tday and night.'") + + verseMarkerCsr("22") + + ``; + + const bibleVerse22 = + bookMarker("GEN") + + `` + + chapterMarkerCsr("8") + + verseMarkerCsr("21") + + noStyleCsr("Versículo vinte e um.") + + verseMarkerCsr("21") + + `` + + `` + + `
` + + `
` + + `` + + verseMarkerCsr("22") + + `` + + `“Enquanto durar a terra,
` + + `\t\tjamais cessarão
` + + `\t\tplantio e colheita,
` + + `\t\tfrio e calor,
` + + `\t\tverão e inverno,
` + + `\t\tdia e noite”.` + + `
` + + verseMarkerCsr("22") + + `
`; + + const studyXml = wrapStory(studyVerse22); + const bibleXml = wrapStory(bibleVerse22); + const idx = buildBibleVerseIndex(bibleXml); + const entry = idx.get(verseKey("GEN", "8", "22"))!; + expect(entry.paragraphChunks.length).toBeGreaterThanOrEqual(1); + + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + expect(stats.replacedCount).toBeGreaterThanOrEqual(1); + expect(stats.blockSwapCount).toBeGreaterThanOrEqual(1); + expect(xml).toContain("Enquanto durar a terra"); + expect(xml).toContain("jamais cessarão"); + expect(xml).toContain("plantio e colheita"); + expect(xml).not.toContain("As long as the earth lasts"); + expect(xml).not.toContain("there will always be cold"); + + const q2Count = (xml.match(/text%3aq2/g) || []).length; + expect(q2Count).toBe(0); + }); + + it("handles polluted meta:bk and still swaps verses", () => { + const studyXml = wrapStory( + `` + + `` + + `[PT] GEN` + + simpleVerseParagraph("text%3ap", "1", "1", "ENGLISH") + ); + const bibleXml = wrapStory( + bookMarker("GEN") + simpleVerseParagraph("text%3ap", "1", "1", "PORTUGUÊS") + ); + const idx = buildBibleVerseIndex(bibleXml); + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.missingFromBible).toEqual([]); + expect(stats.replacedCount).toBe(1); + expect(xml).toContain("PORTUGUÊS"); + expect(xml).not.toContain("ENGLISH"); + }); +}); + +describe("applyBibleSwapToStudyXml — PSA & extras", () => { + it("does not modify PSA verses", () => { + const studyXml = wrapStory( + bookMarker("PSA") + simpleVerseParagraph("text%3ap", "1", "1", "Blessed") + ); + const bibleXml = wrapStory( + bookMarker("PSA") + simpleVerseParagraph("text%3ap", "1", "1", "Bem-aventurado") + ); + const idx = buildBibleVerseIndex(bibleXml); + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.replacedCount).toBe(0); + expect(stats.skippedPsa).toBeGreaterThan(0); + expect(xml).toContain("Blessed"); + expect(xml).not.toContain("Bem-aventurado"); + }); + + it("appends extra Bible verses at end of chapter", () => { + const studyXml = wrapStory( + bookMarker("GEN") + + simpleVerseParagraph("text%3ap", "1", "1", "verse one") + ); + const bibleXml = wrapStory( + bookMarker("GEN") + + simpleVerseParagraph("text%3ap", "1", "1", "TRANSLATED 1") + + simpleVerseParagraph("text%3ap", null, "2", "EXTRA 2") + ); + const idx = buildBibleVerseIndex(bibleXml); + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.extraInBibleAppended.length).toBe(1); + expect(xml).toContain("EXTRA 2"); + }); +}); + +describe("applyBibleSwapToStudyXml — intro preservation", () => { + it("does not touch intro paragraphs", () => { + const studyXml = wrapStory( + bookMarker("GEN") + + `` + + noStyleCsr("INTRO TEXT") + + `` + + simpleVerseParagraph("text%3ap", "1", "1", "REAL VERSE") + ); + const bibleXml = wrapStory( + bookMarker("GEN") + simpleVerseParagraph("text%3ap", "1", "1", "TRADUZIDO") + ); + const idx = buildBibleVerseIndex(bibleXml); + const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); + + expect(stats.replacedCount).toBe(1); + expect(xml).toContain("INTRO TEXT"); + expect(xml).toContain("TRADUZIDO"); + expect(xml).not.toContain("REAL VERSE"); + }); +}); diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts new file mode 100644 index 000000000..1b622e41c --- /dev/null +++ b/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts @@ -0,0 +1,1160 @@ +/** + * Bible Swap — replace Study Bible verse content with a translated Bible IDML. + * + * Hybrid strategy (per BIBLE_TEXT_REPLACEMENT_APPROACH.md): + * 1. **Bible paragraph-layout swap** — when poetry verses use different + * paragraph counts (e.g. Study alternates text:q1/q2 but Bible keeps all + * lines in one text:q1 CSR), replace the Study verse's full paragraph span + * with the Bible's so tabs and `
` line breaks stay intact. + * 2. **Paragraph-aligned swap** — when a multi-paragraph verse uses the same + * paragraph-style sequence in Study and Bible (e.g. text:p → b_poetry → + * text:q1 → text:q2), map Bible text per paragraph so poetry tabs/indents + * stay correct instead of flattening the whole verse. + * 3. **Structure-preserving swap** — when CSR skeletons match in a single block, + * replace only `` while keeping Study CharacterStyleRange tags. + * 4. **Content-only fallback** — otherwise distribute Bible text across Study + * prose slots (nd, no-style, source serif, …). + * + * Always skips: intro/meta/title paragraphs, Psalms (PSA), footnote content. + */ + +// --------------------------------------------------------------------------- +// Public types +// --------------------------------------------------------------------------- + +export type VerseKey = `${string}|${string}|${string}`; + +/** Prose text grouped by paragraph within a cross-paragraph verse. */ +export interface ParagraphChunkEntry { + paragraphStyle: string; + proseSegments: string[]; +} + +export interface VerseEntry { + /** Concatenated plain text from `[No character style]` content nodes. */ + text: string; + /** + * One entry per `` inside `[No character style]` for this verse, + * in document order. Preserves line breaks / poetry layout across paragraphs. + */ + segments: string[]; + /** + * Plain text from every prose CSR in the verse (nd, no-style, source serif, …), + * in document order — used when structure signatures match. + */ + proseSegments: string[]; + /** Structural fingerprint of the verse's CSR block (for block-swap matching). */ + structureSig: string; + /** Raw XML: all top-level CSRs from opening `meta:v` through closing `meta:v`. */ + blockXml: string; + /** True when the verse block lives entirely inside one ParagraphStyleRange. */ + singleParagraph: boolean; + /** Ordered paragraph styles from opening through closing `meta:v`. */ + paragraphSig: string; + /** Per-paragraph prose segments (for paragraph-aligned swap). */ + paragraphChunks: ParagraphChunkEntry[]; + /** All `ParagraphStyleRange` XML from first through last paragraph of this verse. */ + verseSpanXml: string; +} + +export type BibleVerseIndex = Map; + +export interface SwapStats { + replacedCount: number; + blockSwapCount: number; + contentOnlyCount: number; + skippedPsa: number; + missingFromBible: Array<{ book: string; chapter: string; verse: string }>; + extraInBibleAppended: Array<{ book: string; chapter: string; verse: string }>; +} + +export const SKIPPED_BOOK_CODES: ReadonlySet = new Set(["PSA"]); + +const PSA_BOOK_CODE = "PSA"; + +const NO_STYLE_RE = + /CharacterStyle\/\$ID\/\[No character style\]|CharacterStyle\/\$ID\/%5BNo character style%5D/; + +// --------------------------------------------------------------------------- +// Keys & helpers +// --------------------------------------------------------------------------- + +export const verseKey = (book: string, chapter: string, verse: string): VerseKey => + `${book}|${chapter}|${verse}`; + +export const listVerseKeys = (index: BibleVerseIndex): VerseKey[] => + Array.from(index.keys()); + +const digitsOnly = (s: string): string => s.replace(/\D/g, ""); + +const xmlEscape = (s: string): string => + s + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + +// --------------------------------------------------------------------------- +// Depth-tracking XML iterators (no DOMParser — works in Node + webview) +// --------------------------------------------------------------------------- + +interface TopLevelElement { + fullStart: number; + fullEnd: number; + bodyStart: number; + bodyEnd: number; + appliedParagraphStyle?: string; + appliedCharacterStyle?: string; +} + +/** + * Iterate top-level `...` elements inside a region, + * correctly handling nesting (e.g. Footnote inside a CSR). + */ +function* iterateTopLevelElements( + xml: string, + regionStart: number, + regionEnd: number, + tagName: string +): IterableIterator { + const openRe = new RegExp(`<${tagName}\\b`, "g"); + openRe.lastIndex = regionStart; + + while (true) { + const openMatch = openRe.exec(xml); + if (!openMatch || openMatch.index >= regionEnd) break; + + const openStart = openMatch.index; + const openTagEnd = xml.indexOf(">", openStart); + if (openTagEnd === -1 || openTagEnd >= regionEnd) break; + + const closeTag = ``; + let depth = 1; + let pos = openTagEnd + 1; + + while (depth > 0 && pos < regionEnd) { + const nextOpen = xml.indexOf(`<${tagName}`, pos); + const nextClose = xml.indexOf(closeTag, pos); + if (nextClose === -1) break; + + if (nextOpen !== -1 && nextOpen < nextClose && nextOpen < regionEnd) { + depth++; + pos = nextOpen + tagName.length + 1; + } else { + depth--; + if (depth === 0) { + const fullEnd = nextClose + closeTag.length; + const openTag = xml.slice(openStart, openTagEnd + 1); + const styleMatch = + openTag.match(/AppliedParagraphStyle="([^"]+)"/) ?? + openTag.match(/AppliedCharacterStyle="([^"]+)"/); + yield { + fullStart: openStart, + fullEnd, + bodyStart: openTagEnd + 1, + bodyEnd: nextClose, + appliedParagraphStyle: openTag.includes("ParagraphStyleRange") + ? styleMatch?.[1] + : undefined, + appliedCharacterStyle: openTag.includes("CharacterStyleRange") + ? styleMatch?.[1] + : undefined, + }; + openRe.lastIndex = fullEnd; + break; + } + pos = nextClose + closeTag.length; + } + } + if (depth > 0) break; + } +} + +interface ParagraphInfo { + fullStart: number; + fullEnd: number; + bodyStart: number; + bodyEnd: number; + appliedParagraphStyle: string; +} + +function* iterateParagraphs(xml: string): IterableIterator { + for (const el of iterateTopLevelElements(xml, 0, xml.length, "ParagraphStyleRange")) { + if (el.appliedParagraphStyle) { + yield { + fullStart: el.fullStart, + fullEnd: el.fullEnd, + bodyStart: el.bodyStart, + bodyEnd: el.bodyEnd, + appliedParagraphStyle: el.appliedParagraphStyle, + }; + } + } +} + +interface CsrInfo { + fullStart: number; + fullEnd: number; + absBodyStart: number; + absBodyEnd: number; + appliedCharacterStyle: string; + xml: string; +} + +function* iterateCsrAbs( + xml: string, + regionStart: number, + regionEnd: number +): IterableIterator { + for (const el of iterateTopLevelElements(xml, regionStart, regionEnd, "CharacterStyleRange")) { + if (!el.appliedCharacterStyle) continue; + yield { + fullStart: el.fullStart, + fullEnd: el.fullEnd, + absBodyStart: el.bodyStart, + absBodyEnd: el.bodyEnd, + appliedCharacterStyle: el.appliedCharacterStyle, + xml: xml.slice(el.fullStart, el.fullEnd), + }; + } +} + +interface ContentMatch { + absStart: number; + absEnd: number; + absInnerStart: number; + absInnerEnd: number; +} + +function* iterateContentAbs( + xml: string, + regionStart: number, + regionEnd: number +): IterableIterator { + const fnProbe = xml.indexOf(" | null = null; + if (hasFootnote) { + footnoteRanges = []; + for (const fn of iterateTopLevelElements(xml, regionStart, regionEnd, "Footnote")) { + footnoteRanges.push([fn.fullStart, fn.fullEnd]); + } + } + + const re = /([\s\S]*?)<\/Content>/g; + re.lastIndex = regionStart; + let m: RegExpExecArray | null; + while ((m = re.exec(xml)) !== null) { + if (m.index >= regionEnd) break; + const absStart = m.index; + const absEnd = absStart + m[0].length; + if (absEnd > regionEnd) break; + if (footnoteRanges) { + let inFootnote = false; + for (const [s, e] of footnoteRanges) { + if (absStart >= s && absEnd <= e) { + inFootnote = true; + break; + } + } + if (inFootnote) continue; + } + yield { + absStart, + absEnd, + absInnerStart: absStart + "".length, + absInnerEnd: absEnd - "".length, + }; + } +} + +// --------------------------------------------------------------------------- +// Style classification +// --------------------------------------------------------------------------- + +function isBookMarkerParagraphStyle(style: string): boolean { + return /(?:^|\/)meta%3abk(?:_|$|\b)/.test(style) || /(?:^|\/)meta:bk/.test(style); +} + +/** Paragraph styles that may contain biblical verse text. */ +function isReplaceableParagraphStyle(style: string): boolean { + if (isBookMarkerParagraphStyle(style)) return false; + if (/(?:^|\/)intro%3a|(?:^|\/)intro:/.test(style)) return false; + if (/(?:^|\/)meta%3a|(?:^|\/)meta:/.test(style)) return false; + if (/(?:^|\/)title%3a|(?:^|\/)title:/.test(style)) return false; + if (/(?:^|\/)notes%3a|(?:^|\/)notes:/.test(style)) return false; + return ( + /(?:^|\/)text%3a|(?:^|\/)text:/.test(style) || + /(?:^|\/)b(?:_|$|\b)/.test(style) || + /(?:^|\/)b_/.test(style) + ); +} + +function isChapterMarkerStyle(style: string): boolean { + return /meta%3ac|meta:c/.test(style); +} + +function isVerseMarkerStyle(style: string): boolean { + return /meta%3av|meta:v/.test(style); +} + +/** Verse-number, spacing, drop-cap, notes, and hidden marker CSRs — never replace text. */ +function isMarkerOrStructuralStyle(style: string): boolean { + if (isChapterMarkerStyle(style) || isVerseMarkerStyle(style)) return true; + if (/(?:^|\/)notes%3a|(?:^|\/)notes:/.test(style)) return true; + if (/(?:^|\/)meta%3a|(?:^|\/)meta:/.test(style)) return true; + return ( + /(?:^|\/)cv%3av(?:_|$|\b)/.test(style) || + /(?:^|\/)cv:v(?:_|$|\b)/.test(style) || + /(?:^|\/)cv%3av_sp/.test(style) || + /(?:^|\/)cv:v_sp/.test(style) || + /(?:^|\/)cv%3adc/.test(style) || + /(?:^|\/)cv:dc/.test(style) || + /(?:^|\/)#base\.hidden/.test(style) + ); +} + +function isNoCharacterStyle(style: string): boolean { + return NO_STYLE_RE.test(style); +} + +function extractProseSegmentsFromCsrList(csrXmlList: string[]): string[] { + const segments: string[] = []; + for (const csrXml of csrXmlList) { + const styleMatch = csrXml.match(/AppliedCharacterStyle="([^"]+)"/); + if (isMarkerOrStructuralStyle(styleMatch?.[1] ?? "")) continue; + const text = collectContentText(csrXml, 0, csrXml.length); + segments.push(text); + } + return segments; +} + +/** + * Extract canonical SBL book code from `meta:bk` content. The notes export + * pipeline may prefix translated text (e.g. "[PT] GEN"); we must still find GEN. + */ +function extractBookCode(rawText: string): string { + if (!rawText) return ""; + const trimmed = rawText.replace(/\s+/g, " ").trim(); + const m = trimmed.match(/\b(?:[1-3][A-Z]{2}|[A-Z]{3})\b/); + return m ? m[0] : trimmed; +} + +function collectContentText(xml: string, start: number, end: number): string { + let text = ""; + for (const c of iterateContentAbs(xml, start, end)) { + text += xml.slice(c.absInnerStart, c.absInnerEnd); + } + return text; +} + +// --------------------------------------------------------------------------- +// Verse block structure (for hybrid block swap) +// --------------------------------------------------------------------------- + +/** Normalize a character style path for comparison. */ +function normalizeCharStyle(style: string): string { + return style + .replace(/^CharacterStyle\//, "") + .replace(/%3a/g, ":") + .replace(/%5B/g, "[") + .replace(/%5D/g, "]"); +} + +/** Token describing one CSR's role in a verse block (ignores actual prose). */ +function csrStructureToken(csrXml: string): string { + const styleMatch = csrXml.match(/AppliedCharacterStyle="([^"]+)"/); + const style = normalizeCharStyle(styleMatch?.[1] ?? "?"); + + if (isNoCharacterStyle(styleMatch?.[1] ?? "")) { + if (/<\?ACE\s/.test(csrXml) || /<\?ACE\s*\?>/.test(csrXml)) return `${style}:ace`; + const hasBr = //.test(csrXml); + const text = collectContentText(csrXml, 0, csrXml.length).replace(/\s+/g, " ").trim(); + if (hasBr && !text) return `${style}:br`; + if (!text) return `${style}:empty`; + return `${style}:text`; + } + const text = collectContentText(csrXml, 0, csrXml.length).replace(/\s+/g, " ").trim(); + return text ? `${style}:marker` : `${style}:empty`; +} + +function buildStructureSig(csrXmlList: string[]): string { + return csrXmlList.map(csrStructureToken).join("|"); +} + +function normalizeParagraphStyle(style: string): string { + return style + .replace(/^ParagraphStyle\//, "") + .replace(/%3a/g, ":") + .replace(/%5B/g, "[") + .replace(/%5D/g, "]"); +} + +interface ParagraphChunkState { + paragraphStyle: string; + paragraphStart: number; + proseContents: ContentMatch[]; + proseSegments: string[]; +} + +function buildParagraphSig(chunks: Array<{ paragraphStyle: string }>): string { + return chunks.map((c) => normalizeParagraphStyle(c.paragraphStyle)).join("|"); +} + +function isPoetryParagraphSig(sig: string): boolean { + return /text:q[12]/.test(sig); +} + +function countProseLinesInChunks(chunks: Array<{ proseSegments: string[] }>): number { + return chunks.reduce( + (sum, c) => sum + c.proseSegments.filter((s) => s.trim().length > 0).length, + 0 + ); +} + +function maxProseLinesInOneChunk(chunks: Array<{ proseSegments: string[] }>): number { + return chunks.reduce( + (max, c) => Math.max(max, c.proseSegments.filter((s) => s.trim().length > 0).length), + 0 + ); +} + +/** + * Study and Bible both have poetry (text:q1/q2) but different paragraph layouts — + * e.g. GEN 8:22 Study uses many alternating q1/q2 paras while Bible keeps every + * line in one q1 CSR with `
` between `` nodes. + */ +function shouldUseBibleVerseSpanLayout( + study: { + paragraphSig: string; + singleParagraph: boolean; + paragraphChunks: ParagraphChunkState[]; + }, + bible: VerseEntry +): boolean { + if (study.singleParagraph || bible.singleParagraph) return false; + if (study.paragraphSig === bible.paragraphSig) return false; + if (!isPoetryParagraphSig(study.paragraphSig) || !isPoetryParagraphSig(bible.paragraphSig)) { + return false; + } + + const studyParas = study.paragraphChunks.length; + const bibleParas = bible.paragraphChunks.length; + const bibleMaxLines = maxProseLinesInOneChunk(bible.paragraphChunks); + const studyLines = countProseLinesInChunks(study.paragraphChunks); + const bibleLines = countProseLinesInChunks(bible.paragraphChunks); + + if (bibleMaxLines < 2) return false; + + // Bible consolidated into fewer paragraphs than Study. + if (bibleParas < studyParas) return true; + + // Same paragraph count but Bible packs many lines into one CSR (Br-separated). + const bibleConsolidated = bible.paragraphChunks.some( + (c) => c.proseSegments.filter((s) => s.trim().length > 0).length >= 3 + ); + const studySpread = study.paragraphChunks.every( + (c) => c.proseSegments.filter((s) => s.trim().length > 0).length <= 2 + ); + if (bibleConsolidated && studySpread && bibleMaxLines >= 3) return true; + + // Line counts differ enough that weight-based splitting would break words. + if (bibleParas !== studyParas && Math.abs(bibleLines - studyLines) >= 2) return true; + + return false; +} + +/** + * Poetry verses are often preceded by a `b_poetry` spacer paragraph before the + * opening `meta:v`. Include it in the verse span so layout swap keeps the break. + */ +function findPoetryLeadInStart(storyXml: string, verseParagraphStart: number): number { + const before = storyXml.lastIndexOf("", before); + if (openEnd === -1 || openEnd >= verseParagraphStart) return verseParagraphStart; + const openTag = storyXml.slice(before, openEnd + 1); + const styleMatch = openTag.match(/AppliedParagraphStyle="([^"]+)"/); + if (!styleMatch) return verseParagraphStart; + const style = normalizeParagraphStyle(styleMatch[1]); + if (/(?:^|\/)b_poetry(?:_|$|\b)/.test(style) || /(?:^|\/)b(?:_|$|\b)/.test(style)) { + return before; + } + return verseParagraphStart; +} + +function ensureParagraphChunk( + openVerse: OpenVerseState, + para: ParagraphInfo +): ParagraphChunkState { + const last = openVerse.paragraphChunks[openVerse.paragraphChunks.length - 1]; + if (!last || last.paragraphStart !== para.fullStart) { + const chunk: ParagraphChunkState = { + paragraphStyle: para.appliedParagraphStyle, + paragraphStart: para.fullStart, + proseContents: [], + proseSegments: [], + }; + openVerse.paragraphChunks.push(chunk); + return chunk; + } + return last; +} + +interface OpenVerseState { + book: string; + chapter: string; + verse: string; + csrXmlList: string[]; + blockStart: number; + blockEnd: number; + paragraphStart: number; + paragraphEnd: number; + noStyleContents: ContentMatch[]; + proseContents: ContentMatch[]; + paragraphChunks: ParagraphChunkState[]; +} + +interface WalkCallbacks { + onBook?: (book: string) => void; + onChapter?: (book: string, chapter: string) => void; + onVerseOpen?: (v: { book: string; chapter: string; verse: string }) => void; + /** Fired when entering the PSA book (verses are not swapped). */ + onEnterPsa?: () => void; + onVerseClose?: (entry: { + book: string; + chapter: string; + verse: string; + text: string; + segments: string[]; + structureSig: string; + blockXml: string; + singleParagraph: boolean; + noStyleContents: ContentMatch[]; + proseContents: ContentMatch[]; + paragraphChunks: ParagraphChunkState[]; + paragraphStart: number; + paragraphEnd: number; + verseSpanXml: string; + csrXmlList: string[]; + blockStart: number; + blockEnd: number; + }) => void; +} + +/** + * Stream through a Story XML, tracking book / chapter / verse state and + * firing callbacks at verse boundaries. + */ +function walkStory(storyXml: string, callbacks: WalkCallbacks): void { + let currentBook = ""; + let currentChapter = ""; + let openVerse: OpenVerseState | null = null; + let inPsa = false; + + const closeVerse = () => { + if (!openVerse) return; + const { + book, + chapter, + verse, + csrXmlList, + blockStart, + blockEnd, + paragraphStart, + paragraphEnd, + } = openVerse; + if (!inPsa && book && chapter && verse) { + const segments: string[] = []; + const textParts: string[] = []; + for (const c of openVerse.noStyleContents) { + const t = storyXml.slice(c.absInnerStart, c.absInnerEnd); + segments.push(t); + if (t.trim()) textParts.push(t); + } + callbacks.onVerseClose?.({ + book, + chapter, + verse, + text: textParts.join(" ").replace(/\s+/g, " ").trim(), + segments, + structureSig: buildStructureSig(csrXmlList), + blockXml: storyXml.slice(blockStart, blockEnd), + singleParagraph: paragraphStart === paragraphEnd, + noStyleContents: [...openVerse.noStyleContents], + proseContents: [...openVerse.proseContents], + paragraphChunks: openVerse.paragraphChunks.map((c) => ({ + paragraphStyle: c.paragraphStyle, + paragraphStart: c.paragraphStart, + proseContents: [...c.proseContents], + proseSegments: [...c.proseSegments], + })), + paragraphStart, + paragraphEnd, + verseSpanXml: storyXml.slice(paragraphStart, paragraphEnd), + csrXmlList: [...csrXmlList], + blockStart, + blockEnd, + }); + } + openVerse = null; + }; + + for (const para of iterateParagraphs(storyXml)) { + if (isBookMarkerParagraphStyle(para.appliedParagraphStyle)) { + closeVerse(); + let bookRaw = ""; + for (const c of iterateContentAbs(storyXml, para.bodyStart, para.bodyEnd)) { + bookRaw += storyXml.slice(c.absInnerStart, c.absInnerEnd); + } + const code = extractBookCode(bookRaw); + if (code) { + currentBook = code; + currentChapter = ""; + inPsa = code === PSA_BOOK_CODE; + if (inPsa) callbacks.onEnterPsa?.(); + callbacks.onBook?.(code); + } + continue; + } + + if (!isReplaceableParagraphStyle(para.appliedParagraphStyle)) { + continue; + } + + for (const csr of iterateCsrAbs(storyXml, para.bodyStart, para.bodyEnd)) { + if (isChapterMarkerStyle(csr.appliedCharacterStyle)) { + const cnum = digitsOnly(collectContentText(storyXml, csr.absBodyStart, csr.absBodyEnd)); + if (cnum && cnum !== currentChapter) { + closeVerse(); + currentChapter = cnum; + callbacks.onChapter?.(currentBook, currentChapter); + } + continue; + } + + if (isVerseMarkerStyle(csr.appliedCharacterStyle)) { + const vnum = collectContentText(storyXml, csr.absBodyStart, csr.absBodyEnd).trim(); + if (!/^\d+$/.test(vnum)) continue; + + if (!openVerse) { + // Opening marker + const paragraphStart = findPoetryLeadInStart(storyXml, para.fullStart); + openVerse = { + book: currentBook, + chapter: currentChapter, + verse: vnum, + csrXmlList: [csr.xml], + blockStart: csr.fullStart, + blockEnd: csr.fullEnd, + paragraphStart, + paragraphEnd: para.fullEnd, + noStyleContents: [], + proseContents: [], + paragraphChunks: [], + }; + callbacks.onVerseOpen?.({ + book: currentBook, + chapter: currentChapter, + verse: vnum, + }); + } else if (openVerse.verse === vnum) { + // Closing marker (same verse number) + openVerse.csrXmlList.push(csr.xml); + openVerse.blockEnd = csr.fullEnd; + closeVerse(); + } else { + // New verse opens before previous closed — force-close previous + closeVerse(); + openVerse = { + book: currentBook, + chapter: currentChapter, + verse: vnum, + csrXmlList: [csr.xml], + blockStart: csr.fullStart, + blockEnd: csr.fullEnd, + paragraphStart: para.fullStart, + paragraphEnd: para.fullEnd, + noStyleContents: [], + proseContents: [], + paragraphChunks: [], + }; + callbacks.onVerseOpen?.({ + book: currentBook, + chapter: currentChapter, + verse: vnum, + }); + } + continue; + } + + if (openVerse) { + const verseParagraphChunk = ensureParagraphChunk(openVerse, para); + openVerse.csrXmlList.push(csr.xml); + openVerse.blockEnd = csr.fullEnd; + openVerse.paragraphEnd = para.fullEnd; + + if (!isMarkerOrStructuralStyle(csr.appliedCharacterStyle)) { + for (const c of iterateContentAbs(storyXml, csr.absBodyStart, csr.absBodyEnd)) { + if (isProseContentSlot(storyXml, c)) { + const text = storyXml.slice(c.absInnerStart, c.absInnerEnd); + openVerse.proseContents.push(c); + verseParagraphChunk.proseContents.push(c); + verseParagraphChunk.proseSegments.push(text); + } + } + } + + if (isNoCharacterStyle(csr.appliedCharacterStyle)) { + for (const c of iterateContentAbs(storyXml, csr.absBodyStart, csr.absBodyEnd)) { + openVerse.noStyleContents.push(c); + } + } + } + } + } + closeVerse(); +} + +// --------------------------------------------------------------------------- +// Build Bible verse index +// --------------------------------------------------------------------------- + +export function buildBibleVerseIndex(bibleStoryXml: string): BibleVerseIndex { + const index: BibleVerseIndex = new Map(); + + walkStory(bibleStoryXml, { + onVerseClose: (entry) => { + if (!entry.book || !entry.chapter || !entry.verse || !entry.text) return; + const key = verseKey(entry.book, entry.chapter, entry.verse); + index.set(key, { + text: entry.text, + segments: entry.segments, + proseSegments: extractProseSegmentsFromCsrList(entry.csrXmlList), + structureSig: entry.structureSig, + blockXml: entry.blockXml, + singleParagraph: entry.singleParagraph, + paragraphSig: buildParagraphSig(entry.paragraphChunks), + paragraphChunks: entry.paragraphChunks.map((c) => ({ + paragraphStyle: c.paragraphStyle, + proseSegments: c.proseSegments, + })), + verseSpanXml: entry.verseSpanXml, + }); + }, + }); + + return index; +} + +// --------------------------------------------------------------------------- +// Apply swap to Study Bible +// --------------------------------------------------------------------------- + +interface Splice { + absStart: number; + absEnd: number; + replacement: string; +} + +function upsertSplice(map: Map, sp: Splice): void { + map.set(sp.absStart, sp); +} + +/** True when this `` carries verse prose (not ACE, not inter-verse spacing). */ +function isProseContentSlot(storyXml: string, c: ContentMatch): boolean { + const raw = storyXml.slice(c.absInnerStart, c.absInnerEnd); + if (!raw.trim()) return false; + if (/^<\?ACE/.test(raw.trim())) return false; + // Single space between verses in the same paragraph — keep original. + if (raw === " " || raw === "\u2009" || raw === "\u00A0") return false; + return true; +} + +function slotWhitespace(studyXml: string, slot: ContentMatch): { leading: string; trailing: string } { + const orig = studyXml.slice(slot.absInnerStart, slot.absInnerEnd); + return { + leading: orig.match(/^(\s*)/)?.[1] ?? "", + trailing: orig.match(/(\s*)$/)?.[1] ?? "", + }; +} + +/** + * Split one string across N slots using the Study's original line lengths as + * weights (used when the Bible has one segment but Study has several lines). + * Preserves each slot's leading/trailing whitespace (poetry tabs). + */ +function splitTextByStudyWeights( + studyXml: string, + proseSlots: ContentMatch[], + text: string +): string[] { + if (proseSlots.length === 0) return []; + if (proseSlots.length === 1) { + const { leading, trailing } = slotWhitespace(studyXml, proseSlots[0]); + const body = /^\s/.test(text) ? text : text.trim(); + return [leading + body + trailing]; + } + + const weights = proseSlots.map((c) => { + const len = studyXml.slice(c.absInnerStart, c.absInnerEnd).trim().length; + return len > 0 ? len : 1; + }); + const totalWeight = weights.reduce((a, b) => a + b, 0); + + const parts: string[] = []; + let pos = 0; + for (let i = 0; i < proseSlots.length; i++) { + const { leading, trailing } = slotWhitespace(studyXml, proseSlots[i]); + if (i === proseSlots.length - 1) { + const body = text.slice(pos).trim(); + parts.push(leading + body + trailing); + break; + } + const share = Math.max(1, Math.round((weights[i] / totalWeight) * text.length)); + let end = Math.min(text.length, pos + share); + if (end < text.length) { + const nextSpace = text.indexOf(" ", end); + if (nextSpace !== -1 && nextSpace - pos < share * 1.5) { + end = nextSpace + 1; + } + } + const body = text.slice(pos, end).trim(); + parts.push(leading + body + trailing); + pos = end; + } + return parts; +} + +/** + * Map Bible prose segments onto Study prose `` slots in order. + */ +function mapProseSegmentsToSlots( + studyXml: string, + proseSlots: ContentMatch[], + bibleSegments: string[], + bibleFullText: string +): string[] { + const n = proseSlots.length; + if (n === 0) return []; + + const prose = bibleSegments.filter((s) => s.trim().length > 0); + const source = prose.length > 0 ? prose : [bibleFullText]; + const m = source.length; + + if (m === n) { + return source.map((segment, i) => { + if (/^\s/.test(segment)) return segment; + const { leading, trailing } = slotWhitespace(studyXml, proseSlots[i]); + return leading + segment.trim() + trailing; + }); + } + if (m === 1) return splitTextByStudyWeights(studyXml, proseSlots, source[0]); + if (m > n) { + const out = source.slice(0, n - 1); + out.push(source.slice(n - 1).join(" ")); + return out; + } + // m > 1 && m < n — map available lines, leave extra Study slots empty only when + // we cannot use Bible paragraph layout (caller should prefer span layout). + const out = [...source]; + while (out.length < n) out.push(""); + return out; +} + +/** + * Replace the Study verse's full paragraph span with the Bible's paragraph XML + * (used when poetry layout differs, e.g. consolidated q1 CSR vs alternating q1/q2). + */ +function applyBibleVerseSpanLayout( + studyParagraphStart: number, + studyParagraphEnd: number, + bibleEntry: VerseEntry, + splices: Map +): void { + if (!bibleEntry.verseSpanXml || bibleEntry.verseSpanXml.length === 0) return; + upsertSplice(splices, { + absStart: studyParagraphStart, + absEnd: studyParagraphEnd, + replacement: bibleEntry.verseSpanXml, + }); +} + +/** + * Replace `` in Study prose slots, keeping every CharacterStyleRange + * wrapper and attribute from the Study file. + */ +function applyDistributedContentReplacement( + studyXml: string, + proseContents: ContentMatch[], + bibleEntry: VerseEntry, + splices: Map, + bibleSegmentSource?: string[] +): void { + const proseSlots: ContentMatch[] = []; + for (const c of proseContents) { + if (isProseContentSlot(studyXml, c)) proseSlots.push(c); + } + if (proseSlots.length === 0) return; + + const bibleSegments = + bibleSegmentSource ?? + (bibleEntry.proseSegments.length > 0 ? bibleEntry.proseSegments : bibleEntry.segments); + + const mapped = mapProseSegmentsToSlots( + studyXml, + proseSlots, + bibleSegments, + bibleEntry.text + ); + + for (let i = 0; i < proseSlots.length; i++) { + upsertSplice(splices, { + absStart: proseSlots[i].absInnerStart, + absEnd: proseSlots[i].absInnerEnd, + replacement: xmlEscape(mapped[i] ?? ""), + }); + } +} + +/** + * Multi-paragraph verses (poetry): map Bible text per paragraph when the + * paragraph-style sequence matches between Study and Bible. + */ +function applyParagraphAlignedReplacement( + studyXml: string, + studyChunks: ParagraphChunkState[], + bibleEntry: VerseEntry, + splices: Map +): boolean { + const bibleChunks = bibleEntry.paragraphChunks; + if (studyChunks.length !== bibleChunks.length || studyChunks.length < 2) { + return false; + } + + for (let i = 0; i < studyChunks.length; i++) { + if ( + normalizeParagraphStyle(studyChunks[i].paragraphStyle) !== + normalizeParagraphStyle(bibleChunks[i].paragraphStyle) + ) { + return false; + } + applyDistributedContentReplacement( + studyXml, + studyChunks[i].proseContents, + bibleEntry, + splices, + bibleChunks[i].proseSegments + ); + } + return true; +} + +/** + * When CSR skeletons match, map Bible prose onto Study prose slots 1:1 while + * preserving Study CharacterStyleRange tags (Tracking, nd, source serif, etc.). + */ +function applyStructurePreservingReplacement( + studyXml: string, + studyProseContents: ContentMatch[], + bibleEntry: VerseEntry, + splices: Map +): void { + applyDistributedContentReplacement( + studyXml, + studyProseContents, + bibleEntry, + splices, + bibleEntry.proseSegments + ); +} + +export function applyBibleSwapToStudyXml( + studyStoryXml: string, + bibleIndex: BibleVerseIndex +): { xml: string; stats: SwapStats } { + const stats: SwapStats = { + replacedCount: 0, + blockSwapCount: 0, + contentOnlyCount: 0, + skippedPsa: 0, + missingFromBible: [], + extraInBibleAppended: [], + }; + + const splicesByStart = new Map(); + const studyVersesInChapter = new Map>(); + /** Last `[No character style]` Content in each chapter (for versification extras). */ + const lastNoStyleByChapter = new Map(); + + const chapterKey = (book: string, chapter: string) => `${book}|${chapter}`; + + walkStory(studyStoryXml, { + onBook: () => { + /* chapter tracking resets on chapter marker */ + }, + onChapter: (book, chapter) => { + if (!studyVersesInChapter.has(chapterKey(book, chapter))) { + studyVersesInChapter.set(chapterKey(book, chapter), new Set()); + } + }, + onEnterPsa: () => { + stats.skippedPsa++; + }, + onVerseClose: (studyVerse) => { + const { book, chapter, verse } = studyVerse; + if (!book || !chapter || !verse) return; + + if (book === PSA_BOOK_CODE) { + stats.skippedPsa++; + return; + } + + const ck = chapterKey(book, chapter); + if (!studyVersesInChapter.has(ck)) { + studyVersesInChapter.set(ck, new Set()); + } + studyVersesInChapter.get(ck)!.add(verse); + + if (studyVerse.noStyleContents.length > 0) { + const last = studyVerse.noStyleContents[studyVerse.noStyleContents.length - 1]; + lastNoStyleByChapter.set(ck, last); + } + + const bibleEntry = bibleIndex.get(verseKey(book, chapter, verse)); + if (!bibleEntry) { + stats.missingFromBible.push({ book, chapter, verse }); + return; + } + + const studyParaSig = buildParagraphSig(studyVerse.paragraphChunks); + const useBibleSpanLayout = shouldUseBibleVerseSpanLayout( + { + paragraphSig: studyParaSig, + singleParagraph: studyVerse.singleParagraph, + paragraphChunks: studyVerse.paragraphChunks, + }, + bibleEntry + ); + const canParagraphAlign = + !studyVerse.singleParagraph && + studyParaSig.length > 0 && + studyParaSig === bibleEntry.paragraphSig && + studyVerse.paragraphChunks.length === bibleEntry.paragraphChunks.length; + + const canStructurePreserve = + studyVerse.structureSig === bibleEntry.structureSig && + studyVerse.proseContents.length > 0; + + if (useBibleSpanLayout) { + applyBibleVerseSpanLayout( + studyVerse.paragraphStart, + studyVerse.paragraphEnd, + bibleEntry, + splicesByStart + ); + stats.blockSwapCount++; + } else if ( + canParagraphAlign && + applyParagraphAlignedReplacement( + studyStoryXml, + studyVerse.paragraphChunks, + bibleEntry, + splicesByStart + ) + ) { + stats.blockSwapCount++; + } else if (canStructurePreserve) { + applyStructurePreservingReplacement( + studyStoryXml, + studyVerse.proseContents, + bibleEntry, + splicesByStart + ); + stats.blockSwapCount++; + } else { + applyDistributedContentReplacement( + studyStoryXml, + studyVerse.proseContents.length > 0 + ? studyVerse.proseContents + : studyVerse.noStyleContents, + bibleEntry, + splicesByStart + ); + stats.contentOnlyCount++; + } + stats.replacedCount++; + }, + }); + + // Append Bible verses not present in Study (versification extras). + // The anchor may fall inside a block-swap splice; merge into that splice + // instead of creating a nested splice (which would be skipped on apply). + const findSpliceCovering = (innerPos: number): Splice | undefined => { + for (const sp of splicesByStart.values()) { + if (sp.absStart <= innerPos && innerPos < sp.absEnd) return sp; + } + return undefined; + }; + + for (const [key, bibleEntry] of bibleIndex.entries()) { + const [book, chapter, verse] = key.split("|"); + if (book === PSA_BOOK_CODE) continue; + const ck = chapterKey(book, chapter); + const studySet = studyVersesInChapter.get(ck); + if (studySet?.has(verse)) continue; + + const anchor = lastNoStyleByChapter.get(ck); + if (!anchor) continue; + + const appendText = xmlEscape(bibleEntry.text); + const covering = findSpliceCovering(anchor.absInnerStart); + if (covering) { + // Anchor sits inside a block-swap region — append into the last + // node of the replacement XML, not after the block. + const lastOpen = covering.replacement.lastIndexOf(""); + const lastClose = covering.replacement.lastIndexOf(""); + if (lastOpen !== -1 && lastClose > lastOpen) { + const inner = covering.replacement.slice(lastOpen + "".length, lastClose); + covering.replacement = + covering.replacement.slice(0, lastOpen + "".length) + + (inner.trim() ? `${inner} ${appendText}` : appendText) + + covering.replacement.slice(lastClose); + } else { + covering.replacement = `${covering.replacement} ${appendText}`; + } + } else { + const existing = splicesByStart.get(anchor.absInnerStart); + if (existing) { + existing.replacement = `${existing.replacement} ${appendText}`; + } else { + const original = studyStoryXml.slice(anchor.absInnerStart, anchor.absInnerEnd); + upsertSplice(splicesByStart, { + absStart: anchor.absInnerStart, + absEnd: anchor.absInnerEnd, + replacement: original.trim() ? `${original} ${appendText}` : appendText, + }); + } + } + stats.extraInBibleAppended.push({ book, chapter, verse }); + } + + const splices = Array.from(splicesByStart.values()).sort((a, b) => a.absStart - b.absStart); + const parts: string[] = []; + let cursor = 0; + for (const sp of splices) { + if (sp.absStart < cursor) continue; + if (sp.absStart > cursor) { + parts.push(studyStoryXml.slice(cursor, sp.absStart)); + } + parts.push(sp.replacement); + cursor = sp.absEnd; + } + if (cursor < studyStoryXml.length) { + parts.push(studyStoryXml.slice(cursor)); + } + + return { xml: parts.join(""), stats }; +} From f09232e7fca905232792b77dd2a3e5c1a6c38901 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Pacanovsk=C3=BD?= Date: Fri, 29 May 2026 15:44:39 +0200 Subject: [PATCH 02/12] fix: restore audio export merge after milestone export work --- src/exportHandler/audioExporter.ts | 23 +++++++++++++++-------- src/exportHandler/exportHandler.ts | 2 +- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/exportHandler/audioExporter.ts b/src/exportHandler/audioExporter.ts index 388daae45..aa27334ce 100644 --- a/src/exportHandler/audioExporter.ts +++ b/src/exportHandler/audioExporter.ts @@ -703,12 +703,26 @@ export async function exportAudioAttachments( const dialogueMap = computeDialogueLineNumbers(notebook.cells); debug(`Processing notebook with ${notebook.cells.length} cells`); + // Build milestone folder mapping: cellId -> milestone folder name + const cellMilestoneFolder = buildCellMilestoneMap(notebook.cells); const milestoneFilter = options?.selectedMilestonesByFile?.[file.fsPath]; let currentMilestoneIndex = -1; + // Count audio cells for per-book progress. Paratext and + // milestone cells (e.g. chapter headers, intros) are not + // recording targets, so they're filtered out by + // `isExportableCell` — they would otherwise show up under + // "no audio recorded" purely as noise. + const audioCells: Array<{ cell: any; cellId: string; pick: AudioPick; }> = []; for (const cell of notebook.cells) { currentMilestoneIndex = advanceMilestoneIndexForCell(cell, currentMilestoneIndex); - + if ( + milestoneFilter && + milestoneFilter.length > 0 && + !milestoneFilter.includes(effectiveMilestoneIndex(currentMilestoneIndex)) + ) { + continue; + } if (!isExportableCell(cell)) continue; const cellId: string | undefined = cell?.metadata?.id; if (!cellId) continue; @@ -723,13 +737,6 @@ export async function exportAudioAttachments( // entirely rather than reporting a row they can't act on. continue; } - if ( - milestoneFilter && - milestoneFilter.length > 0 && - !milestoneFilter.includes(effectiveMilestoneIndex(currentMilestoneIndex)) - ) { - continue; - } if (outcome.state === "selection-missing") { // The user explicitly chose a take but the attachment // is gone (deleted, missing, or unknown). We refuse to diff --git a/src/exportHandler/exportHandler.ts b/src/exportHandler/exportHandler.ts index 5596848ae..42db3ab21 100644 --- a/src/exportHandler/exportHandler.ts +++ b/src/exportHandler/exportHandler.ts @@ -1833,7 +1833,7 @@ export async function exportCodexContent( if (includeAudio) { const { exportAudioAttachments } = await import("./audioExporter"); exportPromises.push( - exportAudioAttachments(audioPath, filesToExport, { + exportAudioAttachments(audioPath, filesToExport, childReporter, { includeTimestamps: options?.includeTimestamps, selectedMilestonesByFile: options?.selectedMilestonesByFile, }) From 3224245f26005357450bbc96bbb1a8af71107789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Pacanovsk=C3=BD?= Date: Fri, 29 May 2026 17:15:42 +0200 Subject: [PATCH 03/12] export screen fix --- src/projectManager/projectExportView.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index b34fb0774..1d514ca21 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -1351,8 +1351,8 @@ function getWebviewContent( - -
+ +
@@ -2673,9 +2673,8 @@ function getWebviewContent( exportState.started = true; document.body.classList.add('exporting'); document.querySelectorAll('.step-panel').forEach(p => p.classList.remove('active')); - const step4 = document.getElementById('step4'); - if (step4) step4.classList.add('active'); - currentStep = 4; + const stepExporting = document.getElementById('stepExporting'); + if (stepExporting) stepExporting.classList.add('active'); setStageState('preparing', 'active'); exportState.stageIndex = 0; } From 1c57e4a3600bc097459772d45794160189ac846e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Pacanovsk=C3=BD?= Date: Fri, 29 May 2026 17:48:33 +0200 Subject: [PATCH 04/12] bible swap fix --- .tmp-gen3-swap-snippet.xml | 72 - .../utils/bibleSwapCompatibility.ts | 249 ---- .../importers/biblica/bibleSwap.test.ts | 495 ------- .../importers/biblica/bibleSwap.ts | 1160 ----------------- 4 files changed, 1976 deletions(-) delete mode 100644 .tmp-gen3-swap-snippet.xml delete mode 100644 src/projectManager/utils/bibleSwapCompatibility.ts delete mode 100644 webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts delete mode 100644 webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts diff --git a/.tmp-gen3-swap-snippet.xml b/.tmp-gen3-swap-snippet.xml deleted file mode 100644 index a06dd67c3..000000000 --- a/.tmp-gen3-swap-snippet.xml +++ /dev/null @@ -1,72 +0,0 @@ ->14 - - - - - - 14 - - - Então, Deus disse: ― Haja lumi­nares no firma­mento do céu para fazer sepa­ração entre o dia e a noite. Sejam eles sinais para mar­car tempos deter­mi­nados, dias e anos, - - - 14 - - - - - - 15 - - - - - - 15 - - - e sirvam de lumi­nares no firma­mento do céu para ilu­mi­nar a terra. - - - E assim foi. - - - - - - 15 - - - - - - 16 - - - - - - 16 - - - Deus fez os dois grandes lumi­nares: o maior para gover­nar o dia e o menor para gover­nar a noite; fez também as estrelas. - - - 16 - - - - - - 17 - - - - - - 17 - - - Deus os colo­cou no firma­mento do céu para ilu­mi­nar a terra, - - ; -} - -/** - * Read a Bible IDML file's bytes, unzip it, find the largest `Stories/*.xml`, - * and build a verse index. - */ -export async function buildBibleIndexFromUri( - uri: vscode.Uri -): Promise { - const data = await vscode.workspace.fs.readFile(uri); - return buildBibleIndexFromBytes(data); -} - -export async function buildBibleIndexFromBytes( - data: Uint8Array -): Promise { - if (data.length < 4 || data[0] !== 0x50 || data[1] !== 0x4b) { - throw new Error( - "Selected file is not a valid IDML (ZIP) archive. Expected a .idml file." - ); - } - const zip = await JSZip.loadAsync(data); - const storyXml = await readLargestStoryXml(zip); - if (!storyXml) { - throw new Error( - "No Stories/*.xml entries found inside the IDML. The file may be empty or corrupted." - ); - } - return buildBibleVerseIndex(storyXml); -} - -/** - * Pull the largest XML file under `Stories/` from a loaded IDML ZIP and - * return its contents as a UTF-8 string. Per the analysis doc, the main - * Story XML is always the largest one in the folder. - */ -async function readLargestStoryXml(zip: JSZip): Promise { - let bestKey: string | null = null; - let bestSize = -1; - for (const name of Object.keys(zip.files)) { - if (!name.startsWith("Stories/") || !name.endsWith(".xml")) continue; - const file = zip.files[name]; - if (file.dir) continue; - // `_data.uncompressedSize` isn't part of the public types but it's - // available on the internal JSZip object. Fall back to reading the - // file if not present. - const size = - (file as unknown as { _data?: { uncompressedSize?: number } })._data - ?.uncompressedSize ?? -1; - if (size > bestSize) { - bestSize = size; - bestKey = name; - } - } - if (!bestKey) { - // Slow fallback: read every Stories XML, take the longest text. - let bestText: string | null = null; - for (const name of Object.keys(zip.files)) { - if (!name.startsWith("Stories/") || !name.endsWith(".xml")) continue; - const file = zip.file(name); - if (!file) continue; - const text = await file.async("text"); - if (!bestText || text.length > bestText.length) { - bestText = text; - } - } - return bestText; - } - const file = zip.file(bestKey); - if (!file) return null; - return file.async("text"); -} - -/** - * Walk the selected `.codex` notebooks: load each one's original IDML and - * extract its verse set. Returns the aggregated set as a verse-index-style - * map (innerXml/shape are unused on the Study side here, just need the keys). - */ -async function buildStudyVerseSetFromCodexFiles( - filesToExport: string[] -): Promise>> { - // Result: book -> set of "chapter|verse" - const result = new Map>(); - const workspaceFolders = vscode.workspace.workspaceFolders; - if (!workspaceFolders || workspaceFolders.length === 0) return result; - const workspaceFolder = workspaceFolders[0]; - - for (const filePath of filesToExport) { - try { - const uri = vscode.Uri.file(filePath); - const notebook = await readCodexNotebookFromUri(uri); - const meta = notebook.metadata as unknown as - | { originalFileName?: string; originalName?: string } - | undefined; - const originalFileName = - meta?.originalFileName || - meta?.originalName || - `${basename(filePath).split(".")[0]}.idml`; - const originalUri = await resolveOriginalFileUri( - workspaceFolder, - originalFileName - ); - const data = await vscode.workspace.fs.readFile(originalUri); - if (data.length < 4 || data[0] !== 0x50 || data[1] !== 0x4b) continue; - const zip = await JSZip.loadAsync(data); - const storyXml = await readLargestStoryXml(zip); - if (!storyXml) continue; - const studyIndex = buildBibleVerseIndex(storyXml); - for (const key of listVerseKeys(studyIndex)) { - const [book, chapter, verse] = key.split("|"); - let set = result.get(book); - if (!set) { - set = new Set(); - result.set(book, set); - } - set.add(`${chapter}|${verse}`); - } - } catch (err) { - console.warn( - `[BibleSwapCompatibility] Could not read original IDML for ${filePath}:`, - err - ); - } - } - return result; -} - -/** - * Compute a compatibility report between a chosen Bible IDML and the set of - * `.codex` files the user has selected for export. - */ -export async function analyzeBibleSwapCompatibility( - bibleIdmlPath: string, - filesToExport: string[] -): Promise { - const bibleUri = vscode.Uri.file(bibleIdmlPath); - const bibleFileName = basename(bibleIdmlPath); - - const [bibleIndex, studyByBook] = await Promise.all([ - buildBibleIndexFromUri(bibleUri), - buildStudyVerseSetFromCodexFiles(filesToExport), - ]); - - // Pre-bucket the Bible index by book for cheap lookups. - const bibleByBook = new Map>(); // book -> "chapter|verse" - for (const key of listVerseKeys(bibleIndex)) { - const [book, chapter, verse] = key.split("|"); - let set = bibleByBook.get(book); - if (!set) { - set = new Set(); - bibleByBook.set(book, set); - } - set.add(`${chapter}|${verse}`); - } - - let booksExpected = 0; - let booksFound = 0; - const chapterSetExpected = new Set(); // "book|chapter" - const chapterSetFound = new Set(); - let versesExpected = 0; - let versesMatched = 0; - let psaSkipped = false; - const perBookMismatches: Array<{ book: string; missing: number; extra: number }> = []; - - for (const [book, studyVerses] of studyByBook.entries()) { - booksExpected++; - if (SKIPPED_BOOK_CODES.has(book)) { - psaSkipped = true; - // PSA is excluded from the matched/expected totals so the - // "% match" number isn't artificially dragged down by a book - // we deliberately don't swap. - booksExpected--; - continue; - } - - const bibleVerses = bibleByBook.get(book); - if (bibleVerses && bibleVerses.size > 0) booksFound++; - - let missing = 0; - for (const cv of studyVerses) { - versesExpected++; - const [chapter] = cv.split("|"); - chapterSetExpected.add(`${book}|${chapter}`); - if (bibleVerses && bibleVerses.has(cv)) { - versesMatched++; - chapterSetFound.add(`${book}|${chapter}`); - } else { - missing++; - } - } - let extra = 0; - if (bibleVerses) { - for (const cv of bibleVerses) { - if (!studyVerses.has(cv)) extra++; - } - } - if (missing > 0 || extra > 0) { - perBookMismatches.push({ book, missing, extra }); - } - } - - perBookMismatches.sort((a, b) => b.missing + b.extra - (a.missing + a.extra)); - - return { - bibleFileName, - booksFound, - booksExpected, - chaptersFound: chapterSetFound.size, - chaptersExpected: chapterSetExpected.size, - versesMatched, - versesExpected, - psaSkipped, - perBookMismatches, - }; -} diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts deleted file mode 100644 index e77a04882..000000000 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.test.ts +++ /dev/null @@ -1,495 +0,0 @@ -/** - * Tests for Bible Swap (hybrid block-swap + content-only fallback). - */ - -import { describe, it, expect } from "vitest"; -import { - buildBibleVerseIndex, - applyBibleSwapToStudyXml, - verseKey, -} from "./bibleSwap"; - -const wrapStory = (paragraphsXml: string) => - `` + - `` + - `` + - paragraphsXml + - ``; - -const bookMarker = (code: string) => - `` + - `` + - `${code}`; - -const noStyleCsr = (inner: string) => - `` + - `${inner}`; - -const verseMarkerCsr = (n: string) => - `` + - `${n}`; - -const chapterMarkerCsr = (n: string) => - `` + - `${n}:`; - -const chapterParagraph = (n: string) => - `` + - chapterMarkerCsr(n) + - ``; - -const simpleVerseParagraph = ( - style: string, - chapter: string | null, - verse: string, - body: string -) => - `` + - (chapter ? chapterMarkerCsr(chapter) : "") + - verseMarkerCsr(verse) + - noStyleCsr(body) + - verseMarkerCsr(verse) + - ``; - -/** Study/Bible share identical CSR skeleton in one paragraph → block swap. */ -const identicalSkeletonVerse = (bodyStudy: string, bodyBible: string) => { - const versePara = (body: string) => - `` + - chapterMarkerCsr("1") + - verseMarkerCsr("1") + - noStyleCsr(body) + - verseMarkerCsr("1") + - ``; - return { - study: bookMarker("GEN") + versePara(bodyStudy), - bible: bookMarker("GEN") + versePara(bodyBible), - }; -}; - -describe("buildBibleVerseIndex", () => { - it("indexes verse text and structure signature", () => { - const xml = wrapStory( - bookMarker("GEN") + simpleVerseParagraph("text%3ap", "1", "1", "No princípio") - ); - const idx = buildBibleVerseIndex(xml); - const v = idx.get(verseKey("GEN", "1", "1")); - expect(v?.text).toContain("No princípio"); - expect(v?.structureSig).toContain("meta:v"); - expect(v?.blockXml).toContain("No princípio"); - }); - - it("extracts book code from polluted meta:bk content", () => { - const polluted = - bookMarker("[PT] GEN") + simpleVerseParagraph("text%3ap", "1", "1", "x"); - const idx = buildBibleVerseIndex(wrapStory(polluted)); - expect(idx.get(verseKey("GEN", "1", "1"))).toBeDefined(); - }); - - it("concatenates cross-paragraph verse text (GEN 1:3 pattern)", () => { - const paraOpen = - `` + - verseMarkerCsr("3") + - noStyleCsr("Deus disse:") + - ``; - const paraMid = - `` + - noStyleCsr("Haja luz.") + - ``; - const paraClose = - `` + - noStyleCsr("E houve luz.") + - verseMarkerCsr("3") + - ``; - const xml = wrapStory(bookMarker("GEN") + chapterParagraph("1") + paraOpen + paraMid + paraClose); - const idx = buildBibleVerseIndex(xml); - const v = idx.get(verseKey("GEN", "1", "3"))!; - expect(v.text).toContain("Deus disse:"); - expect(v.text).toContain("Haja luz."); - expect(v.text).toContain("E houve luz."); - expect(v.singleParagraph).toBe(false); - }); -}); - -describe("applyBibleSwapToStudyXml — structure-preserving swap", () => { - it("replaces verse text when structure signatures match without replacing Study CSRs", () => { - const { study, bible } = identicalSkeletonVerse("ENGLISH", "PORTUGUÊS"); - const idx = buildBibleVerseIndex(wrapStory(bible)); - const entry = idx.get(verseKey("GEN", "1", "1")); - expect(entry?.structureSig).toBeTruthy(); - - const { xml, stats } = applyBibleSwapToStudyXml(wrapStory(study), idx); - - expect(stats.replacedCount).toBe(1); - expect(stats.blockSwapCount).toBe(1); - expect(stats.contentOnlyCount).toBe(0); - expect(xml).toContain("PORTUGUÊS"); - expect(xml).not.toContain("ENGLISH"); - }); - - it("preserves Study CharacterStyleRange attributes such as Tracking", () => { - const verseWithTracking = (body: string) => - bookMarker("GEN") + - `` + - chapterMarkerCsr("2") + - verseMarkerCsr("10") + - `` + - `${body}` + - verseMarkerCsr("10") + - ``; - - const studyXml = wrapStory(verseWithTracking("A river watered the garden.")); - const bibleXml = wrapStory(verseWithTracking("Um rio fluía do Éden.")); - const idx = buildBibleVerseIndex(bibleXml); - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.blockSwapCount).toBe(1); - expect(xml).toContain('Tracking="-15"'); - expect(xml).toContain("Um rio fluía"); - expect(xml).not.toContain("A river watered"); - }); - - it("preserves styled divine-name runs (nd) while swapping text", () => { - const verseWithNd = (lord: string, rest: string) => - bookMarker("GEN") + - `` + - chapterMarkerCsr("2") + - verseMarkerCsr("15") + - `` + - `The ` + - `` + - `${lord}` + - `` + - ` ${rest}` + - verseMarkerCsr("15") + - ``; - - const studyXml = wrapStory( - verseWithNd("Lord", "God put the man in the Garden of Eden.") - ); - const bibleXml = wrapStory( - verseWithNd("SENHOR", "Deus tomou o homem e o colocou no jardim do Éden.") - ); - const idx = buildBibleVerseIndex(bibleXml); - const { xml } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(xml).toContain('AppliedCharacterStyle="CharacterStyle/nd"'); - expect(xml).toContain("SENHOR"); - expect(xml).toContain("Deus tomou o homem"); - expect(xml).not.toContain("Lord"); - expect(xml).not.toContain("God put the man"); - }); -}); - -describe("applyBibleSwapToStudyXml — content-only fallback", () => { - it("uses content-only when Study and Bible verse structures differ", () => { - // Study: single paragraph, all of verse 3 in one block - const studyPara = - `` + - verseMarkerCsr("3") + - noStyleCsr("God said, 'Let there be light.' And there was light.") + - verseMarkerCsr("3") + - ``; - // Bible: three paragraphs (quoted speech split) — different structure - const bibleParas = - `` + - verseMarkerCsr("3") + - noStyleCsr("Deus disse:") + - `` + - `` + - noStyleCsr("Haja luz.") + - `` + - `` + - noStyleCsr("E houve luz.") + - verseMarkerCsr("3") + - ``; - - const studyXml = wrapStory(bookMarker("GEN") + chapterParagraph("1") + studyPara); - const bibleXml = wrapStory(bookMarker("GEN") + chapterParagraph("1") + bibleParas); - const idx = buildBibleVerseIndex(bibleXml); - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.replacedCount).toBe(1); - expect(stats.blockSwapCount).toBe(0); - expect(stats.contentOnlyCount).toBe(1); - expect(xml).toContain("Deus disse:"); - expect(xml).toContain("Haja luz."); - expect(xml).not.toContain("God said"); - // Study still has a single text%3ap paragraph for verse 3 (Bible's extra paras are not copied) - const studyVerse3Paras = (xml.match(/ParagraphStyle\/text%3ap">/g) || []).length; - expect(studyVerse3Paras).toBeGreaterThanOrEqual(1); - }); - - it("distributes multi-line poetry across Study paragraph slots instead of clearing them", () => { - const poetryVerse = (lines: string[], closingVerse = "23") => { - const speech = lines[0]; - const poetryLines = lines.slice(1); - let xml = - bookMarker("GEN") + - chapterParagraph("2") + - `` + - verseMarkerCsr("23") + - noStyleCsr(speech) + - ``; - for (const line of poetryLines) { - xml += - `` + - noStyleCsr(line) + - ``; - } - xml += - `` + - verseMarkerCsr(closingVerse) + - ``; - return xml; - }; - - const studyLines = [ - "The man said,", - "\t\t'Her bones and flesh!'", - "\t\tShe shall be called 'woman',", - "\t\tfor she was taken out of man.'", - ]; - const bibleLines = [ - "Então, o homem disse:", - "\t\t\"Esta, por fim, é osso dos meus ossos", - "\t\te carne da minha carne! Ela será chamada 'mulher',", - "\t\tporque do homem foi tirada\".", - ]; - - const studyXml = wrapStory(poetryVerse(studyLines)); - const bibleXml = wrapStory(poetryVerse(bibleLines)); - const idx = buildBibleVerseIndex(bibleXml); - const entry = idx.get(verseKey("GEN", "2", "23"))!; - expect(entry.segments.filter((s) => s.trim()).length).toBe(4); - - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.replacedCount).toBe(1); - expect(xml).toContain("Então, o homem disse:"); - expect(xml).toContain("Esta, por fim"); - expect(xml).toContain("Ela será chamada"); - expect(xml).toContain("porque do homem foi tirada"); - expect(xml).not.toContain("The man said"); - expect(xml).not.toContain("Her bones"); - - // Each poetry paragraph should still have prose (no empty ¶ slots). - const q1Contents = [...xml.matchAll(/text%3aq1">[\s\S]*?([^<]*)<\/Content>/g)].map( - (m) => m[1] - ); - expect(q1Contents.length).toBe(3); - expect(q1Contents.every((c) => c.trim().length > 0)).toBe(true); - }); - - it("uses paragraph-aligned mapping for multi-paragraph poetry (GEN 3:14 pattern)", () => { - const poetryVerse14 = (lang: "en" | "pt") => { - const intro = - lang === "en" - ? "So the Lord God spoke to the snake. He said, 'Because you have done this," - : "Então, o Senhor Deus declarou à serpente:"; - const q1a = lang === "en" ? "\t\t'You are set apart from all livestock" : "\t\t\"Por ter feito isso,"; - const q2a = - lang === "en" - ? "\t\tand all wild animals." - : "\t\tmaldita é você entre todos os animais de rebanho"; - const q2b = lang === "en" ? "\t\tI am putting a curse on you." : "\t\te entre todos os animais do campo!"; - const q1b = lang === "en" ? "\t\tYou will crawl on your belly." : "\t\tVocê rastejará sobre o seu ventre"; - const q2c = - lang === "en" ? "\t\tall the days of your life." : "\t\te comerá pó todos os dias da sua vida."; - - return ( - bookMarker("GEN") + - `` + - chapterMarkerCsr("3") + - verseMarkerCsr("14") + - noStyleCsr(intro) + - `` + - `` + - `
` + - `
` + - `` + - noStyleCsr(q1a) + - `` + - `` + - noStyleCsr(q2a) + - noStyleCsr(q2b) + - `` + - `` + - noStyleCsr(q1b) + - `` + - `` + - noStyleCsr(q2c) + - verseMarkerCsr("14") + - `` - ); - }; - - const studyXml = wrapStory(poetryVerse14("en")); - const bibleXml = wrapStory(poetryVerse14("pt")); - const idx = buildBibleVerseIndex(bibleXml); - const entry = idx.get(verseKey("GEN", "3", "14"))!; - expect(entry.paragraphSig).toContain("text:p"); - expect(entry.paragraphSig).toContain("text:q1"); - expect(entry.paragraphChunks.length).toBeGreaterThan(2); - - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - expect(stats.blockSwapCount).toBe(1); - expect(xml).toContain("Então, o Senhor"); - expect(xml).toContain("maldita é você"); - expect(xml).toContain("Você rastejará"); - expect(xml).not.toContain("You are set apart"); - expect(xml).not.toContain("all wild animals"); - }); - - it("uses Bible paragraph layout when Study alternates q1/q2 but Bible consolidates lines (GEN 8:22 pattern)", () => { - const studyVerse22 = - bookMarker("GEN") + - `` + - chapterMarkerCsr("8") + - verseMarkerCsr("21") + - noStyleCsr("Verse twenty-one text.") + - verseMarkerCsr("21") + - `` + - `` + - `
` + - `
` + - `` + - verseMarkerCsr("22") + - noStyleCsr("'As long as the earth lasts,") + - `` + - `` + - noStyleCsr("\t\tthere will always be a time to plant") + - noStyleCsr("\t\tand a time to gather the crops.") + - `` + - `` + - noStyleCsr("\t\tAs long as the earth lasts,") + - `` + - `` + - noStyleCsr("\t\tthere will always be cold and heat.") + - `` + - `` + - noStyleCsr("\t\tThere will always be summer and winter,") + - `` + - `` + - noStyleCsr("\t\tday and night.'") + - verseMarkerCsr("22") + - ``; - - const bibleVerse22 = - bookMarker("GEN") + - `` + - chapterMarkerCsr("8") + - verseMarkerCsr("21") + - noStyleCsr("Versículo vinte e um.") + - verseMarkerCsr("21") + - `` + - `` + - `
` + - `
` + - `` + - verseMarkerCsr("22") + - `` + - `“Enquanto durar a terra,
` + - `\t\tjamais cessarão
` + - `\t\tplantio e colheita,
` + - `\t\tfrio e calor,
` + - `\t\tverão e inverno,
` + - `\t\tdia e noite”.` + - `
` + - verseMarkerCsr("22") + - `
`; - - const studyXml = wrapStory(studyVerse22); - const bibleXml = wrapStory(bibleVerse22); - const idx = buildBibleVerseIndex(bibleXml); - const entry = idx.get(verseKey("GEN", "8", "22"))!; - expect(entry.paragraphChunks.length).toBeGreaterThanOrEqual(1); - - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - expect(stats.replacedCount).toBeGreaterThanOrEqual(1); - expect(stats.blockSwapCount).toBeGreaterThanOrEqual(1); - expect(xml).toContain("Enquanto durar a terra"); - expect(xml).toContain("jamais cessarão"); - expect(xml).toContain("plantio e colheita"); - expect(xml).not.toContain("As long as the earth lasts"); - expect(xml).not.toContain("there will always be cold"); - - const q2Count = (xml.match(/text%3aq2/g) || []).length; - expect(q2Count).toBe(0); - }); - - it("handles polluted meta:bk and still swaps verses", () => { - const studyXml = wrapStory( - `` + - `` + - `[PT] GEN` + - simpleVerseParagraph("text%3ap", "1", "1", "ENGLISH") - ); - const bibleXml = wrapStory( - bookMarker("GEN") + simpleVerseParagraph("text%3ap", "1", "1", "PORTUGUÊS") - ); - const idx = buildBibleVerseIndex(bibleXml); - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.missingFromBible).toEqual([]); - expect(stats.replacedCount).toBe(1); - expect(xml).toContain("PORTUGUÊS"); - expect(xml).not.toContain("ENGLISH"); - }); -}); - -describe("applyBibleSwapToStudyXml — PSA & extras", () => { - it("does not modify PSA verses", () => { - const studyXml = wrapStory( - bookMarker("PSA") + simpleVerseParagraph("text%3ap", "1", "1", "Blessed") - ); - const bibleXml = wrapStory( - bookMarker("PSA") + simpleVerseParagraph("text%3ap", "1", "1", "Bem-aventurado") - ); - const idx = buildBibleVerseIndex(bibleXml); - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.replacedCount).toBe(0); - expect(stats.skippedPsa).toBeGreaterThan(0); - expect(xml).toContain("Blessed"); - expect(xml).not.toContain("Bem-aventurado"); - }); - - it("appends extra Bible verses at end of chapter", () => { - const studyXml = wrapStory( - bookMarker("GEN") + - simpleVerseParagraph("text%3ap", "1", "1", "verse one") - ); - const bibleXml = wrapStory( - bookMarker("GEN") + - simpleVerseParagraph("text%3ap", "1", "1", "TRANSLATED 1") + - simpleVerseParagraph("text%3ap", null, "2", "EXTRA 2") - ); - const idx = buildBibleVerseIndex(bibleXml); - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.extraInBibleAppended.length).toBe(1); - expect(xml).toContain("EXTRA 2"); - }); -}); - -describe("applyBibleSwapToStudyXml — intro preservation", () => { - it("does not touch intro paragraphs", () => { - const studyXml = wrapStory( - bookMarker("GEN") + - `` + - noStyleCsr("INTRO TEXT") + - `` + - simpleVerseParagraph("text%3ap", "1", "1", "REAL VERSE") - ); - const bibleXml = wrapStory( - bookMarker("GEN") + simpleVerseParagraph("text%3ap", "1", "1", "TRADUZIDO") - ); - const idx = buildBibleVerseIndex(bibleXml); - const { xml, stats } = applyBibleSwapToStudyXml(studyXml, idx); - - expect(stats.replacedCount).toBe(1); - expect(xml).toContain("INTRO TEXT"); - expect(xml).toContain("TRADUZIDO"); - expect(xml).not.toContain("REAL VERSE"); - }); -}); diff --git a/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts b/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts deleted file mode 100644 index 1b622e41c..000000000 --- a/webviews/codex-webviews/src/NewSourceUploader/importers/biblica/bibleSwap.ts +++ /dev/null @@ -1,1160 +0,0 @@ -/** - * Bible Swap — replace Study Bible verse content with a translated Bible IDML. - * - * Hybrid strategy (per BIBLE_TEXT_REPLACEMENT_APPROACH.md): - * 1. **Bible paragraph-layout swap** — when poetry verses use different - * paragraph counts (e.g. Study alternates text:q1/q2 but Bible keeps all - * lines in one text:q1 CSR), replace the Study verse's full paragraph span - * with the Bible's so tabs and `
` line breaks stay intact. - * 2. **Paragraph-aligned swap** — when a multi-paragraph verse uses the same - * paragraph-style sequence in Study and Bible (e.g. text:p → b_poetry → - * text:q1 → text:q2), map Bible text per paragraph so poetry tabs/indents - * stay correct instead of flattening the whole verse. - * 3. **Structure-preserving swap** — when CSR skeletons match in a single block, - * replace only `` while keeping Study CharacterStyleRange tags. - * 4. **Content-only fallback** — otherwise distribute Bible text across Study - * prose slots (nd, no-style, source serif, …). - * - * Always skips: intro/meta/title paragraphs, Psalms (PSA), footnote content. - */ - -// --------------------------------------------------------------------------- -// Public types -// --------------------------------------------------------------------------- - -export type VerseKey = `${string}|${string}|${string}`; - -/** Prose text grouped by paragraph within a cross-paragraph verse. */ -export interface ParagraphChunkEntry { - paragraphStyle: string; - proseSegments: string[]; -} - -export interface VerseEntry { - /** Concatenated plain text from `[No character style]` content nodes. */ - text: string; - /** - * One entry per `` inside `[No character style]` for this verse, - * in document order. Preserves line breaks / poetry layout across paragraphs. - */ - segments: string[]; - /** - * Plain text from every prose CSR in the verse (nd, no-style, source serif, …), - * in document order — used when structure signatures match. - */ - proseSegments: string[]; - /** Structural fingerprint of the verse's CSR block (for block-swap matching). */ - structureSig: string; - /** Raw XML: all top-level CSRs from opening `meta:v` through closing `meta:v`. */ - blockXml: string; - /** True when the verse block lives entirely inside one ParagraphStyleRange. */ - singleParagraph: boolean; - /** Ordered paragraph styles from opening through closing `meta:v`. */ - paragraphSig: string; - /** Per-paragraph prose segments (for paragraph-aligned swap). */ - paragraphChunks: ParagraphChunkEntry[]; - /** All `ParagraphStyleRange` XML from first through last paragraph of this verse. */ - verseSpanXml: string; -} - -export type BibleVerseIndex = Map; - -export interface SwapStats { - replacedCount: number; - blockSwapCount: number; - contentOnlyCount: number; - skippedPsa: number; - missingFromBible: Array<{ book: string; chapter: string; verse: string }>; - extraInBibleAppended: Array<{ book: string; chapter: string; verse: string }>; -} - -export const SKIPPED_BOOK_CODES: ReadonlySet = new Set(["PSA"]); - -const PSA_BOOK_CODE = "PSA"; - -const NO_STYLE_RE = - /CharacterStyle\/\$ID\/\[No character style\]|CharacterStyle\/\$ID\/%5BNo character style%5D/; - -// --------------------------------------------------------------------------- -// Keys & helpers -// --------------------------------------------------------------------------- - -export const verseKey = (book: string, chapter: string, verse: string): VerseKey => - `${book}|${chapter}|${verse}`; - -export const listVerseKeys = (index: BibleVerseIndex): VerseKey[] => - Array.from(index.keys()); - -const digitsOnly = (s: string): string => s.replace(/\D/g, ""); - -const xmlEscape = (s: string): string => - s - .replace(/&/g, "&") - .replace(//g, ">") - .replace(/"/g, """) - .replace(/'/g, "'"); - -// --------------------------------------------------------------------------- -// Depth-tracking XML iterators (no DOMParser — works in Node + webview) -// --------------------------------------------------------------------------- - -interface TopLevelElement { - fullStart: number; - fullEnd: number; - bodyStart: number; - bodyEnd: number; - appliedParagraphStyle?: string; - appliedCharacterStyle?: string; -} - -/** - * Iterate top-level `...` elements inside a region, - * correctly handling nesting (e.g. Footnote inside a CSR). - */ -function* iterateTopLevelElements( - xml: string, - regionStart: number, - regionEnd: number, - tagName: string -): IterableIterator { - const openRe = new RegExp(`<${tagName}\\b`, "g"); - openRe.lastIndex = regionStart; - - while (true) { - const openMatch = openRe.exec(xml); - if (!openMatch || openMatch.index >= regionEnd) break; - - const openStart = openMatch.index; - const openTagEnd = xml.indexOf(">", openStart); - if (openTagEnd === -1 || openTagEnd >= regionEnd) break; - - const closeTag = ``; - let depth = 1; - let pos = openTagEnd + 1; - - while (depth > 0 && pos < regionEnd) { - const nextOpen = xml.indexOf(`<${tagName}`, pos); - const nextClose = xml.indexOf(closeTag, pos); - if (nextClose === -1) break; - - if (nextOpen !== -1 && nextOpen < nextClose && nextOpen < regionEnd) { - depth++; - pos = nextOpen + tagName.length + 1; - } else { - depth--; - if (depth === 0) { - const fullEnd = nextClose + closeTag.length; - const openTag = xml.slice(openStart, openTagEnd + 1); - const styleMatch = - openTag.match(/AppliedParagraphStyle="([^"]+)"/) ?? - openTag.match(/AppliedCharacterStyle="([^"]+)"/); - yield { - fullStart: openStart, - fullEnd, - bodyStart: openTagEnd + 1, - bodyEnd: nextClose, - appliedParagraphStyle: openTag.includes("ParagraphStyleRange") - ? styleMatch?.[1] - : undefined, - appliedCharacterStyle: openTag.includes("CharacterStyleRange") - ? styleMatch?.[1] - : undefined, - }; - openRe.lastIndex = fullEnd; - break; - } - pos = nextClose + closeTag.length; - } - } - if (depth > 0) break; - } -} - -interface ParagraphInfo { - fullStart: number; - fullEnd: number; - bodyStart: number; - bodyEnd: number; - appliedParagraphStyle: string; -} - -function* iterateParagraphs(xml: string): IterableIterator { - for (const el of iterateTopLevelElements(xml, 0, xml.length, "ParagraphStyleRange")) { - if (el.appliedParagraphStyle) { - yield { - fullStart: el.fullStart, - fullEnd: el.fullEnd, - bodyStart: el.bodyStart, - bodyEnd: el.bodyEnd, - appliedParagraphStyle: el.appliedParagraphStyle, - }; - } - } -} - -interface CsrInfo { - fullStart: number; - fullEnd: number; - absBodyStart: number; - absBodyEnd: number; - appliedCharacterStyle: string; - xml: string; -} - -function* iterateCsrAbs( - xml: string, - regionStart: number, - regionEnd: number -): IterableIterator { - for (const el of iterateTopLevelElements(xml, regionStart, regionEnd, "CharacterStyleRange")) { - if (!el.appliedCharacterStyle) continue; - yield { - fullStart: el.fullStart, - fullEnd: el.fullEnd, - absBodyStart: el.bodyStart, - absBodyEnd: el.bodyEnd, - appliedCharacterStyle: el.appliedCharacterStyle, - xml: xml.slice(el.fullStart, el.fullEnd), - }; - } -} - -interface ContentMatch { - absStart: number; - absEnd: number; - absInnerStart: number; - absInnerEnd: number; -} - -function* iterateContentAbs( - xml: string, - regionStart: number, - regionEnd: number -): IterableIterator { - const fnProbe = xml.indexOf(" | null = null; - if (hasFootnote) { - footnoteRanges = []; - for (const fn of iterateTopLevelElements(xml, regionStart, regionEnd, "Footnote")) { - footnoteRanges.push([fn.fullStart, fn.fullEnd]); - } - } - - const re = /([\s\S]*?)<\/Content>/g; - re.lastIndex = regionStart; - let m: RegExpExecArray | null; - while ((m = re.exec(xml)) !== null) { - if (m.index >= regionEnd) break; - const absStart = m.index; - const absEnd = absStart + m[0].length; - if (absEnd > regionEnd) break; - if (footnoteRanges) { - let inFootnote = false; - for (const [s, e] of footnoteRanges) { - if (absStart >= s && absEnd <= e) { - inFootnote = true; - break; - } - } - if (inFootnote) continue; - } - yield { - absStart, - absEnd, - absInnerStart: absStart + "".length, - absInnerEnd: absEnd - "".length, - }; - } -} - -// --------------------------------------------------------------------------- -// Style classification -// --------------------------------------------------------------------------- - -function isBookMarkerParagraphStyle(style: string): boolean { - return /(?:^|\/)meta%3abk(?:_|$|\b)/.test(style) || /(?:^|\/)meta:bk/.test(style); -} - -/** Paragraph styles that may contain biblical verse text. */ -function isReplaceableParagraphStyle(style: string): boolean { - if (isBookMarkerParagraphStyle(style)) return false; - if (/(?:^|\/)intro%3a|(?:^|\/)intro:/.test(style)) return false; - if (/(?:^|\/)meta%3a|(?:^|\/)meta:/.test(style)) return false; - if (/(?:^|\/)title%3a|(?:^|\/)title:/.test(style)) return false; - if (/(?:^|\/)notes%3a|(?:^|\/)notes:/.test(style)) return false; - return ( - /(?:^|\/)text%3a|(?:^|\/)text:/.test(style) || - /(?:^|\/)b(?:_|$|\b)/.test(style) || - /(?:^|\/)b_/.test(style) - ); -} - -function isChapterMarkerStyle(style: string): boolean { - return /meta%3ac|meta:c/.test(style); -} - -function isVerseMarkerStyle(style: string): boolean { - return /meta%3av|meta:v/.test(style); -} - -/** Verse-number, spacing, drop-cap, notes, and hidden marker CSRs — never replace text. */ -function isMarkerOrStructuralStyle(style: string): boolean { - if (isChapterMarkerStyle(style) || isVerseMarkerStyle(style)) return true; - if (/(?:^|\/)notes%3a|(?:^|\/)notes:/.test(style)) return true; - if (/(?:^|\/)meta%3a|(?:^|\/)meta:/.test(style)) return true; - return ( - /(?:^|\/)cv%3av(?:_|$|\b)/.test(style) || - /(?:^|\/)cv:v(?:_|$|\b)/.test(style) || - /(?:^|\/)cv%3av_sp/.test(style) || - /(?:^|\/)cv:v_sp/.test(style) || - /(?:^|\/)cv%3adc/.test(style) || - /(?:^|\/)cv:dc/.test(style) || - /(?:^|\/)#base\.hidden/.test(style) - ); -} - -function isNoCharacterStyle(style: string): boolean { - return NO_STYLE_RE.test(style); -} - -function extractProseSegmentsFromCsrList(csrXmlList: string[]): string[] { - const segments: string[] = []; - for (const csrXml of csrXmlList) { - const styleMatch = csrXml.match(/AppliedCharacterStyle="([^"]+)"/); - if (isMarkerOrStructuralStyle(styleMatch?.[1] ?? "")) continue; - const text = collectContentText(csrXml, 0, csrXml.length); - segments.push(text); - } - return segments; -} - -/** - * Extract canonical SBL book code from `meta:bk` content. The notes export - * pipeline may prefix translated text (e.g. "[PT] GEN"); we must still find GEN. - */ -function extractBookCode(rawText: string): string { - if (!rawText) return ""; - const trimmed = rawText.replace(/\s+/g, " ").trim(); - const m = trimmed.match(/\b(?:[1-3][A-Z]{2}|[A-Z]{3})\b/); - return m ? m[0] : trimmed; -} - -function collectContentText(xml: string, start: number, end: number): string { - let text = ""; - for (const c of iterateContentAbs(xml, start, end)) { - text += xml.slice(c.absInnerStart, c.absInnerEnd); - } - return text; -} - -// --------------------------------------------------------------------------- -// Verse block structure (for hybrid block swap) -// --------------------------------------------------------------------------- - -/** Normalize a character style path for comparison. */ -function normalizeCharStyle(style: string): string { - return style - .replace(/^CharacterStyle\//, "") - .replace(/%3a/g, ":") - .replace(/%5B/g, "[") - .replace(/%5D/g, "]"); -} - -/** Token describing one CSR's role in a verse block (ignores actual prose). */ -function csrStructureToken(csrXml: string): string { - const styleMatch = csrXml.match(/AppliedCharacterStyle="([^"]+)"/); - const style = normalizeCharStyle(styleMatch?.[1] ?? "?"); - - if (isNoCharacterStyle(styleMatch?.[1] ?? "")) { - if (/<\?ACE\s/.test(csrXml) || /<\?ACE\s*\?>/.test(csrXml)) return `${style}:ace`; - const hasBr = //.test(csrXml); - const text = collectContentText(csrXml, 0, csrXml.length).replace(/\s+/g, " ").trim(); - if (hasBr && !text) return `${style}:br`; - if (!text) return `${style}:empty`; - return `${style}:text`; - } - const text = collectContentText(csrXml, 0, csrXml.length).replace(/\s+/g, " ").trim(); - return text ? `${style}:marker` : `${style}:empty`; -} - -function buildStructureSig(csrXmlList: string[]): string { - return csrXmlList.map(csrStructureToken).join("|"); -} - -function normalizeParagraphStyle(style: string): string { - return style - .replace(/^ParagraphStyle\//, "") - .replace(/%3a/g, ":") - .replace(/%5B/g, "[") - .replace(/%5D/g, "]"); -} - -interface ParagraphChunkState { - paragraphStyle: string; - paragraphStart: number; - proseContents: ContentMatch[]; - proseSegments: string[]; -} - -function buildParagraphSig(chunks: Array<{ paragraphStyle: string }>): string { - return chunks.map((c) => normalizeParagraphStyle(c.paragraphStyle)).join("|"); -} - -function isPoetryParagraphSig(sig: string): boolean { - return /text:q[12]/.test(sig); -} - -function countProseLinesInChunks(chunks: Array<{ proseSegments: string[] }>): number { - return chunks.reduce( - (sum, c) => sum + c.proseSegments.filter((s) => s.trim().length > 0).length, - 0 - ); -} - -function maxProseLinesInOneChunk(chunks: Array<{ proseSegments: string[] }>): number { - return chunks.reduce( - (max, c) => Math.max(max, c.proseSegments.filter((s) => s.trim().length > 0).length), - 0 - ); -} - -/** - * Study and Bible both have poetry (text:q1/q2) but different paragraph layouts — - * e.g. GEN 8:22 Study uses many alternating q1/q2 paras while Bible keeps every - * line in one q1 CSR with `
` between `` nodes. - */ -function shouldUseBibleVerseSpanLayout( - study: { - paragraphSig: string; - singleParagraph: boolean; - paragraphChunks: ParagraphChunkState[]; - }, - bible: VerseEntry -): boolean { - if (study.singleParagraph || bible.singleParagraph) return false; - if (study.paragraphSig === bible.paragraphSig) return false; - if (!isPoetryParagraphSig(study.paragraphSig) || !isPoetryParagraphSig(bible.paragraphSig)) { - return false; - } - - const studyParas = study.paragraphChunks.length; - const bibleParas = bible.paragraphChunks.length; - const bibleMaxLines = maxProseLinesInOneChunk(bible.paragraphChunks); - const studyLines = countProseLinesInChunks(study.paragraphChunks); - const bibleLines = countProseLinesInChunks(bible.paragraphChunks); - - if (bibleMaxLines < 2) return false; - - // Bible consolidated into fewer paragraphs than Study. - if (bibleParas < studyParas) return true; - - // Same paragraph count but Bible packs many lines into one CSR (Br-separated). - const bibleConsolidated = bible.paragraphChunks.some( - (c) => c.proseSegments.filter((s) => s.trim().length > 0).length >= 3 - ); - const studySpread = study.paragraphChunks.every( - (c) => c.proseSegments.filter((s) => s.trim().length > 0).length <= 2 - ); - if (bibleConsolidated && studySpread && bibleMaxLines >= 3) return true; - - // Line counts differ enough that weight-based splitting would break words. - if (bibleParas !== studyParas && Math.abs(bibleLines - studyLines) >= 2) return true; - - return false; -} - -/** - * Poetry verses are often preceded by a `b_poetry` spacer paragraph before the - * opening `meta:v`. Include it in the verse span so layout swap keeps the break. - */ -function findPoetryLeadInStart(storyXml: string, verseParagraphStart: number): number { - const before = storyXml.lastIndexOf("", before); - if (openEnd === -1 || openEnd >= verseParagraphStart) return verseParagraphStart; - const openTag = storyXml.slice(before, openEnd + 1); - const styleMatch = openTag.match(/AppliedParagraphStyle="([^"]+)"/); - if (!styleMatch) return verseParagraphStart; - const style = normalizeParagraphStyle(styleMatch[1]); - if (/(?:^|\/)b_poetry(?:_|$|\b)/.test(style) || /(?:^|\/)b(?:_|$|\b)/.test(style)) { - return before; - } - return verseParagraphStart; -} - -function ensureParagraphChunk( - openVerse: OpenVerseState, - para: ParagraphInfo -): ParagraphChunkState { - const last = openVerse.paragraphChunks[openVerse.paragraphChunks.length - 1]; - if (!last || last.paragraphStart !== para.fullStart) { - const chunk: ParagraphChunkState = { - paragraphStyle: para.appliedParagraphStyle, - paragraphStart: para.fullStart, - proseContents: [], - proseSegments: [], - }; - openVerse.paragraphChunks.push(chunk); - return chunk; - } - return last; -} - -interface OpenVerseState { - book: string; - chapter: string; - verse: string; - csrXmlList: string[]; - blockStart: number; - blockEnd: number; - paragraphStart: number; - paragraphEnd: number; - noStyleContents: ContentMatch[]; - proseContents: ContentMatch[]; - paragraphChunks: ParagraphChunkState[]; -} - -interface WalkCallbacks { - onBook?: (book: string) => void; - onChapter?: (book: string, chapter: string) => void; - onVerseOpen?: (v: { book: string; chapter: string; verse: string }) => void; - /** Fired when entering the PSA book (verses are not swapped). */ - onEnterPsa?: () => void; - onVerseClose?: (entry: { - book: string; - chapter: string; - verse: string; - text: string; - segments: string[]; - structureSig: string; - blockXml: string; - singleParagraph: boolean; - noStyleContents: ContentMatch[]; - proseContents: ContentMatch[]; - paragraphChunks: ParagraphChunkState[]; - paragraphStart: number; - paragraphEnd: number; - verseSpanXml: string; - csrXmlList: string[]; - blockStart: number; - blockEnd: number; - }) => void; -} - -/** - * Stream through a Story XML, tracking book / chapter / verse state and - * firing callbacks at verse boundaries. - */ -function walkStory(storyXml: string, callbacks: WalkCallbacks): void { - let currentBook = ""; - let currentChapter = ""; - let openVerse: OpenVerseState | null = null; - let inPsa = false; - - const closeVerse = () => { - if (!openVerse) return; - const { - book, - chapter, - verse, - csrXmlList, - blockStart, - blockEnd, - paragraphStart, - paragraphEnd, - } = openVerse; - if (!inPsa && book && chapter && verse) { - const segments: string[] = []; - const textParts: string[] = []; - for (const c of openVerse.noStyleContents) { - const t = storyXml.slice(c.absInnerStart, c.absInnerEnd); - segments.push(t); - if (t.trim()) textParts.push(t); - } - callbacks.onVerseClose?.({ - book, - chapter, - verse, - text: textParts.join(" ").replace(/\s+/g, " ").trim(), - segments, - structureSig: buildStructureSig(csrXmlList), - blockXml: storyXml.slice(blockStart, blockEnd), - singleParagraph: paragraphStart === paragraphEnd, - noStyleContents: [...openVerse.noStyleContents], - proseContents: [...openVerse.proseContents], - paragraphChunks: openVerse.paragraphChunks.map((c) => ({ - paragraphStyle: c.paragraphStyle, - paragraphStart: c.paragraphStart, - proseContents: [...c.proseContents], - proseSegments: [...c.proseSegments], - })), - paragraphStart, - paragraphEnd, - verseSpanXml: storyXml.slice(paragraphStart, paragraphEnd), - csrXmlList: [...csrXmlList], - blockStart, - blockEnd, - }); - } - openVerse = null; - }; - - for (const para of iterateParagraphs(storyXml)) { - if (isBookMarkerParagraphStyle(para.appliedParagraphStyle)) { - closeVerse(); - let bookRaw = ""; - for (const c of iterateContentAbs(storyXml, para.bodyStart, para.bodyEnd)) { - bookRaw += storyXml.slice(c.absInnerStart, c.absInnerEnd); - } - const code = extractBookCode(bookRaw); - if (code) { - currentBook = code; - currentChapter = ""; - inPsa = code === PSA_BOOK_CODE; - if (inPsa) callbacks.onEnterPsa?.(); - callbacks.onBook?.(code); - } - continue; - } - - if (!isReplaceableParagraphStyle(para.appliedParagraphStyle)) { - continue; - } - - for (const csr of iterateCsrAbs(storyXml, para.bodyStart, para.bodyEnd)) { - if (isChapterMarkerStyle(csr.appliedCharacterStyle)) { - const cnum = digitsOnly(collectContentText(storyXml, csr.absBodyStart, csr.absBodyEnd)); - if (cnum && cnum !== currentChapter) { - closeVerse(); - currentChapter = cnum; - callbacks.onChapter?.(currentBook, currentChapter); - } - continue; - } - - if (isVerseMarkerStyle(csr.appliedCharacterStyle)) { - const vnum = collectContentText(storyXml, csr.absBodyStart, csr.absBodyEnd).trim(); - if (!/^\d+$/.test(vnum)) continue; - - if (!openVerse) { - // Opening marker - const paragraphStart = findPoetryLeadInStart(storyXml, para.fullStart); - openVerse = { - book: currentBook, - chapter: currentChapter, - verse: vnum, - csrXmlList: [csr.xml], - blockStart: csr.fullStart, - blockEnd: csr.fullEnd, - paragraphStart, - paragraphEnd: para.fullEnd, - noStyleContents: [], - proseContents: [], - paragraphChunks: [], - }; - callbacks.onVerseOpen?.({ - book: currentBook, - chapter: currentChapter, - verse: vnum, - }); - } else if (openVerse.verse === vnum) { - // Closing marker (same verse number) - openVerse.csrXmlList.push(csr.xml); - openVerse.blockEnd = csr.fullEnd; - closeVerse(); - } else { - // New verse opens before previous closed — force-close previous - closeVerse(); - openVerse = { - book: currentBook, - chapter: currentChapter, - verse: vnum, - csrXmlList: [csr.xml], - blockStart: csr.fullStart, - blockEnd: csr.fullEnd, - paragraphStart: para.fullStart, - paragraphEnd: para.fullEnd, - noStyleContents: [], - proseContents: [], - paragraphChunks: [], - }; - callbacks.onVerseOpen?.({ - book: currentBook, - chapter: currentChapter, - verse: vnum, - }); - } - continue; - } - - if (openVerse) { - const verseParagraphChunk = ensureParagraphChunk(openVerse, para); - openVerse.csrXmlList.push(csr.xml); - openVerse.blockEnd = csr.fullEnd; - openVerse.paragraphEnd = para.fullEnd; - - if (!isMarkerOrStructuralStyle(csr.appliedCharacterStyle)) { - for (const c of iterateContentAbs(storyXml, csr.absBodyStart, csr.absBodyEnd)) { - if (isProseContentSlot(storyXml, c)) { - const text = storyXml.slice(c.absInnerStart, c.absInnerEnd); - openVerse.proseContents.push(c); - verseParagraphChunk.proseContents.push(c); - verseParagraphChunk.proseSegments.push(text); - } - } - } - - if (isNoCharacterStyle(csr.appliedCharacterStyle)) { - for (const c of iterateContentAbs(storyXml, csr.absBodyStart, csr.absBodyEnd)) { - openVerse.noStyleContents.push(c); - } - } - } - } - } - closeVerse(); -} - -// --------------------------------------------------------------------------- -// Build Bible verse index -// --------------------------------------------------------------------------- - -export function buildBibleVerseIndex(bibleStoryXml: string): BibleVerseIndex { - const index: BibleVerseIndex = new Map(); - - walkStory(bibleStoryXml, { - onVerseClose: (entry) => { - if (!entry.book || !entry.chapter || !entry.verse || !entry.text) return; - const key = verseKey(entry.book, entry.chapter, entry.verse); - index.set(key, { - text: entry.text, - segments: entry.segments, - proseSegments: extractProseSegmentsFromCsrList(entry.csrXmlList), - structureSig: entry.structureSig, - blockXml: entry.blockXml, - singleParagraph: entry.singleParagraph, - paragraphSig: buildParagraphSig(entry.paragraphChunks), - paragraphChunks: entry.paragraphChunks.map((c) => ({ - paragraphStyle: c.paragraphStyle, - proseSegments: c.proseSegments, - })), - verseSpanXml: entry.verseSpanXml, - }); - }, - }); - - return index; -} - -// --------------------------------------------------------------------------- -// Apply swap to Study Bible -// --------------------------------------------------------------------------- - -interface Splice { - absStart: number; - absEnd: number; - replacement: string; -} - -function upsertSplice(map: Map, sp: Splice): void { - map.set(sp.absStart, sp); -} - -/** True when this `` carries verse prose (not ACE, not inter-verse spacing). */ -function isProseContentSlot(storyXml: string, c: ContentMatch): boolean { - const raw = storyXml.slice(c.absInnerStart, c.absInnerEnd); - if (!raw.trim()) return false; - if (/^<\?ACE/.test(raw.trim())) return false; - // Single space between verses in the same paragraph — keep original. - if (raw === " " || raw === "\u2009" || raw === "\u00A0") return false; - return true; -} - -function slotWhitespace(studyXml: string, slot: ContentMatch): { leading: string; trailing: string } { - const orig = studyXml.slice(slot.absInnerStart, slot.absInnerEnd); - return { - leading: orig.match(/^(\s*)/)?.[1] ?? "", - trailing: orig.match(/(\s*)$/)?.[1] ?? "", - }; -} - -/** - * Split one string across N slots using the Study's original line lengths as - * weights (used when the Bible has one segment but Study has several lines). - * Preserves each slot's leading/trailing whitespace (poetry tabs). - */ -function splitTextByStudyWeights( - studyXml: string, - proseSlots: ContentMatch[], - text: string -): string[] { - if (proseSlots.length === 0) return []; - if (proseSlots.length === 1) { - const { leading, trailing } = slotWhitespace(studyXml, proseSlots[0]); - const body = /^\s/.test(text) ? text : text.trim(); - return [leading + body + trailing]; - } - - const weights = proseSlots.map((c) => { - const len = studyXml.slice(c.absInnerStart, c.absInnerEnd).trim().length; - return len > 0 ? len : 1; - }); - const totalWeight = weights.reduce((a, b) => a + b, 0); - - const parts: string[] = []; - let pos = 0; - for (let i = 0; i < proseSlots.length; i++) { - const { leading, trailing } = slotWhitespace(studyXml, proseSlots[i]); - if (i === proseSlots.length - 1) { - const body = text.slice(pos).trim(); - parts.push(leading + body + trailing); - break; - } - const share = Math.max(1, Math.round((weights[i] / totalWeight) * text.length)); - let end = Math.min(text.length, pos + share); - if (end < text.length) { - const nextSpace = text.indexOf(" ", end); - if (nextSpace !== -1 && nextSpace - pos < share * 1.5) { - end = nextSpace + 1; - } - } - const body = text.slice(pos, end).trim(); - parts.push(leading + body + trailing); - pos = end; - } - return parts; -} - -/** - * Map Bible prose segments onto Study prose `` slots in order. - */ -function mapProseSegmentsToSlots( - studyXml: string, - proseSlots: ContentMatch[], - bibleSegments: string[], - bibleFullText: string -): string[] { - const n = proseSlots.length; - if (n === 0) return []; - - const prose = bibleSegments.filter((s) => s.trim().length > 0); - const source = prose.length > 0 ? prose : [bibleFullText]; - const m = source.length; - - if (m === n) { - return source.map((segment, i) => { - if (/^\s/.test(segment)) return segment; - const { leading, trailing } = slotWhitespace(studyXml, proseSlots[i]); - return leading + segment.trim() + trailing; - }); - } - if (m === 1) return splitTextByStudyWeights(studyXml, proseSlots, source[0]); - if (m > n) { - const out = source.slice(0, n - 1); - out.push(source.slice(n - 1).join(" ")); - return out; - } - // m > 1 && m < n — map available lines, leave extra Study slots empty only when - // we cannot use Bible paragraph layout (caller should prefer span layout). - const out = [...source]; - while (out.length < n) out.push(""); - return out; -} - -/** - * Replace the Study verse's full paragraph span with the Bible's paragraph XML - * (used when poetry layout differs, e.g. consolidated q1 CSR vs alternating q1/q2). - */ -function applyBibleVerseSpanLayout( - studyParagraphStart: number, - studyParagraphEnd: number, - bibleEntry: VerseEntry, - splices: Map -): void { - if (!bibleEntry.verseSpanXml || bibleEntry.verseSpanXml.length === 0) return; - upsertSplice(splices, { - absStart: studyParagraphStart, - absEnd: studyParagraphEnd, - replacement: bibleEntry.verseSpanXml, - }); -} - -/** - * Replace `` in Study prose slots, keeping every CharacterStyleRange - * wrapper and attribute from the Study file. - */ -function applyDistributedContentReplacement( - studyXml: string, - proseContents: ContentMatch[], - bibleEntry: VerseEntry, - splices: Map, - bibleSegmentSource?: string[] -): void { - const proseSlots: ContentMatch[] = []; - for (const c of proseContents) { - if (isProseContentSlot(studyXml, c)) proseSlots.push(c); - } - if (proseSlots.length === 0) return; - - const bibleSegments = - bibleSegmentSource ?? - (bibleEntry.proseSegments.length > 0 ? bibleEntry.proseSegments : bibleEntry.segments); - - const mapped = mapProseSegmentsToSlots( - studyXml, - proseSlots, - bibleSegments, - bibleEntry.text - ); - - for (let i = 0; i < proseSlots.length; i++) { - upsertSplice(splices, { - absStart: proseSlots[i].absInnerStart, - absEnd: proseSlots[i].absInnerEnd, - replacement: xmlEscape(mapped[i] ?? ""), - }); - } -} - -/** - * Multi-paragraph verses (poetry): map Bible text per paragraph when the - * paragraph-style sequence matches between Study and Bible. - */ -function applyParagraphAlignedReplacement( - studyXml: string, - studyChunks: ParagraphChunkState[], - bibleEntry: VerseEntry, - splices: Map -): boolean { - const bibleChunks = bibleEntry.paragraphChunks; - if (studyChunks.length !== bibleChunks.length || studyChunks.length < 2) { - return false; - } - - for (let i = 0; i < studyChunks.length; i++) { - if ( - normalizeParagraphStyle(studyChunks[i].paragraphStyle) !== - normalizeParagraphStyle(bibleChunks[i].paragraphStyle) - ) { - return false; - } - applyDistributedContentReplacement( - studyXml, - studyChunks[i].proseContents, - bibleEntry, - splices, - bibleChunks[i].proseSegments - ); - } - return true; -} - -/** - * When CSR skeletons match, map Bible prose onto Study prose slots 1:1 while - * preserving Study CharacterStyleRange tags (Tracking, nd, source serif, etc.). - */ -function applyStructurePreservingReplacement( - studyXml: string, - studyProseContents: ContentMatch[], - bibleEntry: VerseEntry, - splices: Map -): void { - applyDistributedContentReplacement( - studyXml, - studyProseContents, - bibleEntry, - splices, - bibleEntry.proseSegments - ); -} - -export function applyBibleSwapToStudyXml( - studyStoryXml: string, - bibleIndex: BibleVerseIndex -): { xml: string; stats: SwapStats } { - const stats: SwapStats = { - replacedCount: 0, - blockSwapCount: 0, - contentOnlyCount: 0, - skippedPsa: 0, - missingFromBible: [], - extraInBibleAppended: [], - }; - - const splicesByStart = new Map(); - const studyVersesInChapter = new Map>(); - /** Last `[No character style]` Content in each chapter (for versification extras). */ - const lastNoStyleByChapter = new Map(); - - const chapterKey = (book: string, chapter: string) => `${book}|${chapter}`; - - walkStory(studyStoryXml, { - onBook: () => { - /* chapter tracking resets on chapter marker */ - }, - onChapter: (book, chapter) => { - if (!studyVersesInChapter.has(chapterKey(book, chapter))) { - studyVersesInChapter.set(chapterKey(book, chapter), new Set()); - } - }, - onEnterPsa: () => { - stats.skippedPsa++; - }, - onVerseClose: (studyVerse) => { - const { book, chapter, verse } = studyVerse; - if (!book || !chapter || !verse) return; - - if (book === PSA_BOOK_CODE) { - stats.skippedPsa++; - return; - } - - const ck = chapterKey(book, chapter); - if (!studyVersesInChapter.has(ck)) { - studyVersesInChapter.set(ck, new Set()); - } - studyVersesInChapter.get(ck)!.add(verse); - - if (studyVerse.noStyleContents.length > 0) { - const last = studyVerse.noStyleContents[studyVerse.noStyleContents.length - 1]; - lastNoStyleByChapter.set(ck, last); - } - - const bibleEntry = bibleIndex.get(verseKey(book, chapter, verse)); - if (!bibleEntry) { - stats.missingFromBible.push({ book, chapter, verse }); - return; - } - - const studyParaSig = buildParagraphSig(studyVerse.paragraphChunks); - const useBibleSpanLayout = shouldUseBibleVerseSpanLayout( - { - paragraphSig: studyParaSig, - singleParagraph: studyVerse.singleParagraph, - paragraphChunks: studyVerse.paragraphChunks, - }, - bibleEntry - ); - const canParagraphAlign = - !studyVerse.singleParagraph && - studyParaSig.length > 0 && - studyParaSig === bibleEntry.paragraphSig && - studyVerse.paragraphChunks.length === bibleEntry.paragraphChunks.length; - - const canStructurePreserve = - studyVerse.structureSig === bibleEntry.structureSig && - studyVerse.proseContents.length > 0; - - if (useBibleSpanLayout) { - applyBibleVerseSpanLayout( - studyVerse.paragraphStart, - studyVerse.paragraphEnd, - bibleEntry, - splicesByStart - ); - stats.blockSwapCount++; - } else if ( - canParagraphAlign && - applyParagraphAlignedReplacement( - studyStoryXml, - studyVerse.paragraphChunks, - bibleEntry, - splicesByStart - ) - ) { - stats.blockSwapCount++; - } else if (canStructurePreserve) { - applyStructurePreservingReplacement( - studyStoryXml, - studyVerse.proseContents, - bibleEntry, - splicesByStart - ); - stats.blockSwapCount++; - } else { - applyDistributedContentReplacement( - studyStoryXml, - studyVerse.proseContents.length > 0 - ? studyVerse.proseContents - : studyVerse.noStyleContents, - bibleEntry, - splicesByStart - ); - stats.contentOnlyCount++; - } - stats.replacedCount++; - }, - }); - - // Append Bible verses not present in Study (versification extras). - // The anchor may fall inside a block-swap splice; merge into that splice - // instead of creating a nested splice (which would be skipped on apply). - const findSpliceCovering = (innerPos: number): Splice | undefined => { - for (const sp of splicesByStart.values()) { - if (sp.absStart <= innerPos && innerPos < sp.absEnd) return sp; - } - return undefined; - }; - - for (const [key, bibleEntry] of bibleIndex.entries()) { - const [book, chapter, verse] = key.split("|"); - if (book === PSA_BOOK_CODE) continue; - const ck = chapterKey(book, chapter); - const studySet = studyVersesInChapter.get(ck); - if (studySet?.has(verse)) continue; - - const anchor = lastNoStyleByChapter.get(ck); - if (!anchor) continue; - - const appendText = xmlEscape(bibleEntry.text); - const covering = findSpliceCovering(anchor.absInnerStart); - if (covering) { - // Anchor sits inside a block-swap region — append into the last - // node of the replacement XML, not after the block. - const lastOpen = covering.replacement.lastIndexOf(""); - const lastClose = covering.replacement.lastIndexOf(""); - if (lastOpen !== -1 && lastClose > lastOpen) { - const inner = covering.replacement.slice(lastOpen + "".length, lastClose); - covering.replacement = - covering.replacement.slice(0, lastOpen + "".length) + - (inner.trim() ? `${inner} ${appendText}` : appendText) + - covering.replacement.slice(lastClose); - } else { - covering.replacement = `${covering.replacement} ${appendText}`; - } - } else { - const existing = splicesByStart.get(anchor.absInnerStart); - if (existing) { - existing.replacement = `${existing.replacement} ${appendText}`; - } else { - const original = studyStoryXml.slice(anchor.absInnerStart, anchor.absInnerEnd); - upsertSplice(splicesByStart, { - absStart: anchor.absInnerStart, - absEnd: anchor.absInnerEnd, - replacement: original.trim() ? `${original} ${appendText}` : appendText, - }); - } - } - stats.extraInBibleAppended.push({ book, chapter, verse }); - } - - const splices = Array.from(splicesByStart.values()).sort((a, b) => a.absStart - b.absStart); - const parts: string[] = []; - let cursor = 0; - for (const sp of splices) { - if (sp.absStart < cursor) continue; - if (sp.absStart > cursor) { - parts.push(studyStoryXml.slice(cursor, sp.absStart)); - } - parts.push(sp.replacement); - cursor = sp.absEnd; - } - if (cursor < studyStoryXml.length) { - parts.push(studyStoryXml.slice(cursor)); - } - - return { xml: parts.join(""), stats }; -} From b40d19e63f322a0ea048fba3613bccdc913a0197 Mon Sep 17 00:00:00 2001 From: Sam van Vuuren Date: Mon, 1 Jun 2026 10:12:20 -0400 Subject: [PATCH 05/12] fix(export): scrollable list, aligned icons, and OK button in export mismatch modal (#998) Consolidates the three modal fixes for the "Files Without Audio/Text" export warning that were previously split across two PRs (#1002, #1003): - Scrollable file list: cap .popup-file-list at 40vh with overflow-y:auto so a long list scrolls instead of overflowing past the viewport. - Aligned icons: vertically center each file icon with its filename (display:flex; align-items:center on the list rows). - OK button: add a primary OK button at the bottom-right of the content-mismatch modal so users have an obvious dismiss action beyond the small X. CSS changes apply to both the content-mismatch and HTML-mismatch popups via the shared .popup-file-list class. Supersedes #1002 and #1003. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/projectManager/projectExportView.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index 1d514ca21..8ac6d196a 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -785,8 +785,11 @@ function getWebviewContent( border: 1px solid rgba(202, 138, 4, 0.25); border-radius: 4px; font-size: 0.9em; + max-height: 40vh; + overflow-y: auto; } - .popup-file-list div { padding: 2px 0; } + .popup-file-list div { padding: 2px 0; display: flex; align-items: center; } + .popup-footer { display: flex; justify-content: flex-end; margin-top: 16px; } /* Step 4: Exporting screen */ .export-progress-card { @@ -1461,6 +1464,9 @@ function getWebviewContent( The export will still proceed, but the listed files will produce empty output for the selected format.

+
From bd6b3b4553942b4959efc818e697861f44a190a2 Mon Sep 17 00:00:00 2001 From: Sam van Vuuren Date: Mon, 1 Jun 2026 10:45:25 -0400 Subject: [PATCH 06/12] fix(export): scroll the modal body so the file list reliably scrolls The first pass capped only the inner .popup-file-list, which left .popup-card free to grow past the viewport and could push the new OK button off-screen. Switch to the standard scrollable-modal pattern: cap the card at 85vh as a flex column, scroll the .popup-body (overflow-y:auto; min-height:0), and pin the header and footer (flex-shrink:0) so the OK button stays visible. Verified with a headless-Chromium render of the popup CSS: with 60 files the body scrolls (scrollHeight 1552 > clientHeight 520) and the OK button stays in view at both 700px and 420px window heights; with 4 files it correctly does not scroll. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/projectManager/projectExportView.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index 8ac6d196a..e69d8244f 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -753,9 +753,13 @@ function getWebviewContent( padding: 20px 24px; max-width: 480px; width: 90%; + max-height: 85vh; + display: flex; + flex-direction: column; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.35); } .popup-header { + flex-shrink: 0; display: flex; align-items: center; gap: 8px; @@ -777,6 +781,8 @@ function getWebviewContent( font-size: 0.9em; color: var(--vscode-editor-foreground); line-height: 1.5; + overflow-y: auto; + min-height: 0; } .popup-file-list { margin: 8px 0; @@ -785,11 +791,9 @@ function getWebviewContent( border: 1px solid rgba(202, 138, 4, 0.25); border-radius: 4px; font-size: 0.9em; - max-height: 40vh; - overflow-y: auto; } .popup-file-list div { padding: 2px 0; display: flex; align-items: center; } - .popup-footer { display: flex; justify-content: flex-end; margin-top: 16px; } + .popup-footer { display: flex; justify-content: flex-end; margin-top: 16px; flex-shrink: 0; } /* Step 4: Exporting screen */ .export-progress-card { From 9987a0c21e2ab3ca5fa76030a59ab2e9d8e66b02 Mon Sep 17 00:00:00 2001 From: Sam van Vuuren Date: Mon, 1 Jun 2026 10:51:11 -0400 Subject: [PATCH 07/12] fix(export): scroll only the file list, not the whole modal body Make .popup-body a non-scrolling flex column with the summary/note paragraphs pinned (flex-shrink:0) and only .popup-file-list absorbing overflow (flex:0 1 auto; min-height:0; overflow-y:auto). The list stays compact for short lists and scrolls internally for long ones, while the header, surrounding text, and OK button stay fixed. Verified via headless-Chromium render: with 60 files only the list scrolls (body not scrollable) and summary/note/OK button stay visible at 700px and 420px window heights; with 4 files nothing scrolls. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/projectManager/projectExportView.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index e69d8244f..2cf41f096 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -781,9 +781,11 @@ function getWebviewContent( font-size: 0.9em; color: var(--vscode-editor-foreground); line-height: 1.5; - overflow-y: auto; + display: flex; + flex-direction: column; min-height: 0; } + .popup-body > p { flex-shrink: 0; } .popup-file-list { margin: 8px 0; padding: 8px 12px; @@ -791,6 +793,9 @@ function getWebviewContent( border: 1px solid rgba(202, 138, 4, 0.25); border-radius: 4px; font-size: 0.9em; + flex: 0 1 auto; + min-height: 0; + overflow-y: auto; } .popup-file-list div { padding: 2px 0; display: flex; align-items: center; } .popup-footer { display: flex; justify-content: flex-end; margin-top: 16px; flex-shrink: 0; } From 0d2384ee0b2646f11c36d3fa3a49dea9741f63ca Mon Sep 17 00:00:00 2001 From: Sam van Vuuren Date: Mon, 1 Jun 2026 11:00:15 -0400 Subject: [PATCH 08/12] fix(export): cap mismatch-modal file list at 26vh before scrolling Halve the height at which the Files Without Audio/Text list starts scrolling by adding max-height:26vh to .popup-file-list, so it tops out at roughly half its previous height (~373px -> ~198px at a 700px window) and scrolls from there. Surrounding text, header, and OK button are unaffected. Verified via headless-Chromium render. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/projectManager/projectExportView.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index 2cf41f096..23fb41ed1 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -795,6 +795,7 @@ function getWebviewContent( font-size: 0.9em; flex: 0 1 auto; min-height: 0; + max-height: 26vh; overflow-y: auto; } .popup-file-list div { padding: 2px 0; display: flex; align-items: center; } From 5eb8d0f1dfe7c6e7ae70d534386013468e10ff43 Mon Sep 17 00:00:00 2001 From: Luke-Bilhorn Date: Mon, 1 Jun 2026 15:27:56 -0500 Subject: [PATCH 09/12] fix(editor): preserve consecutive spaces in cell editor (#1010) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quill's built-in `matchText` clipboard matcher collapses runs of 2+ ASCII spaces into a single space when converting HTML → Delta on cell open. For scripture translation, every character is meaningful: double spaces in imported source text (e.g. the Portuguese project) were silently dropped in the open editor view while still present in the closed-cell display, making them appear "hidden" and uneditable. Fixes this at the root by: 1. Swapping Quill's built-in TEXT_NODE matcher for a non-destructive variant in `utils/preserveWhitespace.ts`. Mirrors Quill 2.0.3's matchText exactly minus the offending collapse line; all other semantics (Word `` handling, `
` passthrough, leading/
   trailing-space stripping at block boundaries, NBSP normalization)
   are preserved. The matcher is located by function reference so a
   future Quill reordering doesn't silently re-introduce the bug.

2. Restoring `.ql-editor`'s `white-space` to `pre-wrap` (Quill's actual
   default) so the now-preserved spaces render visibly while editing.

Adds a vitest regression suite that exercises the matcher against a
real Quill instance to catch any future Quill upgrade that breaks the
splice.

If upstream PR https://github.com/slab/quill/pull/4319 ever lands,
`utils/preserveWhitespace.ts` and the splice in `Editor.tsx` can be
removed entirely.
---
 .../src/CodexCellEditor/Editor.tsx            |  25 ++-
 .../__tests___/preserveWhitespace.test.ts     | 108 ++++++++++++
 .../utils/preserveWhitespace.ts               | 156 ++++++++++++++++++
 3 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 webviews/codex-webviews/src/CodexCellEditor/__tests___/preserveWhitespace.test.ts
 create mode 100644 webviews/codex-webviews/src/CodexCellEditor/utils/preserveWhitespace.ts

diff --git a/webviews/codex-webviews/src/CodexCellEditor/Editor.tsx b/webviews/codex-webviews/src/CodexCellEditor/Editor.tsx
index 13de4a60f..e92c9de4f 100644
--- a/webviews/codex-webviews/src/CodexCellEditor/Editor.tsx
+++ b/webviews/codex-webviews/src/CodexCellEditor/Editor.tsx
@@ -9,6 +9,7 @@ import React, {
 } from "react";
 import Quill, { Delta, Op } from "quill";
 import "quill/dist/quill.snow.css";
+import { installPreserveWhitespaceMatcher } from "./utils/preserveWhitespace";
 import { getCleanedHtml } from "./utils";
 import {
     isSuperscriptibleDigit,
@@ -506,7 +507,20 @@ const Editor = forwardRef((props, ref) => {
             const clipboardModule = quill.getModule("clipboard") as {
                 addMatcher: (selector: number, matcher: typeof matchSuperscriptUnicodeDigits) => void;
                 convert: (args: { html?: string; text?: string }) => Delta;
+                matchers: Array<[number | string, (node: Node, delta: Delta, scroll: unknown) => Delta]>;
             };
+
+            // Replace Quill's built-in whitespace-collapsing text matcher with
+            // a non-destructive variant so runs of consecutive spaces survive
+            // the HTML → Delta conversion on cell open. See issue #1010 and
+            // utils/preserveWhitespace.ts.
+            const swapped = installPreserveWhitespaceMatcher(clipboardModule);
+            if (!swapped && DEBUG_ENABLED) {
+                console.warn(
+                    "[Editor] Could not locate Quill's built-in matchText to replace; double spaces may collapse on cell open (issue #1010)."
+                );
+            }
+
             clipboardModule.addMatcher(Node.TEXT_NODE, matchSuperscriptUnicodeDigits);
 
             // Apply minimal direct styles; rely on CSS file for look-and-feel
@@ -1518,7 +1532,16 @@ const Editor = forwardRef((props, ref) => {
                     display: block;
                 }
                 .ql-editor {
-                    white-space: normal !important;
+                    /*
+                     * Keep Quill's default rendering posture (pre-wrap) so that
+                     * runs of consecutive spaces stay visible and editable while
+                     * a cell is open. The previous "normal" override collapsed
+                     * them at paint time, which (combined with Quill's clipboard
+                     * matcher dropping them at convert time — fixed separately
+                     * via preserveWhitespaceMatchText) made double spaces in
+                     * source text impossible to correct in place. See #1010.
+                     */
+                    white-space: pre-wrap !important;
                     background-color: var(--vscode-editor-background) !important;
                     color: var(--vscode-editor-foreground) !important;
                 }
diff --git a/webviews/codex-webviews/src/CodexCellEditor/__tests___/preserveWhitespace.test.ts b/webviews/codex-webviews/src/CodexCellEditor/__tests___/preserveWhitespace.test.ts
new file mode 100644
index 000000000..6b6bb3224
--- /dev/null
+++ b/webviews/codex-webviews/src/CodexCellEditor/__tests___/preserveWhitespace.test.ts
@@ -0,0 +1,108 @@
+/**
+ * Regression test for issue #1010: double spaces in source content must
+ * survive Quill's HTML → Delta conversion when a cell is opened.
+ *
+ * The test uses a real Quill instance (no mocks) and exercises the same code
+ * path the editor uses on cell open. If a future Quill upgrade restructures
+ * its clipboard module such that we can no longer locate the built-in
+ * `matchText` by reference (or changes its semantics), this test will fail
+ * loudly — that's the regression signal to revisit utils/preserveWhitespace.ts.
+ */
+
+import { describe, it, expect, beforeEach } from "vitest";
+import Quill, { Delta } from "quill";
+import { installPreserveWhitespaceMatcher } from "../utils/preserveWhitespace";
+
+type ClipboardModule = {
+    convert: (args: { html?: string; text?: string }) => Delta;
+    matchers: Array<[number | string, unknown]>;
+};
+
+function makeQuillWithPreservedWhitespace(): { quill: Quill; clipboard: ClipboardModule } {
+    const container = document.createElement("div");
+    container.id = "quill-host";
+    document.body.appendChild(container);
+
+    const quill = new Quill(container, { theme: "snow" });
+    const clipboard = quill.getModule("clipboard") as unknown as ClipboardModule;
+
+    const swapped = installPreserveWhitespaceMatcher(clipboard as never);
+    if (!swapped) {
+        throw new Error(
+            "installPreserveWhitespaceMatcher failed: Quill's built-in matchText could not be located. " +
+                "If you just upgraded Quill, check that `matchText` is still exported from quill/modules/clipboard."
+        );
+    }
+
+    return { quill, clipboard };
+}
+
+function deltaText(delta: Delta): string {
+    return delta.ops
+        .map((op) => (typeof op.insert === "string" ? op.insert : ""))
+        .join("");
+}
+
+describe("preserveWhitespaceMatchText (issue #1010)", () => {
+    beforeEach(() => {
+        document.body.innerHTML = "";
+    });
+
+    it("preserves mid-line double ASCII spaces in a 

", () => { + const { clipboard } = makeQuillWithPreservedWhitespace(); + + const delta = clipboard.convert({ html: "

hello world

", text: "" }); + + expect(deltaText(delta)).toContain("hello world"); + }); + + it("preserves runs of 3+ ASCII spaces verbatim", () => { + const { clipboard } = makeQuillWithPreservedWhitespace(); + + const delta = clipboard.convert({ html: "

a b c

", text: "" }); + + expect(deltaText(delta)).toContain("a b c"); + }); + + it("normalizes   to a regular space (matches Quill's existing semantics)", () => { + // We don't want to silently let \u00A0 leak into the saved Delta — the + // user-visible text should be regular spaces. Only the *count* of + // consecutive spaces should be preserved. + const { clipboard } = makeQuillWithPreservedWhitespace(); + + const delta = clipboard.convert({ html: "

foo  bar

", text: "" }); + + const text = deltaText(delta); + expect(text).toContain("foo bar"); + expect(text).not.toContain("\u00a0"); + }); + + it("still strips a single leading space at the start of a block", () => { + // This is Quill's existing semantics for handling indented HTML — + // we want to preserve it. A leading single space at a block boundary + // should still be dropped. + const { clipboard } = makeQuillWithPreservedWhitespace(); + + const delta = clipboard.convert({ html: "

leading

", text: "" }); + + expect(deltaText(delta)).not.toMatch(/^ leading/); + }); + + it("still drops pure-whitespace text nodes between block elements", () => { + // Multi-paragraph HTML with newline indentation between

tags + // shouldn't produce phantom whitespace ops in the Delta. + const { clipboard } = makeQuillWithPreservedWhitespace(); + + const delta = clipboard.convert({ + html: "

one

\n

two

", + text: "", + }); + + const inserts = delta.ops + .map((op) => op.insert) + .filter((insert): insert is string => typeof insert === "string"); + + // No op should be purely indentation whitespace. + expect(inserts.every((s) => s.trim().length > 0 || s === "\n")).toBe(true); + }); +}); diff --git a/webviews/codex-webviews/src/CodexCellEditor/utils/preserveWhitespace.ts b/webviews/codex-webviews/src/CodexCellEditor/utils/preserveWhitespace.ts new file mode 100644 index 000000000..5d49ff837 --- /dev/null +++ b/webviews/codex-webviews/src/CodexCellEditor/utils/preserveWhitespace.ts @@ -0,0 +1,156 @@ +import type { Delta } from "quill"; +import { Parchment } from "quill"; +import { matchText as builtinMatchText } from "quill/modules/clipboard"; + +/** + * Whitespace-preserving replacement for Quill's built-in `matchText` matcher. + * + * ### Why this exists + * + * Quill's default text matcher (quill/modules/clipboard.js → `matchText`) + * actively destroys data by collapsing runs of 2+ ASCII spaces into a single + * space when converting HTML → Delta. For a general-purpose rich-text editor + * that matches HTML rendering semantics, but for a scripture translation + * editor it silently corrupts content imported from source texts — e.g. + * mid-line double spaces in Portuguese source materials become invisible and + * uneditable in the open cell while still present in the closed-cell view. + * See issue #1010. + * + * ### What this does + * + * Mirrors Quill 2.0.3's `matchText` exactly except for the single offending + * line that collapses `/ {2,}/g → ' '`. All other behavior is preserved: + * + * - Microsoft Word ` ` empty-line marker handling. + * - `
` ancestor pass-through (Quill preserves whitespace inside `
`).
+ *  - Dropping pure-whitespace text nodes that sit between block elements
+ *    (mimics how browsers ignore indentation between block tags).
+ *  - Normalizing non-NBSP whitespace (tabs, CR, LF) to a regular space.
+ *  - Stripping single leading/trailing spaces at block boundaries.
+ *  - Final NBSP → space normalization, so user-typed double spaces (which
+ *    browsers in contenteditable convert to ` \u00A0`) round-trip as two
+ *    regular spaces in the saved HTML.
+ *
+ * ### Future
+ *
+ * Upstream PR https://github.com/slab/quill/pull/4319 ("Fix white spaces not
+ * being preserved when pasted into editor") would make `Clipboard#convert()`
+ * respect inline `white-space` style and obsolete this whole module. As of
+ * 2026-06 it remains open and unmerged. When/if it lands, remove this file
+ * and the matcher splice in Editor.tsx.
+ *
+ * The `isLine`, `isPreNode`, and `isBetweenInlineElements` helpers mirror
+ * Quill's private versions (clipboard.js lines 236–258 in 2.0.3) which are
+ * not exported. They've been structurally stable across recent Quill versions.
+ */
+
+const preNodes = new WeakMap();
+function isPreNode(node: Node | null): boolean {
+    if (node == null) return false;
+    const cached = preNodes.get(node);
+    if (cached !== undefined) return cached;
+    const result =
+        node instanceof Element && node.tagName === "PRE" ? true : isPreNode(node.parentNode);
+    preNodes.set(node, result);
+    return result;
+}
+
+// `scroll` is Quill's ScrollBlot; we don't depend on its full shape, just the
+// `query` method that resolves a DOM node to a registered blot definition.
+type ScrollLike = { query?: (node: Node) => unknown };
+
+function isLine(node: Node | null, scroll: ScrollLike | undefined | null): boolean {
+    if (!(node instanceof Element)) return false;
+    const match = scroll?.query?.(node) as { prototype?: unknown } | null | undefined;
+    return match != null && match.prototype instanceof Parchment.BlockBlot;
+}
+
+function isBetweenInlineElements(node: Node, scroll: ScrollLike | undefined | null): boolean {
+    const prev = (node as ChildNode).previousElementSibling;
+    const next = (node as ChildNode).nextElementSibling;
+    return !!(prev && next && !isLine(prev, scroll) && !isLine(next, scroll));
+}
+
+export function preserveWhitespaceMatchText(
+    node: Text,
+    delta: Delta,
+    scroll: ScrollLike | undefined | null
+): Delta {
+    let text = node.data;
+
+    if (node.parentElement?.tagName === "O:P") {
+        return delta.insert(text.trim());
+    }
+
+    if (isPreNode(node)) {
+        return delta.insert(text);
+    }
+
+    if (
+        text.trim().length === 0 &&
+        text.includes("\n") &&
+        !isBetweenInlineElements(node, scroll)
+    ) {
+        return delta;
+    }
+
+    text = text.replace(/[^\S\u00a0]/g, " ");
+    // NOTE: Quill's matchText collapses runs here with `text.replace(/ {2,}/g, ' ')`.
+    // We intentionally do NOT collapse — that's the entire point of this file.
+
+    const prevSibling = node.previousSibling;
+    const nextSibling = node.nextSibling;
+    if (
+        (prevSibling == null && node.parentElement != null && isLine(node.parentElement, scroll)) ||
+        (prevSibling instanceof Element && isLine(prevSibling, scroll))
+    ) {
+        text = text.replace(/^ /, "");
+    }
+    if (
+        (nextSibling == null && node.parentElement != null && isLine(node.parentElement, scroll)) ||
+        (nextSibling instanceof Element && isLine(nextSibling, scroll))
+    ) {
+        text = text.replace(/ $/, "");
+    }
+
+    text = text.replaceAll("\u00a0", " ");
+    return delta.insert(text);
+}
+
+/**
+ * Shape of the parts of Quill's Clipboard module we touch. Mirrors what
+ * `quill.getModule("clipboard")` actually returns at runtime; declared inline
+ * here so consumers don't need to depend on Quill's internal types.
+ */
+type ClipboardModuleLike = {
+    matchers?: Array<[number | string, (node: Node, delta: Delta, scroll: unknown) => Delta]>;
+};
+
+/**
+ * Swaps Quill's built-in TEXT_NODE `matchText` matcher with the
+ * whitespace-preserving version above. Identifies the existing entry by
+ * function reference (using the imported `builtinMatchText`) rather than by
+ * position, so the replacement is robust to Quill reordering its default
+ * matchers in a future patch release.
+ *
+ * Returns `true` if the swap succeeded, `false` if Quill's built-in matcher
+ * couldn't be located (e.g. because Quill's internals changed shape in an
+ * upgrade). Callers should treat `false` as a regression signal.
+ */
+export function installPreserveWhitespaceMatcher(clipboardModule: ClipboardModuleLike): boolean {
+    // Defensive against test doubles that stub out `clipboard` without a
+    // `matchers` array — silently no-op there.
+    if (!Array.isArray(clipboardModule?.matchers)) return false;
+
+    const entry = clipboardModule.matchers.find(
+        ([selector, matcher]) =>
+            selector === Node.TEXT_NODE && matcher === (builtinMatchText as unknown)
+    );
+    if (!entry) return false;
+    entry[1] = preserveWhitespaceMatchText as (
+        node: Node,
+        delta: Delta,
+        scroll: unknown
+    ) => Delta;
+    return true;
+}

From ffccbd2a948f2c470d70eb2f960b3e764b9c6962 Mon Sep 17 00:00:00 2001
From: LeviXIII 
Date: Tue, 2 Jun 2026 10:23:24 -0400
Subject: [PATCH 10/12] - Create pull request template - Create issue templates

---
 .github/ISSUE_TEMPLATE/bug-report.md      | 21 +++++++++++++++++
 .github/ISSUE_TEMPLATE/feature-request.md | 21 +++++++++++++++++
 .github/pull-request-template.md          | 28 +++++++++++++++++++++++
 3 files changed, 70 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md
 create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md
 create mode 100644 .github/pull-request-template.md

diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
new file mode 100644
index 000000000..80e94ffe9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -0,0 +1,21 @@
+---
+name: Bug Report
+about: Report a bug
+title: "[Bug]: "
+labels: bug
+assignees: ''
+---
+
+## Description
+
+Describe the bug.
+
+## Steps to Reproduce
+
+1.
+2.
+3.
+
+## Expected Behavior
+
+## Actual Behavior
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md
new file mode 100644
index 000000000..cbe8444e1
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
@@ -0,0 +1,21 @@
+---
+name: Feature Request
+title: "[Feature]: "
+labels: feature
+assignees: ''
+---
+
+## Summary
+
+Describe the feature.
+
+## Problem
+
+What problem are you trying to solve?
+
+Give some background: what is the current workflow and why is it difficult, inefficient, or not done right now?
+
+## Acceptance Criteria 
+
+- [ ] Does the thing
+- [ ] Looks good
\ No newline at end of file
diff --git a/.github/pull-request-template.md b/.github/pull-request-template.md
new file mode 100644
index 000000000..576d1c990
--- /dev/null
+++ b/.github/pull-request-template.md
@@ -0,0 +1,28 @@
+## PR Title
+Format:
+[Issue Number]-[Issue Name]
+
+## Summary
+
+Closes #[insert issue number here]
+
+Describe the change.
+
+## Changes
+
+These should match with the Acceptance Criteria
+
+## Testing Checklist
+
+If generating from Claude, please do the checklist yourself before submitting.
+You may use sub-headers to organize better as follows:
+
+### Header 1
+- [ ] It does the thing
+
+### Header 2
+- [ ] It doesn't do the other thing
+
+## Screenshots
+
+Only if necessary for clarity
\ No newline at end of file

From fd1bf04f41eaa30016639af3527d23d9bb0a6a6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Pacanovsk=C3=BD?= 
Date: Wed, 3 Jun 2026 08:03:57 +0200
Subject: [PATCH 11/12] Fix for older project so they can see the milestone
 audio option during exporting

---
 sharedUtils/milestoneIndexUtils.ts          | 210 ++++++++++++++++++--
 src/exportHandler/audioExporter.ts          |  14 +-
 src/projectManager/projectExportView.ts     |  14 +-
 src/projectManager/utils/exportViewUtils.ts |  21 +-
 4 files changed, 225 insertions(+), 34 deletions(-)

diff --git a/sharedUtils/milestoneIndexUtils.ts b/sharedUtils/milestoneIndexUtils.ts
index 5f54aa75b..1657132aa 100644
--- a/sharedUtils/milestoneIndexUtils.ts
+++ b/sharedUtils/milestoneIndexUtils.ts
@@ -4,16 +4,101 @@ import { CodexCellTypes } from "../types/enums";
 type NotebookCell = {
     value?: string;
     metadata?: {
+        id?: string;
         type?: string;
-        data?: { deleted?: boolean };
+        chapter?: number | string;
+        chapterNumber?: number | string;
+        data?: {
+            deleted?: boolean;
+            chapter?: number | string;
+            globalReferences?: string[];
+        };
     };
 };
 
+export type MilestoneIndexModel = {
+    milestones: MilestoneInfo[];
+    /** 0-based milestone index for each notebook cell index */
+    cellMilestoneIndices: number[];
+};
+
+/** True when the notebook has at least one non-deleted milestone cell. */
+export function hasExplicitMilestonesInCells(cells: NotebookCell[]): boolean {
+    return cells.some(
+        (cell) =>
+            cell.metadata?.type === CodexCellTypes.MILESTONE &&
+            cell.metadata?.data?.deleted !== true
+    );
+}
+
+function isCountableContentCell(cell: NotebookCell): boolean {
+    const cellType = cell.metadata?.type;
+    if (cellType === CodexCellTypes.MILESTONE || cellType === "paratext") {
+        return false;
+    }
+    return cell.metadata?.data?.deleted !== true;
+}
+
+function extractChapterFromCellId(cellId: string): string | null {
+    if (!cellId) {
+        return null;
+    }
+    const match = cellId.match(/\s+(\d+):(\d+)(?::|$)/);
+    return match ? match[1] : null;
+}
+
 /**
- * Read-only milestone extraction from notebook cells (mirrors codexDocument.buildMilestoneIndex).
+ * Unique chapter key for detection (e.g. "MAT-1"), aligned with milestone migration / import helpers.
  */
-export function extractMilestonesFromCells(cells: NotebookCell[]): MilestoneInfo[] {
+export function extractChapterKeyForDetection(cell: NotebookCell): string | null {
+    const meta = cell.metadata;
+    if (meta?.chapterNumber !== undefined && meta.chapterNumber !== null) {
+        return String(meta.chapterNumber);
+    }
+    if (meta?.chapter !== undefined && meta.chapter !== null) {
+        return String(meta.chapter);
+    }
+    if (meta?.data?.chapter !== undefined && meta.data.chapter !== null) {
+        return String(meta.data.chapter);
+    }
+
+    const globalRefs = meta?.data?.globalReferences;
+    if (globalRefs && Array.isArray(globalRefs) && globalRefs.length > 0) {
+        const firstRef = globalRefs[0];
+        const chapter = extractChapterFromCellId(firstRef);
+        if (chapter) {
+            const bookMatch = firstRef.match(/^([^\s]+)/);
+            return bookMatch ? `${bookMatch[1]}-${chapter}` : chapter;
+        }
+    }
+
+    const cellId = meta?.id;
+    if (cellId) {
+        const chapter = extractChapterFromCellId(cellId);
+        if (chapter) {
+            const bookMatch = cellId.match(/^([^\s]+)/);
+            return bookMatch ? `${bookMatch[1]}-${chapter}` : chapter;
+        }
+    }
+
+    return null;
+}
+
+function milestoneLabelFromChapterKey(chapterKey: string, milestoneIndex: number): string {
+    const dash = chapterKey.lastIndexOf("-");
+    if (dash > 0) {
+        return chapterKey.slice(dash + 1);
+    }
+    return chapterKey || String(milestoneIndex + 1);
+}
+
+function buildFromExplicitMilestoneCells(cells: NotebookCell[]): MilestoneIndexModel | null {
+    if (!hasExplicitMilestonesInCells(cells)) {
+        return null;
+    }
+
     const milestones: MilestoneInfo[] = [];
+    const cellMilestoneIndices = new Array(cells.length).fill(0);
     let totalContentCells = 0;
     let currentMilestoneIndex = -1;
     let currentMilestoneCellCount = 0;
@@ -35,17 +120,17 @@ export function extractMilestonesFromCells(cells: NotebookCell[]): MilestoneInfo
                     value: cell.value || String(currentMilestoneIndex + 1),
                     cellCount: 0,
                 });
+                cellMilestoneIndices[i] = currentMilestoneIndex;
             }
             continue;
         }
 
-        if (cellType !== CodexCellTypes.MILESTONE && cellType !== "paratext") {
-            const isDeleted = cell.metadata?.data?.deleted === true;
-            if (!isDeleted) {
-                totalContentCells++;
-                if (currentMilestoneIndex >= 0) {
-                    currentMilestoneCellCount++;
-                }
+        if (isCountableContentCell(cell)) {
+            totalContentCells++;
+            const idx = currentMilestoneIndex >= 0 ? currentMilestoneIndex : 0;
+            cellMilestoneIndices[i] = idx;
+            if (currentMilestoneIndex >= 0) {
+                currentMilestoneCellCount++;
             }
         }
     }
@@ -55,15 +140,114 @@ export function extractMilestonesFromCells(cells: NotebookCell[]): MilestoneInfo
     }
 
     if (milestones.length === 0) {
-        return [{
+        return null;
+    }
+
+    return { milestones, cellMilestoneIndices };
+}
+
+function buildFromChapterBoundaries(cells: NotebookCell[]): MilestoneIndexModel | null {
+    const milestones: MilestoneInfo[] = [];
+    const cellMilestoneIndices = new Array(cells.length).fill(0);
+    const seenChapters = new Set();
+    let currentMilestoneIndex = -1;
+    let currentMilestoneCellCount = 0;
+
+    for (let i = 0; i < cells.length; i++) {
+        const cell = cells[i];
+
+        if (!isCountableContentCell(cell)) {
+            cellMilestoneIndices[i] = currentMilestoneIndex >= 0 ? currentMilestoneIndex : 0;
+            continue;
+        }
+
+        const chapterKey = extractChapterKeyForDetection(cell);
+        if (chapterKey && !seenChapters.has(chapterKey)) {
+            if (currentMilestoneIndex >= 0) {
+                milestones[currentMilestoneIndex].cellCount = currentMilestoneCellCount;
+            }
+            currentMilestoneIndex++;
+            currentMilestoneCellCount = 0;
+            seenChapters.add(chapterKey);
+            milestones.push({
+                index: currentMilestoneIndex,
+                cellIndex: i,
+                value: milestoneLabelFromChapterKey(chapterKey, currentMilestoneIndex),
+                cellCount: 0,
+            });
+        }
+
+        const idx = currentMilestoneIndex >= 0 ? currentMilestoneIndex : 0;
+        cellMilestoneIndices[i] = idx;
+        if (currentMilestoneIndex >= 0) {
+            currentMilestoneCellCount++;
+        }
+    }
+
+    if (currentMilestoneIndex >= 0) {
+        milestones[currentMilestoneIndex].cellCount = currentMilestoneCellCount;
+    }
+
+    if (milestones.length <= 1) {
+        return null;
+    }
+
+    return { milestones, cellMilestoneIndices };
+}
+
+function buildSyntheticMilestoneModel(cells: NotebookCell[]): MilestoneIndexModel {
+    let totalContentCells = 0;
+    const cellMilestoneIndices = new Array(cells.length).fill(0);
+
+    for (let i = 0; i < cells.length; i++) {
+        if (isCountableContentCell(cells[i])) {
+            totalContentCells++;
+        }
+    }
+
+    return {
+        milestones: [{
             index: 0,
             cellIndex: 0,
             value: "1",
             cellCount: totalContentCells,
-        }];
+        }],
+        cellMilestoneIndices,
+    };
+}
+
+/**
+ * Builds milestone list and per-cell indices using explicit milestone cells, then chapter
+ * boundaries in cell IDs (legacy NT/OT projects), then a single synthetic fallback.
+ */
+export function buildMilestoneIndexModel(cells: NotebookCell[]): MilestoneIndexModel {
+    const explicit = buildFromExplicitMilestoneCells(cells);
+    if (explicit && explicit.milestones.length > 1) {
+        return explicit;
+    }
+
+    const inferred = buildFromChapterBoundaries(cells);
+    if (inferred) {
+        return inferred;
     }
 
-    return milestones;
+    if (explicit) {
+        return explicit;
+    }
+
+    return buildSyntheticMilestoneModel(cells);
+}
+
+/** True when the export UI should offer per-chapter milestone selection (more than one chapter). */
+export function hasSelectableMilestonesInCells(cells: NotebookCell[]): boolean {
+    return buildMilestoneIndexModel(cells).milestones.length > 1;
+}
+
+/**
+ * Read-only milestone extraction from notebook cells (mirrors codexDocument.buildMilestoneIndex).
+ */
+export function extractMilestonesFromCells(cells: NotebookCell[]): MilestoneInfo[] {
+    return buildMilestoneIndexModel(cells).milestones;
 }
 
 /**
diff --git a/src/exportHandler/audioExporter.ts b/src/exportHandler/audioExporter.ts
index aa27334ce..a9d0e69b9 100644
--- a/src/exportHandler/audioExporter.ts
+++ b/src/exportHandler/audioExporter.ts
@@ -13,10 +13,7 @@ import type { ExportProgressReporter, ExportMissingReason } from "./exportProgre
 import { pickAudioAttachment, isExportableCell, type AudioPick, type AudioPickOutcome } from "./audioAttachmentUtils";
 import { formatCellDisplayLabel } from "./cellLabelUtils";
 import { CodexCellTypes } from "../../types/enums";
-import {
-    advanceMilestoneIndexForCell,
-    effectiveMilestoneIndex,
-} from "../../sharedUtils/milestoneIndexUtils";
+import { buildMilestoneIndexModel } from "../../sharedUtils/milestoneIndexUtils";
 
 const execAsync = promisify(exec);
 
@@ -706,7 +703,7 @@ export async function exportAudioAttachments(
         // Build milestone folder mapping: cellId -> milestone folder name
         const cellMilestoneFolder = buildCellMilestoneMap(notebook.cells);
         const milestoneFilter = options?.selectedMilestonesByFile?.[file.fsPath];
-        let currentMilestoneIndex = -1;
+        const milestoneModel = buildMilestoneIndexModel(notebook.cells);
 
         // Count audio cells for per-book progress. Paratext and
         // milestone cells (e.g. chapter headers, intros) are not
@@ -714,12 +711,13 @@ export async function exportAudioAttachments(
         // `isExportableCell` — they would otherwise show up under
         // "no audio recorded" purely as noise.
         const audioCells: Array<{ cell: any; cellId: string; pick: AudioPick; }> = [];
-        for (const cell of notebook.cells) {
-            currentMilestoneIndex = advanceMilestoneIndexForCell(cell, currentMilestoneIndex);
+        for (let cellIndex = 0; cellIndex < notebook.cells.length; cellIndex++) {
+            const cell = notebook.cells[cellIndex];
+            const milestoneIndex = milestoneModel.cellMilestoneIndices[cellIndex] ?? 0;
             if (
                 milestoneFilter &&
                 milestoneFilter.length > 0 &&
-                !milestoneFilter.includes(effectiveMilestoneIndex(currentMilestoneIndex))
+                !milestoneFilter.includes(milestoneIndex)
             ) {
                 continue;
             }
diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts
index 23fb41ed1..a320b3415 100644
--- a/src/projectManager/projectExportView.ts
+++ b/src/projectManager/projectExportView.ts
@@ -4,7 +4,7 @@ import * as fs from "fs";
 import * as vscode from "vscode";
 import { safePostMessageToPanel } from "../utils/webviewUtils";
 import { EXPORT_OPTIONS_BY_FILE_TYPE } from "../../sharedUtils/exportOptionsEligibility";
-import { groupCodexFilesByImporterType, type FileGroup, BIBLE_MILESTONE_EXPORT_GROUP_KEYS } from "./utils/exportViewUtils";
+import { groupCodexFilesByImporterType, type FileGroup } from "./utils/exportViewUtils";
 import { readCodexNotebookFromUri } from "../exportHandler/exportHandlerUtils";
 import { compareHtmlStructure } from "../../sharedUtils/htmlStructureUtils";
 import { getMediaFilesStrategy } from "../utils/localProjectSettings";
@@ -365,8 +365,6 @@ function getWebviewContent(
     const groupsJson = JSON.stringify(fileGroups);
     const exportOptionsConfigJson = JSON.stringify(EXPORT_OPTIONS_BY_FILE_TYPE);
     const initialExportFolderJson = JSON.stringify(initialExportFolder);
-    const bibleMilestoneGroupKeysJson = JSON.stringify([...BIBLE_MILESTONE_EXPORT_GROUP_KEYS]);
-
     return `
     
         
@@ -1340,7 +1338,7 @@ function getWebviewContent(
                     

Select Milestones

- Choose which chapters (milestones) to include in the audio export. All milestones are selected by default. + Choose which chapters (milestones) to include in the audio export.

@@ -1528,7 +1526,6 @@ function getWebviewContent( const vscode = acquireVsCodeApi(); const fileGroups = ${groupsJson}; const exportOptionsConfig = ${exportOptionsConfigJson}; - const bibleMilestoneGroupKeys = new Set(${bibleMilestoneGroupKeysJson}); const isStreamOnly = ${JSON.stringify(isStreamOnly)}; let currentStep = 1; let selectedFormat = null; @@ -1541,10 +1538,11 @@ function getWebviewContent( function shouldShowMilestoneStep() { if (!selectedAudioMode) return false; - if (!selectedGroupKey || !bibleMilestoneGroupKeys.has(selectedGroupKey)) return false; for (const path of selectedFiles) { const f = fileLookup[path]; - if (f && f.milestones && f.milestones.length > 0) return true; + if (f && f.hasSelectableMilestones && f.milestones && f.milestones.length > 0) { + return true; + } } return false; } @@ -1566,7 +1564,7 @@ function getWebviewContent( milestoneFilePaths = []; for (const path of selectedFiles) { const f = fileLookup[path]; - if (f && f.milestones && f.milestones.length > 0) { + if (f && f.hasSelectableMilestones && f.milestones && f.milestones.length > 0) { selectedMilestonesByFile[path] = new Set(f.milestones.map(m => m.index)); milestoneFilePaths.push(path); } diff --git a/src/projectManager/utils/exportViewUtils.ts b/src/projectManager/utils/exportViewUtils.ts index 54ebe6f27..080708284 100644 --- a/src/projectManager/utils/exportViewUtils.ts +++ b/src/projectManager/utils/exportViewUtils.ts @@ -6,7 +6,10 @@ import { isLabelableCell, } from "../../exportHandler/audioAttachmentUtils"; import { formatCellDisplayLabel } from "../../exportHandler/cellLabelUtils"; -import { extractMilestonesFromCells } from "../../../sharedUtils/milestoneIndexUtils"; +import { + buildMilestoneIndexModel, + hasSelectableMilestonesInCells, +} from "../../../sharedUtils/milestoneIndexUtils"; export { EXPORT_OPTIONS_BY_FILE_TYPE, @@ -84,11 +87,10 @@ export interface FileGroupEntry { hasAudio: boolean; audioStats?: NotebookAudioStats; milestones: MilestoneInfo[]; + /** True when the notebook has real milestone cells (chapter boundaries), not only a synthetic fallback. */ + hasSelectableMilestones: boolean; } -/** File types that support milestone selection during bible audio export */ -export const BIBLE_MILESTONE_EXPORT_GROUP_KEYS = new Set(["usfm", "ebible", "paratext"]); - export interface FileGroup { groupKey: string; displayName: string; @@ -345,6 +347,11 @@ function getGroupKeyFromMetadata(metadata: Record): string { return "usfm"; } + // Legacy scripture projects: NT/OT corpus without importerType (common in older projects) + if (corpusMarker === "NT" || corpusMarker === "OT") { + return "usfm"; + } + // Bible Stories (OBS) if (corpusMarker === "obs" || importerType === "obs") { return "obs"; @@ -394,7 +401,9 @@ export async function groupCodexFilesByImporterType( const audioStats = hasAudio ? analyzeNotebookAudioStats(notebook, bookCode) : undefined; - const milestones = extractMilestonesFromCells(notebook.cells); + const milestoneModel = buildMilestoneIndexModel(notebook.cells); + const milestones = milestoneModel.milestones; + const hasSelectableMilestones = hasSelectableMilestonesInCells(notebook.cells); if (!groupsMap.has(groupKey)) { groupsMap.set(groupKey, []); @@ -407,6 +416,7 @@ export async function groupCodexFilesByImporterType( hasAudio, audioStats, milestones, + hasSelectableMilestones, }); } catch { const name = uri.fsPath.split(/[/\\]/).pop() || ""; @@ -420,6 +430,7 @@ export async function groupCodexFilesByImporterType( hasTranslations: false, hasAudio: false, milestones: [{ index: 0, cellIndex: 0, value: "1", cellCount: 0 }], + hasSelectableMilestones: false, }); } } From 37c79ed4e975c8a57eec38495dbdd55332395438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Pacanovsk=C3=BD?= Date: Thu, 4 Jun 2026 13:47:58 +0200 Subject: [PATCH 12/12] Fix for single milestone files and unselecting everything in one file --- sharedUtils/milestoneIndexUtils.ts | 12 +++- src/exportHandler/audioExporter.ts | 19 ++++-- src/exportHandler/exportHandler.ts | 2 +- src/projectManager/projectExportView.ts | 6 +- .../src/milestoneIndexUtils.test.ts | 63 +++++++++++++++++++ 5 files changed, 91 insertions(+), 11 deletions(-) create mode 100644 webviews/codex-webviews/src/milestoneIndexUtils.test.ts diff --git a/sharedUtils/milestoneIndexUtils.ts b/sharedUtils/milestoneIndexUtils.ts index 1657132aa..8add45daf 100644 --- a/sharedUtils/milestoneIndexUtils.ts +++ b/sharedUtils/milestoneIndexUtils.ts @@ -238,9 +238,17 @@ export function buildMilestoneIndexModel(cells: NotebookCell[]): MilestoneIndexM return buildSyntheticMilestoneModel(cells); } -/** True when the export UI should offer per-chapter milestone selection (more than one chapter). */ +/** + * True when the export UI should offer per-chapter milestone selection: explicit + * milestone cells (including a single chapter) or multiple inferred chapter + * boundaries. False for the synthetic single-chapter fallback only. + */ export function hasSelectableMilestonesInCells(cells: NotebookCell[]): boolean { - return buildMilestoneIndexModel(cells).milestones.length > 1; + if (hasExplicitMilestonesInCells(cells)) { + return true; + } + const inferred = buildFromChapterBoundaries(cells); + return inferred !== null && inferred.milestones.length > 0; } /** diff --git a/src/exportHandler/audioExporter.ts b/src/exportHandler/audioExporter.ts index a9d0e69b9..183e3302d 100644 --- a/src/exportHandler/audioExporter.ts +++ b/src/exportHandler/audioExporter.ts @@ -684,8 +684,17 @@ export async function exportAudioAttachments( }); const bookCode = basename(file.fsPath).split(".")[0] || "BOOK"; - const bookFolder = vscode.Uri.joinPath(exportDir, sanitizeFileComponent(bookCode)); - await vscode.workspace.fs.createDirectory(bookFolder); + const milestoneSelection = options?.selectedMilestonesByFile; + const milestoneFilter = milestoneSelection?.[file.fsPath]; + // Empty array means the user cleared every milestone for this file on step 3. + if ( + milestoneSelection && + Object.prototype.hasOwnProperty.call(milestoneSelection, file.fsPath) && + milestoneFilter && + milestoneFilter.length === 0 + ) { + continue; + } let notebook: CodexNotebookAsJSONData; try { @@ -702,9 +711,10 @@ export async function exportAudioAttachments( // Build milestone folder mapping: cellId -> milestone folder name const cellMilestoneFolder = buildCellMilestoneMap(notebook.cells); - const milestoneFilter = options?.selectedMilestonesByFile?.[file.fsPath]; const milestoneModel = buildMilestoneIndexModel(notebook.cells); + const bookFolder = vscode.Uri.joinPath(exportDir, sanitizeFileComponent(bookCode)); + // Count audio cells for per-book progress. Paratext and // milestone cells (e.g. chapter headers, intros) are not // recording targets, so they're filtered out by @@ -715,8 +725,9 @@ export async function exportAudioAttachments( const cell = notebook.cells[cellIndex]; const milestoneIndex = milestoneModel.cellMilestoneIndices[cellIndex] ?? 0; if ( + milestoneSelection && + Object.prototype.hasOwnProperty.call(milestoneSelection, file.fsPath) && milestoneFilter && - milestoneFilter.length > 0 && !milestoneFilter.includes(milestoneIndex) ) { continue; diff --git a/src/exportHandler/exportHandler.ts b/src/exportHandler/exportHandler.ts index 42db3ab21..30884e550 100644 --- a/src/exportHandler/exportHandler.ts +++ b/src/exportHandler/exportHandler.ts @@ -255,7 +255,7 @@ export interface ExportOptions { removeIds?: boolean; includeAudio?: boolean; includeTimestamps?: boolean; - /** Per-file list of 0-based milestone indices to include when exporting audio */ + /** Per-file 0-based milestone indices to include when exporting audio. An empty array skips that file entirely. Files omitted from this map are exported in full (no milestone step). */ selectedMilestonesByFile?: Record; } diff --git a/src/projectManager/projectExportView.ts b/src/projectManager/projectExportView.ts index a320b3415..0e65c5d71 100644 --- a/src/projectManager/projectExportView.ts +++ b/src/projectManager/projectExportView.ts @@ -1673,11 +1673,9 @@ function getWebviewContent( function buildSelectedMilestonesPayload() { if (!shouldShowMilestoneStep()) return undefined; const payload = {}; - for (const path of selectedFiles) { + for (const path of milestoneFilePaths) { const set = selectedMilestonesByFile[path]; - if (set && set.size > 0) { - payload[path] = Array.from(set).sort((a, b) => a - b); - } + payload[path] = set ? Array.from(set).sort((a, b) => a - b) : []; } return Object.keys(payload).length > 0 ? payload : undefined; } diff --git a/webviews/codex-webviews/src/milestoneIndexUtils.test.ts b/webviews/codex-webviews/src/milestoneIndexUtils.test.ts new file mode 100644 index 000000000..af8ce960f --- /dev/null +++ b/webviews/codex-webviews/src/milestoneIndexUtils.test.ts @@ -0,0 +1,63 @@ +import { describe, it, expect } from "vitest"; +import { + buildMilestoneIndexModel, + hasSelectableMilestonesInCells, +} from "../../../sharedUtils/milestoneIndexUtils"; +import { CodexCellTypes } from "../../../types/enums"; + +function milestoneCell(value: string) { + return { + kind: 2, + value, + metadata: { + type: CodexCellTypes.MILESTONE, + data: {}, + }, + }; +} + +function contentCell(id: string) { + return { + kind: 2, + value: "verse", + metadata: { + type: "text", + id, + data: { globalReferences: [id] }, + }, + }; +} + +describe("hasSelectableMilestonesInCells", () => { + it("returns true for a notebook with one explicit milestone chapter", () => { + const cells = [ + milestoneCell("Chapter 1"), + contentCell("MAT 1:1"), + contentCell("MAT 1:2"), + ]; + expect(buildMilestoneIndexModel(cells).milestones).toHaveLength(1); + expect(hasSelectableMilestonesInCells(cells)).toBe(true); + }); + + it("returns true for multiple explicit milestones", () => { + const cells = [ + milestoneCell("Chapter 1"), + contentCell("MAT 1:1"), + milestoneCell("Chapter 2"), + contentCell("MAT 2:1"), + ]; + expect(hasSelectableMilestonesInCells(cells)).toBe(true); + }); + + it("returns false when only the synthetic single-chapter fallback applies", () => { + const cells = [ + { + kind: 2, + value: "plain", + metadata: { type: "text", data: {} }, + }, + ]; + expect(buildMilestoneIndexModel(cells).milestones).toHaveLength(1); + expect(hasSelectableMilestonesInCells(cells)).toBe(false); + }); +});