From 99dc341f2bf836227d1e73b4be1c49043c0f0c0a Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 29 May 2026 02:54:35 -0400 Subject: [PATCH 1/2] test(docx-core): characterize collision/salt-loop in insertParagraphBookmarks `buildParagraphSeed` combines paragraph text with the immediate prev/next neighbor text and ancestor signature. When two paragraphs sit at positions where both the text and the prev/next neighbors are identical, the seed collides; `deriveDeterministicJrParaName` resolves this with a 10,000-step salt-loop (first hit unsalted, subsequent hits get `|salt:N`). This load-bearing behavior had no characterization test. The two added scenarios: 1. Build a fixture with two paragraphs at positions 1 and 4 that share text and identical prev/next neighbors (`Anchor context.`/`Tail context.`), call `insertParagraphBookmarks`, assert both _bk_* IDs match the canonical regex and are distinct from each other. 2. Open the same XML body twice independently, apply `insertParagraphBookmarks` to each, assert the two collision-resolved ID lists are byte-identical across opens. Both scenarios use `test.openspec(...)` to land on the existing `document-paragraph-id-stability-and-fingerprint` traceability lane. No source files changed; this characterizes existing behavior only. Peer review (Gemini + Codex) pending. Ref: #282 --- ...aragraph_id_stability.traceability.test.ts | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts b/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts index db217bee..0e1ff3f6 100644 --- a/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts +++ b/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts @@ -98,4 +98,97 @@ describe('Traceability: document-paragraph-id-stability-and-fingerprint — Para }); }, ); + + test.openspec('insertParagraphBookmarks resolves seed collisions with a deterministic salt')( + 'insertParagraphBookmarks resolves seed collisions with a deterministic salt', + async ({ given, when, then, attachPrettyJson }: AllureBddContext) => { + const xmlBody = + 'Anchor context.' + + 'Duplicate clause.' + + 'Tail context.' + + 'Anchor context.' + + 'Duplicate clause.' + + 'Tail context.'; + const doc = makeDoc(xmlBody); + let duplicateIds: (string | null)[] = []; + + await given('two paragraphs have identical text and identical neighbor context', async () => { + await attachPrettyJson('Collision fixture', { + duplicateParagraphIndexes: [1, 4], + duplicateText: 'Duplicate clause.', + previousText: 'Anchor context.', + nextText: 'Tail context.', + }); + }); + + await when('insertParagraphBookmarks is called', async () => { + insertParagraphBookmarks(doc, 'test-attachment'); + const paragraphs = Array.from(doc.getElementsByTagNameNS(OOXML.W_NS, 'p')); + duplicateIds = [paragraphs[1], paragraphs[4]].map((p) => getParagraphBookmarkId(p as Element)); + await attachPrettyJson('Duplicate paragraph identifiers', duplicateIds); + }); + + await then('the colliding paragraphs receive distinct canonical identifiers', () => { + expect(duplicateIds.length).toBe(2); + expect(duplicateIds[0]).toMatch(/^_bk_[0-9a-f]{12}$/); + expect(duplicateIds[1]).toMatch(/^_bk_[0-9a-f]{12}$/); + expect(duplicateIds[1]).not.toEqual(duplicateIds[0]); + }); + }, + ); + + test.openspec('Collision resolution is stable across independent reopens')( + 'Collision resolution is stable across independent reopens', + async ({ given, when, then, attachPrettyJson }: AllureBddContext) => { + const xmlBody = + 'Anchor context.' + + 'Duplicate clause.' + + 'Tail context.' + + 'Anchor context.' + + 'Duplicate clause.' + + 'Tail context.'; + + let firstOpenIds: (string | null)[] = []; + let secondOpenIds: (string | null)[] = []; + + await given('the same colliding paragraph content is opened twice independently', async () => { + await attachPrettyJson('Collision fixture', { + duplicateParagraphIndexes: [1, 4], + duplicateText: 'Duplicate clause.', + previousText: 'Anchor context.', + nextText: 'Tail context.', + }); + }); + + await when('insertParagraphBookmarks is applied to each open', async () => { + const doc1 = makeDoc(xmlBody); + insertParagraphBookmarks(doc1, 'test-attachment'); + const firstParagraphs = Array.from(doc1.getElementsByTagNameNS(OOXML.W_NS, 'p')); + firstOpenIds = [firstParagraphs[1], firstParagraphs[4]].map((p) => + getParagraphBookmarkId(p as Element), + ); + + const doc2 = makeDoc(xmlBody); + insertParagraphBookmarks(doc2, 'test-attachment'); + const secondParagraphs = Array.from(doc2.getElementsByTagNameNS(OOXML.W_NS, 'p')); + secondOpenIds = [secondParagraphs[1], secondParagraphs[4]].map((p) => + getParagraphBookmarkId(p as Element), + ); + + await attachPrettyJson('Duplicate paragraph identifiers by open', { + firstOpenIds, + secondOpenIds, + }); + }); + + await then('collision salts are assigned byte-identically by document order', () => { + expect(firstOpenIds.length).toBe(2); + expect(secondOpenIds).toEqual(firstOpenIds); + for (const id of firstOpenIds) { + expect(id).toMatch(/^_bk_[0-9a-f]{12}$/); + } + expect(firstOpenIds[1]).not.toEqual(firstOpenIds[0]); + }); + }, + ); }); From 4dfb74b96e876e995c33a2e8968499999daf5b98 Mon Sep 17 00:00:00 2001 From: Steven Obiajulu Date: Fri, 29 May 2026 03:03:26 -0400 Subject: [PATCH 2/2] test(docx-core): pin salt-loop output IDs to characterize derivation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Peer review (Codex dynamic) noted the prior "two distinct IDs" assertion would still pass if `buildParagraphSeed` later added sibling position or wider context — the salt-loop would never run and the test would lose its characterization value. Pin the exact unsalted hash (_bk_04c5b72c79f7) and the |salt:1 hash (_bk_a2abd088979b) so the test fails loudly if the derivation changes shape. Comment notes the rationale. Ref: #282 --- .../paragraph_id_stability.traceability.test.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts b/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts index 0e1ff3f6..3b9ba62d 100644 --- a/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts +++ b/packages/docx-core/test-primitives/paragraph_id_stability.traceability.test.ts @@ -128,11 +128,17 @@ describe('Traceability: document-paragraph-id-stability-and-fingerprint — Para await attachPrettyJson('Duplicate paragraph identifiers', duplicateIds); }); - await then('the colliding paragraphs receive distinct canonical identifiers', () => { + await then('the colliding paragraphs receive the unsalted hash then the |salt:1 hash', () => { expect(duplicateIds.length).toBe(2); expect(duplicateIds[0]).toMatch(/^_bk_[0-9a-f]{12}$/); expect(duplicateIds[1]).toMatch(/^_bk_[0-9a-f]{12}$/); expect(duplicateIds[1]).not.toEqual(duplicateIds[0]); + // Pin the exact derivation so the test characterizes the salt-loop, not + // just "two distinct IDs". If buildParagraphSeed later includes sibling + // position or wider context, both IDs would still be distinct without + // the salt loop ever running — this assertion fails loudly in that case. + expect(duplicateIds[0]).toBe('_bk_04c5b72c79f7'); // sha12(seed) + expect(duplicateIds[1]).toBe('_bk_a2abd088979b'); // sha12(seed|salt:1) }); }, ); @@ -188,6 +194,10 @@ describe('Traceability: document-paragraph-id-stability-and-fingerprint — Para expect(id).toMatch(/^_bk_[0-9a-f]{12}$/); } expect(firstOpenIds[1]).not.toEqual(firstOpenIds[0]); + // Pin the cross-open salt assignment so any drift in salt-loop iteration + // order (e.g., if derivation later considered prior-document state) + // would fail the test loudly. See companion scenario for the seed math. + expect(firstOpenIds).toEqual(['_bk_04c5b72c79f7', '_bk_a2abd088979b']); }); }, );