diff --git a/packages/docx-core/src/primitives/dom-helpers.ts b/packages/docx-core/src/primitives/dom-helpers.ts
index ff3e4ed..8a25695 100644
--- a/packages/docx-core/src/primitives/dom-helpers.ts
+++ b/packages/docx-core/src/primitives/dom-helpers.ts
@@ -42,12 +42,21 @@ export function getLeafText(el: Element): string | undefined {
/**
* Set the direct text content of a leaf element. Replaces or creates
* the first text child node.
+ *
+ * NOTE: We must update BOTH `data` and `nodeValue` on the text node.
+ * In @xmldom/xmldom, `CharacterData` stores text in two separate plain
+ * properties: `data` (read by XMLSerializer) and `nodeValue` (read by
+ * `textContent` getter). All CharacterData mutation methods keep them
+ * in sync, but a direct `child.nodeValue = text` assignment only updates
+ * `nodeValue`, leaving the stale original value in `data`. This causes
+ * XMLSerializer to output the old text. Using `replaceData` (which sets
+ * both atomically) is the correct W3C-compliant approach.
*/
export function setLeafText(el: Element, text: string): void {
for (let i = 0; i < el.childNodes.length; i++) {
const child = el.childNodes[i]!;
if (child.nodeType === NODE_TYPE.TEXT) {
- child.nodeValue = text;
+ (child as CharacterData).replaceData(0, (child as CharacterData).length, text);
return;
}
}
diff --git a/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts b/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts
new file mode 100644
index 0000000..60e14c3
--- /dev/null
+++ b/packages/docx-core/src/xmldom-characterdata-nodevalue.test.ts
@@ -0,0 +1,88 @@
+/**
+ * Upstream bug report: @xmldom/xmldom CharacterData nodeValue/data desync
+ *
+ * In @xmldom/xmldom, `CharacterData` stores text in two separate plain
+ * properties: `data` (read by XMLSerializer) and `nodeValue` (read by the
+ * `textContent` getter). All built-in mutation methods (appendData,
+ * replaceData, splitText, textContent setter) keep them in sync via:
+ * `this.nodeValue = this.data = text`
+ *
+ * However, a direct `node.nodeValue = text` assignment is NOT intercepted —
+ * it only updates the instance property, leaving `data` stale. Since
+ * XMLSerializer reads `node.data`, mutations via direct nodeValue assignment
+ * are silently lost in serialized output.
+ *
+ * WHATWG DOM Living Standard §4.10: for CharacterData nodes, `nodeValue`
+ * getter/setter must be equivalent to `data`.
+ *
+ * This caused a silent data-loss bug in our DOCX comparison engine (Issue #35).
+ * The fix was to use `replaceData()` instead of direct `nodeValue` assignment
+ * in `setLeafText()` (packages/docx-core/src/primitives/dom-helpers.ts).
+ *
+ * These tests document the bug for filing upstream at:
+ * https://github.com/xmldom/xmldom/issues
+ *
+ * Filed as companion to our merged PR #960 (ParentNode.children getter).
+ */
+
+import { describe, expect } from 'vitest';
+import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
+import { testAllure } from './testing/allure-test.js';
+
+const test = testAllure
+ .epic('Document Comparison')
+ .withLabels({ feature: 'xmldom CharacterData Sync' });
+
+describe('xmldom CharacterData nodeValue/data sync', () => {
+ test('replaceData keeps nodeValue and data in sync', () => {
+ const doc = new DOMParser().parseFromString('', 'text/xml');
+ const text = doc.createTextNode('original');
+ doc.documentElement!.appendChild(text);
+
+ text.replaceData(0, text.length, 'updated');
+
+ expect(text.nodeValue).toBe('updated');
+ expect(text.data).toBe('updated');
+ expect(new XMLSerializer().serializeToString(doc)).toContain('updated');
+ });
+
+ // This test is marked .fails() to document the upstream bug, still present in
+ // @xmldom/xmldom 0.9.x. Direct nodeValue assignment only updates the instance
+ // property — `data` stays stale, so XMLSerializer silently outputs the old text.
+ //
+ // Once the library implements nodeValue as a getter/setter on CharacterData.prototype
+ // (or via Node.prototype dispatch), change this to a normal test() with the same assertions.
+ //
+ // Fix direction:
+ // Object.defineProperty(CharacterData.prototype, 'nodeValue', {
+ // get() { return this.data; },
+ // set(v) { const s = v == null ? '' : String(v); this.data = s; this.length = s.length; }
+ // });
+ test.fails('direct nodeValue assignment updates nodeValue but NOT data or XMLSerializer output', () => {
+ const doc = new DOMParser().parseFromString('', 'text/xml');
+ const text = doc.createTextNode('original');
+ doc.documentElement!.appendChild(text);
+
+ text.nodeValue = 'updated';
+
+ // nodeValue appears correct — the instance property was shadowed
+ expect(text.nodeValue).toBe('updated');
+ // data is stale — these fail in xmldom 0.9.x
+ expect(text.data).toBe('updated');
+ expect(new XMLSerializer().serializeToString(doc)).toContain('updated');
+ });
+
+ // Real-world impact: simulates the setLeafText path in our DOCX comparison engine
+ // before the fix (Issue #35). The merged atom appeared correct in DOM traversal
+ // (nodeValue read back 'hello world') but XMLSerializer silently wrote stale text.
+ test.fails('merging atom text via nodeValue loses data on serialization', () => {
+ const doc = new DOMParser().parseFromString('hello ', 'text/xml');
+ const textNode = doc.documentElement!.firstChild as unknown as CharacterData;
+
+ textNode.nodeValue = 'hello world';
+
+ // These fail — data and serialized output remain 'hello '
+ expect(textNode.data).toBe('hello world');
+ expect(new XMLSerializer().serializeToString(doc)).toContain('hello world');
+ });
+});