From 731c1c8d1f46819b92ee99d965ea606941f69762 Mon Sep 17 00:00:00 2001 From: Matt Toohey Date: Tue, 31 Mar 2026 14:33:03 +1100 Subject: [PATCH 1/2] fix(diff-viewer): split tokens on symbol boundaries for finer inline diff highlights Previously splitWords only split on whitespace, so a trailing comma or prefix change caused the entire whitespace-delimited token to highlight. Now word characters (\w+), whitespace (\s+), and individual symbols are separate tokens, so only the actually changed parts get highlighted. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/lib/utils/inlineDiff.test.ts | 23 ++++++++++++++++++- .../diff-viewer/src/lib/utils/inlineDiff.ts | 2 +- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts b/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts index 8c5b57fd..faedacd7 100644 --- a/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts +++ b/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts @@ -157,7 +157,7 @@ describe('computeLineDiff', () => { expect(result.modifiedPairs).toHaveLength(1); const pair = result.modifiedPairs[0]; - // "1;" -> "2;" are the changed tokens + // "1" -> "2" are the changed tokens expect(pair.beforeHighlights.length).toBeGreaterThan(0); expect(pair.afterHighlights.length).toBeGreaterThan(0); }); @@ -412,6 +412,27 @@ describe('computeLineDiff', () => { expect(result.modifiedPairs).toHaveLength(0); }); + it('highlights only the trailing comma when value is otherwise identical', () => { + const before = [' "dmg:publish": "node scripts/publish-dmg-to-github-release.mjs"']; + const after = [' "release:dmg:publish": "node scripts/publish-dmg-to-github-release.mjs",']; + const result = computeLineDiff(before, after); + + expect(result.modifiedPairs).toHaveLength(1); + const pair = result.modifiedPairs[0]; + + // Only "release:" prefix and trailing "," should be highlighted on the after side, + // NOT the entire value string. + const afterHighlightedText = pair.afterHighlights.map(h => + after[0].slice(h.start, h.end), + ); + expect(afterHighlightedText).toContain(','); + expect(afterHighlightedText.join('')).toContain('release'); + + // The shared value portion should NOT be highlighted + const totalHighlighted = pair.afterHighlights.reduce((sum, h) => sum + (h.end - h.start), 0); + expect(totalHighlighted).toBeLessThan(after[0].length / 2); + }); + it('handles interleaved unchanged and changed lines', () => { const before = ['A', 'B', 'C', 'D', 'E']; const after = ['A', 'B2', 'C', 'D2', 'E']; diff --git a/packages/diff-viewer/src/lib/utils/inlineDiff.ts b/packages/diff-viewer/src/lib/utils/inlineDiff.ts index 8d76b6ee..a7cec44e 100644 --- a/packages/diff-viewer/src/lib/utils/inlineDiff.ts +++ b/packages/diff-viewer/src/lib/utils/inlineDiff.ts @@ -93,7 +93,7 @@ function similarity(a: string, b: string): number { const SIMILARITY_THRESHOLD = 0.55; function splitWords(text: string): string[] { - return text.split(/(\s+)/); + return text.match(/\w+|\s+|[^\s\w]/g) ?? []; } function computeCharHighlights( From 65aefeac41def92c8c1aa4cfa83b3411f2e4a549 Mon Sep 17 00:00:00 2001 From: Matt Toohey Date: Tue, 31 Mar 2026 14:44:24 +1100 Subject: [PATCH 2/2] fix(diff-viewer): use Unicode property escapes for proper accented/emoji tokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The splitWords regex now uses \p{L}\p{N} with the `gu` flags instead of \w, so accented characters (e.g. café) are kept as whole word tokens and emoji (e.g. 🎉) are treated as single code points rather than split surrogate pairs. Adds test cases for both scenarios. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../src/lib/utils/inlineDiff.test.ts | 38 +++++++++++++++++++ .../diff-viewer/src/lib/utils/inlineDiff.ts | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts b/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts index faedacd7..2e63ae14 100644 --- a/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts +++ b/packages/diff-viewer/src/lib/utils/inlineDiff.test.ts @@ -433,6 +433,44 @@ describe('computeLineDiff', () => { expect(totalHighlighted).toBeLessThan(after[0].length / 2); }); + it('highlights only the changed accented word', () => { + const before = ['le café est bon']; + const after = ['le thé est bon']; + const result = computeLineDiff(before, after); + + expect(result.modifiedPairs).toHaveLength(1); + const pair = result.modifiedPairs[0]; + + const beforeHighlightedText = pair.beforeHighlights.map(h => + before[0].slice(h.start, h.end), + ); + expect(beforeHighlightedText).toEqual(['café']); + + const afterHighlightedText = pair.afterHighlights.map(h => + after[0].slice(h.start, h.end), + ); + expect(afterHighlightedText).toEqual(['thé']); + }); + + it('highlights only the changed emoji', () => { + const before = ['status: 🎉 done']; + const after = ['status: 🚀 done']; + const result = computeLineDiff(before, after); + + expect(result.modifiedPairs).toHaveLength(1); + const pair = result.modifiedPairs[0]; + + const beforeHighlightedText = pair.beforeHighlights.map(h => + before[0].slice(h.start, h.end), + ); + expect(beforeHighlightedText).toEqual(['🎉']); + + const afterHighlightedText = pair.afterHighlights.map(h => + after[0].slice(h.start, h.end), + ); + expect(afterHighlightedText).toEqual(['🚀']); + }); + it('handles interleaved unchanged and changed lines', () => { const before = ['A', 'B', 'C', 'D', 'E']; const after = ['A', 'B2', 'C', 'D2', 'E']; diff --git a/packages/diff-viewer/src/lib/utils/inlineDiff.ts b/packages/diff-viewer/src/lib/utils/inlineDiff.ts index a7cec44e..e22e0bac 100644 --- a/packages/diff-viewer/src/lib/utils/inlineDiff.ts +++ b/packages/diff-viewer/src/lib/utils/inlineDiff.ts @@ -93,7 +93,7 @@ function similarity(a: string, b: string): number { const SIMILARITY_THRESHOLD = 0.55; function splitWords(text: string): string[] { - return text.match(/\w+|\s+|[^\s\w]/g) ?? []; + return text.match(/[\p{L}\p{N}_]+|\s+|[^\s\p{L}\p{N}_]/gu) ?? []; } function computeCharHighlights(