From e2e1fd0674cfc3ef445949904f8029cda7c915c9 Mon Sep 17 00:00:00 2001 From: Kai Prince <34746763+KaiPrince@users.noreply.github.com> Date: Tue, 30 Dec 2025 20:24:23 -0800 Subject: [PATCH 1/5] Add test --- test/utils.test.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/utils.test.js b/test/utils.test.js index 9ff32c7..4e3aefe 100644 --- a/test/utils.test.js +++ b/test/utils.test.js @@ -55,4 +55,13 @@ describe('stripWrappingParagraphs', () => { assert.equal(result, expected); }); + + it('removes nested wrapping p tags', async () => { + const input = '

'; + const expected = '
'; + + const result = stripWrappingParagraphs(input); + + assert.equal(result, expected); + }); }); From b3be64bb452a0e9affb8592a318f390f8a415e34 Mon Sep 17 00:00:00 2001 From: Kai Prince <34746763+KaiPrince@users.noreply.github.com> Date: Tue, 30 Dec 2025 21:48:30 -0800 Subject: [PATCH 2/5] fix: traverse child nodes when stripping p tags --- src/utils.js | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/utils.js b/src/utils.js index 63f3f18..155f7bc 100644 --- a/src/utils.js +++ b/src/utils.js @@ -6,25 +6,37 @@ function isWhitespaceTextNode(node) { export const stripWrappingParagraphs = (html) => { const parsedHtml = parse5.parseFragment(html); - parsedHtml.childNodes = parsedHtml.childNodes.flatMap((node) => { - if (node.nodeName !== 'p') { - return node; - } - - // Ignore whitespace-only text nodes - const meaningfulChildren = node.childNodes.filter( - (child) => !isWhitespaceTextNode(child) - ); - - if ( - meaningfulChildren.length === 1 && - meaningfulChildren[0].nodeName.includes('-') - ) { - return meaningfulChildren[0]; - } - - return node; - }); + parsedHtml.childNodes = parsedHtml.childNodes.map(traverseNodes); return parse5.serialize(parsedHtml); }; + +function traverseNodes(node) { + node = stripWrappingParagraph(node); + + // Don't traverse children of custom elements + if (node.childNodes && !node.nodeName.includes('-')) { + node.childNodes = node.childNodes.map(traverseNodes); + } + + return node; +} + +function stripWrappingParagraph(node) { + if (node.nodeName !== 'p') { + return node; + } + + // Ignore whitespace-only text nodes + const meaningfulChildren = node.childNodes.filter( + (child) => !isWhitespaceTextNode(child) + ); + + if (meaningfulChildren.length === 1 && + meaningfulChildren[0].nodeName.includes('-')) { + return meaningfulChildren[0]; + } + + return node; +} + From 9dbf4d3710395bbd04b14a30f2d1c8e33f6bb2cb Mon Sep 17 00:00:00 2001 From: Kai Prince <34746763+KaiPrince@users.noreply.github.com> Date: Tue, 30 Dec 2025 21:48:49 -0800 Subject: [PATCH 3/5] Add additional test --- test/utils.test.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/utils.test.js b/test/utils.test.js index 4e3aefe..3971675 100644 --- a/test/utils.test.js +++ b/test/utils.test.js @@ -64,4 +64,13 @@ describe('stripWrappingParagraphs', () => { assert.equal(result, expected); }); + + it('removes double nested wrapping p tags', async () => { + const input = '

'; + const expected = '
'; + + const result = stripWrappingParagraphs(input); + + assert.equal(result, expected); + }); }); From 7ffd916f1f6b6384596ba3b50af385b701da5351 Mon Sep 17 00:00:00 2001 From: Kai Prince <34746763+KaiPrince@users.noreply.github.com> Date: Tue, 30 Dec 2025 23:20:55 -0800 Subject: [PATCH 4/5] fix: support full html doc --- src/utils.js | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/utils.js b/src/utils.js index 155f7bc..b1154f2 100644 --- a/src/utils.js +++ b/src/utils.js @@ -5,12 +5,31 @@ function isWhitespaceTextNode(node) { } export const stripWrappingParagraphs = (html) => { - const parsedHtml = parse5.parseFragment(html); - parsedHtml.childNodes = parsedHtml.childNodes.map(traverseNodes); + const isFullHtmlDoc = (/^<(!DOCTYPE )?html>/i).test(html); + const parsedHtml = isFullHtmlDoc ? parse5.parse(html) : parse5.parseFragment(html); + + const rootNode = chooseRootNode(parsedHtml); + rootNode.childNodes = rootNode.childNodes.map(traverseNodes); return parse5.serialize(parsedHtml); }; +function chooseRootNode(parsedHtml, isFullHtmlDoc) { + if (isFullHtmlDoc) { + const rootNode = parsedHtml.childNodes + .find((x) => x.nodeName === 'html') + ?.childNodes?.find((x) => x.nodeName === 'body'); + + if (!rootNode) { + throw new Error('html output is missing the body tag'); + } + + return rootNode; + } else { + return parsedHtml; + } +} + function traverseNodes(node) { node = stripWrappingParagraph(node); From 75c1c3f05ad2d63a1f791a656af26ea7c50acb9b Mon Sep 17 00:00:00 2001 From: Kai Prince <34746763+KaiPrince@users.noreply.github.com> Date: Tue, 30 Dec 2025 23:21:11 -0800 Subject: [PATCH 5/5] Add html doc tests --- test/utils.test.js | 112 +++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/test/utils.test.js b/test/utils.test.js index 3971675..e5da916 100644 --- a/test/utils.test.js +++ b/test/utils.test.js @@ -3,74 +3,96 @@ import assert from 'node:assert/strict'; import { describe, it } from 'node:test'; describe('stripWrappingParagraphs', () => { - it('removes wrapping p tags', async () => { - const input = '

'; - const expected = ''; + describe('Html fragment', () => { + it('removes wrapping p tags', async () => { + const input = '

'; + const expected = ''; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); - }); + assert.equal(result, expected); + }); - it('removes wrapping p tags (inner content)', async () => { - const input = '

inner content

'; - const expected = 'inner content'; + it('removes wrapping p tags (inner content)', async () => { + const input = '

inner content

'; + const expected = 'inner content'; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); - }); + assert.equal(result, expected); + }); - it('removes wrapping p tags (inner content, newlines)', async () => { - const input = '

inner\ncontent

'; - const expected = 'inner\ncontent'; + it('removes wrapping p tags (inner content, newlines)', async () => { + const input = '

inner\ncontent

'; + const expected = 'inner\ncontent'; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); - }); + assert.equal(result, expected); + }); - it('removes wrapping p tags (whitespace)', async () => { - const input = '

\n\t \n

'; - const expected = ''; + it('removes wrapping p tags (whitespace)', async () => { + const input = '

\n\t \n

'; + const expected = ''; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); - }); + assert.equal(result, expected); + }); - it('does not remove wrapping p tags if it includes other content', async () => { - const input = '

Hello

'; + it('does not remove wrapping p tags if it includes other content', async () => { + const input = '

Hello

'; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, input); - }); + assert.equal(result, input); + }); - it('removes wrapping p tags (multiple)', async () => { - const input = '

\n

'; - const expected = '\n'; + it('removes wrapping p tags (multiple)', async () => { + const input = + '

\n

'; + const expected = '\n'; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); - }); + assert.equal(result, expected); + }); - it('removes nested wrapping p tags', async () => { - const input = '

'; - const expected = '
'; + it('removes nested wrapping p tags', async () => { + const input = '

'; + const expected = '
'; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); + assert.equal(result, expected); + }); + + it('removes double nested wrapping p tags', async () => { + const input = '

'; + const expected = '
'; + + const result = stripWrappingParagraphs(input); + + assert.equal(result, expected); + }); }); - it('removes double nested wrapping p tags', async () => { - const input = '

'; - const expected = '
'; + describe('Html document', () => { + it('preserves html, head, and body tags', () => { + const input = '

Hello

'; + + const result = stripWrappingParagraphs(input); + + assert.equal(result, input); + }); + + it('strips p tags in body', () => { + const input = '

'; + const expected = ''; - const result = stripWrappingParagraphs(input); + const result = stripWrappingParagraphs(input); - assert.equal(result, expected); + assert.equal(result, expected); + }); }); });