From e2e1fd0674cfc3ef445949904f8029cda7c915c9 Mon Sep 17 00:00:00 2001
From: Kai Prince <34746763+KaiPrince@users.noreply.github.com>
Date: Tue, 30 Dec 2025 20:24:23 -0800
Subject: [PATCH 1/5] Add test
---
test/utils.test.js | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/test/utils.test.js b/test/utils.test.js
index 9ff32c7..4e3aefe 100644
--- a/test/utils.test.js
+++ b/test/utils.test.js
@@ -55,4 +55,13 @@ describe('stripWrappingParagraphs', () => {
assert.equal(result, expected);
});
+
+ it('removes nested wrapping p tags', async () => {
+ const input = '
';
+ const expected = '';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, expected);
+ });
});
From b3be64bb452a0e9affb8592a318f390f8a415e34 Mon Sep 17 00:00:00 2001
From: Kai Prince <34746763+KaiPrince@users.noreply.github.com>
Date: Tue, 30 Dec 2025 21:48:30 -0800
Subject: [PATCH 2/5] fix: traverse child nodes when stripping p tags
---
src/utils.js | 50 +++++++++++++++++++++++++++++++-------------------
1 file changed, 31 insertions(+), 19 deletions(-)
diff --git a/src/utils.js b/src/utils.js
index 63f3f18..155f7bc 100644
--- a/src/utils.js
+++ b/src/utils.js
@@ -6,25 +6,37 @@ function isWhitespaceTextNode(node) {
export const stripWrappingParagraphs = (html) => {
const parsedHtml = parse5.parseFragment(html);
- parsedHtml.childNodes = parsedHtml.childNodes.flatMap((node) => {
- if (node.nodeName !== 'p') {
- return node;
- }
-
- // Ignore whitespace-only text nodes
- const meaningfulChildren = node.childNodes.filter(
- (child) => !isWhitespaceTextNode(child)
- );
-
- if (
- meaningfulChildren.length === 1 &&
- meaningfulChildren[0].nodeName.includes('-')
- ) {
- return meaningfulChildren[0];
- }
-
- return node;
- });
+ parsedHtml.childNodes = parsedHtml.childNodes.map(traverseNodes);
return parse5.serialize(parsedHtml);
};
+
+function traverseNodes(node) {
+ node = stripWrappingParagraph(node);
+
+ // Don't traverse children of custom elements
+ if (node.childNodes && !node.nodeName.includes('-')) {
+ node.childNodes = node.childNodes.map(traverseNodes);
+ }
+
+ return node;
+}
+
+function stripWrappingParagraph(node) {
+ if (node.nodeName !== 'p') {
+ return node;
+ }
+
+ // Ignore whitespace-only text nodes
+ const meaningfulChildren = node.childNodes.filter(
+ (child) => !isWhitespaceTextNode(child)
+ );
+
+ if (meaningfulChildren.length === 1 &&
+ meaningfulChildren[0].nodeName.includes('-')) {
+ return meaningfulChildren[0];
+ }
+
+ return node;
+}
+
From 9dbf4d3710395bbd04b14a30f2d1c8e33f6bb2cb Mon Sep 17 00:00:00 2001
From: Kai Prince <34746763+KaiPrince@users.noreply.github.com>
Date: Tue, 30 Dec 2025 21:48:49 -0800
Subject: [PATCH 3/5] Add additional test
---
test/utils.test.js | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/test/utils.test.js b/test/utils.test.js
index 4e3aefe..3971675 100644
--- a/test/utils.test.js
+++ b/test/utils.test.js
@@ -64,4 +64,13 @@ describe('stripWrappingParagraphs', () => {
assert.equal(result, expected);
});
+
+ it('removes double nested wrapping p tags', async () => {
+ const input = '';
+ const expected = '
';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, expected);
+ });
});
From 7ffd916f1f6b6384596ba3b50af385b701da5351 Mon Sep 17 00:00:00 2001
From: Kai Prince <34746763+KaiPrince@users.noreply.github.com>
Date: Tue, 30 Dec 2025 23:20:55 -0800
Subject: [PATCH 4/5] fix: support full html doc
---
src/utils.js | 23 +++++++++++++++++++++--
1 file changed, 21 insertions(+), 2 deletions(-)
diff --git a/src/utils.js b/src/utils.js
index 155f7bc..b1154f2 100644
--- a/src/utils.js
+++ b/src/utils.js
@@ -5,12 +5,31 @@ function isWhitespaceTextNode(node) {
}
export const stripWrappingParagraphs = (html) => {
- const parsedHtml = parse5.parseFragment(html);
- parsedHtml.childNodes = parsedHtml.childNodes.map(traverseNodes);
+ const isFullHtmlDoc = (/^<(!DOCTYPE )?html>/i).test(html);
+ const parsedHtml = isFullHtmlDoc ? parse5.parse(html) : parse5.parseFragment(html);
+
+ const rootNode = chooseRootNode(parsedHtml);
+ rootNode.childNodes = rootNode.childNodes.map(traverseNodes);
return parse5.serialize(parsedHtml);
};
+function chooseRootNode(parsedHtml, isFullHtmlDoc) {
+ if (isFullHtmlDoc) {
+ const rootNode = parsedHtml.childNodes
+ .find((x) => x.nodeName === 'html')
+ ?.childNodes?.find((x) => x.nodeName === 'body');
+
+ if (!rootNode) {
+ throw new Error('html output is missing the body tag');
+ }
+
+ return rootNode;
+ } else {
+ return parsedHtml;
+ }
+}
+
function traverseNodes(node) {
node = stripWrappingParagraph(node);
From 75c1c3f05ad2d63a1f791a656af26ea7c50acb9b Mon Sep 17 00:00:00 2001
From: Kai Prince <34746763+KaiPrince@users.noreply.github.com>
Date: Tue, 30 Dec 2025 23:21:11 -0800
Subject: [PATCH 5/5] Add html doc tests
---
test/utils.test.js | 112 +++++++++++++++++++++++++++------------------
1 file changed, 67 insertions(+), 45 deletions(-)
diff --git a/test/utils.test.js b/test/utils.test.js
index 3971675..e5da916 100644
--- a/test/utils.test.js
+++ b/test/utils.test.js
@@ -3,74 +3,96 @@ import assert from 'node:assert/strict';
import { describe, it } from 'node:test';
describe('stripWrappingParagraphs', () => {
- it('removes wrapping p tags', async () => {
- const input = '
';
- const expected = '';
+ describe('Html fragment', () => {
+ it('removes wrapping p tags', async () => {
+ const input = '
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes wrapping p tags (inner content)', async () => {
- const input = 'inner content
';
- const expected = 'inner content';
+ it('removes wrapping p tags (inner content)', async () => {
+ const input = 'inner content
';
+ const expected = 'inner content';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes wrapping p tags (inner content, newlines)', async () => {
- const input = 'inner\ncontent
';
- const expected = 'inner\ncontent';
+ it('removes wrapping p tags (inner content, newlines)', async () => {
+ const input = 'inner\ncontent
';
+ const expected = 'inner\ncontent';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes wrapping p tags (whitespace)', async () => {
- const input = ' \n\t \n
';
- const expected = '';
+ it('removes wrapping p tags (whitespace)', async () => {
+ const input = ' \n\t \n
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('does not remove wrapping p tags if it includes other content', async () => {
- const input = 'Hello
';
+ it('does not remove wrapping p tags if it includes other content', async () => {
+ const input = 'Hello
';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, input);
- });
+ assert.equal(result, input);
+ });
- it('removes wrapping p tags (multiple)', async () => {
- const input = '
\n
';
- const expected = '\n';
+ it('removes wrapping p tags (multiple)', async () => {
+ const input =
+ '
\n
';
+ const expected = '\n';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes nested wrapping p tags', async () => {
- const input = '
';
- const expected = '';
+ it('removes nested wrapping p tags', async () => {
+ const input = '
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
+ assert.equal(result, expected);
+ });
+
+ it('removes double nested wrapping p tags', async () => {
+ const input = '';
+ const expected = '
';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, expected);
+ });
});
- it('removes double nested wrapping p tags', async () => {
- const input = '';
- const expected = '
';
+ describe('Html document', () => {
+ it('preserves html, head, and body tags', () => {
+ const input = 'Hello
';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, input);
+ });
+
+ it('strips p tags in body', () => {
+ const input = '
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
+ assert.equal(result, expected);
+ });
});
});