diff --git a/src/utils.js b/src/utils.js
index 63f3f18..b1154f2 100644
--- a/src/utils.js
+++ b/src/utils.js
@@ -5,26 +5,57 @@ function isWhitespaceTextNode(node) {
}
export const stripWrappingParagraphs = (html) => {
- const parsedHtml = parse5.parseFragment(html);
- parsedHtml.childNodes = parsedHtml.childNodes.flatMap((node) => {
- if (node.nodeName !== 'p') {
- return node;
- }
+ const isFullHtmlDoc = (/^<(!DOCTYPE )?html>/i).test(html);
+ const parsedHtml = isFullHtmlDoc ? parse5.parse(html) : parse5.parseFragment(html);
+
+ const rootNode = chooseRootNode(parsedHtml);
+ rootNode.childNodes = rootNode.childNodes.map(traverseNodes);
+
+ return parse5.serialize(parsedHtml);
+};
- // Ignore whitespace-only text nodes
- const meaningfulChildren = node.childNodes.filter(
- (child) => !isWhitespaceTextNode(child)
- );
+function chooseRootNode(parsedHtml, isFullHtmlDoc) {
+ if (isFullHtmlDoc) {
+ const rootNode = parsedHtml.childNodes
+ .find((x) => x.nodeName === 'html')
+ ?.childNodes?.find((x) => x.nodeName === 'body');
- if (
- meaningfulChildren.length === 1 &&
- meaningfulChildren[0].nodeName.includes('-')
- ) {
- return meaningfulChildren[0];
+ if (!rootNode) {
+ throw new Error('html output is missing the body tag');
}
+ return rootNode;
+ } else {
+ return parsedHtml;
+ }
+}
+
+function traverseNodes(node) {
+ node = stripWrappingParagraph(node);
+
+ // Don't traverse children of custom elements
+ if (node.childNodes && !node.nodeName.includes('-')) {
+ node.childNodes = node.childNodes.map(traverseNodes);
+ }
+
+ return node;
+}
+
+function stripWrappingParagraph(node) {
+ if (node.nodeName !== 'p') {
return node;
- });
+ }
+
+ // Ignore whitespace-only text nodes
+ const meaningfulChildren = node.childNodes.filter(
+ (child) => !isWhitespaceTextNode(child)
+ );
+
+ if (meaningfulChildren.length === 1 &&
+ meaningfulChildren[0].nodeName.includes('-')) {
+ return meaningfulChildren[0];
+ }
+
+ return node;
+}
- return parse5.serialize(parsedHtml);
-};
diff --git a/test/utils.test.js b/test/utils.test.js
index 9ff32c7..e5da916 100644
--- a/test/utils.test.js
+++ b/test/utils.test.js
@@ -3,56 +3,96 @@ import assert from 'node:assert/strict';
import { describe, it } from 'node:test';
describe('stripWrappingParagraphs', () => {
- it('removes wrapping p tags', async () => {
- const input = '
';
- const expected = '';
+ describe('Html fragment', () => {
+ it('removes wrapping p tags', async () => {
+ const input = '
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes wrapping p tags (inner content)', async () => {
- const input = 'inner content
';
- const expected = 'inner content';
+ it('removes wrapping p tags (inner content)', async () => {
+ const input = 'inner content
';
+ const expected = 'inner content';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes wrapping p tags (inner content, newlines)', async () => {
- const input = 'inner\ncontent
';
- const expected = 'inner\ncontent';
+ it('removes wrapping p tags (inner content, newlines)', async () => {
+ const input = 'inner\ncontent
';
+ const expected = 'inner\ncontent';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
- it('removes wrapping p tags (whitespace)', async () => {
- const input = ' \n\t \n
';
- const expected = '';
+ it('removes wrapping p tags (whitespace)', async () => {
+ const input = ' \n\t \n
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
- });
+ assert.equal(result, expected);
+ });
+
+ it('does not remove wrapping p tags if it includes other content', async () => {
+ const input = 'Hello
';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, input);
+ });
+
+ it('removes wrapping p tags (multiple)', async () => {
+ const input =
+ '
\n
';
+ const expected = '\n';
- it('does not remove wrapping p tags if it includes other content', async () => {
- const input = 'Hello
';
+ const result = stripWrappingParagraphs(input);
- const result = stripWrappingParagraphs(input);
+ assert.equal(result, expected);
+ });
- assert.equal(result, input);
+ it('removes nested wrapping p tags', async () => {
+ const input = '
';
+ const expected = '';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, expected);
+ });
+
+ it('removes double nested wrapping p tags', async () => {
+ const input = '';
+ const expected = '
';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, expected);
+ });
});
- it('removes wrapping p tags (multiple)', async () => {
- const input = '
\n
';
- const expected = '\n';
+ describe('Html document', () => {
+ it('preserves html, head, and body tags', () => {
+ const input = 'Hello
';
+
+ const result = stripWrappingParagraphs(input);
+
+ assert.equal(result, input);
+ });
+
+ it('strips p tags in body', () => {
+ const input = '
';
+ const expected = '';
- const result = stripWrappingParagraphs(input);
+ const result = stripWrappingParagraphs(input);
- assert.equal(result, expected);
+ assert.equal(result, expected);
+ });
});
});