From 2d652306a6b84f87ae55f03d855a38ff9f7bafe6 Mon Sep 17 00:00:00 2001 From: Ndevu12 Date: Sun, 29 Mar 2026 14:18:25 +0000 Subject: [PATCH] fix(core): make isContentEmpty robust to malformed and unclosed tags Replace the regex-based strip (<[^>]*>) with a linear scan over angle brackets. The previous approach could misclassify strings with an unclosed '<' (e.g. literal text or partial tags) because the pattern left odd fragments or dropped visible characters in edge cases. - Treat text after '<' with no following '>' as visible content, so partial tags are not treated as empty. - Preserve existing behavior for normal HTML: empty/whitespace-only paragraphs and plain whitespace still count as empty. Tests cover unclosed brackets, bare tags, multi-line ASCII whitespace inside tags, and tagless whitespace-only input. --- src/core/model.ts | 19 +++++++++++++++++-- tests/unit/core/model.test.ts | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/core/model.ts b/src/core/model.ts index f6ed6b0..2e3065a 100644 --- a/src/core/model.ts +++ b/src/core/model.ts @@ -28,6 +28,21 @@ export function createEmptyDoc(): string { * Strips all tags and checks whether any visible text remains. */ export function isContentEmpty(html: string): boolean { - const stripped = html.replace(/<[^>]*>/g, '').trim(); - return stripped.length === 0; + let i = 0; + let stripped = ''; + while (i < html.length) { + const lt = html.indexOf('<', i); + if (lt === -1) { + stripped += html.slice(i); + break; + } + stripped += html.slice(i, lt); + const gt = html.indexOf('>', lt + 1); + if (gt === -1) { + stripped += html.slice(lt); + break; + } + i = gt + 1; + } + return stripped.trim().length === 0; } diff --git a/tests/unit/core/model.test.ts b/tests/unit/core/model.test.ts index 911e211..15b431c 100644 --- a/tests/unit/core/model.test.ts +++ b/tests/unit/core/model.test.ts @@ -47,5 +47,21 @@ describe('model', () => { it('returns true for empty string', () => { expect(isContentEmpty('')).toBe(true); }); + + it('returns false when < is unclosed and there is visible text after it', () => { + expect(isContentEmpty('< world')).toBe(false); + }); + + it('returns false for a bare unclosed tag (no closing >)', () => { + expect(isContentEmpty(' { + expect(isContentEmpty('

\n\t\r\f

')).toBe(true); + }); + + it('returns true for a string with no tags that is only whitespace', () => { + expect(isContentEmpty(' \n ')).toBe(true); + }); }); });