From 5c293d57391eb19e6f4306636060a3c5241e3f6c Mon Sep 17 00:00:00 2001 From: Nahiyan Khan Date: Thu, 23 Apr 2026 17:27:28 -0400 Subject: [PATCH] fix: harden three regexes flagged by CodeQL polynomial-redos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - FONT_SUFFIXES: dropped the leading \s* that created an ambiguous split with the word alternation; added a \s+ squash before .trim() so any leftover whitespace is still collapsed. - scanHeadings: /^(#{1,6})\s+(.*?)\s*$/ → /^(#{1,6})\s(.*)$/ with .trim() on the captured text. \s and . both match space characters, so the original had overlapping quantifiers. - slug: rewrote as a single imperative O(n) pass. The regex chain kept getting flagged even after splitting /^-+|-+$/g in two. Semantics preserved — verified against the existing layout tests and a broader slug input set (leading/trailing/interior dash runs, mixed case, unicode dropouts). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ghost-drift/src/core/embedding/compare.ts | 12 +++++-- .../src/core/fingerprint/layout.ts | 33 +++++++++++++++---- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/packages/ghost-drift/src/core/embedding/compare.ts b/packages/ghost-drift/src/core/embedding/compare.ts index 7047603..eeb318f 100644 --- a/packages/ghost-drift/src/core/embedding/compare.ts +++ b/packages/ghost-drift/src/core/embedding/compare.ts @@ -345,14 +345,20 @@ function cosineSimilarity(a: number[], b: number[]): number { // --- Font matching --- -const FONT_SUFFIXES = - /\s*\b(variable|var|vf|pro|new|next|display|text|mono)\b/gi; +const FONT_SUFFIXES = /\b(variable|var|vf|pro|new|next|display|text|mono)\b/gi; -/** Normalize font family name for fuzzy comparison */ +/** Normalize font family name for fuzzy comparison. + * + * `FONT_SUFFIXES` intentionally omits a leading `\s*` — combining it with + * `\b` and alternation gives CodeQL's polynomial-redos check an ambiguous + * split. The trailing `.replace(/\s+/g, " ").trim()` folds any whitespace + * the suffix strip left behind, so the result is equivalent. + */ function normalizeFontFamily(name: string): string { return name .replace(/['"]/g, "") .replace(FONT_SUFFIXES, "") + .replace(/\s+/g, " ") .trim() .toLowerCase(); } diff --git a/packages/ghost-drift/src/core/fingerprint/layout.ts b/packages/ghost-drift/src/core/fingerprint/layout.ts index b4e551a..ae9801e 100644 --- a/packages/ghost-drift/src/core/fingerprint/layout.ts +++ b/packages/ghost-drift/src/core/fingerprint/layout.ts @@ -164,10 +164,13 @@ function scanHeadings( ): Heading[] { const out: Heading[] = []; for (let i = startLine - 1; i < endLine; i++) { - const m = /^(#{1,6})\s+(.*?)\s*$/.exec(lines[i]); + // `\s` rather than `\s+` avoids an ambiguous split with the following + // `.*` (both match spaces) that CodeQL flags as polynomial. `.trim()` + // on the captured group folds extra whitespace either side. + const m = /^(#{1,6})\s(.*)$/.exec(lines[i]); if (!m) continue; if (m[1].length === level) { - out.push({ lineNumber: i + 1, level, text: m[2] }); + out.push({ lineNumber: i + 1, level, text: m[2].trim() }); } else if (m[1].length < level) { // A shallower heading ends the region when scanning nested headings // inside a bounded parent. @@ -190,10 +193,28 @@ function isDelimiter(line: string): boolean { } function slug(s: string): string { - return s - .toLowerCase() - .replace(/[^a-z0-9]+/g, "-") - .replace(/^-+|-+$/g, ""); + // Imperative rather than regex-chained because CodeQL flagged the + // three-stage /[^a-z0-9]+/g → /^-+/ → /-+$/ pipeline as polynomial on + // inputs with many '-' repetitions. Single O(n) pass, same semantics. + let out = ""; + let lastDash = true; + for (let i = 0; i < s.length; i++) { + const c = s.charCodeAt(i); + const lower = c >= 65 && c <= 90 ? c + 32 : c; + const isAlnum = + (lower >= 97 && lower <= 122) || (lower >= 48 && lower <= 57); + if (isAlnum) { + out += String.fromCharCode(lower); + lastDash = false; + } else if (!lastDash) { + out += "-"; + lastDash = true; + } + } + if (out.length > 0 && out.charCodeAt(out.length - 1) === 45) { + out = out.slice(0, -1); + } + return out; } /**