From 4f66d23eee8be40485e66f156a56b79abbcd527d Mon Sep 17 00:00:00 2001
From: andreinknv <andrei.nknv@outlook.com>
Date: Sun, 26 Apr 2026 01:43:23 -0400
Subject: [PATCH] fix: defense-in-depth hardening
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bundles seven smaller hardening fixes from the audit pass:

1. Symlink-resistant path validation in sync paths
   src/extraction/index.ts:1148, 1206, 1293, 1347
   src/utils.ts (new validatePathWithinRootReal)

   getChangedFiles() and sync() now resolve symlinks via realpath before
   reading, so a regular file swapped for a symlink to outside the
   project (between scan and read) is rejected rather than followed.
   The lexical-only validatePathWithinRoot stays for paths that don't
   yet exist on disk.

2. storeExtractionResult is now atomic
   src/extraction/index.ts:1018-1098, src/db/queries.ts (new
   QueryBuilder.transaction)

   delete + insert + upsert run in a single SQLite transaction, so a
   process kill mid-write can no longer leave the file's old data
   wiped while the new data is missing. Nested transactions inside
   insertNodes/insertEdges/insertUnresolvedRefsBatch/deleteFile work
   correctly because better-sqlite3 collapses them via SAVEPOINT.

3. recycleWorker is now actually async
   src/extraction/index.ts:573

   Previously declared sync but called via `await`. Now properly
   awaits worker.terminate() with a 1s timeout (so a wedged WASM
   worker can't block the caller indefinitely) and force-fires
   terminate after the timeout to avoid zombie threads.

4. Parse-worker handles unknown message types
   src/extraction/parse-worker.ts:60-78

   When a malformed message arrives with an `id`, the worker sends
   back a structured error instead of silently dropping it (which
   previously caused the main thread to block until the per-file
   timeout). Messages without an id are still ignored — no pending
   promise to unblock.

5. ReDoS-safe glob matching
   src/utils.ts (new globToSafeRegex), src/bin/codegraph.ts:1163,
   src/graph/queries.ts:196

   Coalesces runs of `*` so hostile inputs like `*****` map to a
   single `.*` rather than five chained quantifiers (which would
   catastrophically backtrack on long inputs). Caps input at 1024
   chars. Implementation walks character-by-character with no
   sentinels — avoids any chance of marker-string collisions with
   user input.

6. fileUriToPath fallback hardening
   src/mcp/index.ts:36-40

   Catch-block fallback for malformed file:// URIs now runs through
   path.resolve() so a `file:///../etc/passwd` style input is
   normalized rather than passed raw to downstream filesystem code.

7. Reference dedup at extraction time + dead-code removal
   src/extraction/tree-sitter.ts (new dedupeReferences),
   src/db/queries.ts (deleteUnresolvedByNode removed)

   A function that calls `foo()` 100 times now produces 1 unresolved
   reference instead of 100; the resolver collapses duplicates anyway
   but extraction was wasteful. Removed the never-called
   deleteUnresolvedByNode helper (FK cascade handles cleanup).

3 new tests for globToSafeRegex; full suite 383/383 passes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 __tests__/security.test.ts     |  33 +++++++
 src/bin/codegraph.ts           |  16 ++--
 src/db/queries.ts              |  22 +++--
 src/extraction/index.ts        | 153 +++++++++++++++++++++------------
 src/extraction/parse-worker.ts |  24 ++++++
 src/extraction/tree-sitter.ts  |  23 ++++-
 src/graph/queries.ts           |  13 ++-
 src/mcp/index.ts               |   6 +-
 src/utils.ts                   |  67 +++++++++++++++
 9 files changed, 272 insertions(+), 85 deletions(-)

diff --git a/__tests__/security.test.ts b/__tests__/security.test.ts
index 53441d58..1c62e648 100644
--- a/__tests__/security.test.ts
+++ b/__tests__/security.test.ts
@@ -533,3 +533,36 @@ describe('Symlink Cycle Detection', () => {
     expect(files).toContain('src/valid.ts');
   });
 });
+
+describe('ReDoS-safe glob matching', () => {
+  it('coalesces runs of `*` so hostile inputs do not produce nested quantifiers', async () => {
+    const { globToSafeRegex } = await import('../src/utils');
+    // Two or more stars collapse to a single recursive wildcard. This is the
+    // ReDoS protection: `*****` doesn't expand to `[^/]*[^/]*[^/]*[^/]*[^/]*`,
+    // which on a long input could catastrophically backtrack.
+    expect(globToSafeRegex('*****')).toBe('.*');
+    expect(globToSafeRegex('**')).toBe('.*');
+
+    // Even a constructed-from-hostile-input regex matches in linear time.
+    const regex = new RegExp(`^${globToSafeRegex('*****')}foo$`);
+    const start = Date.now();
+    // 100k 'a's followed by something that doesn't end in 'foo'.
+    expect(regex.test('a'.repeat(100000) + 'bar')).toBe(false);
+    expect(Date.now() - start).toBeLessThan(500);
+  });
+
+  it('rejects pathologically long glob inputs', async () => {
+    const { globToSafeRegex } = await import('../src/utils');
+    expect(globToSafeRegex('*'.repeat(2000))).toBeNull();
+  });
+
+  it('preserves the standard glob semantics for common patterns', async () => {
+    const { globToSafeRegex } = await import('../src/utils');
+    const body = globToSafeRegex('src/**/*.test.ts');
+    expect(body).toBeDefined();
+    const regex = new RegExp(`^${body}$`);
+    expect(regex.test('src/lib/foo.test.ts')).toBe(true);
+    expect(regex.test('src/lib/foo.ts')).toBe(false);
+    expect(regex.test('other/src/foo.test.ts')).toBe(false);
+  });
+});
diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts
index d118a1fd..44ccc873 100644
--- a/src/bin/codegraph.ts
+++ b/src/bin/codegraph.ts
@@ -23,6 +23,7 @@ import * as path from 'path';
 import * as fs from 'fs';
 import { getCodeGraphDir, isInitialized } from '../directory';
 import { createShimmerProgress } from '../ui/shimmer-progress';
+import { globToSafeRegex } from '../utils';
 
 // Lazy-load heavy modules (CodeGraph, runInstaller) to keep CLI startup fast.
 async function loadCodeGraph(): Promise<typeof import('../index')> {
@@ -1158,16 +1159,15 @@ program
         /\/spec\//,
       ];
 
-      // Custom filter pattern
+      // Custom filter pattern (ReDoS-safe — globToSafeRegex coalesces
+      // consecutive wildcards so hostile inputs can't produce nested
+      // quantifiers like `.+.+.+`).
       let customFilter: RegExp | null = null;
       if (options.filter) {
-        // Convert glob to regex: ** → .+, * → [^/]*, . → \.
-        const regex = options.filter
-          .replace(/[+[\]{}()^$|\\]/g, '\\$&')
-          .replace(/\./g, '\\.')
-          .replace(/\*\*/g, '.+')
-          .replace(/\*/g, '[^/]*');
-        customFilter = new RegExp(regex);
+        const regexBody = globToSafeRegex(options.filter);
+        if (regexBody !== null) {
+          customFilter = new RegExp(regexBody);
+        }
       }
 
       function isTestFile(filePath: string): boolean {
diff --git a/src/db/queries.ts b/src/db/queries.ts
index 51f1a1ad..41a27c6b 100644
--- a/src/db/queries.ts
+++ b/src/db/queries.ts
@@ -170,7 +170,6 @@ export class QueryBuilder {
     getFileByPath?: SqliteStatement;
     getAllFiles?: SqliteStatement;
     insertUnresolved?: SqliteStatement;
-    deleteUnresolvedByNode?: SqliteStatement;
     getUnresolvedByName?: SqliteStatement;
     getNodesByName?: SqliteStatement;
     getNodesByQualifiedNameExact?: SqliteStatement;
@@ -185,6 +184,14 @@ export class QueryBuilder {
     this.db = db;
   }
 
+  /**
+   * Execute a callback inside a single SQLite transaction. Useful when a
+   * caller needs several `QueryBuilder` operations to commit atomically.
+   */
+  transaction<T>(fn: () => T): T {
+    return this.db.transaction(fn)();
+  }
+
   // ===========================================================================
   // Node Operations
   // ===========================================================================
@@ -1032,17 +1039,8 @@ export class QueryBuilder {
     insert();
   }
 
-  /**
-   * Delete unresolved references from a node
-   */
-  deleteUnresolvedByNode(nodeId: string): void {
-    if (!this.stmts.deleteUnresolvedByNode) {
-      this.stmts.deleteUnresolvedByNode = this.db.prepare(
-        'DELETE FROM unresolved_refs WHERE from_node_id = ?'
-      );
-    }
-    this.stmts.deleteUnresolvedByNode.run(nodeId);
-  }
+  // (deleteUnresolvedByNode removed — never called; FK cascade on
+  // nodes(id) → unresolved_refs.from_node_id handles cleanup automatically.)
 
   /**
    * Get unresolved references by name (for resolution)
diff --git a/src/extraction/index.ts b/src/extraction/index.ts
index 4ad056fb..822c5cc5 100644
--- a/src/extraction/index.ts
+++ b/src/extraction/index.ts
@@ -20,7 +20,7 @@ import { QueryBuilder } from '../db/queries';
 import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
 import { logDebug, logWarn } from '../errors';
-import { validatePathWithinRoot, normalizePath } from '../utils';
+import { validatePathWithinRoot, validatePathWithinRootReal, normalizePath } from '../utils';
 import picomatch from 'picomatch';
 
 /**
@@ -571,14 +571,29 @@ export class ExtractionOrchestrator {
      * Terminates the current worker and clears the reference so
      * ensureWorker() will spawn a fresh one on the next call.
      */
-    function recycleWorker(): void {
+    async function recycleWorker(): Promise<void> {
       if (!parseWorker) return;
       log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
       const w = parseWorker;
       parseWorker = null;
       workerParseCount = 0;
-      // Fire-and-forget: worker.terminate() can hang if WASM is stuck
-      w.terminate().catch(() => {});
+      // worker.terminate() can hang if WASM is stuck — bound the wait so we
+      // never block the caller's `await` on a wedged worker. The terminate
+      // promise keeps running in the background so the worker eventually gets
+      // reaped even if the timeout wins.
+      let timedOut = false;
+      try {
+        await Promise.race([
+          w.terminate(),
+          new Promise<void>((resolve) => setTimeout(() => { timedOut = true; resolve(); }, 1000)),
+        ]);
+      } catch {
+        // ignore — terminate() failing means the worker is already gone
+      }
+      if (timedOut) {
+        // Fire-and-forget: don't leak a zombie if terminate is still pending.
+        w.terminate().catch(() => {});
+      }
     }
 
     async function requestParse(filePath: string, content: string): Promise<ExtractionResult> {
@@ -1016,7 +1031,13 @@ export class ExtractionOrchestrator {
   }
 
   /**
-   * Store extraction result in database
+   * Store extraction result in database.
+   *
+   * The whole sequence (delete existing rows → insert nodes → insert edges →
+   * insert unresolved refs → upsert file record) runs in a single transaction
+   * so a process kill mid-write cannot leave the file's old data wiped while
+   * the new data is missing — either everything from this call commits or
+   * nothing does.
    */
   private storeExtractionResult(
     filePath: string,
@@ -1033,59 +1054,61 @@ export class ExtractionOrchestrator {
       return; // No changes
     }
 
-    // Delete existing data for this file
-    if (existingFile) {
-      this.queries.deleteFile(filePath);
-    }
-
     // Filter out nodes with missing required fields before insertion.
     // This prevents FK violations when edges reference nodes that would
     // be silently skipped by insertNode() (see issue #42).
     const validNodes = result.nodes.filter((n) => n.id && n.kind && n.name && n.filePath && n.language);
 
-    // Insert nodes
-    if (validNodes.length > 0) {
-      this.queries.insertNodes(validNodes);
-    }
+    this.queries.transaction(() => {
+      // Delete existing data for this file
+      if (existingFile) {
+        this.queries.deleteFile(filePath);
+      }
 
-    // Filter edges to only reference nodes that were actually inserted
-    if (result.edges.length > 0) {
-      const insertedIds = new Set(validNodes.map((n) => n.id));
-      const validEdges = result.edges.filter(
-        (e) => insertedIds.has(e.source) && insertedIds.has(e.target)
-      );
-      if (validEdges.length > 0) {
-        this.queries.insertEdges(validEdges);
+      // Insert nodes
+      if (validNodes.length > 0) {
+        this.queries.insertNodes(validNodes);
       }
-    }
 
-    // Insert unresolved references in batch with denormalized filePath/language
-    if (result.unresolvedReferences.length > 0) {
-      const insertedIds = new Set(validNodes.map((n) => n.id));
-      const refsWithContext = result.unresolvedReferences
-        .filter((ref) => insertedIds.has(ref.fromNodeId))
-        .map((ref) => ({
-          ...ref,
-          filePath: ref.filePath ?? filePath,
-          language: ref.language ?? language,
-        }));
-      if (refsWithContext.length > 0) {
-        this.queries.insertUnresolvedRefsBatch(refsWithContext);
+      // Filter edges to only reference nodes that were actually inserted
+      if (result.edges.length > 0) {
+        const insertedIds = new Set(validNodes.map((n) => n.id));
+        const validEdges = result.edges.filter(
+          (e) => insertedIds.has(e.source) && insertedIds.has(e.target)
+        );
+        if (validEdges.length > 0) {
+          this.queries.insertEdges(validEdges);
+        }
       }
-    }
 
-    // Insert file record
-    const fileRecord: FileRecord = {
-      path: filePath,
-      contentHash,
-      language,
-      size: stats.size,
-      modifiedAt: stats.mtimeMs,
-      indexedAt: Date.now(),
-      nodeCount: result.nodes.length,
-      errors: result.errors.length > 0 ? result.errors : undefined,
-    };
-    this.queries.upsertFile(fileRecord);
+      // Insert unresolved references in batch with denormalized filePath/language
+      if (result.unresolvedReferences.length > 0) {
+        const insertedIds = new Set(validNodes.map((n) => n.id));
+        const refsWithContext = result.unresolvedReferences
+          .filter((ref) => insertedIds.has(ref.fromNodeId))
+          .map((ref) => ({
+            ...ref,
+            filePath: ref.filePath ?? filePath,
+            language: ref.language ?? language,
+          }));
+        if (refsWithContext.length > 0) {
+          this.queries.insertUnresolvedRefsBatch(refsWithContext);
+        }
+      }
+
+      // Insert file record
+      const fileRecord: FileRecord = {
+        path: filePath,
+        contentHash,
+        language,
+        size: stats.size,
+        modifiedAt: stats.mtimeMs,
+        indexedAt: Date.now(),
+        nodeCount: result.nodes.length,
+        errors: result.errors.length > 0 ? result.errors : undefined,
+      };
+      this.queries.upsertFile(fileRecord);
+    });
   }
 
   /**
@@ -1125,9 +1148,16 @@ export class ExtractionOrchestrator {
         }
       }
 
-      // Handle modified files — read + hash only these files
+      // Handle modified files — read + hash only these files. Resolve
+      // symlinks (validatePathWithinRootReal) so a regular file swapped
+      // for a symlink to outside the project between scan and read is
+      // rejected, not followed.
       for (const filePath of gitChanges.modified) {
-        const fullPath = path.join(this.rootDir, filePath);
+        const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+        if (!fullPath) {
+          logWarn('Path traversal blocked during sync', { filePath });
+          continue;
+        }
         let content: string;
         try {
           content = fs.readFileSync(fullPath, 'utf-8');
@@ -1176,9 +1206,13 @@ export class ExtractionOrchestrator {
         }
       }
 
-      // Find files to add or update
+      // Find files to add or update (symlink-resistant validation)
       for (const filePath of currentFiles) {
-        const fullPath = path.join(this.rootDir, filePath);
+        const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+        if (!fullPath) {
+          logWarn('Path traversal blocked during sync', { filePath });
+          continue;
+        }
         let content: string;
         try {
           content = fs.readFileSync(fullPath, 'utf-8');
@@ -1260,8 +1294,13 @@ export class ExtractionOrchestrator {
       }
 
       // Modified files — read + hash only these, compare with DB
+      // (symlink-resistant validation)
       for (const filePath of gitChanges.modified) {
-        const fullPath = path.join(this.rootDir, filePath);
+        const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+        if (!fullPath) {
+          logWarn('Path traversal blocked while detecting changes', { filePath });
+          continue;
+        }
         let content: string;
         try {
           content = fs.readFileSync(fullPath, 'utf-8');
@@ -1309,9 +1348,13 @@ export class ExtractionOrchestrator {
       }
     }
 
-    // Find added and modified files
+    // Find added and modified files (symlink-resistant validation)
     for (const filePath of currentFiles) {
-      const fullPath = path.join(this.rootDir, filePath);
+      const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
+      if (!fullPath) {
+        logWarn('Path traversal blocked while detecting changes', { filePath });
+        continue;
+      }
       let content: string;
       try {
         content = fs.readFileSync(fullPath, 'utf-8');
diff --git a/src/extraction/parse-worker.ts b/src/extraction/parse-worker.ts
index 21b239ca..211cfbf7 100644
--- a/src/extraction/parse-worker.ts
+++ b/src/extraction/parse-worker.ts
@@ -55,5 +55,29 @@ parentPort!.on('message', async (msg: { type: string; id?: number; filePath?: st
     }
   } else if (msg.type === 'shutdown') {
     parentPort!.postMessage({ type: 'shutdown-ack' });
+  } else {
+    // Unknown message types: when an `id` is present, surface a structured
+    // error so the in-flight Promise on the main thread fails fast rather
+    // than blocking until the per-file timeout. Messages without an `id`
+    // have no pending promise to unblock and are silently ignored — no
+    // harm done.
+    const id = msg.id;
+    if (typeof id === 'number') {
+      parentPort!.postMessage({
+        type: 'parse-result',
+        id,
+        result: {
+          nodes: [],
+          edges: [],
+          unresolvedReferences: [],
+          errors: [{
+            message: `Parse worker received unknown message type: ${msg.type}`,
+            severity: 'error',
+            code: 'worker_protocol_error',
+          }],
+          durationMs: 0,
+        } satisfies ExtractionResult,
+      });
+    }
   }
 });
diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts
index 7345d91f..2fbda545 100644
--- a/src/extraction/tree-sitter.ts
+++ b/src/extraction/tree-sitter.ts
@@ -26,6 +26,27 @@ import { DfmExtractor } from './dfm-extractor';
 // Re-export for backward compatibility
 export { generateNodeId } from './tree-sitter-helpers';
 
+/**
+ * Deduplicate unresolved references by (fromNodeId, referenceName,
+ * referenceKind). A function calling `foo()` 100 times pushes 100 refs
+ * during extraction; the resolver collapses them to one edge eventually
+ * (edges are unique on `(source, target, kind, line)` and most resolvers
+ * skip duplicate work), but indexing time and DB churn scale with the
+ * raw count. Collapsing here keeps the first occurrence's line/column
+ * (which is typically what users want when "go to call site" surfaces).
+ */
+function dedupeReferences(refs: UnresolvedReference[]): UnresolvedReference[] {
+  const seen = new Set<string>();
+  const out: UnresolvedReference[] = [];
+  for (const ref of refs) {
+    const key = `${ref.fromNodeId}\0${ref.referenceKind}\0${ref.referenceName}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    out.push(ref);
+  }
+  return out;
+}
+
 /**
  * Extract the name from a node based on language
  */
@@ -216,7 +237,7 @@ export class TreeSitterExtractor {
     return {
       nodes: this.nodes,
       edges: this.edges,
-      unresolvedReferences: this.unresolvedReferences,
+      unresolvedReferences: dedupeReferences(this.unresolvedReferences),
       errors: this.errors,
       durationMs: Date.now() - startTime,
     };
diff --git a/src/graph/queries.ts b/src/graph/queries.ts
index c39e2e32..e6d79c51 100644
--- a/src/graph/queries.ts
+++ b/src/graph/queries.ts
@@ -7,6 +7,7 @@
 import { Node, Edge, Context, Subgraph, EdgeKind } from '../types';
 import { QueryBuilder } from '../db/queries';
 import { GraphTraverser } from './traversal';
+import { globToSafeRegex } from '../utils';
 
 /**
  * Graph query manager for complex queries
@@ -194,13 +195,11 @@ export class GraphQueryManager {
    * @returns Array of matching nodes
    */
   findByQualifiedName(pattern: string): Node[] {
-    // Convert glob pattern to regex
-    const regexPattern = pattern
-      .replace(/[.+^${}()|[\]\\]/g, '\\$&')
-      .replace(/\*/g, '.*')
-      .replace(/\?/g, '.');
-
-    const regex = new RegExp(`^${regexPattern}$`);
+    // Convert glob pattern to regex (ReDoS-safe — consecutive wildcards are
+    // coalesced so hostile inputs can't produce nested quantifiers).
+    const regexBody = globToSafeRegex(pattern);
+    if (regexBody === null) return [];
+    const regex = new RegExp(`^${regexBody}$`);
 
     // This is inefficient for large graphs - would need FTS index on qualified_name
     // For now, use kind-based filtering if possible
diff --git a/src/mcp/index.ts b/src/mcp/index.ts
index bc3552ae..90f680fe 100644
--- a/src/mcp/index.ts
+++ b/src/mcp/index.ts
@@ -34,8 +34,10 @@ function fileUriToPath(uri: string): string {
     }
     return path.resolve(filePath);
   } catch {
-    // Fallback for non-standard URIs
-    return uri.replace(/^file:\/\/\/?/, '');
+    // Fallback for non-standard URIs — still resolve through path.resolve
+    // so a malformed `file:///../etc/passwd` is normalized rather than
+    // returned raw to downstream filesystem code.
+    return path.resolve(uri.replace(/^file:\/\/\/?/, ''));
   }
 }
 
diff --git a/src/utils.ts b/src/utils.ts
index e75e58e0..22f31232 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -122,6 +122,36 @@ export function isPathWithinRoot(filePath: string, rootDir: string): boolean {
   return resolvedPath.startsWith(resolvedRoot + path.sep) || resolvedPath === resolvedRoot;
 }
 
+/**
+ * Like validatePathWithinRoot but also resolves symlinks via fs.realpathSync,
+ * so a regular-looking path that is actually a symlink to outside the root
+ * is rejected. Returns the resolved real path, or null if the file escapes
+ * the root or can't be reached.
+ *
+ * Costs an extra realpath syscall vs. the lexical-only check, so prefer
+ * validatePathWithinRoot for hot paths where symlink TOCTOU isn't relevant.
+ */
+export function validatePathWithinRootReal(projectRoot: string, filePath: string): string | null {
+  const resolved = path.resolve(projectRoot, filePath);
+  const normalizedRoot = path.resolve(projectRoot);
+  if (!resolved.startsWith(normalizedRoot + path.sep) && resolved !== normalizedRoot) {
+    return null;
+  }
+  try {
+    const realPath = fs.realpathSync(resolved);
+    const realRoot = fs.realpathSync(normalizedRoot);
+    if (!realPath.startsWith(realRoot + path.sep) && realPath !== realRoot) {
+      return null;
+    }
+    return realPath;
+  } catch {
+    // realpath failures (broken symlink, permissions) — return the lexically-
+    // resolved path. The downstream readFileSync will fail naturally and the
+    // caller already handles read errors.
+    return resolved;
+  }
+}
+
 /**
  * Like isPathWithinRoot but also resolves symlinks via fs.realpathSync.
  *
@@ -174,6 +204,43 @@ export function normalizePath(filePath: string): string {
   return filePath.replace(/\\/g, '/');
 }
 
+/**
+ * Convert a simple `*` / `?` / `**` glob to a safe regex source string.
+ *
+ * Hardens against catastrophic backtracking: consecutive `*` are coalesced
+ * to a single wildcard so a hostile input like `*****` doesn't become
+ * `.*.*.*.*.*` (nested quantifiers blow up on long inputs). Returns null
+ * if the input would produce a pathologically long pattern.
+ *
+ * The output is the *body* of the regex (no anchors); callers add `^` / `$`
+ * as appropriate for their use case.
+ */
+export function globToSafeRegex(glob: string): string | null {
+  if (glob.length > 1024) return null;
+  // Single pass: walk character-by-character. When we hit a `*` we look
+  // ahead to coalesce a run, so `**` (any-depth) maps to `.*` and `*` to
+  // `[^/]*` — and a hostile `*****` collapses to a single `.*` rather
+  // than five chained quantifiers (which would catastrophically
+  // backtrack on long inputs).
+  let out = '';
+  for (let i = 0; i < glob.length; i++) {
+    const ch = glob[i];
+    if (ch === '*') {
+      let runLen = 1;
+      while (glob[i + runLen] === '*') runLen++;
+      out += runLen >= 2 ? '.*' : '[^/]*';
+      i += runLen - 1;
+    } else if (ch === '?') {
+      out += '[^/]';
+    } else if (ch && /[.+^${}()|[\]\\]/.test(ch)) {
+      out += '\\' + ch;
+    } else if (ch) {
+      out += ch;
+    }
+  }
+  return out;
+}
+
 /**
  * Cross-process file lock using a lock file with PID tracking.
  *