diff --git a/__tests__/extraction-resolution-accuracy.test.ts b/__tests__/extraction-resolution-accuracy.test.ts
new file mode 100644
index 00000000..f78f3d76
--- /dev/null
+++ b/__tests__/extraction-resolution-accuracy.test.ts
@@ -0,0 +1,266 @@
+/**
+ * Extraction & Resolution Accuracy Tests
+ *
+ * Regression tests for three accuracy bugs fixed in one PR:
+ *   1. Parse-retry comment strip was hardcoded to `//`, no-op on Python/Ruby/etc.
+ *   2. Framework route extractors ran regex over raw file content, matching
+ *      examples in docstrings/comments as real routes.
+ *   3. UTF-8 BOM caused spurious "modified" hash mismatches between editors.
+ */
+
+import { describe, it, expect } from 'vitest';
+import { stripBom, stripCommentLinesForRetry, stripCommentsForRegex } from '../src/utils';
+import { hashContent } from '../src/extraction';
+import { flaskResolver, fastapiResolver, djangoResolver } from '../src/resolution/frameworks/python';
+import { expressResolver } from '../src/resolution/frameworks/express';
+import { aspnetResolver } from '../src/resolution/frameworks/csharp';
+import { rustResolver } from '../src/resolution/frameworks/rust';
+import { laravelResolver } from '../src/resolution/frameworks/laravel';
+
+describe('UTF-8 BOM normalization (bug #5)', () => {
+  it('stripBom removes leading U+FEFF', () => {
+    expect(stripBom('﻿hello')).toBe('hello');
+    expect(stripBom('hello')).toBe('hello');
+    expect(stripBom('')).toBe('');
+  });
+
+  it('stripBom only removes leading BOM, not embedded ones', () => {
+    expect(stripBom('a﻿b')).toBe('a﻿b');
+  });
+
+  it('hashContent treats BOM and no-BOM as identical', () => {
+    const withBom = '﻿export function hello() { return 42; }';
+    const withoutBom = 'export function hello() { return 42; }';
+    expect(hashContent(withBom)).toBe(hashContent(withoutBom));
+  });
+});
+
+describe('Per-language comment-line stripping (bug #1)', () => {
+  it('strips `#` lines for Python', () => {
+    const input = ['# CHECK: foo', 'def x():', '    pass'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'python');
+    expect(out.split('\n')).toEqual(['', 'def x():', '    pass']);
+  });
+
+  it('strips `#` lines for Ruby', () => {
+    const input = ['# top comment', 'def x; end'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'ruby');
+    expect(out.split('\n')).toEqual(['', 'def x; end']);
+  });
+
+  it('strips `//` lines for TypeScript', () => {
+    const input = ['// header', 'function x() {}'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'typescript');
+    expect(out.split('\n')).toEqual(['', 'function x() {}']);
+  });
+
+  it('strips both `//` and `#` lines for PHP', () => {
+    const input = ['// js-style', '# perl-style', '<?php $x = 1;'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'php');
+    expect(out.split('\n')).toEqual(['', '', '<?php $x = 1;']);
+  });
+
+  it('returns content unchanged for unknown languages', () => {
+    const input = '// looks like a comment\ncode';
+    expect(stripCommentLinesForRetry(input, 'unknown-lang')).toBe(input);
+  });
+
+  it('preserves line count so node positions stay correct', () => {
+    const input = ['# c1', 'a', '# c2', 'b'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'python');
+    expect(out.split('\n').length).toBe(input.split('\n').length);
+  });
+
+  it('does NOT strip indented `#` inside Python (still recognized as line comment)', () => {
+    // The marker matches optional leading whitespace + `#`, so an indented
+    // pure comment line is correctly stripped. Non-comment code on the same
+    // line as `#` (mid-line comment) is intentionally not stripped here.
+    const input = ['    # indented comment', '    pass  # trailing'].join('\n');
+    const out = stripCommentLinesForRetry(input, 'python');
+    expect(out.split('\n')).toEqual(['', '    pass  # trailing']);
+  });
+});
+
+describe('Framework regex no longer matches docstrings/comments (bug #4)', () => {
+  describe('Flask', () => {
+    it('skips routes inside `#` comments', () => {
+      const content = [
+        'from flask import Flask',
+        'app = Flask(__name__)',
+        '# Example: @app.route("/fake")',
+        '@app.route("/real")',
+        'def real(): pass',
+      ].join('\n');
+      const nodes = flaskResolver.extractNodes!('app.py', content);
+      const paths = nodes.map((n) => n.name);
+      expect(paths).toContain('/real');
+      expect(paths).not.toContain('/fake');
+    });
+
+    it('skips routes inside triple-quoted docstrings', () => {
+      const content = [
+        'def example():',
+        '    """',
+        '    Usage: @app.route("/fake")',
+        '    """',
+        '    pass',
+        '@app.route("/real")',
+        'def real(): pass',
+      ].join('\n');
+      const nodes = flaskResolver.extractNodes!('app.py', content);
+      const paths = nodes.map((n) => n.name);
+      expect(paths).toContain('/real');
+      expect(paths).not.toContain('/fake');
+    });
+  });
+
+  describe('FastAPI', () => {
+    it('skips routes inside `#` comments and triple-quoted docstrings', () => {
+      const content = [
+        '"""',
+        'Module docs — example: @app.get("/docfake")',
+        '"""',
+        '# @app.post("/commentfake")',
+        '@app.get("/real")',
+        'def real(): pass',
+      ].join('\n');
+      const nodes = fastapiResolver.extractNodes!('app.py', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/docfake'))).toBe(false);
+      expect(names.some((n) => n.includes('/commentfake'))).toBe(false);
+    });
+
+    it('preserves correct line numbers for real routes after stripping', () => {
+      const content = [
+        '"""',                    // line 1
+        '@app.get("/fake")',      // line 2 — inside docstring
+        '"""',                    // line 3
+        '',                       // line 4
+        '@app.get("/real")',      // line 5 — real
+      ].join('\n');
+      const nodes = fastapiResolver.extractNodes!('app.py', content);
+      const real = nodes.find((n) => n.name.includes('/real'));
+      expect(real).toBeDefined();
+      expect(real!.startLine).toBe(5);
+    });
+  });
+
+  describe('Django URL patterns', () => {
+    it('skips path() inside `#` comments', () => {
+      const content = [
+        'from django.urls import path',
+        '# example: path("fake/", fake_view)',
+        'urlpatterns = [path("real/", real_view)]',
+      ].join('\n');
+      const nodes = djangoResolver.extractNodes!('urls.py', content);
+      const names = nodes.map((n) => n.name);
+      expect(names).toContain('real/');
+      expect(names).not.toContain('fake/');
+    });
+  });
+
+  describe('Express', () => {
+    it('skips routes inside `//` comments', () => {
+      const content = [
+        'const app = express();',
+        '// app.get("/fake", fakeHandler);',
+        'app.get("/real", realHandler);',
+      ].join('\n');
+      const nodes = expressResolver.extractNodes!('server.js', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/fake'))).toBe(false);
+    });
+
+    it('skips routes inside `/* ... */` block comments', () => {
+      const content = [
+        '/*',
+        ' * app.post("/blockfake", h);',
+        ' */',
+        'app.get("/real", h);',
+      ].join('\n');
+      const nodes = expressResolver.extractNodes!('server.js', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/blockfake'))).toBe(false);
+    });
+  });
+
+  describe('Laravel', () => {
+    it('skips routes inside PHP `//` and `#` comments', () => {
+      const content = [
+        '<?php',
+        '// Route::get("/jsfake", $h);',
+        '# Route::get("/perlfake", $h);',
+        'Route::get("/real", $h);',
+      ].join('\n');
+      const nodes = laravelResolver.extractNodes!('routes.php', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/jsfake'))).toBe(false);
+      expect(names.some((n) => n.includes('/perlfake'))).toBe(false);
+    });
+  });
+
+  describe('Rust', () => {
+    it('skips actix/rocket routes inside `///` doc comments', () => {
+      const content = [
+        '/// Example route: #[get("/docfake")]',
+        '#[get("/real")]',
+        'fn real() {}',
+      ].join('\n');
+      const nodes = rustResolver.extractNodes!('main.rs', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/docfake'))).toBe(false);
+    });
+  });
+
+  describe('ASP.NET (C#)', () => {
+    it('skips route attributes inside `///` XML doc comments', () => {
+      const content = [
+        '/// <summary>',
+        '/// Example: [HttpGet("/docfake")]',
+        '/// </summary>',
+        '[HttpGet("/real")]',
+        'public class C {}',
+      ].join('\n');
+      const nodes = aspnetResolver.extractNodes!('Controller.cs', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/docfake'))).toBe(false);
+    });
+
+    it('skips minimal-API MapGet/MapPost calls inside comments', () => {
+      // Regression: the minimalApiPattern loop below the routePatterns
+      // loop was initially missed when applying the strip helper, leaving
+      // commented-out `app.MapGet("/x")` calls extracted as real routes.
+      const content = [
+        '// app.MapGet("/linefake", h);',
+        '/*',
+        ' * app.MapPost("/blockfake", h);',
+        ' */',
+        'app.MapGet("/real", h);',
+      ].join('\n');
+      const nodes = aspnetResolver.extractNodes!('Program.cs', content);
+      const names = nodes.map((n) => n.name);
+      expect(names.some((n) => n.includes('/real'))).toBe(true);
+      expect(names.some((n) => n.includes('/linefake'))).toBe(false);
+      expect(names.some((n) => n.includes('/blockfake'))).toBe(false);
+    });
+  });
+});
+
+describe('stripCommentsForRegex preserves line offsets', () => {
+  it('keeps newlines so match.index → original line number', () => {
+    const input = '"""\n@app.get("/x")\n"""\n@app.get("/y")';
+    const out = stripCommentsForRegex(input, 'python');
+    // Newlines preserved
+    expect(out.split('\n').length).toBe(input.split('\n').length);
+    // The /y route survives
+    expect(out).toContain('/y');
+    // The docstring contents are blanked
+    expect(out).not.toContain('/x');
+  });
+});
diff --git a/src/extraction/index.ts b/src/extraction/index.ts
index 4ad056fb..f4acda24 100644
--- a/src/extraction/index.ts
+++ b/src/extraction/index.ts
@@ -20,7 +20,7 @@ import { QueryBuilder } from '../db/queries';
 import { extractFromSource } from './tree-sitter';
 import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
 import { logDebug, logWarn } from '../errors';
-import { validatePathWithinRoot, normalizePath } from '../utils';
+import { validatePathWithinRoot, normalizePath, stripBom, stripCommentLinesForRetry } from '../utils';
 import picomatch from 'picomatch';
 
 /**
@@ -85,10 +85,15 @@ export interface SyncResult {
 }
 
 /**
- * Calculate SHA256 hash of file contents
+ * Calculate SHA256 hash of file contents.
+ *
+ * A leading UTF-8 BOM is stripped before hashing so files round-tripped
+ * through editors that disagree about BOM handling (VSCode strips by
+ * default; some Windows editors preserve it) hash identically and don't
+ * appear "modified" on every sync.
  */
 export function hashContent(content: string): string {
-  return crypto.createHash('sha256').update(content).digest('hex');
+  return crypto.createHash('sha256').update(stripBom(content)).digest('hex');
 }
 
 /**
@@ -820,11 +825,12 @@ export class ExtractionOrchestrator {
           }
 
           // Strip lines that are entirely comments (preserving line numbers
-          // by replacing with empty lines so node positions stay correct)
-          const stripped = fullContent
-            .split('\n')
-            .map(line => /^\s*\/\//.test(line) ? '' : line)
-            .join('\n');
+          // by replacing with empty lines so node positions stay correct).
+          // The marker is language-specific — the previous hardcoded `//`
+          // was a no-op for Python (`#`), Ruby (`#`), etc., so those files
+          // would silently keep failing on the retry.
+          const language = detectLanguage(filePath, fullContent);
+          const stripped = stripCommentLinesForRetry(fullContent, language);
 
           let result: ExtractionResult;
           try {
@@ -834,7 +840,6 @@ export class ExtractionOrchestrator {
           }
 
           if (result.nodes.length > 0 || result.errors.length === 0) {
-            const language = detectLanguage(filePath, fullContent);
             const stats = await fsp.stat(path.join(this.rootDir, filePath));
             this.storeExtractionResult(filePath, fullContent, language, stats, result);
 
diff --git a/src/resolution/frameworks/csharp.ts b/src/resolution/frameworks/csharp.ts
index 1e170be4..9effb53f 100644
--- a/src/resolution/frameworks/csharp.ts
+++ b/src/resolution/frameworks/csharp.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const aspnetResolver: FrameworkResolver = {
   name: 'aspnet',
@@ -117,6 +118,9 @@ export const aspnetResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Strip `//` and `/* */` comments so XML-doc examples like
+    // `/// [HttpGet("/x")]` aren't treated as real route attributes.
+    const safe = stripCommentsForRegex(content, 'csharp');
 
     // Extract route attributes
     // [HttpGet("path")], [HttpPost("path")], [Route("path")]
@@ -128,8 +132,8 @@ export const aspnetResolver: FrameworkResolver = {
 
     for (const pattern of routePatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
-        const line = content.slice(0, match.index).split('\n').length;
+      while ((match = pattern.exec(safe)) !== null) {
+        const line = safe.slice(0, match.index).split('\n').length;
 
         if (pattern.source.includes('Http')) {
           if (match[3]) {
@@ -190,9 +194,9 @@ export const aspnetResolver: FrameworkResolver = {
     const minimalApiPattern = /\.Map(Get|Post|Put|Patch|Delete)\s*\(\s*["']([^"']+)["']/g;
 
     let match;
-    while ((match = minimalApiPattern.exec(content)) !== null) {
+    while ((match = minimalApiPattern.exec(safe)) !== null) {
       const [, method, path] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
diff --git a/src/resolution/frameworks/express.ts b/src/resolution/frameworks/express.ts
index 0afa7e03..07851769 100644
--- a/src/resolution/frameworks/express.ts
+++ b/src/resolution/frameworks/express.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const expressResolver: FrameworkResolver = {
   name: 'express',
@@ -93,6 +94,9 @@ export const expressResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Neutralize comments and JSDoc blocks so a `app.get('/x')` example in
+    // a comment isn't extracted as a real route.
+    const safe = stripCommentsForRegex(content, 'javascript');
 
     // Extract route definitions
     // app.get('/path', handler) or router.get('/path', handler)
@@ -102,9 +106,9 @@ export const expressResolver: FrameworkResolver = {
 
     for (const pattern of routePatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
+      while ((match = pattern.exec(safe)) !== null) {
         const [, _obj, method, path] = match;
-        const line = content.slice(0, match.index).split('\n').length;
+        const line = safe.slice(0, match.index).split('\n').length;
 
         // Skip middleware use() without paths
         if (method === 'use' && !path?.startsWith('/')) {
diff --git a/src/resolution/frameworks/laravel.ts b/src/resolution/frameworks/laravel.ts
index d6a79885..4b3b5e00 100644
--- a/src/resolution/frameworks/laravel.ts
+++ b/src/resolution/frameworks/laravel.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 /**
  * Laravel facade mappings to underlying classes
@@ -93,6 +94,7 @@ export const laravelResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    const safe = stripCommentsForRegex(content, 'php');
 
     // Extract route definitions
     const routePatterns = [
@@ -106,10 +108,10 @@ export const laravelResolver: FrameworkResolver = {
 
     for (const pattern of routePatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
+      while ((match = pattern.exec(safe)) !== null) {
         if (pattern.source.includes('resource')) {
           const [, resourceName] = match;
-          const line = content.slice(0, match.index).split('\n').length;
+          const line = safe.slice(0, match.index).split('\n').length;
           nodes.push({
             id: `route:${filePath}:resource:${resourceName}:${line}`,
             kind: 'route',
@@ -125,7 +127,7 @@ export const laravelResolver: FrameworkResolver = {
           });
         } else {
           const [, method, path] = match;
-          const line = content.slice(0, match.index).split('\n').length;
+          const line = safe.slice(0, match.index).split('\n').length;
           nodes.push({
             id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
             kind: 'route',
diff --git a/src/resolution/frameworks/python.ts b/src/resolution/frameworks/python.ts
index 88f5034a..021fbd1d 100644
--- a/src/resolution/frameworks/python.ts
+++ b/src/resolution/frameworks/python.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const djangoResolver: FrameworkResolver = {
   name: 'django',
@@ -77,6 +78,10 @@ export const djangoResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Neutralize comments and docstrings so a `path('/x', view)` example in
+    // a docstring isn't extracted as a real route. Newlines preserved so
+    // line numbers stay correct.
+    const safe = stripCommentsForRegex(content, 'python');
 
     // Extract URL patterns
     // path('route/', view, name='name')
@@ -87,9 +92,9 @@ export const djangoResolver: FrameworkResolver = {
 
     for (const pattern of urlPatterns) {
       let match;
-      while ((match = pattern.exec(content)) !== null) {
+      while ((match = pattern.exec(safe)) !== null) {
         const [, urlPath] = match;
-        const line = content.slice(0, match.index).split('\n').length;
+        const line = safe.slice(0, match.index).split('\n').length;
 
         nodes.push({
           id: `route:${filePath}:${urlPath}:${line}`,
@@ -157,15 +162,16 @@ export const flaskResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    const safe = stripCommentsForRegex(content, 'python');
 
     // Extract Flask route decorators
     // @app.route('/path') or @blueprint.route('/path')
     const routePattern = /@(\w+)\.route\s*\(\s*['"]([^'"]+)['"]/g;
 
     let match;
-    while ((match = routePattern.exec(content)) !== null) {
+    while ((match = routePattern.exec(safe)) !== null) {
       const [, _appOrBp, routePath] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${routePath}:${line}`,
@@ -245,15 +251,16 @@ export const fastapiResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    const safe = stripCommentsForRegex(content, 'python');
 
     // Extract FastAPI route decorators
     // @app.get('/path') or @router.post('/path')
     const routePattern = /@(\w+)\.(get|post|put|patch|delete|options|head)\s*\(\s*['"]([^'"]+)['"]/g;
 
     let match;
-    while ((match = routePattern.exec(content)) !== null) {
+    while ((match = routePattern.exec(safe)) !== null) {
       const [, _appOrRouter, method, routePath] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${routePath}:${line}`,
diff --git a/src/resolution/frameworks/rust.ts b/src/resolution/frameworks/rust.ts
index 5ab10bc3..92d92060 100644
--- a/src/resolution/frameworks/rust.ts
+++ b/src/resolution/frameworks/rust.ts
@@ -6,6 +6,7 @@
 
 import { Node } from '../../types';
 import { FrameworkResolver, UnresolvedRef, ResolvedRef, ResolutionContext } from '../types';
+import { stripCommentsForRegex } from '../../utils';
 
 export const rustResolver: FrameworkResolver = {
   name: 'rust',
@@ -74,15 +75,18 @@ export const rustResolver: FrameworkResolver = {
   extractNodes(filePath: string, content: string): Node[] {
     const nodes: Node[] = [];
     const now = Date.now();
+    // Strip `//` and `/* */` comments so doc-comment examples like
+    // `/// #[get("/x")]` aren't treated as real route attributes.
+    const safe = stripCommentsForRegex(content, 'rust');
 
     // Extract Actix-web routes
     // #[get("/path")], #[post("/path")], etc.
     const actixRoutePattern = /#\[(get|post|put|patch|delete)\s*\(\s*["']([^"']+)["']/g;
 
     let match;
-    while ((match = actixRoutePattern.exec(content)) !== null) {
+    while ((match = actixRoutePattern.exec(safe)) !== null) {
       const [, method, path] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
@@ -103,9 +107,9 @@ export const rustResolver: FrameworkResolver = {
     // #[get("/path")], #[post("/path", ...)]
     const rocketRoutePattern = /#\[(get|post|put|patch|delete|head|options)\s*\(\s*["']([^"']+)["']/g;
 
-    while ((match = rocketRoutePattern.exec(content)) !== null) {
+    while ((match = rocketRoutePattern.exec(safe)) !== null) {
       const [, method, path] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       // Avoid duplicates from actix pattern
       const routeId = `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`;
@@ -130,9 +134,9 @@ export const rustResolver: FrameworkResolver = {
     // .route("/path", get(handler))
     const axumRoutePattern = /\.route\s*\(\s*["']([^"']+)["']\s*,\s*(get|post|put|patch|delete)/g;
 
-    while ((match = axumRoutePattern.exec(content)) !== null) {
+    while ((match = axumRoutePattern.exec(safe)) !== null) {
       const [, path, method] = match;
-      const line = content.slice(0, match.index).split('\n').length;
+      const line = safe.slice(0, match.index).split('\n').length;
 
       nodes.push({
         id: `route:${filePath}:${method!.toUpperCase()}:${path}:${line}`,
diff --git a/src/utils.ts b/src/utils.ts
index e75e58e0..64741ab6 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -174,6 +174,135 @@ export function normalizePath(filePath: string): string {
   return filePath.replace(/\\/g, '/');
 }
 
+/**
+ * Strip a leading UTF-8 BOM (U+FEFF) if present.
+ *
+ * Editors disagree about whether to write the BOM. Without normalization
+ * the same logical content hashes to two different values depending on
+ * which editor last touched the file, producing spurious "modified"
+ * detections on every sync.
+ */
+export function stripBom(content: string): string {
+  return content.charCodeAt(0) === 0xfeff ? content.slice(1) : content;
+}
+
+/**
+ * Replace every non-newline character in `text` with a space. Preserves
+ * line count and column offsets so subsequent regex matches against the
+ * processed content map back to the same line numbers in the original.
+ */
+function blankPreservingNewlines(text: string): string {
+  return text.replace(/[^\n]/g, ' ');
+}
+
+/**
+ * Comment / docstring patterns to neutralize before applying coarse-grained
+ * regex extraction (e.g., framework route decorators). The goal is to
+ * prevent commented-out examples and docstring snippets from being
+ * extracted as real code constructs, without rebuilding a full lexer.
+ *
+ * For each language we strip:
+ *   - Block comments (preserve newlines so line numbers stay correct).
+ *   - Whole-line single-line comments (only when the line contains nothing
+ *     but optional whitespace before the marker — this avoids corrupting
+ *     string literals on the same line).
+ *   - Python triple-quoted strings (the common docstring carrier).
+ *
+ * We deliberately do NOT strip arbitrary string literals — that risks
+ * removing legitimate route paths the regex needs to see.
+ */
+const BLOCK_COMMENT_LANGUAGES = new Set([
+  'javascript', 'typescript', 'tsx', 'jsx',
+  'java', 'csharp', 'cpp', 'c',
+  'go', 'rust', 'swift', 'kotlin', 'dart', 'scala',
+  'php',
+]);
+
+/**
+ * Per-language line-comment marker as a *line-anchored* prefix regex.
+ * Stateless (no `/g`, no `/m`) so it can be reused across many `.test`
+ * calls without regex-state pitfalls.
+ */
+const LINE_COMMENT_MARKER: Record<string, RegExp> = {
+  javascript: /^[ \t]*\/\//,
+  typescript: /^[ \t]*\/\//,
+  tsx: /^[ \t]*\/\//,
+  jsx: /^[ \t]*\/\//,
+  java: /^[ \t]*\/\//,
+  csharp: /^[ \t]*\/\//,
+  cpp: /^[ \t]*\/\//,
+  c: /^[ \t]*\/\//,
+  go: /^[ \t]*\/\//,
+  rust: /^[ \t]*\/\//,
+  swift: /^[ \t]*\/\//,
+  kotlin: /^[ \t]*\/\//,
+  dart: /^[ \t]*\/\//,
+  scala: /^[ \t]*\/\//,
+  pascal: /^[ \t]*\/\//,
+  python: /^[ \t]*#/,
+  ruby: /^[ \t]*#/,
+  php: /^[ \t]*(?:\/\/|#)/,
+};
+
+/**
+ * Best-effort comment stripper for use before coarse-grained regex
+ * extraction. Returns content with comments and (for Python) triple-quoted
+ * strings replaced by spaces — newlines preserved so line/column offsets
+ * derived from the result still map onto the original file.
+ *
+ * Languages without an entry are returned unchanged.
+ */
+export function stripCommentsForRegex(content: string, language: string): string {
+  let out = content;
+
+  if (BLOCK_COMMENT_LANGUAGES.has(language)) {
+    out = out.replace(/\/\*[\s\S]*?\*\//g, blankPreservingNewlines);
+  }
+  if (language === 'python') {
+    out = out.replace(/"""[\s\S]*?"""/g, blankPreservingNewlines);
+    out = out.replace(/'''[\s\S]*?'''/g, blankPreservingNewlines);
+  }
+  if (language === 'ruby') {
+    out = out.replace(/^=begin\b[\s\S]*?^=end\b[^\n]*/gm, blankPreservingNewlines);
+  }
+
+  const lineMarker = LINE_COMMENT_MARKER[language];
+  if (lineMarker) {
+    // Walk lines; replace any line that starts with optional whitespace
+    // then the marker. Done line-at-a-time so we never touch content
+    // inside string literals on other lines.
+    out = out
+      .split('\n')
+      .map((line) => (lineMarker.test(line) ? blankPreservingNewlines(line) : line))
+      .join('\n');
+  }
+
+  return out;
+}
+
+/**
+ * Strip lines that are entirely a single-line comment for the given
+ * language, replacing them with empty lines. Preserves line numbers so
+ * tree-sitter node positions stay correct.
+ *
+ * Used by the parser-retry "shrink the file" fallback. Unlike
+ * {@link stripCommentsForRegex} this does NOT strip block comments or
+ * docstrings — the goal is to remove the easiest dead weight (e.g.
+ * compiler test files dominated by `# CHECK:` / `// CHECK:` lines)
+ * without risking semantic changes.
+ *
+ * Returns content unchanged for languages without a known line-comment
+ * marker.
+ */
+export function stripCommentLinesForRetry(content: string, language: string): string {
+  const marker = LINE_COMMENT_MARKER[language];
+  if (!marker) return content;
+  return content
+    .split('\n')
+    .map((line) => (marker.test(line) ? '' : line))
+    .join('\n');
+}
+
 /**
  * Cross-process file lock using a lock file with PID tracking.
  *