Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions __tests__/security.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -533,3 +533,36 @@ describe('Symlink Cycle Detection', () => {
expect(files).toContain('src/valid.ts');
});
});

describe('ReDoS-safe glob matching', () => {
it('coalesces runs of `*` so hostile inputs do not produce nested quantifiers', async () => {
const { globToSafeRegex } = await import('../src/utils');
// Two or more stars collapse to a single recursive wildcard. This is the
// ReDoS protection: `*****` doesn't expand to `[^/]*[^/]*[^/]*[^/]*[^/]*`,
// which on a long input could catastrophically backtrack.
expect(globToSafeRegex('*****')).toBe('.*');
expect(globToSafeRegex('**')).toBe('.*');

// Even a constructed-from-hostile-input regex matches in linear time.
const regex = new RegExp(`^${globToSafeRegex('*****')}foo$`);
const start = Date.now();
// 100k 'a's followed by something that doesn't end in 'foo'.
expect(regex.test('a'.repeat(100000) + 'bar')).toBe(false);
expect(Date.now() - start).toBeLessThan(500);
});

it('rejects pathologically long glob inputs', async () => {
const { globToSafeRegex } = await import('../src/utils');
expect(globToSafeRegex('*'.repeat(2000))).toBeNull();
});

it('preserves the standard glob semantics for common patterns', async () => {
const { globToSafeRegex } = await import('../src/utils');
const body = globToSafeRegex('src/**/*.test.ts');
expect(body).toBeDefined();
const regex = new RegExp(`^${body}$`);
expect(regex.test('src/lib/foo.test.ts')).toBe(true);
expect(regex.test('src/lib/foo.ts')).toBe(false);
expect(regex.test('other/src/foo.test.ts')).toBe(false);
});
});
16 changes: 8 additions & 8 deletions src/bin/codegraph.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import * as path from 'path';
import * as fs from 'fs';
import { getCodeGraphDir, isInitialized } from '../directory';
import { createShimmerProgress } from '../ui/shimmer-progress';
import { globToSafeRegex } from '../utils';

// Lazy-load heavy modules (CodeGraph, runInstaller) to keep CLI startup fast.
async function loadCodeGraph(): Promise<typeof import('../index')> {
Expand Down Expand Up @@ -1158,16 +1159,15 @@ program
/\/spec\//,
];

// Custom filter pattern
// Custom filter pattern (ReDoS-safe — globToSafeRegex coalesces
// consecutive wildcards so hostile inputs can't produce nested
// quantifiers like `.+.+.+`).
let customFilter: RegExp | null = null;
if (options.filter) {
// Convert glob to regex: ** → .+, * → [^/]*, . → \.
const regex = options.filter
.replace(/[+[\]{}()^$|\\]/g, '\\$&')
.replace(/\./g, '\\.')
.replace(/\*\*/g, '.+')
.replace(/\*/g, '[^/]*');
customFilter = new RegExp(regex);
const regexBody = globToSafeRegex(options.filter);
if (regexBody !== null) {
customFilter = new RegExp(regexBody);
}
}

function isTestFile(filePath: string): boolean {
Expand Down
22 changes: 10 additions & 12 deletions src/db/queries.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,6 @@ export class QueryBuilder {
getFileByPath?: SqliteStatement;
getAllFiles?: SqliteStatement;
insertUnresolved?: SqliteStatement;
deleteUnresolvedByNode?: SqliteStatement;
getUnresolvedByName?: SqliteStatement;
getNodesByName?: SqliteStatement;
getNodesByQualifiedNameExact?: SqliteStatement;
Expand All @@ -185,6 +184,14 @@ export class QueryBuilder {
this.db = db;
}

/**
* Execute a callback inside a single SQLite transaction. Useful when a
* caller needs several `QueryBuilder` operations to commit atomically.
*/
transaction<T>(fn: () => T): T {
return this.db.transaction(fn)();
}

// ===========================================================================
// Node Operations
// ===========================================================================
Expand Down Expand Up @@ -1032,17 +1039,8 @@ export class QueryBuilder {
insert();
}

/**
* Delete unresolved references from a node
*/
deleteUnresolvedByNode(nodeId: string): void {
if (!this.stmts.deleteUnresolvedByNode) {
this.stmts.deleteUnresolvedByNode = this.db.prepare(
'DELETE FROM unresolved_refs WHERE from_node_id = ?'
);
}
this.stmts.deleteUnresolvedByNode.run(nodeId);
}
// (deleteUnresolvedByNode removed — never called; FK cascade on
// nodes(id) → unresolved_refs.from_node_id handles cleanup automatically.)

/**
* Get unresolved references by name (for resolution)
Expand Down
153 changes: 98 additions & 55 deletions src/extraction/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import { QueryBuilder } from '../db/queries';
import { extractFromSource } from './tree-sitter';
import { detectLanguage, isLanguageSupported, initGrammars, loadGrammarsForLanguages } from './grammars';
import { logDebug, logWarn } from '../errors';
import { validatePathWithinRoot, normalizePath } from '../utils';
import { validatePathWithinRoot, validatePathWithinRootReal, normalizePath } from '../utils';
import picomatch from 'picomatch';

/**
Expand Down Expand Up @@ -571,14 +571,29 @@ export class ExtractionOrchestrator {
* Terminates the current worker and clears the reference so
* ensureWorker() will spawn a fresh one on the next call.
*/
function recycleWorker(): void {
async function recycleWorker(): Promise<void> {
if (!parseWorker) return;
log(`Recycling worker after ${workerParseCount} parses (heap: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB RSS)`);
const w = parseWorker;
parseWorker = null;
workerParseCount = 0;
// Fire-and-forget: worker.terminate() can hang if WASM is stuck
w.terminate().catch(() => {});
// worker.terminate() can hang if WASM is stuck — bound the wait so we
// never block the caller's `await` on a wedged worker. The terminate
// promise keeps running in the background so the worker eventually gets
// reaped even if the timeout wins.
let timedOut = false;
try {
await Promise.race([
w.terminate(),
new Promise<void>((resolve) => setTimeout(() => { timedOut = true; resolve(); }, 1000)),
]);
} catch {
// ignore — terminate() failing means the worker is already gone
}
if (timedOut) {
// Fire-and-forget: don't leak a zombie if terminate is still pending.
w.terminate().catch(() => {});
}
}

async function requestParse(filePath: string, content: string): Promise<ExtractionResult> {
Expand Down Expand Up @@ -1016,7 +1031,13 @@ export class ExtractionOrchestrator {
}

/**
* Store extraction result in database
* Store extraction result in database.
*
* The whole sequence (delete existing rows → insert nodes → insert edges →
* insert unresolved refs → upsert file record) runs in a single transaction
* so a process kill mid-write cannot leave the file's old data wiped while
* the new data is missing — either everything from this call commits or
* nothing does.
*/
private storeExtractionResult(
filePath: string,
Expand All @@ -1033,59 +1054,61 @@ export class ExtractionOrchestrator {
return; // No changes
}

// Delete existing data for this file
if (existingFile) {
this.queries.deleteFile(filePath);
}

// Filter out nodes with missing required fields before insertion.
// This prevents FK violations when edges reference nodes that would
// be silently skipped by insertNode() (see issue #42).
const validNodes = result.nodes.filter((n) => n.id && n.kind && n.name && n.filePath && n.language);

// Insert nodes
if (validNodes.length > 0) {
this.queries.insertNodes(validNodes);
}
this.queries.transaction(() => {
// Delete existing data for this file
if (existingFile) {
this.queries.deleteFile(filePath);
}

// Filter edges to only reference nodes that were actually inserted
if (result.edges.length > 0) {
const insertedIds = new Set(validNodes.map((n) => n.id));
const validEdges = result.edges.filter(
(e) => insertedIds.has(e.source) && insertedIds.has(e.target)
);
if (validEdges.length > 0) {
this.queries.insertEdges(validEdges);
// Insert nodes
if (validNodes.length > 0) {
this.queries.insertNodes(validNodes);
}
}

// Insert unresolved references in batch with denormalized filePath/language
if (result.unresolvedReferences.length > 0) {
const insertedIds = new Set(validNodes.map((n) => n.id));
const refsWithContext = result.unresolvedReferences
.filter((ref) => insertedIds.has(ref.fromNodeId))
.map((ref) => ({
...ref,
filePath: ref.filePath ?? filePath,
language: ref.language ?? language,
}));
if (refsWithContext.length > 0) {
this.queries.insertUnresolvedRefsBatch(refsWithContext);
// Filter edges to only reference nodes that were actually inserted
if (result.edges.length > 0) {
const insertedIds = new Set(validNodes.map((n) => n.id));
const validEdges = result.edges.filter(
(e) => insertedIds.has(e.source) && insertedIds.has(e.target)
);
if (validEdges.length > 0) {
this.queries.insertEdges(validEdges);
}
}
}

// Insert file record
const fileRecord: FileRecord = {
path: filePath,
contentHash,
language,
size: stats.size,
modifiedAt: stats.mtimeMs,
indexedAt: Date.now(),
nodeCount: result.nodes.length,
errors: result.errors.length > 0 ? result.errors : undefined,
};
this.queries.upsertFile(fileRecord);
// Insert unresolved references in batch with denormalized filePath/language
if (result.unresolvedReferences.length > 0) {
const insertedIds = new Set(validNodes.map((n) => n.id));
const refsWithContext = result.unresolvedReferences
.filter((ref) => insertedIds.has(ref.fromNodeId))
.map((ref) => ({
...ref,
filePath: ref.filePath ?? filePath,
language: ref.language ?? language,
}));
if (refsWithContext.length > 0) {
this.queries.insertUnresolvedRefsBatch(refsWithContext);
}
}

// Insert file record
const fileRecord: FileRecord = {
path: filePath,
contentHash,
language,
size: stats.size,
modifiedAt: stats.mtimeMs,
indexedAt: Date.now(),
nodeCount: result.nodes.length,
errors: result.errors.length > 0 ? result.errors : undefined,
};
this.queries.upsertFile(fileRecord);
});
}

/**
Expand Down Expand Up @@ -1125,9 +1148,16 @@ export class ExtractionOrchestrator {
}
}

// Handle modified files — read + hash only these files
// Handle modified files — read + hash only these files. Resolve
// symlinks (validatePathWithinRootReal) so a regular file swapped
// for a symlink to outside the project between scan and read is
// rejected, not followed.
for (const filePath of gitChanges.modified) {
const fullPath = path.join(this.rootDir, filePath);
const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
if (!fullPath) {
logWarn('Path traversal blocked during sync', { filePath });
continue;
}
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
Expand Down Expand Up @@ -1176,9 +1206,13 @@ export class ExtractionOrchestrator {
}
}

// Find files to add or update
// Find files to add or update (symlink-resistant validation)
for (const filePath of currentFiles) {
const fullPath = path.join(this.rootDir, filePath);
const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
if (!fullPath) {
logWarn('Path traversal blocked during sync', { filePath });
continue;
}
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
Expand Down Expand Up @@ -1260,8 +1294,13 @@ export class ExtractionOrchestrator {
}

// Modified files — read + hash only these, compare with DB
// (symlink-resistant validation)
for (const filePath of gitChanges.modified) {
const fullPath = path.join(this.rootDir, filePath);
const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
if (!fullPath) {
logWarn('Path traversal blocked while detecting changes', { filePath });
continue;
}
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
Expand Down Expand Up @@ -1309,9 +1348,13 @@ export class ExtractionOrchestrator {
}
}

// Find added and modified files
// Find added and modified files (symlink-resistant validation)
for (const filePath of currentFiles) {
const fullPath = path.join(this.rootDir, filePath);
const fullPath = validatePathWithinRootReal(this.rootDir, filePath);
if (!fullPath) {
logWarn('Path traversal blocked while detecting changes', { filePath });
continue;
}
let content: string;
try {
content = fs.readFileSync(fullPath, 'utf-8');
Expand Down
24 changes: 24 additions & 0 deletions src/extraction/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,29 @@ parentPort!.on('message', async (msg: { type: string; id?: number; filePath?: st
}
} else if (msg.type === 'shutdown') {
parentPort!.postMessage({ type: 'shutdown-ack' });
} else {
// Unknown message types: when an `id` is present, surface a structured
// error so the in-flight Promise on the main thread fails fast rather
// than blocking until the per-file timeout. Messages without an `id`
// have no pending promise to unblock and are silently ignored — no
// harm done.
const id = msg.id;
if (typeof id === 'number') {
parentPort!.postMessage({
type: 'parse-result',
id,
result: {
nodes: [],
edges: [],
unresolvedReferences: [],
errors: [{
message: `Parse worker received unknown message type: ${msg.type}`,
severity: 'error',
code: 'worker_protocol_error',
}],
durationMs: 0,
} satisfies ExtractionResult,
});
}
}
});
Loading