diff --git a/ROADMAP.md b/ROADMAP.md index 845822e..beb4bea 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -27,7 +27,7 @@ - [x] Python ecosystem support (`requirements.txt`, `pyproject.toml`) - [x] Severity filtering (`--min-severity`) - [x] File/path exclusion patterns -- [ ] Performance optimization for large repositories +- [x] Performance optimization for large repositories - [ ] More comprehensive test fixtures ## Future (not committed) diff --git a/src/rules/opk-001-ai-credentials.ts b/src/rules/opk-001-ai-credentials.ts index e449d4a..8961610 100644 --- a/src/rules/opk-001-ai-credentials.ts +++ b/src/rules/opk-001-ai-credentials.ts @@ -89,29 +89,37 @@ const rule: Rule = { const absolutePath = path.join(context.rootDir, relativePath); - let stat: fs.Stats; - try { - stat = fs.statSync(absolutePath); - } catch { - continue; - } + let content: string | undefined; + if (context.getFileContent) { + content = await context.getFileContent(relativePath); + if (content === undefined) { + continue; + } + } else { + let stat: fs.Stats; + try { + stat = fs.statSync(absolutePath); + } catch { + continue; + } - if (stat.size > MAX_FILE_SIZE || stat.size === 0) { - continue; - } + if (stat.size > MAX_FILE_SIZE || stat.size === 0) { + continue; + } - let buffer: Buffer; - try { - buffer = fs.readFileSync(absolutePath); - } catch { - continue; - } + let buffer: Buffer; + try { + buffer = fs.readFileSync(absolutePath); + } catch { + continue; + } - if (isBinaryContent(buffer)) { - continue; + if (isBinaryContent(buffer)) { + continue; + } + content = buffer.toString('utf-8'); } - const content = buffer.toString('utf-8'); const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { diff --git a/src/rules/opk-002-prompt-artifacts.ts b/src/rules/opk-002-prompt-artifacts.ts index 4bc16bb..fd365eb 100644 --- a/src/rules/opk-002-prompt-artifacts.ts +++ b/src/rules/opk-002-prompt-artifacts.ts @@ -89,29 +89,36 @@ const rule: Rule = { const absolutePath = path.join(context.rootDir, relativePath); - let stat: fs.Stats; - try { - stat = fs.statSync(absolutePath); - } catch { - continue; - } + let content: string | undefined; + if (context.getFileContent) { + content = await context.getFileContent(relativePath); + if (content === undefined) continue; + } else { + let stat: fs.Stats; + try { + stat = fs.statSync(absolutePath); + } catch { + continue; + } - if (stat.size > MAX_FILE_SIZE || stat.size === 0) { - continue; - } + if (stat.size > MAX_FILE_SIZE || stat.size === 0) { + continue; + } - let buffer: Buffer; - try { - buffer = fs.readFileSync(absolutePath); - } catch { - continue; - } + let buffer: Buffer; + try { + buffer = fs.readFileSync(absolutePath); + } catch { + continue; + } - if (isBinaryContent(buffer)) { - continue; + if (isBinaryContent(buffer)) { + continue; + } + + content = buffer.toString('utf-8'); } - const content = buffer.toString('utf-8'); const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { diff --git a/src/rules/opk-003-placeholder-code.ts b/src/rules/opk-003-placeholder-code.ts index 66479e1..8756020 100644 --- a/src/rules/opk-003-placeholder-code.ts +++ b/src/rules/opk-003-placeholder-code.ts @@ -87,29 +87,36 @@ const rule: Rule = { const absolutePath = path.join(context.rootDir, relativePath); - let stat: fs.Stats; - try { - stat = fs.statSync(absolutePath); - } catch { - continue; - } + let content: string | undefined; + if (context.getFileContent) { + content = await context.getFileContent(relativePath); + if (content === undefined) continue; + } else { + let stat: fs.Stats; + try { + stat = fs.statSync(absolutePath); + } catch { + continue; + } - if (stat.size > MAX_FILE_SIZE || stat.size === 0) { - continue; - } + if (stat.size > MAX_FILE_SIZE || stat.size === 0) { + continue; + } - let buffer: Buffer; - try { - buffer = fs.readFileSync(absolutePath); - } catch { - continue; - } + let buffer: Buffer; + try { + buffer = fs.readFileSync(absolutePath); + } catch { + continue; + } - if (isBinaryContent(buffer)) { - continue; + if (isBinaryContent(buffer)) { + continue; + } + + content = buffer.toString('utf-8'); } - const content = buffer.toString('utf-8'); const lines = content.split('\n'); for (let i = 0; i < lines.length; i++) { diff --git a/src/scanner/index.ts b/src/scanner/index.ts index 6a1e19c..5e71427 100644 --- a/src/scanner/index.ts +++ b/src/scanner/index.ts @@ -88,11 +88,6 @@ export async function scan( const excludePatterns = config?.exclude || []; const files = await discoverFiles(absoluteRoot, excludePatterns); - - const context: ScanContext = { - rootDir: absoluteRoot, - files, - }; const disabledRules = new Set(); if (config?.rules) { @@ -106,19 +101,66 @@ export async function scan( const allFindings: ScanResult['findings'] = []; let rulesRun = 0; - for (const rule of rules) { - if (disabledRules.has(rule.id)) { - continue; - } + const activeRules = rules.filter(r => !disabledRules.has(r.id)); + rulesRun = activeRules.length; - try { - const findings = await rule.check(context); - allFindings.push(...findings); - rulesRun++; - } catch (err: unknown) { - const message = err instanceof Error ? err.message : String(err); - process.stderr.write(`Warning: Rule ${rule.id} failed: ${message}\n`); - rulesRun++; + const CHUNK_SIZE = 100; + for (let i = 0; i < files.length; i += CHUNK_SIZE) { + const chunk = files.slice(i, i + CHUNK_SIZE); + const fileCache = new Map(); + + const getFileContent = async (relativePath: string): Promise => { + if (fileCache.has(relativePath)) { + const cached = fileCache.get(relativePath); + return cached === null ? undefined : cached; + } + try { + const absolutePath = path.join(absoluteRoot, relativePath); + const stat = await fs.promises.stat(absolutePath); + if (stat.size === 0 || stat.size > 1_000_000) { + fileCache.set(relativePath, null); + return undefined; + } + const buffer = await fs.promises.readFile(absolutePath); + + // binary check + let isBinary = false; + const checkLength = Math.min(buffer.length, 8000); + for (let j = 0; j < checkLength; j++) { + if (buffer[j] === 0) { + isBinary = true; + break; + } + } + + if (isBinary) { + fileCache.set(relativePath, null); + return undefined; + } + + const content = buffer.toString('utf8'); + fileCache.set(relativePath, content); + return content; + } catch { + fileCache.set(relativePath, null); + return undefined; + } + }; + + const chunkContext: ScanContext = { + rootDir: absoluteRoot, + files: chunk, + getFileContent, + }; + + for (const rule of activeRules) { + try { + const findings = await rule.check(chunkContext); + allFindings.push(...findings); + } catch (err: unknown) { + const message = err instanceof Error ? err.message : String(err); + process.stderr.write(`Warning: Rule ${rule.id} failed: ${message}\n`); + } } } diff --git a/src/types.ts b/src/types.ts index 7def8aa..1f7d576 100644 --- a/src/types.ts +++ b/src/types.ts @@ -16,6 +16,8 @@ export interface ScanContext { rootDir: string; /** List of file paths relative to rootDir */ files: string[]; + /** Optional file cache accessor to improve I/O performance */ + getFileContent?: (relativePath: string) => Promise; } export interface Rule {