From 05eb7a53cb84d921e37e425958dfb3affd463406 Mon Sep 17 00:00:00 2001 From: andreinknv Date: Sun, 26 Apr 2026 01:27:34 -0400 Subject: [PATCH 01/22] fix: correctness bugs found in audit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four user-visible bugs caught by an independent audit pass: 1. Svelte symbols reported on the wrong line src/extraction/svelte-extractor.ts:144 The script-block regex captures content starting with the leading newline that follows `>`, so the inner extractor sees that newline as line 1 of its 1-indexed input and the first real code on line 2. The previous `contentStartLine = scriptTagLine + openingTagLines + 1` was added to that 1-indexed line number, shifting every Svelte symbol's startLine / endLine off by 1. Drops the `+1`. Five regression tests added covering single-line, multi-line opening tag, template-offset, single-line no-newline, and dual module/instance script blocks. 2. Watcher silently dropped pending changes on sync failure src/sync/watcher.ts:177 `hasChanges = false` ran before the sync attempt, so a thrown sync (DB locked, transient FS error) left the pending batch forgotten until a NEW file event arrived. Re-set `hasChanges = true` in the catch path so a transient failure schedules a retry on its own. Regression test added (mocks fail-then-succeed, asserts the second call happens without a new file event). 3. Graph traversal default maxDepth was Infinity src/graph/traversal.ts:14, src/types.ts:301 `limit: 1000` capped returned nodes, but during traversal the visited set and BFS/DFS frontier can grow far beyond `limit` on highly connected graphs before the cap kicks in. Default is now 10. Callers who really need exhaustive traversal can still pass `maxDepth: Infinity` explicitly — the JSDoc documents this. This is a public-API behavior change; existing tests pass. Also caps `findPath`'s BFS queue at 100,000 entries (FIND_PATH_MAX_QUEUE) and returns null if exceeded — each entry holds a cloned path array, so on dense graphs the queue could otherwise consume gigabytes before either finding a path or exhausting the search. 4. `findRelevantContext` did not bound caller-supplied limits src/context/index.ts:284 `searchLimit` is multiplied by 5 in `findNodesByExactName` and feeds several other unbounded operations; a caller passing `searchLimit: 1_000_000` would pull millions of rows. Now clamped: searchLimit ∈ [1, 100], maxNodes ∈ [1, 1000], traversalDepth ∈ [0, 10]. Regression test asserts a 1e9 input is bounded. All 387 tests pass serialized; tsc clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- __tests__/context.test.ts | 13 ++++++ __tests__/extraction.test.ts | 69 ++++++++++++++++++++++++++++++ __tests__/watcher.test.ts | 30 +++++++++++++ src/context/index.ts | 8 ++++ src/extraction/svelte-extractor.ts | 10 +++-- src/graph/traversal.ts | 23 +++++++++- src/sync/watcher.ts | 12 +++++- src/types.ts | 7 ++- 8 files changed, 165 insertions(+), 7 deletions(-) diff --git a/__tests__/context.test.ts b/__tests__/context.test.ts index 52dae1fe..9a0614aa 100644 --- a/__tests__/context.test.ts +++ b/__tests__/context.test.ts @@ -210,6 +210,19 @@ export function validateEmail(email: string): boolean { expect(result.nodes.size).toBeLessThanOrEqual(5); }); + + it('should clamp absurd searchLimit/maxNodes values to safe upper bounds', async () => { + // Without clamping, the internal `findNodesByExactName` query would + // request `searchLimit * 5` rows — passing 1e9 here would blow out + // memory. The call should complete in normal time and not return more + // than the hard cap on maxNodes (1000). + const result = await cg.findRelevantContext('function', { + searchLimit: 1_000_000_000, + maxNodes: 1_000_000_000, + traversalDepth: 1_000, + }); + expect(result.nodes.size).toBeLessThanOrEqual(1000); + }); }); describe('buildContext()', () => { diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index 8a70ffed..a6fd7687 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -3079,3 +3079,72 @@ describe('Directory Exclusion', () => { expect(files.every((f) => !f.includes('vendor'))).toBe(true); }); }); + +// ============================================================================= +// Svelte line-number regressions (audit fix) +// ============================================================================= + +describe('Svelte line numbering', () => { + it('reports symbol line numbers relative to the .svelte file, not the script content', () => { + // Line 1: + const code = `\n`; + const result = extractFromSource('Comp.svelte', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'add'); + expect(fn).toBeDefined(); + expect(fn?.startLine).toBe(2); + }); + + it('handles multi-line opening tags (script with attributes wrapped)', () => { + // Line 1: + const code = `\nfunction greet() { return "hi"; }\n\n`; + const result = extractFromSource('Comp.svelte', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'greet'); + expect(fn).toBeDefined(); + expect(fn?.startLine).toBe(3); + }); + + it('preserves correct line numbers when the script block is offset by template lines', () => { + // Line 1:

Hello

+ // Line 2: + // Line 3: + const code = `

Hello

\n\n\n`; + const result = extractFromSource('Comp.svelte', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'bottom'); + expect(fn).toBeDefined(); + expect(fn?.startLine).toBe(4); + }); + + it('handles a single-line script block with no internal newline', () => { + // Line 1: + const code = `\n`; + const result = extractFromSource('Comp.svelte', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'inline'); + expect(fn).toBeDefined(); + expect(fn?.startLine).toBe(1); + }); + + it('attributes each block correctly when a file has both module and instance scripts', () => { + // Line 1: + // Line 4: + // Line 5: + const code = + `\n` + + `\n\n`; + const result = extractFromSource('Comp.svelte', code); + const moduleFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'moduleHelper'); + const instanceFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'instanceHelper'); + expect(moduleFn?.startLine).toBe(2); + expect(instanceFn?.startLine).toBe(6); + }); +}); diff --git a/__tests__/watcher.test.ts b/__tests__/watcher.test.ts index f3638e6d..ee732df6 100644 --- a/__tests__/watcher.test.ts +++ b/__tests__/watcher.test.ts @@ -218,6 +218,36 @@ describe('FileWatcher', () => { watcher.stop(); }); + + it('should retry pending changes after a sync failure (no events lost)', async () => { + // First call rejects, subsequent calls resolve. After the initial + // failure, the watcher should retry the same batch on its own — without + // this, transient sync failures (DB locked etc.) would silently drop the + // changes until a new file event happened. + let calls = 0; + const syncFn = vi.fn().mockImplementation(() => { + calls++; + if (calls === 1) return Promise.reject(new Error('transient')); + return Promise.resolve({ filesChanged: 1, durationMs: 5 }); + }); + const onSyncError = vi.fn(); + const onSyncComplete = vi.fn(); + const watcher = new FileWatcher(testDir, baseConfig, syncFn, { + debounceMs: 100, + onSyncError, + onSyncComplete, + }); + + watcher.start(); + fs.writeFileSync(path.join(testDir, 'src', 'test.ts'), 'export const z = 3;'); + + await waitFor(() => onSyncComplete.mock.calls.length > 0, 5000); + expect(onSyncError).toHaveBeenCalledTimes(1); + expect(syncFn).toHaveBeenCalledTimes(2); + expect(onSyncComplete).toHaveBeenCalledWith({ filesChanged: 1, durationMs: 5 }); + + watcher.stop(); + }); }); describe('CodeGraph integration', () => { diff --git a/src/context/index.ts b/src/context/index.ts index 94192377..08f25657 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -286,6 +286,14 @@ export class ContextBuilder { options: FindRelevantContextOptions = {} ): Promise { const opts = { ...DEFAULT_FIND_OPTIONS, ...options }; + // Bound user-supplied limits — `searchLimit` is multiplied by 5 in + // findNodesByExactName (line 312) and feeds several other unbounded + // operations below, so a request with `searchLimit: 1_000_000` would + // pull millions of rows before any filtering. 100 is well above the + // largest legitimate use we've seen. + opts.searchLimit = Math.min(Math.max(1, opts.searchLimit), 100); + opts.maxNodes = Math.min(Math.max(1, opts.maxNodes), 1000); + opts.traversalDepth = Math.min(Math.max(0, opts.traversalDepth), 10); // Start with empty subgraph const nodes = new Map(); diff --git a/src/extraction/svelte-extractor.ts b/src/extraction/svelte-extractor.ts index 5586ee34..323cbe80 100644 --- a/src/extraction/svelte-extractor.ts +++ b/src/extraction/svelte-extractor.ts @@ -135,13 +135,17 @@ export class SvelteExtractor { // Detect module script const isModule = /context\s*=\s*["']module["']/.test(attrs); - // Calculate start line of the script content (line after - const code = `\n`; - const result = extractFromSource('Comp.svelte', code); - const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'add'); - expect(fn).toBeDefined(); - expect(fn?.startLine).toBe(2); - }); - - it('handles multi-line opening tags (script with attributes wrapped)', () => { - // Line 1: - const code = `\nfunction greet() { return "hi"; }\n\n`; - const result = extractFromSource('Comp.svelte', code); - const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'greet'); - expect(fn).toBeDefined(); - expect(fn?.startLine).toBe(3); - }); - - it('preserves correct line numbers when the script block is offset by template lines', () => { - // Line 1:

Hello

- // Line 2: - // Line 3: - const code = `

Hello

\n\n\n`; - const result = extractFromSource('Comp.svelte', code); - const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'bottom'); - expect(fn).toBeDefined(); - expect(fn?.startLine).toBe(4); - }); - - it('handles a single-line script block with no internal newline', () => { - // Line 1: - const code = `\n`; - const result = extractFromSource('Comp.svelte', code); - const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'inline'); - expect(fn).toBeDefined(); - expect(fn?.startLine).toBe(1); - }); - - it('attributes each block correctly when a file has both module and instance scripts', () => { - // Line 1: - // Line 4: - // Line 5: - const code = - `\n` + - `\n\n`; - const result = extractFromSource('Comp.svelte', code); - const moduleFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'moduleHelper'); - const instanceFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'instanceHelper'); - expect(moduleFn?.startLine).toBe(2); - expect(instanceFn?.startLine).toBe(6); +describe('HCL / Terraform Extraction', () => { + describe('Language detection', () => { + it('should detect HCL/Terraform files', () => { + expect(detectLanguage('main.tf')).toBe('hcl'); + expect(detectLanguage('terraform.tfvars')).toBe('hcl'); + expect(detectLanguage('config.hcl')).toBe('hcl'); + }); + + it('should report HCL as supported', () => { + expect(isLanguageSupported('hcl')).toBe(true); + expect(getSupportedLanguages()).toContain('hcl'); + }); + }); + + describe('Block extraction', () => { + it('should extract a resource block as a class node', () => { + const code = `resource "aws_s3_bucket" "logs" { bucket = "my-logs" }`; + const result = extractFromSource('main.tf', code); + + const node = result.nodes.find((n) => n.qualifiedName === 'aws_s3_bucket.logs'); + expect(node).toBeDefined(); + expect(node?.kind).toBe('class'); + expect(node?.name).toBe('aws_s3_bucket.logs'); + expect(node?.language).toBe('hcl'); + expect(node?.signature).toBe('resource "aws_s3_bucket" "logs"'); + }); + + it('should extract a data block with `data.` prefix', () => { + const code = `data "aws_caller_identity" "current" {}`; + const result = extractFromSource('main.tf', code); + + const node = result.nodes.find((n) => n.qualifiedName === 'data.aws_caller_identity.current'); + expect(node).toBeDefined(); + expect(node?.kind).toBe('class'); + expect(node?.name).toBe('aws_caller_identity.current'); + }); + + it('should extract a variable block', () => { + const code = `variable "environment" { type = string }`; + const result = extractFromSource('main.tf', code); + + const node = result.nodes.find((n) => n.qualifiedName === 'var.environment'); + expect(node).toBeDefined(); + expect(node?.kind).toBe('variable'); + expect(node?.name).toBe('environment'); + }); + + it('should extract an output block as an export', () => { + const code = `output "vpc_id" { value = "abc" }`; + const result = extractFromSource('main.tf', code); + + const node = result.nodes.find((n) => n.qualifiedName === 'output.vpc_id'); + expect(node).toBeDefined(); + expect(node?.kind).toBe('export'); + expect(node?.name).toBe('vpc_id'); + }); + + it('should extract a module block', () => { + const code = `module "vpc" { source = "terraform-aws-modules/vpc/aws" }`; + const result = extractFromSource('main.tf', code); + + const node = result.nodes.find((n) => n.qualifiedName === 'module.vpc'); + expect(node).toBeDefined(); + expect(node?.kind).toBe('module'); + expect(node?.name).toBe('vpc'); + }); + + it('should extract a provider block as namespace', () => { + const code = `provider "aws" { region = "us-east-1" }`; + const result = extractFromSource('main.tf', code); + + const node = result.nodes.find((n) => n.qualifiedName === 'provider.aws'); + expect(node).toBeDefined(); + expect(node?.kind).toBe('namespace'); + }); + + it('should split a locals block into one constant per attribute', () => { + const code = `locals { + bucket_name = "my-bucket" + retention = 30 +}`; + const result = extractFromSource('main.tf', code); + + const bucketName = result.nodes.find((n) => n.qualifiedName === 'local.bucket_name'); + const retention = result.nodes.find((n) => n.qualifiedName === 'local.retention'); + expect(bucketName?.kind).toBe('constant'); + expect(retention?.kind).toBe('constant'); + }); + + it('should connect blocks to the file via contains edges', () => { + const code = `resource "aws_s3_bucket" "logs" {}`; + const result = extractFromSource('main.tf', code); + + const fileNode = result.nodes.find((n) => n.kind === 'file'); + const resourceNode = result.nodes.find((n) => n.qualifiedName === 'aws_s3_bucket.logs'); + expect(fileNode).toBeDefined(); + expect(resourceNode).toBeDefined(); + const containsEdge = result.edges.find( + (e) => e.source === fileNode!.id && e.target === resourceNode!.id && e.kind === 'contains' + ); + expect(containsEdge).toBeDefined(); + }); + }); + + describe('Reference extraction', () => { + it('should extract var.X references', () => { + const code = `resource "aws_s3_bucket" "logs" { bucket = var.bucket_name }`; + const result = extractFromSource('main.tf', code); + + const ref = result.unresolvedReferences.find((r) => r.referenceName === 'var.bucket_name'); + expect(ref).toBeDefined(); + expect(ref?.referenceKind).toBe('references'); + }); + + it('should extract local.X references', () => { + const code = `resource "aws_s3_bucket" "logs" { tags = local.common_tags }`; + const result = extractFromSource('main.tf', code); + + const ref = result.unresolvedReferences.find((r) => r.referenceName === 'local.common_tags'); + expect(ref).toBeDefined(); + }); + + it('should extract module.X references and stop at the module name', () => { + const code = `output "vpc_id" { value = module.vpc.vpc_id }`; + const result = extractFromSource('main.tf', code); + + const ref = result.unresolvedReferences.find((r) => r.referenceName === 'module.vpc'); + expect(ref).toBeDefined(); + // Should NOT emit a reference for the trailing attribute + expect(result.unresolvedReferences.find((r) => r.referenceName === 'module.vpc.vpc_id')).toBeUndefined(); + }); + + it('should extract data.T.N references with both labels', () => { + const code = `output "x" { value = data.aws_caller_identity.current.account_id }`; + const result = extractFromSource('main.tf', code); + + const ref = result.unresolvedReferences.find( + (r) => r.referenceName === 'data.aws_caller_identity.current' + ); + expect(ref).toBeDefined(); + }); + + it('should extract resource references as TYPE.NAME', () => { + const code = `resource "aws_s3_bucket_versioning" "v" { bucket = aws_s3_bucket.logs.id }`; + const result = extractFromSource('main.tf', code); + + const ref = result.unresolvedReferences.find((r) => r.referenceName === 'aws_s3_bucket.logs'); + expect(ref).toBeDefined(); + }); + + it('should extract references inside string interpolations', () => { + const code = 'locals { name = "${var.environment}-${random_id.suffix.hex}" }'; + const result = extractFromSource('main.tf', code); + + const names = result.unresolvedReferences.map((r) => r.referenceName); + expect(names).toContain('var.environment'); + expect(names).toContain('random_id.suffix'); + }); + + it('should ignore references to count, each, self, and path', () => { + const code = `resource "aws_instance" "web" { + count = 3 + tags = { Name = "web-\${count.index}", For = each.value, Self = self.id, P = path.module } +}`; + const result = extractFromSource('main.tf', code); + + const names = result.unresolvedReferences.map((r) => r.referenceName); + expect(names.find((n) => n.startsWith('count.'))).toBeUndefined(); + expect(names.find((n) => n.startsWith('each.'))).toBeUndefined(); + expect(names.find((n) => n.startsWith('self.'))).toBeUndefined(); + expect(names.find((n) => n.startsWith('path.'))).toBeUndefined(); + }); + + it('should ignore for-loop iteration variables', () => { + const code = `output "ids" { value = [for s in var.subnets : s.id] }`; + const result = extractFromSource('main.tf', code); + + const names = result.unresolvedReferences.map((r) => r.referenceName); + // var.subnets reference comes through, but `s.id` does NOT + expect(names).toContain('var.subnets'); + expect(names.find((n) => n.startsWith('s.'))).toBeUndefined(); + }); + + it('should ignore key/value bindings in for-object expressions', () => { + const code = `locals { tags = { for k, v in var.input : k => "\${v}-suffix" } }`; + const result = extractFromSource('main.tf', code); + + const names = result.unresolvedReferences.map((r) => r.referenceName); + expect(names).toContain('var.input'); + expect(names.find((n) => n === 'k' || n.startsWith('k.'))).toBeUndefined(); + expect(names.find((n) => n === 'v' || n.startsWith('v.'))).toBeUndefined(); + }); + + it('should emit an imports edge for module source', () => { + const code = `module "vpc" { source = "terraform-aws-modules/vpc/aws" }`; + const result = extractFromSource('main.tf', code); + + const importRef = result.unresolvedReferences.find( + (r) => r.referenceKind === 'imports' && r.referenceName === 'terraform-aws-modules/vpc/aws' + ); + expect(importRef).toBeDefined(); + }); + }); + + describe('Robustness', () => { + it('should handle empty files', () => { + const result = extractFromSource('main.tf', ''); + const fileNode = result.nodes.find((n) => n.kind === 'file'); + expect(fileNode).toBeDefined(); + }); + + it('should handle blocks with no body', () => { + const code = `data "aws_caller_identity" "current" {}`; + const result = extractFromSource('main.tf', code); + expect(result.nodes.find((n) => n.qualifiedName === 'data.aws_caller_identity.current')).toBeDefined(); + }); + + it('should walk nested blocks for references without emitting child nodes', () => { + const code = `resource "aws_s3_bucket_versioning" "v" { + bucket = aws_s3_bucket.logs.id + versioning_configuration { + status = var.versioning_status + } +}`; + const result = extractFromSource('main.tf', code); + + // Only one block-level node, plus the file + const blockNodes = result.nodes.filter((n) => n.kind === 'class'); + expect(blockNodes.length).toBe(1); + + // References from the nested block should still be captured + const names = result.unresolvedReferences.map((r) => r.referenceName); + expect(names).toContain('aws_s3_bucket.logs'); + expect(names).toContain('var.versioning_status'); + }); }); }); diff --git a/src/extraction/hcl-extractor.ts b/src/extraction/hcl-extractor.ts new file mode 100644 index 00000000..3d810c88 --- /dev/null +++ b/src/extraction/hcl-extractor.ts @@ -0,0 +1,587 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { Node, Edge, ExtractionResult, ExtractionError, UnresolvedReference, NodeKind } from '../types'; +import { generateNodeId, getNodeText } from './tree-sitter-helpers'; +import { getParser } from './grammars'; + +/** + * HclExtractor — extracts a Terraform/HCL file into the graph. + * + * HCL is a declarative configuration language: there are no functions, + * classes, or methods. The unit of structure is the **block**: + * + * [