wiseiodev · dubscode · Mar 4, 2026 · Copilot · Mar 4, 2026
diff --git a/...r-context-graph-enrichment/.openspec.yaml → ...r-context-graph-enrichment/.openspec.yaml b/...r-context-graph-enrichment/.openspec.yaml → ...r-context-graph-enrichment/.openspec.yaml
diff --git a/...deeper-context-graph-enrichment/design.md → ...deeper-context-graph-enrichment/design.md b/...deeper-context-graph-enrichment/design.md → ...deeper-context-graph-enrichment/design.md
diff --git a/...eper-context-graph-enrichment/proposal.md → ...eper-context-graph-enrichment/proposal.md b/...eper-context-graph-enrichment/proposal.md → ...eper-context-graph-enrichment/proposal.md
diff --git a/...nt/specs/context-graph-enrichment/spec.md → ...nt/specs/context-graph-enrichment/spec.md b/...nt/specs/context-graph-enrichment/spec.md → ...nt/specs/context-graph-enrichment/spec.md
diff --git a/openspec/changes/archive/2026-03-04-deeper-context-graph-enrichment/tasks.md b/openspec/changes/archive/2026-03-04-deeper-context-graph-enrichment/tasks.md
@@ -0,0 +1,30 @@
+## 1. Schema and data model updates
+
+- [x] 1.1 Add symbol node schema/types with canonical symbol identifier fields and source location metadata
+- [x] 1.2 Add normalized edge type enum support for `defines`, `references`, `imports`, and `calls`
+- [x] 1.3 Add feature-flag or configuration gate for enabling symbol enrichment rollout
+
+## 2. Symbol extraction pipeline
+
+- [x] 2.1 Implement symbol extraction for initial supported languages (TypeScript/JavaScript) in the indexing pipeline
+- [x] 2.2 Generate deterministic canonical symbol keys (`<repo>::<path>::<kind>::<name>::<range-hash>`) during extraction
+- [x] 2.3 Add extraction diagnostics and partial-failure handling so unsupported constructs do not halt full indexing
+
+## 3. Relationship edge generation
+
+- [x] 3.1 Implement `defines` edge generation from file entities to declared symbols
+- [x] 3.2 Implement `references` and `calls` edge generation from analyzed source contexts to target symbols when resolvable
+- [x] 3.3 Implement `imports` edge generation between importing context and imported symbol/module entities
+
+## 4. Graph persistence and query integration
+
+- [x] 4.1 Persist symbol nodes and semantic edges in graph storage with batch write path support
+- [x] 4.2 Update graph query/retrieval surfaces to return symbol nodes and semantic edge traversals
+- [x] 4.3 Ensure existing file-level graph query contracts remain unchanged when enrichment is enabled
+
+## 5. Validation, testing, and rollout checks
+
+- [x] 5.1 Add golden fixture tests for symbol extraction counts and canonical identifier stability
+- [x] 5.2 Add tests for required edge presence and directionality (`defines`, `references`, `imports`, `calls`)
+- [x] 5.3 Add regression tests verifying file-level query compatibility and non-breaking behavior
+- [x] 5.4 Add indexing performance/volume checks and acceptance thresholds for enriched graph data
diff --git a/openspec/changes/deeper-context-graph-enrichment/tasks.md b/openspec/changes/deeper-context-graph-enrichment/tasks.md
diff --git a/openspec/specs/context-graph-enrichment/spec.md b/openspec/specs/context-graph-enrichment/spec.md
@@ -0,0 +1,48 @@
+# context-graph-enrichment Specification
+
+## Purpose
+TBD - created by archiving change deeper-context-graph-enrichment. Update Purpose after archive.
-TBD - created by archiving change deeper-context-graph-enrichment. Update Purpose after archive.
+This specification defines how the context graph is enriched with symbol-level nodes and normalized semantic relationship edges derived from source code. The goal is to enable precise retrieval, impact analysis, and other graph-based queries while preserving compatibility with existing file-level graph consumers.
-TBD - created by archiving change deeper-context-graph-enrichment. Update Purpose after archive.
+This specification defines how the context graph is enriched with symbol-level nodes and normalized semantic relationship edges derived from source code. The goal is to enable precise retrieval, impact analysis, and other graph-based queries while preserving compatibility with existing file-level graph consumers.
+## Requirements
+### Requirement: Extract Symbol Inventory During Indexing
+The system SHALL extract a symbol inventory for each indexed source file, including supported symbol kinds, canonical symbol identifiers, names, and source locations.
+
+#### Scenario: Symbols extracted from a supported file
+- **WHEN** the indexer processes a supported language file
+- **THEN** the graph pipeline records one symbol entry per discovered symbol with deterministic identifier and location metadata
+
+#### Scenario: Unsupported syntax does not halt indexing
+- **WHEN** symbol extraction encounters an unsupported construct in a file
+- **THEN** the system continues indexing remaining files and records extraction diagnostics for the affected file
+
+### Requirement: Persist Semantic Relationship Edges
+The system SHALL persist normalized directed relationship edges among graph entities using the enum: `defines`, `references`, `imports`, and `calls`.
+
+#### Scenario: Definition edge creation
+- **WHEN** a file contains a symbol definition
+- **THEN** the graph contains a `defines` edge linking the file entity to the symbol entity
+
+#### Scenario: Reference and call edge creation
+- **WHEN** analysis identifies a symbol reference or call site
+- **THEN** the graph contains `references` or `calls` edges from the source symbol or file context to the target symbol when resolvable
+
+### Requirement: Preserve Existing File-Level Graph Behavior
+The system SHALL preserve compatibility for existing file-level graph traversal and consumers while symbol enrichment is enabled.
+
+#### Scenario: Existing consumer query remains valid
+- **WHEN** a consumer executes a pre-existing file-level graph query
+- **THEN** the query returns results with unchanged contract and does not require symbol-level filters
+
+### Requirement: Expose Enriched Graph Data to Query Surfaces
+The system SHALL expose symbol nodes and semantic edges to graph query surfaces used by retrieval and impact analysis.
+
+#### Scenario: Query requests symbol relationships
+- **WHEN** a graph query requests relationships for a symbol identifier
+- **THEN** the query surface returns connected nodes and edges for `defines`, `references`, `imports`, and `calls` relationship types
+
+### Requirement: Validate Enrichment Quality and Stability
+The system SHALL provide automated validation coverage for symbol extraction and relationship edge generation across representative repositories.
+
+#### Scenario: Regression suite for enrichment
+- **WHEN** CI executes graph enrichment tests
+- **THEN** the suite verifies expected symbol counts and required edge presence for golden fixtures without regressing file-level behavior
+
diff --git a/src/context/graph/config.ts b/src/context/graph/config.ts
@@ -0,0 +1,3 @@
+export function isSymbolEnrichmentEnabled(): boolean {
+  return process.env.DUBSBOT_ENABLE_SYMBOL_ENRICHMENT === '1';
+}
diff --git a/src/context/graph/extract.ts b/src/context/graph/extract.ts
@@ -0,0 +1,255 @@
+import { posix } from 'node:path';
+import { buildCanonicalSymbolId, type ExtractedSymbol, type GraphFileExtraction } from './types';
+
+const SUPPORTED_EXTENSIONS = new Set(['.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs']);
+
+export function canExtractSymbols(path: string): boolean {
+  const normalized = path.toLowerCase();
+  for (const extension of SUPPORTED_EXTENSIONS) {
+    if (normalized.endsWith(extension)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+export function extractGraphDataForFile(input: {
+  repoRoot: string;
+  path: string;
+  content: string;
+}): GraphFileExtraction {
+  const normalizedPath = posix.normalize(input.path);
+  if (!canExtractSymbols(normalizedPath)) {
+    return {
+      symbols: [],
+      edges: [],
+      diagnostics: [`unsupported-language:${normalizedPath}`],
+    };
+  }
+
+  const symbols: ExtractedSymbol[] = [];
+  const edges: GraphFileExtraction['edges'] = [];
+  const diagnostics: string[] = [];
+  const lines = input.content.split('\n');
+  const symbolByName = new Map<string, ExtractedSymbol>();
+
+  for (let lineIndex = 0; lineIndex < lines.length; lineIndex += 1) {
+    const line = lines[lineIndex];
+    const lineNumber = lineIndex + 1;
+    const trimmed = line.trim();
+    if (!trimmed) {
+      continue;
+    }
+
+    const functionMatch = line.match(/\bfunction\s+([A-Za-z_$][\w$]*)\s*\(/);
+    if (functionMatch) {
+      const symbol = makeSymbol({
+        repoRoot: input.repoRoot,
+        path: normalizedPath,
+        name: functionMatch[1],
+        kind: 'function',
+        line,
+        lineNumber,
+      });
+      addSymbol(symbols, symbolByName, symbol);
+      continue;
+    }
+
+    const classMatch = line.match(/\bclass\s+([A-Za-z_$][\w$]*)\b/);
+    if (classMatch) {
+      const symbol = makeSymbol({
+        repoRoot: input.repoRoot,
+        path: normalizedPath,
+        name: classMatch[1],
+        kind: 'class',
+        line,
+        lineNumber,
+      });
+      addSymbol(symbols, symbolByName, symbol);
+      continue;
+    }
+
+    const typeMatch = line.match(/\b(?:interface|type)\s+([A-Za-z_$][\w$]*)\b/);
+    if (typeMatch) {
+      const symbol = makeSymbol({
+        repoRoot: input.repoRoot,
+        path: normalizedPath,
+        name: typeMatch[1],
+        kind: 'type',
+        line,
+        lineNumber,
+      });
+      addSymbol(symbols, symbolByName, symbol);
+      continue;
+    }
+
+    const constantMatch = line.match(/\b(?:const|let|var)\s+([A-Za-z_$][\w$]*)\b/);
+    if (constantMatch) {
+      const symbol = makeSymbol({
+        repoRoot: input.repoRoot,
+        path: normalizedPath,
+        name: constantMatch[1],
+        kind: 'constant',
+        line,
+        lineNumber,
+      });
+      addSymbol(symbols, symbolByName, symbol);
+      continue;
+    }
+
+    const importMatch = line.match(/\bimport\s+(.+)\s+from\s+['"]([^'"]+)['"]/);
+    if (importMatch) {
+      const moduleName = importMatch[2];
+      const moduleSymbol = makeSymbol({
+        repoRoot: input.repoRoot,
+        path: normalizedPath,
+        name: `module:${moduleName}`,
+        kind: 'module',
+        line,
+        lineNumber,
+      });
+      addSymbol(symbols, symbolByName, moduleSymbol);
+
+      const importedPart = importMatch[1];
+      const names = importedPart
+        .replace(/[{}]/g, ' ')
+        .split(',')
+        .map((entry) => entry.trim())
+        .map((entry) => entry.split(/\s+as\s+/i).at(-1) ?? entry)
+        .map((entry) => entry.trim())
+        .filter(Boolean);
+      for (const name of names) {
+        const importSymbol = makeSymbol({
+          repoRoot: input.repoRoot,
+          path: normalizedPath,
+          name,
+          kind: 'import',
+          line,
+          lineNumber,
+        });
+        addSymbol(symbols, symbolByName, importSymbol);
+        edges.push({
+          type: 'imports',
+          sourceKey: fileNodeKey(input.repoRoot, normalizedPath),
+          targetKey: importSymbol.id,
+          confidence: 1,
+          metadata: { module: moduleName },
+        });
+      }
+    }
+  }
+
+  for (const symbol of symbols) {
+    edges.push({
+      type: 'defines',
+      sourceKey: fileNodeKey(input.repoRoot, normalizedPath),
+      targetKey: symbol.id,
+      confidence: 1,
+    });
+  }
+
+  const knownNames = [...symbolByName.keys()];
+  for (let lineIndex = 0; lineIndex < lines.length; lineIndex += 1) {
+    const line = lines[lineIndex];
+    const lineNumber = lineIndex + 1;
+    for (const match of line.matchAll(/\b([A-Za-z_$][\w$]*)\s*\(/g)) {
+      const callee = match[1];
+      const target = symbolByName.get(callee);
+      if (!target) {
+        continue;
+      }
+      edges.push({
+        type: 'calls',
+        sourceKey: fileNodeKey(input.repoRoot, normalizedPath),
+        targetKey: target.id,
+        confidence: 0.7,
+        metadata: { line: lineNumber },
+      });
+    }
+
+    for (const name of knownNames) {
+      if (!line.includes(name)) {
+        continue;
+      }
+      const target = symbolByName.get(name);
+      if (!target) {
+        continue;
+      }
+      edges.push({
+        type: 'references',
+        sourceKey: fileNodeKey(input.repoRoot, normalizedPath),
+        targetKey: target.id,
+        confidence: 0.5,
+        metadata: { line: lineNumber },
+      });
+    }
+  }
+
+  if (symbols.length === 0) {
+    diagnostics.push(`no-symbols-detected:${normalizedPath}`);
+  }
+
+  return {
+    symbols,
+    edges: dedupeEdges(edges),
+    diagnostics,
+  };
+}
+
+function addSymbol(
+  symbols: ExtractedSymbol[],
+  symbolByName: Map<string, ExtractedSymbol>,
+  symbol: ExtractedSymbol
+): void {
+  if (symbolByName.has(symbol.name)) {
+    return;
+  }
+  symbols.push(symbol);
+  symbolByName.set(symbol.name, symbol);
+}
+
+function makeSymbol(input: {
+  repoRoot: string;
+  path: string;
+  name: string;
+  kind: ExtractedSymbol['kind'];
+  line: string;
+  lineNumber: number;
+}): ExtractedSymbol {
+  const startColumn = Math.max(input.line.indexOf(input.name), 0) + 1;
+  const endColumn = startColumn + input.name.length;
+  const location = {
+    startLine: input.lineNumber,
+    endLine: input.lineNumber,
+    startColumn,
+    endColumn,
+  };
+  return {
+    id: buildCanonicalSymbolId({
+      repoRoot: input.repoRoot,
+      path: input.path,
+      kind: input.kind,
+      name: input.name,
+      location,
+    }),
+    name: input.name,
+    kind: input.kind,
+    path: input.path,
+    location,
+  };
+}
+
+function fileNodeKey(repoRoot: string, path: string): string {
+  return `${repoRoot}::${path}::file`;
+}
+
+function dedupeEdges(edges: GraphFileExtraction['edges']): GraphFileExtraction['edges'] {
+  const map = new Map<string, GraphFileExtraction['edges'][number]>();
+  for (const edge of edges) {
+    const key = `${edge.type}|${edge.sourceKey}|${edge.targetKey}`;
+    if (!map.has(key)) {
+      map.set(key, edge);
+    }
+  }
+  return [...map.values()];
+}