Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to the TypeScript package will be documented in this file.

## [Unreleased]

### Changed

- **Default graph discovery is stricter on duplicate/generated paths**: legacy detect and `--spi` now share hard ignores for nested worktrees, VCS metadata, `graphify-out`, dependency stores, and common build/cache outputs, while keeping tests, benchmarks, fixtures, and mocks indexable unless the user excludes them.
- **Retrieval exclusions are intent-aware**: prompts like "exclude tests" or "do not include benchmarks" no longer classify as `test` intent, and the parsed excluded domains/terms now suppress matching retrieval candidates instead of only affecting wording.
- **Slice-v1 anchors and traversal are more truthful for production prompts**: literal file-path mentions are distinguished from lexical source-path overlap, explicit `Class.method` prompts anchor the method instead of the class, and pipeline-shaped NestJS prompts now walk backward/forward through controller, service, orchestrator, and persistence paths without exploding into sibling controller methods.
- **Context-pack diagnostics catch semantically wrong packs**: diagnostics now flag excluded-domain selections, polluted source paths, controller-only pipeline packs, missing method anchors/runtime pipeline evidence, and test-dominated production packs.

## [0.22.0] - 2026-05-11

### Added
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,16 @@ graphify-ts --help # full surface

---

## Default discovery rules

`graphify-ts generate` now hard-ignores nested VCS/worktree copies and generated/build output by default: `.worktrees/`, `worktrees/`, `.git/`, `graphify-out/`, `node_modules/`, `dist/`, `build/`, `coverage/`, cache folders, source maps, lock/build artifacts, and temp/log files.

Tests, benchmarks, fixtures, mocks, and config files are **not** hard-ignored anymore. They still get indexed so retrieval can use them when you ask for them, but production/runtime prompts now soft-penalize them and honor prompt exclusions like "exclude tests, benchmarks, fixtures".

`.graphifyignore` still adds extra ignore rules, and negated entries such as `!vendor/**` or `!lib/**` can re-include a default hard-ignore when you intentionally want it indexed.

---

## Trust + limitations

Everything stays local by default. No telemetry, no cloud upload, no API key required.
Expand Down
15 changes: 15 additions & 0 deletions src/contracts/context-pack-diagnostics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// rather than from a model judgement. That keeps the surface fully
// deterministic and CI-asseratable.

import type { SourceDomain } from '../shared/source-discovery.js'

/** The categories of structural problems a context-pack can exhibit. Each
* enum value maps to one rule in computeContextPackDiagnostics. */
export type ContextPackDiagnosticKind =
Expand All @@ -22,6 +24,13 @@ export type ContextPackDiagnosticKind =
| 'low_avg_match_score'
| 'orphan_nodes'
| 'no_graph_signals'
| 'excluded_domain_selected'
| 'test_dominated_pack'
| 'controller_only_pipeline_pack'
| 'missing_method_anchor'
| 'missing_runtime_pipeline'
| 'polluted_source_path_selected'
| 'missing_structural_evidence'

export type ContextPackDiagnosticSeverity = 'info' | 'warn' | 'error'

Expand All @@ -48,6 +57,12 @@ export interface ContextPackQualitySignals {
/** token_count from the pack as a fraction of task_contract.budget.
* Capped at 1.0 for over-budget packs. */
budget_utilization: number
/** Source-domain distribution across selected nodes. */
domain_distribution: Partial<Record<SourceDomain, number>>
/** Domains the prompt explicitly excluded. */
excluded_domains: string[]
/** Number of selected nodes from polluted/generated paths. */
polluted_source_path_count: number
}

export interface ContextPackDiagnostics {
Expand Down
2 changes: 2 additions & 0 deletions src/contracts/context-pack.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { RetrievalGateDecision } from './retrieval-gate.js'
import type { TaskIntentKind } from './task-intent.js'
import type { SourceDomain } from '../shared/source-discovery.js'

export type ContextPackTaskKind = 'explain' | 'review' | 'impact'

Expand Down Expand Up @@ -93,6 +94,7 @@ export interface ContextPackNode {
framework?: string | undefined
framework_role?: string | undefined
framework_boost?: number | undefined
source_domain?: SourceDomain | undefined
evidence_class?: ContextPackEvidenceClass | undefined
representation_type?: ContextRepresentationType | undefined
representation_reason?: string | undefined
Expand Down
12 changes: 12 additions & 0 deletions src/contracts/retrieval-gate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,23 @@ export type RetrievalIntent =
| 'chitchat'
| 'unknown'

export type RetrievalExcludedDomain =
| 'test'
| 'benchmark'
| 'fixture'
| 'generated'
| 'docs'
| 'config'
| 'build_artifact'

export interface RetrievalGateSignals {
has_pr_diff: boolean
has_stack_trace: boolean
mentioned_paths: ReadonlyArray<string>
mentioned_symbols: ReadonlyArray<string>
excluded_domains?: ReadonlyArray<RetrievalExcludedDomain>
excluded_terms?: ReadonlyArray<string>
excluded_path_hints?: ReadonlyArray<string>
}

export interface RetrievalGateDecision {
Expand Down
99 changes: 9 additions & 90 deletions src/pipeline/detect.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import { Dirent, existsSync, lstatSync, mkdirSync, readFileSync, readdirSync, realpathSync, statSync, writeFileSync } from 'node:fs'
import { Dirent, lstatSync, mkdirSync, readFileSync, readdirSync, realpathSync, statSync, writeFileSync } from 'node:fs'
import { basename, dirname, extname, relative, resolve, sep } from 'node:path'

import { sidecarAwareFileFingerprint } from '../shared/binary-ingest-sidecar.js'
import {
isDiscoveryPathIgnored,
isIgnoredByPatterns,
loadGraphifyignorePatterns,
} from '../shared/source-discovery.js'

export const FileType = {
CODE: 'code',
Expand Down Expand Up @@ -109,44 +114,20 @@ const SKIP_DIRS = new Set([
'.venv',
'env',
'.env',
'graphify-out',
'node_modules',
'__pycache__',
'.git',
'dist',
'build',
'target',
'out',
'site-packages',
'lib64',
'.pytest_cache',
'.mypy_cache',
'.ruff_cache',
'.tox',
'.eggs',
'test',
'tests',
'__tests__',
'spec',
'specs',
'e2e',
'cypress',
'playwright',
'coverage',
'storybook-static',
'fixtures',
'__fixtures__',
'__mocks__',
'mocks',
])

const NOISE_FILE_PATTERNS: RegExp[] = [
/\.(test|spec)\.(ts|tsx|js|jsx|mjs|cjs)$/i,
/\.stories\.(ts|tsx|js|jsx)$/i,
/\.mock\.(ts|tsx|js|jsx)$/i,
/^(vitest|jest|webpack|rollup|vite|babel)\.config\./i,
/^setupTests\.(ts|tsx|js|jsx)$/i,
/^jest\.setup\.(ts|tsx|js|jsx)$/i,
]

function isNoiseFile(name: string): boolean {
Expand All @@ -157,21 +138,6 @@ function toPosixPath(path: string): string {
return path.split(sep).join('/')
}

function globToRegExp(pattern: string): RegExp {
const wildcardCount = [...pattern].filter((character) => character === '*').length
if (pattern.length > 512 || wildcardCount > 32) {
return /^$/
}

const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&')
const wildcarded = escaped.replace(/\*/g, '.*').replace(/\?/g, '.')
return new RegExp(`^${wildcarded}$`)
}

function matchesPattern(value: string, pattern: string): boolean {
return globToRegExp(pattern).test(value)
}

function isNoiseDir(part: string): boolean {
return SKIP_DIRS.has(part) || part.endsWith('_venv') || part.endsWith('_env') || part.endsWith('.egg-info')
}
Expand Down Expand Up @@ -234,53 +200,11 @@ export function countWords(path: string): number {
}

export function _loadGraphifyignore(root: string): string[] {
try {
const content = readFileSync(resolve(root, '.graphifyignore'), 'utf8')
return content
.split(/\r?\n/)
.map((line) => line.trim())
.filter((line) => line.length > 0 && !line.startsWith('#'))
} catch {
return []
}
return loadGraphifyignorePatterns(root)
}

export function _isIgnored(path: string, root: string, patterns: string[]): boolean {
if (patterns.length === 0) {
return false
}

const relativePath = toPosixPath(relative(resolve(root), resolve(path)))
if (relativePath.startsWith('..')) {
return false
}

const pathParts = relativePath.split('/')
const fileName = basename(path)

for (const rawPattern of patterns) {
const pattern = rawPattern.replace(/^\/+|\/+$/g, '')
if (!pattern) {
continue
}

if (matchesPattern(relativePath, pattern) || matchesPattern(fileName, pattern)) {
return true
}

for (let index = 0; index < pathParts.length; index += 1) {
const part = pathParts[index]
if (!part) {
continue
}
const prefix = pathParts.slice(0, index + 1).join('/')
if (matchesPattern(part, pattern) || matchesPattern(prefix, pattern)) {
return true
}
}
}

return false
return isIgnoredByPatterns(path, root, patterns)
}

function isWithinRoot(rootRealPath: string, candidateRealPath: string): boolean {
Expand Down Expand Up @@ -312,7 +236,7 @@ function visitDirectory(
continue
}

if (_isIgnored(entryPath, root, ignorePatterns)) {
if (isDiscoveryPathIgnored(entryPath, root, ignorePatterns)) {
continue
}

Expand Down Expand Up @@ -390,11 +314,6 @@ function collectFiles(root: string, followSymlinks: boolean, ignorePatterns: str

visitDirectory(resolvedRoot, resolvedRoot, followSymlinks, ignorePatterns, [rootRealPath], rootRealPath, files)

const memoryDir = resolve(resolvedRoot, 'graphify-out', 'memory')
if (existsSync(memoryDir)) {
visitDirectory(memoryDir, resolvedRoot, followSymlinks, ignorePatterns, [rootRealPath], rootRealPath, files)
}

return [...new Set(files)].sort()
}

Expand Down
26 changes: 9 additions & 17 deletions src/pipeline/spi/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ import {
import { dirname, extname, join, relative, resolve } from 'node:path'
import ts from 'typescript'

import {
isDiscoveryPathIgnored,
loadGraphifyignorePatterns,
} from '../../shared/source-discovery.js'
import type {
SemanticProgramIndex,
SpiDiagnostic,
Expand Down Expand Up @@ -85,19 +89,6 @@ export type BuildSpiOptions = {
// BuildSpiOptions / buildSpi directly.
export type BuildSpiFileLayerOptions = BuildSpiOptions

const SKIP_DIRS = new Set<string>([
'node_modules',
'dist',
'build',
'.next',
'coverage',
'.git',
'graphify-out',
'.test-artifacts',
'.turbo',
'.vercel',
])

const EXT_TO_LANG: Record<string, SpiLanguage> = {
'.ts': 'typescript',
'.tsx': 'tsx',
Expand Down Expand Up @@ -127,7 +118,8 @@ export function buildSpi(opts: BuildSpiOptions): SemanticProgramIndex {
const diagnostics: SpiDiagnostic[] = []

const absPaths: string[] = []
collectFiles(root, absPaths)
const ignorePatterns = loadGraphifyignorePatterns(root)
collectFiles(root, root, ignorePatterns, absPaths)

const pathToFileId = new Map<string, string>()
for (const abs of absPaths) {
Expand Down Expand Up @@ -199,19 +191,19 @@ export function buildSpi(opts: BuildSpiOptions): SemanticProgramIndex {
// buildSpi directly.
export const buildSpiFileLayer = buildSpi

function collectFiles(dir: string, out: string[]): void {
function collectFiles(root: string, dir: string, ignorePatterns: readonly string[], out: string[]): void {
let entries: import('node:fs').Dirent<string>[]
try {
entries = readdirSync(dir, { withFileTypes: true, encoding: 'utf8' })
} catch {
return
}
for (const entry of entries) {
if (SKIP_DIRS.has(entry.name)) continue
if (entry.isSymbolicLink()) continue
const full = join(dir, entry.name)
if (isDiscoveryPathIgnored(full, root, ignorePatterns)) continue
if (entry.isDirectory()) {
collectFiles(full, out)
collectFiles(root, full, ignorePatterns, out)
} else if (entry.isFile()) {
out.push(full)
}
Expand Down
Loading
Loading