diff --git a/__tests__/language-registry.test.ts b/__tests__/language-registry.test.ts new file mode 100644 index 00000000..9afdd59a --- /dev/null +++ b/__tests__/language-registry.test.ts @@ -0,0 +1,157 @@ +/** + * Language registry: structural invariants. + * + * These tests guard against the "parallel list" failure mode that + * the registry refactor exists to prevent. If a future PR adds a + * grammar-backed language but forgets to wire it through one of + * the derived consumers, one of these tests should catch it. + */ +import { describe, it, expect } from 'vitest'; +import { + getLanguageDefs, + getLanguageDefByExtension, + getLanguageDefByName, +} from '../src/extraction/languages/registry'; +import { EXTRACTORS } from '../src/extraction/languages'; +import { + detectLanguage, + isLanguageSupported, + getSupportedLanguages, + getLanguageDisplayName, + EXTENSION_MAP, +} from '../src/extraction/grammars'; + +describe('language registry — single source of truth', () => { + it('has at least the original 19 languages', () => { + const defs = getLanguageDefs(); + expect(defs.length).toBeGreaterThanOrEqual(19); + }); + + it('every def has unique non-empty name', () => { + const names = new Set(); + for (const def of getLanguageDefs()) { + expect(def.name).toBeTruthy(); + expect(names.has(def.name)).toBe(false); + names.add(def.name); + } + }); + + it('extensions are unique across registry (one ext maps to one language)', () => { + const seen = new Map(); + for (const def of getLanguageDefs()) { + for (const ext of def.extensions) { + const lower = ext.toLowerCase(); + if (seen.has(lower)) { + // The .h ambiguity (C vs C++) is intentionally pinned to C + // by the registry; tree-sitter.ts has a content-sniff + // override. Anything else duplicating extensions is a bug. + throw new Error( + `Extension ${lower} mapped twice: ${seen.get(lower)} and ${def.name}` + ); + } + seen.set(lower, def.name); + } + } + }); + + it('grammar-backed defs have wasmFile + extractor', () => { + for (const def of getLanguageDefs()) { + if (!def.grammar) continue; + expect(def.grammar.wasmFile).toMatch(/^tree-sitter-.+\.wasm$/); + expect(def.grammar.extractor).toBeDefined(); + } + }); + + it('custom-extractor defs have a customExtractor function', () => { + for (const def of getLanguageDefs()) { + if (def.grammar) continue; // grammar-backed + expect(def.customExtractor).toBeInstanceOf(Function); + } + }); +}); + +describe('derived consumers stay in sync with the registry', () => { + // Catch the "parallel list drift" bug that motivated this refactor. + // If a new language gets added to registry but a derived consumer + // still hard-codes the old set, one of these will fail. + + it('EXTRACTORS contains exactly the grammar-backed languages', () => { + const grammarBacked = getLanguageDefs() + .filter((d) => d.grammar) + .map((d) => d.name) + .sort(); + const extractorKeys = Object.keys(EXTRACTORS).sort(); + expect(extractorKeys).toEqual(grammarBacked); + }); + + it('every grammar-backed extractor matches def.grammar.extractor exactly', () => { + for (const def of getLanguageDefs()) { + if (!def.grammar) continue; + expect(EXTRACTORS[def.name as keyof typeof EXTRACTORS]).toBe(def.grammar.extractor); + } + }); + + it('EXTENSION_MAP entries exactly mirror registry extensions', () => { + const expected = new Map(); + for (const def of getLanguageDefs()) { + for (const ext of def.extensions) { + expected.set(ext.toLowerCase(), def.name); + } + } + for (const [ext, lang] of expected) { + expect(EXTENSION_MAP[ext]).toBe(lang); + } + // Reverse: no extra keys in EXTENSION_MAP. + expect(Object.keys(EXTENSION_MAP).sort()).toEqual([...expected.keys()].sort()); + }); + + it('detectLanguage returns the expected name for every registered extension', () => { + for (const def of getLanguageDefs()) { + for (const ext of def.extensions) { + // .h is pinned to C by the registry; the C++ heuristic only + // applies when source is provided AND looks like C++. + expect(detectLanguage(`x${ext}`)).toBe(def.name); + } + } + }); + + it('isLanguageSupported returns true for every registered language and false for unknown', () => { + for (const def of getLanguageDefs()) { + expect(isLanguageSupported(def.name as never)).toBe(true); + } + expect(isLanguageSupported('unknown' as never)).toBe(false); + }); + + it('getSupportedLanguages returns exactly the registry names', () => { + const fromRegistry = getLanguageDefs().map((d) => d.name).sort(); + const supported = (getSupportedLanguages() as string[]).sort(); + expect(supported).toEqual(fromRegistry); + }); + + it('getLanguageDisplayName uses each defs displayName', () => { + for (const def of getLanguageDefs()) { + expect(getLanguageDisplayName(def.name as never)).toBe(def.displayName); + } + }); +}); + +describe('lookup helpers', () => { + it('getLanguageDefByName returns the def for a registered name', () => { + expect(getLanguageDefByName('typescript')?.displayName).toBe('TypeScript'); + }); + + it('getLanguageDefByName returns undefined for unknown names', () => { + expect(getLanguageDefByName('nonexistent-language-name')).toBeUndefined(); + }); + + it('getLanguageDefByExtension is case-insensitive', () => { + expect(getLanguageDefByExtension('.TS')?.name).toBe('typescript'); + expect(getLanguageDefByExtension('.ts')?.name).toBe('typescript'); + }); + + it('Pascal extensionOverrides routes .dfm and .fmx to a customExtractor', () => { + const def = getLanguageDefByName('pascal'); + expect(def?.extensionOverrides?.['.dfm']?.customExtractor).toBeInstanceOf(Function); + expect(def?.extensionOverrides?.['.fmx']?.customExtractor).toBeInstanceOf(Function); + }); +}); diff --git a/src/default-config.ts b/src/default-config.ts new file mode 100644 index 00000000..5c59179c --- /dev/null +++ b/src/default-config.ts @@ -0,0 +1,194 @@ +/** + * Default project configuration. + * + * Lives in its own file (separate from `types.ts`) because the + * `include` glob list is derived from the language registry — and + * the registry transitively imports `types.ts` via per-language + * files, which would create an evaluation cycle if `default-config` + * were itself imported by `types.ts` eagerly. + * + * **Lazy include resolution.** The `include` array is built on + * first access via a property getter, not at module load. By the + * time anything reads `DEFAULT_CONFIG.include`, the registry has + * fully evaluated, so all language definitions are available. + */ + +import type { CodeGraphConfig } from './types'; +import { getLanguageDefs } from './extraction/languages/registry'; + +let _includeCache: string[] | null = null; +function buildIncludeGlobs(): string[] { + if (_includeCache) return _includeCache; + const seen = new Set(); + const out: string[] = []; + for (const def of getLanguageDefs()) { + for (const glob of def.includeGlobs) { + if (seen.has(glob)) continue; + seen.add(glob); + out.push(glob); + } + } + _includeCache = out; + return out; +} + +const baseConfig: CodeGraphConfig = { + version: 1, + rootDir: '.', + include: [], // populated lazily via the getter below + exclude: [ + // Version control + '**/.git/**', + + // Dependencies + '**/node_modules/**', + '**/vendor/**', + '**/Pods/**', + + // Generic build outputs + '**/dist/**', + '**/build/**', + '**/out/**', + '**/bin/**', + '**/obj/**', + '**/target/**', + + // JavaScript/TypeScript + '**/*.min.js', + '**/*.bundle.js', + '**/.next/**', + '**/.nuxt/**', + '**/.svelte-kit/**', + '**/.output/**', + '**/.turbo/**', + '**/.cache/**', + '**/.parcel-cache/**', + '**/.vite/**', + '**/.astro/**', + '**/.docusaurus/**', + '**/.gatsby/**', + '**/.webpack/**', + '**/.nx/**', + '**/.yarn/cache/**', + '**/.pnpm-store/**', + '**/storybook-static/**', + + // React Native / Expo + '**/.expo/**', + '**/web-build/**', + '**/ios/Pods/**', + '**/ios/build/**', + '**/android/build/**', + '**/android/.gradle/**', + + // Python + '**/__pycache__/**', + '**/.venv/**', + '**/venv/**', + '**/site-packages/**', + '**/dist-packages/**', + '**/.pytest_cache/**', + '**/.mypy_cache/**', + '**/.ruff_cache/**', + '**/.tox/**', + '**/.nox/**', + '**/*.egg-info/**', + '**/.eggs/**', + + // Go + '**/go/pkg/mod/**', + + // Rust + '**/target/debug/**', + '**/target/release/**', + + // Java/Kotlin/Gradle + '**/.gradle/**', + '**/.m2/**', + '**/generated-sources/**', + '**/.kotlin/**', + + // Dart/Flutter + '**/.dart_tool/**', + + // C#/.NET + '**/.vs/**', + '**/.nuget/**', + '**/artifacts/**', + '**/publish/**', + + // C/C++ + '**/cmake-build-*/**', + '**/CMakeFiles/**', + '**/bazel-*/**', + '**/vcpkg_installed/**', + '**/.conan/**', + '**/Debug/**', + '**/Release/**', + '**/x64/**', + '**/.pio/**', // Platform.io (IoT/embedded build artifacts and library deps) + + // Electron + '**/release/**', + '**/*.app/**', + '**/*.asar', + + // Swift/iOS/Xcode + '**/DerivedData/**', + '**/.build/**', + '**/.swiftpm/**', + '**/xcuserdata/**', + '**/Carthage/Build/**', + '**/SourcePackages/**', + + // Delphi/Pascal + '**/__history/**', + '**/__recovery/**', + '**/*.dcu', + + // PHP + '**/.composer/**', + '**/storage/framework/**', + '**/bootstrap/cache/**', + + // Ruby + '**/.bundle/**', + '**/tmp/cache/**', + '**/public/assets/**', + '**/public/packs/**', + '**/.yardoc/**', + + // Testing/Coverage + '**/coverage/**', + '**/htmlcov/**', + '**/.nyc_output/**', + '**/test-results/**', + '**/.coverage/**', + + // IDE/Editor + '**/.idea/**', + + // Logs and temp + '**/logs/**', + '**/tmp/**', + '**/temp/**', + + // Documentation build output + '**/_build/**', + '**/docs/_build/**', + '**/site/**', + ], + languages: [], + frameworks: [], + maxFileSize: 1024 * 1024, // 1MB + extractDocstrings: true, + trackCallSites: true, +}; + +Object.defineProperty(baseConfig, 'include', { + get: () => buildIncludeGlobs(), + enumerable: true, + configurable: true, +}); + +export const DEFAULT_CONFIG: CodeGraphConfig = baseConfig; diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index df264fb3..5c2aec09 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -4,77 +4,63 @@ * Uses web-tree-sitter (WASM) for universal cross-platform support. * Grammars are loaded lazily — only languages actually present in the project * are compiled, keeping V8 WASM memory pressure low on large codebases. + * + * As of the language-registry refactor, all per-language metadata + * (WASM filenames, file extensions, display names, vendored flag) + * lives in `./languages/.ts` and is auto-collected by + * `./languages/registry.ts`. The constants exported here + * (`EXTENSION_MAP`, `getSupportedLanguages`, `getLanguageDisplayName`) + * remain for backward compat but are derived from the registry. */ import * as path from 'path'; import { Parser, Language as WasmLanguage } from 'web-tree-sitter'; import { Language } from '../types'; +import { getLanguageDefs, getLanguageDefByExtension, getLanguageDefByName } from './languages/registry'; export type GrammarLanguage = Exclude; /** - * WASM filename map — maps each language to its .wasm grammar file - * in the tree-sitter-wasms package. + * File extension → Language mapping, computed lazily on first read. + * + * Cannot be a top-level IIFE: the registry transitively pulls in + * `tree-sitter.ts` (via custom-extractor language defs), which + * imports this file — building the map at module load would TDZ + * against `ALL_DEFS` in the registry. Use the `getExtensionMap()` + * function for an explicit lazy entry point, or read + * `EXTENSION_MAP` (a Proxy that materialises on first property + * access). */ -const WASM_GRAMMAR_FILES: Record = { - typescript: 'tree-sitter-typescript.wasm', - tsx: 'tree-sitter-tsx.wasm', - javascript: 'tree-sitter-javascript.wasm', - jsx: 'tree-sitter-javascript.wasm', - python: 'tree-sitter-python.wasm', - go: 'tree-sitter-go.wasm', - rust: 'tree-sitter-rust.wasm', - java: 'tree-sitter-java.wasm', - c: 'tree-sitter-c.wasm', - cpp: 'tree-sitter-cpp.wasm', - csharp: 'tree-sitter-c_sharp.wasm', - php: 'tree-sitter-php.wasm', - ruby: 'tree-sitter-ruby.wasm', - swift: 'tree-sitter-swift.wasm', - kotlin: 'tree-sitter-kotlin.wasm', - dart: 'tree-sitter-dart.wasm', - pascal: 'tree-sitter-pascal.wasm', -}; +let _extensionMapCache: Record | null = null; +export function getExtensionMap(): Record { + if (_extensionMapCache) return _extensionMapCache; + const out: Record = {}; + for (const def of getLanguageDefs()) { + for (const ext of def.extensions) { + out[ext.toLowerCase()] = def.name as Language; + } + } + _extensionMapCache = out; + return out; +} /** - * File extension to Language mapping + * Backward-compat: a Proxy that lazy-builds the extension map on + * first property access. Existing callers can keep doing + * `EXTENSION_MAP['.ts']` without changes. */ -export const EXTENSION_MAP: Record = { - '.ts': 'typescript', - '.tsx': 'tsx', - '.js': 'javascript', - '.mjs': 'javascript', - '.cjs': 'javascript', - '.jsx': 'jsx', - '.py': 'python', - '.pyw': 'python', - '.go': 'go', - '.rs': 'rust', - '.java': 'java', - '.c': 'c', - '.h': 'c', // Could also be C++, defaulting to C - '.cpp': 'cpp', - '.cc': 'cpp', - '.cxx': 'cpp', - '.hpp': 'cpp', - '.hxx': 'cpp', - '.cs': 'csharp', - '.php': 'php', - '.rb': 'ruby', - '.rake': 'ruby', - '.swift': 'swift', - '.kt': 'kotlin', - '.kts': 'kotlin', - '.dart': 'dart', - '.liquid': 'liquid', - '.svelte': 'svelte', - '.pas': 'pascal', - '.dpr': 'pascal', - '.dpk': 'pascal', - '.lpr': 'pascal', - '.dfm': 'pascal', - '.fmx': 'pascal', -}; +export const EXTENSION_MAP: Record = new Proxy({} as Record, { + get(_t, key: string) { return getExtensionMap()[key]; }, + has(_t, key: string) { return key in getExtensionMap(); }, + ownKeys() { return Object.keys(getExtensionMap()); }, + getOwnPropertyDescriptor(_t, key: string) { + const map = getExtensionMap(); + if (key in map) { + return { configurable: true, enumerable: true, writable: false, value: map[key] }; + } + return undefined; + }, +}); /** * Caches for loaded grammars and parsers @@ -108,21 +94,28 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise - lang in WASM_GRAMMAR_FILES && - !languageCache.has(lang) && - !unavailableGrammarErrors.has(lang) - ); + // Deduplicate; filter to languages that have a tree-sitter grammar + // (registry's `def.grammar` field) and aren't already loaded. + const seen = new Set(); + const toLoad: Array<{ lang: Language; wasmFile: string; vendored: boolean }> = []; + for (const lang of languages) { + if (seen.has(lang)) continue; + seen.add(lang); + if (languageCache.has(lang) || unavailableGrammarErrors.has(lang)) continue; + const def = getLanguageDefByName(lang); + if (!def?.grammar) continue; + toLoad.push({ + lang, + wasmFile: def.grammar.wasmFile, + vendored: def.grammar.vendored === true, + }); + } // Load grammars sequentially to avoid web-tree-sitter WASM race condition on Node 20+ // See: https://github.com/tree-sitter/tree-sitter/issues/2338 - for (const lang of toLoad) { - const wasmFile = WASM_GRAMMAR_FILES[lang]; + for (const { lang, wasmFile, vendored } of toLoad) { try { - // Pascal ships its own WASM (not in tree-sitter-wasms) - const wasmPath = lang === 'pascal' + const wasmPath = vendored ? path.join(__dirname, 'wasm', wasmFile) : require.resolve(`tree-sitter-wasms/out/${wasmFile}`); const language = await WasmLanguage.load(wasmPath); @@ -140,7 +133,9 @@ export async function loadGrammarsForLanguages(languages: Language[]): Promise { - const allLanguages = Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]; + const allLanguages = getLanguageDefs() + .filter((d) => d.grammar) + .map((d) => d.name as Language); await loadGrammarsForLanguages(allLanguages); } @@ -176,7 +171,8 @@ export function getParser(language: Language): Parser | null { */ export function detectLanguage(filePath: string, source?: string): Language { const ext = filePath.substring(filePath.lastIndexOf('.')).toLowerCase(); - const lang = EXTENSION_MAP[ext] || 'unknown'; + const def = getLanguageDefByExtension(ext); + const lang = (def?.name as Language) ?? 'unknown'; // .h files could be C or C++ — check source content for C++ features if (lang === 'c' && ext === '.h' && source) { @@ -196,29 +192,30 @@ function looksLikeCpp(source: string): boolean { } /** - * Check if a language is supported (has a grammar defined). - * Returns true if the grammar exists, even if not yet loaded. + * Check if a language is supported (has a grammar or custom extractor). + * Returns true if a registry entry exists, even if its grammar isn't loaded. */ export function isLanguageSupported(language: Language): boolean { - if (language === 'svelte') return true; // custom extractor (script block delegation) - if (language === 'liquid') return true; // custom regex extractor if (language === 'unknown') return false; - return language in WASM_GRAMMAR_FILES; + return getLanguageDefByName(language) !== undefined; } /** * Check if a grammar has been loaded and is ready for parsing. + * Custom-extractor languages (no `grammar` field) are always "ready". */ export function isGrammarLoaded(language: Language): boolean { - if (language === 'svelte' || language === 'liquid') return true; + const def = getLanguageDefByName(language); + if (!def) return false; + if (!def.grammar) return true; // custom extractor — always available return languageCache.has(language); } /** - * Get all supported languages (those with grammar definitions). + * Get all supported languages from the registry. */ export function getSupportedLanguages(): Language[] { - return [...(Object.keys(WASM_GRAMMAR_FILES) as GrammarLanguage[]), 'svelte', 'liquid']; + return getLanguageDefs().map((d) => d.name as Language); } /** @@ -237,54 +234,33 @@ export function resetParser(language: Language): void { } /** - * Clear parser/grammar caches (useful for testing) + * Clear parser cache (useful for testing). + * + * Note: `languageCache` is intentionally NOT cleared — the WASM + * `Language` modules are expensive to load and stay cached so a + * subsequent `getParser` call can rebuild a fresh `Parser` instance + * without re-reading the .wasm file. To fully re-init, set + * `parserInitialized = false` and call `initGrammars()` again. */ export function clearParserCache(): void { for (const parser of parserCache.values()) { - parser.delete(); + try { parser.delete(); } catch { /* ignore */ } } parserCache.clear(); - // Note: languageCache is NOT cleared — WASM languages persist. - // To fully re-init, set parserInitialized = false and call initGrammars() again. unavailableGrammarErrors.clear(); } /** - * Report grammars that failed to load. + * Get unavailable grammar errors (for diagnostics) */ -export function getUnavailableGrammarErrors(): Partial> { - const out: Partial> = {}; - for (const [language, message] of unavailableGrammarErrors.entries()) { - out[language] = message; - } - return out; +export function getUnavailableGrammarErrors(): Record { + return Object.fromEntries(unavailableGrammarErrors); } /** - * Get language display name + * Human-readable display name (e.g. "TypeScript", "Pascal / Delphi"). + * Returns the canonical name unchanged if no display name is registered. */ export function getLanguageDisplayName(language: Language): string { - const names: Record = { - typescript: 'TypeScript', - javascript: 'JavaScript', - tsx: 'TypeScript (TSX)', - jsx: 'JavaScript (JSX)', - python: 'Python', - go: 'Go', - rust: 'Rust', - java: 'Java', - c: 'C', - cpp: 'C++', - csharp: 'C#', - php: 'PHP', - ruby: 'Ruby', - swift: 'Swift', - kotlin: 'Kotlin', - dart: 'Dart', - svelte: 'Svelte', - liquid: 'Liquid', - pascal: 'Pascal / Delphi', - unknown: 'Unknown', - }; - return names[language] || language; + return getLanguageDefByName(language)?.displayName ?? language; } diff --git a/src/extraction/languages/c-cpp.ts b/src/extraction/languages/c-cpp.ts index 66219d4f..8ed3a9de 100644 --- a/src/extraction/languages/c-cpp.ts +++ b/src/extraction/languages/c-cpp.ts @@ -114,3 +114,21 @@ export const cppExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const C_DEF: LanguageDef = { + name: 'c', + displayName: 'C', + // .h is also listed for C; tree-sitter.ts contains a `.h might be C++` + // heuristic that overrides this on a content-sniff basis. + extensions: ['.c', '.h'], + includeGlobs: ['**/*.c', '**/*.h'], + grammar: { wasmFile: 'tree-sitter-c.wasm', extractor: cExtractor }, +}; +export const CPP_DEF: LanguageDef = { + name: 'cpp', + displayName: 'C++', + extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx'], + includeGlobs: ['**/*.cpp', '**/*.cc', '**/*.cxx', '**/*.hpp', '**/*.hxx'], + grammar: { wasmFile: 'tree-sitter-cpp.wasm', extractor: cppExtractor }, +}; diff --git a/src/extraction/languages/csharp.ts b/src/extraction/languages/csharp.ts index 9de53734..c66aea69 100644 --- a/src/extraction/languages/csharp.ts +++ b/src/extraction/languages/csharp.ts @@ -65,3 +65,12 @@ export const csharpExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const CSHARP_DEF: LanguageDef = { + name: 'csharp', + displayName: 'C#', + extensions: ['.cs'], + includeGlobs: ['**/*.cs'], + grammar: { wasmFile: 'tree-sitter-c_sharp.wasm', extractor: csharpExtractor }, +}; diff --git a/src/extraction/languages/dart.ts b/src/extraction/languages/dart.ts index 5b545d04..d704d826 100644 --- a/src/extraction/languages/dart.ts +++ b/src/extraction/languages/dart.ts @@ -193,3 +193,12 @@ export const dartExtractor: LanguageExtractor = { return undefined; }, }; + +import type { LanguageDef } from './types'; +export const DART_DEF: LanguageDef = { + name: 'dart', + displayName: 'Dart', + extensions: ['.dart'], + includeGlobs: ['**/*.dart'], + grammar: { wasmFile: 'tree-sitter-dart.wasm', extractor: dartExtractor }, +}; diff --git a/src/extraction/languages/go.ts b/src/extraction/languages/go.ts index 898e6165..5de68ffa 100644 --- a/src/extraction/languages/go.ts +++ b/src/extraction/languages/go.ts @@ -49,3 +49,12 @@ export const goExtractor: LanguageExtractor = { return match?.[1]; }, }; + +import type { LanguageDef } from './types'; +export const GO_DEF: LanguageDef = { + name: 'go', + displayName: 'Go', + extensions: ['.go'], + includeGlobs: ['**/*.go'], + grammar: { wasmFile: 'tree-sitter-go.wasm', extractor: goExtractor }, +}; diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index e5d12ac6..0e35b826 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -1,44 +1,71 @@ /** - * Per-language extraction configurations. + * Per-language barrel. * - * Each file exports a LanguageExtractor config object. - * This barrel builds the EXTRACTORS map consumed by TreeSitterExtractor. + * Adding a new language is a single-file addition: drop a + * `.ts` next to this barrel exporting an `_DEF: + * LanguageDef`, then add one import + one array entry to + * `./registry.ts`. Nothing in this file needs to change for new + * languages. + * + * `EXTRACTORS` is preserved as a backward-compat export but is now + * derived from the registry. Direct readers of `EXTRACTORS` get the + * same shape they always did; the canonical source is each + * language def's `grammar.extractor` field. */ -import { Language } from '../../types'; +import type { Language } from '../../types'; import type { LanguageExtractor } from '../tree-sitter-types'; +import { getLanguageDefs } from './registry'; + +export * from './registry'; -import { typescriptExtractor } from './typescript'; -import { javascriptExtractor } from './javascript'; -import { pythonExtractor } from './python'; -import { goExtractor } from './go'; -import { rustExtractor } from './rust'; -import { javaExtractor } from './java'; -import { cExtractor, cppExtractor } from './c-cpp'; -import { csharpExtractor } from './csharp'; -import { phpExtractor } from './php'; -import { rubyExtractor } from './ruby'; -import { swiftExtractor } from './swift'; -import { kotlinExtractor } from './kotlin'; -import { dartExtractor } from './dart'; -import { pascalExtractor } from './pascal'; +/** + * Backward-compat: `Language → LanguageExtractor` map. Built lazily + * on first read (the registry transitively imports modules that + * import this barrel, so building eagerly would TDZ). + */ +let _extractorsCache: Partial> | null = null; +function buildExtractors(): Partial> { + if (_extractorsCache) return _extractorsCache; + const out: Partial> = {}; + for (const def of getLanguageDefs()) { + if (def.grammar) { + out[def.name as Language] = def.grammar.extractor; + } + } + _extractorsCache = out; + return out; +} -export const EXTRACTORS: Partial> = { - typescript: typescriptExtractor, - tsx: typescriptExtractor, - javascript: javascriptExtractor, - jsx: javascriptExtractor, - python: pythonExtractor, - go: goExtractor, - rust: rustExtractor, - java: javaExtractor, - c: cExtractor, - cpp: cppExtractor, - csharp: csharpExtractor, - php: phpExtractor, - ruby: rubyExtractor, - swift: swiftExtractor, - kotlin: kotlinExtractor, - dart: dartExtractor, - pascal: pascalExtractor, -}; +/** + * Lazy Proxy keeps the existing `EXTRACTORS[lang]` access pattern + * working without forcing the registry to evaluate at module load + * (which would deadlock on the cyclic import chain through + * tree-sitter.ts). + */ +export const EXTRACTORS: Partial> = new Proxy( + {} as Partial>, + { + get(_t, key: string) { + return buildExtractors()[key as Language]; + }, + has(_t, key: string) { + return key in buildExtractors(); + }, + ownKeys() { + return Object.keys(buildExtractors()); + }, + getOwnPropertyDescriptor(_t, key: string) { + const m = buildExtractors(); + if ((key as Language) in m) { + return { + configurable: true, + enumerable: true, + writable: false, + value: m[key as Language], + }; + } + return undefined; + }, + } +); diff --git a/src/extraction/languages/java.ts b/src/extraction/languages/java.ts index 638533f0..9613217c 100644 --- a/src/extraction/languages/java.ts +++ b/src/extraction/languages/java.ts @@ -57,3 +57,12 @@ export const javaExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const JAVA_DEF: LanguageDef = { + name: 'java', + displayName: 'Java', + extensions: ['.java'], + includeGlobs: ['**/*.java'], + grammar: { wasmFile: 'tree-sitter-java.wasm', extractor: javaExtractor }, +}; diff --git a/src/extraction/languages/javascript.ts b/src/extraction/languages/javascript.ts index 0a0d6780..946e1c5c 100644 --- a/src/extraction/languages/javascript.ts +++ b/src/extraction/languages/javascript.ts @@ -82,3 +82,12 @@ export const javascriptExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const JAVASCRIPT_DEF: LanguageDef = { + name: 'javascript', + displayName: 'JavaScript', + extensions: ['.js', '.mjs', '.cjs'], + includeGlobs: ['**/*.js'], + grammar: { wasmFile: 'tree-sitter-javascript.wasm', extractor: javascriptExtractor }, +}; diff --git a/src/extraction/languages/jsx.ts b/src/extraction/languages/jsx.ts new file mode 100644 index 00000000..5091ee64 --- /dev/null +++ b/src/extraction/languages/jsx.ts @@ -0,0 +1,14 @@ +/** + * JSX — reuses the JavaScript extractor (the JS grammar handles JSX + * via the same `tree-sitter-javascript.wasm` file). + */ +import { javascriptExtractor } from './javascript'; +import type { LanguageDef } from './types'; + +export const JSX_DEF: LanguageDef = { + name: 'jsx', + displayName: 'JSX', + extensions: ['.jsx'], + includeGlobs: ['**/*.jsx'], + grammar: { wasmFile: 'tree-sitter-javascript.wasm', extractor: javascriptExtractor }, +}; diff --git a/src/extraction/languages/kotlin.ts b/src/extraction/languages/kotlin.ts index 19c38624..77d15609 100644 --- a/src/extraction/languages/kotlin.ts +++ b/src/extraction/languages/kotlin.ts @@ -236,3 +236,12 @@ export const kotlinExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const KOTLIN_DEF: LanguageDef = { + name: 'kotlin', + displayName: 'Kotlin', + extensions: ['.kt', '.kts'], + includeGlobs: ['**/*.kt'], + grammar: { wasmFile: 'tree-sitter-kotlin.wasm', extractor: kotlinExtractor }, +}; diff --git a/src/extraction/languages/liquid.ts b/src/extraction/languages/liquid.ts new file mode 100644 index 00000000..ead2f978 --- /dev/null +++ b/src/extraction/languages/liquid.ts @@ -0,0 +1,16 @@ +/** + * Liquid — custom regex-based extractor for Shopify Liquid templates. + * Tree-sitter has no production-quality Liquid grammar; the + * `LiquidExtractor` does targeted pattern matching for snippet + * includes and Drop variable references. + */ +import { LiquidExtractor } from '../liquid-extractor'; +import type { LanguageDef } from './types'; + +export const LIQUID_DEF: LanguageDef = { + name: 'liquid', + displayName: 'Liquid', + extensions: ['.liquid'], + includeGlobs: ['**/*.liquid'], + customExtractor: (filePath, source) => new LiquidExtractor(filePath, source).extract(), +}; diff --git a/src/extraction/languages/pascal.ts b/src/extraction/languages/pascal.ts index aed6a59f..a196c7b0 100644 --- a/src/extraction/languages/pascal.ts +++ b/src/extraction/languages/pascal.ts @@ -60,3 +60,30 @@ export const pascalExtractor: LanguageExtractor = { return node.type === 'declConst'; }, }; + +import type { LanguageDef } from './types'; +import { DfmExtractor } from '../dfm-extractor'; + +const dfmCustomExtractor = (filePath: string, source: string) => + new DfmExtractor(filePath, source).extract(); + +export const PASCAL_DEF: LanguageDef = { + name: 'pascal', + displayName: 'Pascal / Delphi', + extensions: ['.pas', '.dpr', '.dpk', '.lpr', '.dfm', '.fmx'], + includeGlobs: [ + '**/*.pas', '**/*.dpr', '**/*.dpk', '**/*.lpr', + '**/*.dfm', '**/*.fmx', + ], + grammar: { + wasmFile: 'tree-sitter-pascal.wasm', + vendored: true, + extractor: pascalExtractor, + }, + // .dfm/.fmx are Delphi/FireMonkey form files — declarative property + // definitions, not Pascal source. Route them to the dedicated DfmExtractor. + extensionOverrides: { + '.dfm': { customExtractor: dfmCustomExtractor }, + '.fmx': { customExtractor: dfmCustomExtractor }, + }, +}; diff --git a/src/extraction/languages/php.ts b/src/extraction/languages/php.ts index 1133f979..30271286 100644 --- a/src/extraction/languages/php.ts +++ b/src/extraction/languages/php.ts @@ -103,3 +103,12 @@ export const phpExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const PHP_DEF: LanguageDef = { + name: 'php', + displayName: 'PHP', + extensions: ['.php'], + includeGlobs: ['**/*.php'], + grammar: { wasmFile: 'tree-sitter-php.wasm', extractor: phpExtractor }, +}; diff --git a/src/extraction/languages/python.ts b/src/extraction/languages/python.ts index 77807d66..2cddcf40 100644 --- a/src/extraction/languages/python.ts +++ b/src/extraction/languages/python.ts @@ -51,3 +51,12 @@ export const pythonExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const PYTHON_DEF: LanguageDef = { + name: 'python', + displayName: 'Python', + extensions: ['.py', '.pyw'], + includeGlobs: ['**/*.py'], + grammar: { wasmFile: 'tree-sitter-python.wasm', extractor: pythonExtractor }, +}; diff --git a/src/extraction/languages/registry.ts b/src/extraction/languages/registry.ts new file mode 100644 index 00000000..7e334b72 --- /dev/null +++ b/src/extraction/languages/registry.ts @@ -0,0 +1,108 @@ +/** + * Language registry — central import + collection of every per-language + * `LanguageDef`. Adding a new language is: + * + * 1. Create `src/extraction/languages/.ts` exporting an + * `_DEF: LanguageDef` constant. + * 2. Add **one** import line and **one** array entry to this file. + * + * **That is the complete change list.** All consumers + * (`grammars.ts`, `tree-sitter.ts`'s extractor lookup, + * `default-config.ts`'s include globs, the legacy `EXTRACTORS` + * barrel in `./index.ts`) all read from this registry — there is + * no parallel list to keep in sync. + * + * This file is the only place a "central list" of languages lives, + * so adjacent-line conflicts between PRs adding different languages + * are limited to whichever alphabetical neighborhood they target. + * + * Note: an earlier draft used `fs.readdirSync` auto-discovery which + * eliminated even this file, but `require()` of extensionless paths + * doesn't work under vitest's vite-node loader for `.ts` source. A + * generated-barrel build step would restore zero-list-edits and is + * tracked as a follow-up. + */ + +import type { LanguageDef } from './types'; + +// ===================================================================== +// Imports — one per language, alphabetical by name +// ===================================================================== +import { C_DEF, CPP_DEF } from './c-cpp'; +import { CSHARP_DEF } from './csharp'; +import { DART_DEF } from './dart'; +import { GO_DEF } from './go'; +import { JAVA_DEF } from './java'; +import { JAVASCRIPT_DEF } from './javascript'; +import { JSX_DEF } from './jsx'; +import { KOTLIN_DEF } from './kotlin'; +import { LIQUID_DEF } from './liquid'; +import { PASCAL_DEF } from './pascal'; +import { PHP_DEF } from './php'; +import { PYTHON_DEF } from './python'; +import { RUBY_DEF } from './ruby'; +import { RUST_DEF } from './rust'; +import { SVELTE_DEF } from './svelte'; +import { SWIFT_DEF } from './swift'; +import { TSX_DEF } from './tsx'; +import { TYPESCRIPT_DEF } from './typescript'; + +// ===================================================================== +// Registry — alphabetical by name +// ===================================================================== +const ALL_DEFS: readonly LanguageDef[] = [ + C_DEF, + CPP_DEF, + CSHARP_DEF, + DART_DEF, + GO_DEF, + JAVA_DEF, + JAVASCRIPT_DEF, + JSX_DEF, + KOTLIN_DEF, + LIQUID_DEF, + PASCAL_DEF, + PHP_DEF, + PYTHON_DEF, + RUBY_DEF, + RUST_DEF, + SVELTE_DEF, + SWIFT_DEF, + TSX_DEF, + TYPESCRIPT_DEF, +]; + +let byName: Map | null = null; +let byExtension: Map | null = null; + +function ensureIndexes(): void { + if (byName && byExtension) return; + byName = new Map(); + byExtension = new Map(); + for (const def of ALL_DEFS) { + byName.set(def.name, def); + for (const ext of def.extensions) { + byExtension.set(ext.toLowerCase(), def); + } + } +} + +export function getLanguageDefs(): readonly LanguageDef[] { + return ALL_DEFS; +} + +export function getLanguageDefByName(name: string): LanguageDef | undefined { + ensureIndexes(); + return byName!.get(name); +} + +export function getLanguageDefByExtension(ext: string): LanguageDef | undefined { + ensureIndexes(); + return byExtension!.get(ext.toLowerCase()); +} + +/** Reset cached indexes. Used by tests; no-op in production paths. */ +export function _resetRegistryCacheForTests(): void { + byName = null; + byExtension = null; +} diff --git a/src/extraction/languages/ruby.ts b/src/extraction/languages/ruby.ts index b5426165..810ac26a 100644 --- a/src/extraction/languages/ruby.ts +++ b/src/extraction/languages/ruby.ts @@ -109,3 +109,12 @@ export const rubyExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const RUBY_DEF: LanguageDef = { + name: 'ruby', + displayName: 'Ruby', + extensions: ['.rb', '.rake'], + includeGlobs: ['**/*.rb'], + grammar: { wasmFile: 'tree-sitter-ruby.wasm', extractor: rubyExtractor }, +}; diff --git a/src/extraction/languages/rust.ts b/src/extraction/languages/rust.ts index 0266a2fd..35c957c0 100644 --- a/src/extraction/languages/rust.ts +++ b/src/extraction/languages/rust.ts @@ -114,3 +114,12 @@ export const rustExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const RUST_DEF: LanguageDef = { + name: 'rust', + displayName: 'Rust', + extensions: ['.rs'], + includeGlobs: ['**/*.rs'], + grammar: { wasmFile: 'tree-sitter-rust.wasm', extractor: rustExtractor }, +}; diff --git a/src/extraction/languages/svelte.ts b/src/extraction/languages/svelte.ts new file mode 100644 index 00000000..7f7ab889 --- /dev/null +++ b/src/extraction/languages/svelte.ts @@ -0,0 +1,15 @@ +/** + * Svelte — custom extractor that delegates the script block back + * through the universal extraction pipeline as TypeScript/JavaScript, + * then merges in template-level call references. + */ +import { SvelteExtractor } from '../svelte-extractor'; +import type { LanguageDef } from './types'; + +export const SVELTE_DEF: LanguageDef = { + name: 'svelte', + displayName: 'Svelte', + extensions: ['.svelte'], + includeGlobs: ['**/*.svelte'], + customExtractor: (filePath, source) => new SvelteExtractor(filePath, source).extract(), +}; diff --git a/src/extraction/languages/swift.ts b/src/extraction/languages/swift.ts index 373fa8a9..fe1ac5ce 100644 --- a/src/extraction/languages/swift.ts +++ b/src/extraction/languages/swift.ts @@ -81,3 +81,12 @@ export const swiftExtractor: LanguageExtractor = { return null; }, }; + +import type { LanguageDef } from './types'; +export const SWIFT_DEF: LanguageDef = { + name: 'swift', + displayName: 'Swift', + extensions: ['.swift'], + includeGlobs: ['**/*.swift'], + grammar: { wasmFile: 'tree-sitter-swift.wasm', extractor: swiftExtractor }, +}; diff --git a/src/extraction/languages/tsx.ts b/src/extraction/languages/tsx.ts new file mode 100644 index 00000000..f4cbe536 --- /dev/null +++ b/src/extraction/languages/tsx.ts @@ -0,0 +1,14 @@ +/** + * TSX (TypeScript + JSX) — reuses the TypeScript extractor with a + * dedicated grammar so JSX-specific node types parse correctly. + */ +import { typescriptExtractor } from './typescript'; +import type { LanguageDef } from './types'; + +export const TSX_DEF: LanguageDef = { + name: 'tsx', + displayName: 'TSX', + extensions: ['.tsx'], + includeGlobs: ['**/*.tsx'], + grammar: { wasmFile: 'tree-sitter-tsx.wasm', extractor: typescriptExtractor }, +}; diff --git a/src/extraction/languages/types.ts b/src/extraction/languages/types.ts new file mode 100644 index 00000000..a93e1930 --- /dev/null +++ b/src/extraction/languages/types.ts @@ -0,0 +1,83 @@ +/** + * Per-language registry types. + * + * Each language ships its own self-contained `LanguageDef` (file + * extensions, default-config globs, grammar config, etc.) so that + * adding a new language is a single-file addition rather than 6 + * coordinated edits across `types.ts`, `grammars.ts`, and the + * `extraction/languages/index.ts` barrel. The registry + * (`./registry`) auto-discovers definitions at module load. + */ + +import type { LanguageExtractor } from '../tree-sitter-types'; +import type { ExtractionResult } from '../../types'; + +/** + * Custom extraction function for languages that don't fit the + * universal tree-sitter AST shape (Liquid, Svelte, HCL, SQL, + * Pascal DFM/FMX form files). + */ +export type CustomExtractorFn = (filePath: string, source: string) => ExtractionResult; + +export interface GrammarBackedConfig { + /** + * WASM grammar filename. Resolved either against the + * `tree-sitter-wasms` npm package or, if `vendored` is true, + * against `src/extraction/wasm/`. + */ + wasmFile: string; + /** + * True when the WASM is shipped under `src/extraction/wasm/` + * because no pre-built grammar exists in `tree-sitter-wasms`. + */ + vendored?: boolean; + /** + * Per-language tree-sitter extraction config consumed by + * `TreeSitterExtractor`. The existing per-language objects + * (e.g. `typescriptExtractor`) are passed in here unchanged. + */ + extractor: LanguageExtractor; +} + +export interface LanguageDef { + /** + * Canonical language name. Stored as the `language` value on + * `Node`, `Edge`, and `FileRecord` rows. Should match an entry + * in the `Language` union in `src/types.ts` for known + * languages; new registry-only languages are accepted as + * strings at runtime. + */ + name: string; + /** Human-readable display label (e.g. "HCL / Terraform"). */ + displayName: string; + /** + * File extensions, lower-cased, with leading dot. Each + * extension uniquely maps to one language (caller should not + * register the same extension twice). + */ + extensions: readonly string[]; + /** + * Default-config include glob patterns. Combined into + * `DEFAULT_CONFIG.include` at registry load. + */ + includeGlobs: readonly string[]; + /** + * Tree-sitter grammar config. Absent for purely-custom + * languages like Liquid (regex-based) and Svelte (script + * delegation). + */ + grammar?: GrammarBackedConfig; + /** + * Whole-language custom extractor. Used when `grammar` is + * absent. If both are present, `extensionOverrides` and + * `customExtractor` win over `grammar`. + */ + customExtractor?: CustomExtractorFn; + /** + * Per-extension override. Used by Pascal where `.dfm`/`.fmx` + * (form files) are extracted by `DfmExtractor` rather than the + * tree-sitter Pascal grammar. Keys are lower-cased extensions + * with the leading dot. + */ + extensionOverrides?: Readonly>; +} diff --git a/src/extraction/languages/typescript.ts b/src/extraction/languages/typescript.ts index 9540dd94..9f82e675 100644 --- a/src/extraction/languages/typescript.ts +++ b/src/extraction/languages/typescript.ts @@ -1,5 +1,6 @@ import { getNodeText, getChildByField } from '../tree-sitter-helpers'; import type { LanguageExtractor } from '../tree-sitter-types'; +import type { LanguageDef } from './types'; export const typescriptExtractor: LanguageExtractor = { functionTypes: ['function_declaration', 'arrow_function', 'function_expression'], @@ -116,3 +117,11 @@ export const typescriptExtractor: LanguageExtractor = { return null; }, }; + +export const TYPESCRIPT_DEF: LanguageDef = { + name: 'typescript', + displayName: 'TypeScript', + extensions: ['.ts'], + includeGlobs: ['**/*.ts'], + grammar: { wasmFile: 'tree-sitter-typescript.wasm', extractor: typescriptExtractor }, +}; diff --git a/src/extraction/tree-sitter.ts b/src/extraction/tree-sitter.ts index 7345d91f..29159e2a 100644 --- a/src/extraction/tree-sitter.ts +++ b/src/extraction/tree-sitter.ts @@ -18,10 +18,7 @@ import { import { getParser, detectLanguage, isLanguageSupported } from './grammars'; import { generateNodeId, getNodeText, getChildByField, getPrecedingDocstring } from './tree-sitter-helpers'; import type { LanguageExtractor, ExtractorContext } from './tree-sitter-types'; -import { EXTRACTORS } from './languages'; -import { LiquidExtractor } from './liquid-extractor'; -import { SvelteExtractor } from './svelte-extractor'; -import { DfmExtractor } from './dfm-extractor'; +import { getLanguageDefByName } from './languages/registry'; // Re-export for backward compatibility export { generateNodeId } from './tree-sitter-helpers'; @@ -115,7 +112,10 @@ export class TreeSitterExtractor { this.filePath = filePath; this.source = source; this.language = language || detectLanguage(filePath, source); - this.extractor = EXTRACTORS[this.language] || null; + // Single source of truth: read the extractor straight off the + // language def so adding a new grammar-backed language is a + // one-file change (no parallel EXTRACTORS map to keep in sync). + this.extractor = getLanguageDefByName(this.language)?.grammar?.extractor ?? null; } /** @@ -2319,28 +2319,21 @@ export function extractFromSource( ): ExtractionResult { const detectedLanguage = language || detectLanguage(filePath, source); const fileExtension = path.extname(filePath).toLowerCase(); + const def = getLanguageDefByName(detectedLanguage); - // Use custom extractor for Svelte - if (detectedLanguage === 'svelte') { - const extractor = new SvelteExtractor(filePath, source); - return extractor.extract(); + // Per-extension override wins (e.g. Pascal `.dfm`/`.fmx` route to + // DfmExtractor rather than the tree-sitter Pascal grammar). + const override = def?.extensionOverrides?.[fileExtension]; + if (override) { + return override.customExtractor(filePath, source); } - // Use custom extractor for Liquid - if (detectedLanguage === 'liquid') { - const extractor = new LiquidExtractor(filePath, source); - return extractor.extract(); - } - - // Use custom extractor for DFM/FMX form files - if ( - detectedLanguage === 'pascal' && - (fileExtension === '.dfm' || fileExtension === '.fmx') - ) { - const extractor = new DfmExtractor(filePath, source); - return extractor.extract(); + // Whole-language custom extractor (Liquid, Svelte, etc.). + if (def?.customExtractor) { + return def.customExtractor(filePath, source); } + // Tree-sitter path. const extractor = new TreeSitterExtractor(filePath, source, detectedLanguage); return extractor.extract(); } diff --git a/src/types.ts b/src/types.ts index 6834483d..e9b3cbcc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -476,206 +476,11 @@ export interface CodeGraphConfig { }[]; } -/** - * Default configuration values - */ -export const DEFAULT_CONFIG: CodeGraphConfig = { - version: 1, - rootDir: '.', - include: [ - // TypeScript/JavaScript - '**/*.ts', - '**/*.tsx', - '**/*.js', - '**/*.jsx', - // Python - '**/*.py', - // Go - '**/*.go', - // Rust - '**/*.rs', - // Java - '**/*.java', - // C/C++ - '**/*.c', - '**/*.h', - '**/*.cpp', - '**/*.hpp', - '**/*.cc', - '**/*.cxx', - // C# - '**/*.cs', - // PHP - '**/*.php', - // Ruby - '**/*.rb', - // Swift - '**/*.swift', - // Kotlin - '**/*.kt', - '**/*.kts', - // Dart - '**/*.dart', - // Svelte - '**/*.svelte', - // Liquid (Shopify themes) - '**/*.liquid', - // Pascal / Delphi - '**/*.pas', - '**/*.dpr', - '**/*.dpk', - '**/*.lpr', - '**/*.dfm', - '**/*.fmx', - ], - exclude: [ - // Version control - '**/.git/**', - - // Dependencies - '**/node_modules/**', - '**/vendor/**', - '**/Pods/**', - - // Generic build outputs - '**/dist/**', - '**/build/**', - '**/out/**', - '**/bin/**', - '**/obj/**', - '**/target/**', - - // JavaScript/TypeScript - '**/*.min.js', - '**/*.bundle.js', - '**/.next/**', - '**/.nuxt/**', - '**/.svelte-kit/**', - '**/.output/**', - '**/.turbo/**', - '**/.cache/**', - '**/.parcel-cache/**', - '**/.vite/**', - '**/.astro/**', - '**/.docusaurus/**', - '**/.gatsby/**', - '**/.webpack/**', - '**/.nx/**', - '**/.yarn/cache/**', - '**/.pnpm-store/**', - '**/storybook-static/**', - - // React Native / Expo - '**/.expo/**', - '**/web-build/**', - '**/ios/Pods/**', - '**/ios/build/**', - '**/android/build/**', - '**/android/.gradle/**', - - // Python - '**/__pycache__/**', - '**/.venv/**', - '**/venv/**', - '**/site-packages/**', - '**/dist-packages/**', - '**/.pytest_cache/**', - '**/.mypy_cache/**', - '**/.ruff_cache/**', - '**/.tox/**', - '**/.nox/**', - '**/*.egg-info/**', - '**/.eggs/**', - - // Go - '**/go/pkg/mod/**', - - // Rust - '**/target/debug/**', - '**/target/release/**', - - // Java/Kotlin/Gradle - '**/.gradle/**', - '**/.m2/**', - '**/generated-sources/**', - '**/.kotlin/**', - - // Dart/Flutter - '**/.dart_tool/**', - - // C#/.NET - '**/.vs/**', - '**/.nuget/**', - '**/artifacts/**', - '**/publish/**', - - // C/C++ - '**/cmake-build-*/**', - '**/CMakeFiles/**', - '**/bazel-*/**', - '**/vcpkg_installed/**', - '**/.conan/**', - '**/Debug/**', - '**/Release/**', - '**/x64/**', - '**/.pio/**', // Platform.io (IoT/embedded build artifacts and library deps) - - // Electron - '**/release/**', - '**/*.app/**', - '**/*.asar', - - // Swift/iOS/Xcode - '**/DerivedData/**', - '**/.build/**', - '**/.swiftpm/**', - '**/xcuserdata/**', - '**/Carthage/Build/**', - '**/SourcePackages/**', - - // Delphi/Pascal - '**/__history/**', - '**/__recovery/**', - '**/*.dcu', - - // PHP - '**/.composer/**', - '**/storage/framework/**', - '**/bootstrap/cache/**', - - // Ruby - '**/.bundle/**', - '**/tmp/cache/**', - '**/public/assets/**', - '**/public/packs/**', - '**/.yardoc/**', - - // Testing/Coverage - '**/coverage/**', - '**/htmlcov/**', - '**/.nyc_output/**', - '**/test-results/**', - '**/.coverage/**', - - // IDE/Editor - '**/.idea/**', - - // Logs and temp - '**/logs/**', - '**/tmp/**', - '**/temp/**', - - // Documentation build output - '**/_build/**', - '**/docs/_build/**', - '**/site/**', - ], - languages: [], - frameworks: [], - maxFileSize: 1024 * 1024, // 1MB - extractDocstrings: true, - trackCallSites: true, -}; +// `DEFAULT_CONFIG` lives in `./default-config.ts` so its `include` +// list can be derived from the language registry without import +// cycles. Re-exported here for backward compat with consumers that +// already import it from `'./types'`. +export { DEFAULT_CONFIG } from './default-config'; // ============================================================================= // Database Types