Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions __tests__/language-registry.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/**
* Language registry: structural invariants.
*
* These tests guard against the "parallel list" failure mode that
* the registry refactor exists to prevent. If a future PR adds a
* grammar-backed language but forgets to wire it through one of
* the derived consumers, one of these tests should catch it.
*/
import { describe, it, expect } from 'vitest';
import {
getLanguageDefs,
getLanguageDefByExtension,
getLanguageDefByName,
} from '../src/extraction/languages/registry';
import { EXTRACTORS } from '../src/extraction/languages';
import {
detectLanguage,
isLanguageSupported,
getSupportedLanguages,
getLanguageDisplayName,
EXTENSION_MAP,
} from '../src/extraction/grammars';

describe('language registry — single source of truth', () => {
it('has at least the original 19 languages', () => {
const defs = getLanguageDefs();
expect(defs.length).toBeGreaterThanOrEqual(19);
});

it('every def has unique non-empty name', () => {
const names = new Set<string>();
for (const def of getLanguageDefs()) {
expect(def.name).toBeTruthy();
expect(names.has(def.name)).toBe(false);
names.add(def.name);
}
});

it('extensions are unique across registry (one ext maps to one language)', () => {
const seen = new Map<string, string>();
for (const def of getLanguageDefs()) {
for (const ext of def.extensions) {
const lower = ext.toLowerCase();
if (seen.has(lower)) {
// The .h ambiguity (C vs C++) is intentionally pinned to C
// by the registry; tree-sitter.ts has a content-sniff
// override. Anything else duplicating extensions is a bug.
throw new Error(
`Extension ${lower} mapped twice: ${seen.get(lower)} and ${def.name}`
);
}
seen.set(lower, def.name);
}
}
});

it('grammar-backed defs have wasmFile + extractor', () => {
for (const def of getLanguageDefs()) {
if (!def.grammar) continue;
expect(def.grammar.wasmFile).toMatch(/^tree-sitter-.+\.wasm$/);
expect(def.grammar.extractor).toBeDefined();
}
});

it('custom-extractor defs have a customExtractor function', () => {
for (const def of getLanguageDefs()) {
if (def.grammar) continue; // grammar-backed
expect(def.customExtractor).toBeInstanceOf(Function);
}
});
});

describe('derived consumers stay in sync with the registry', () => {
// Catch the "parallel list drift" bug that motivated this refactor.
// If a new language gets added to registry but a derived consumer
// still hard-codes the old set, one of these will fail.

it('EXTRACTORS contains exactly the grammar-backed languages', () => {
const grammarBacked = getLanguageDefs()
.filter((d) => d.grammar)
.map((d) => d.name)
.sort();
const extractorKeys = Object.keys(EXTRACTORS).sort();
expect(extractorKeys).toEqual(grammarBacked);
});

it('every grammar-backed extractor matches def.grammar.extractor exactly', () => {
for (const def of getLanguageDefs()) {
if (!def.grammar) continue;
expect(EXTRACTORS[def.name as keyof typeof EXTRACTORS]).toBe(def.grammar.extractor);
}
});

it('EXTENSION_MAP entries exactly mirror registry extensions', () => {
const expected = new Map<string, string>();
for (const def of getLanguageDefs()) {
for (const ext of def.extensions) {
expected.set(ext.toLowerCase(), def.name);
}
}
for (const [ext, lang] of expected) {
expect(EXTENSION_MAP[ext]).toBe(lang);
}
// Reverse: no extra keys in EXTENSION_MAP.
expect(Object.keys(EXTENSION_MAP).sort()).toEqual([...expected.keys()].sort());
});

it('detectLanguage returns the expected name for every registered extension', () => {
for (const def of getLanguageDefs()) {
for (const ext of def.extensions) {
// .h is pinned to C by the registry; the C++ heuristic only
// applies when source is provided AND looks like C++.
expect(detectLanguage(`x${ext}`)).toBe(def.name);
}
}
});

it('isLanguageSupported returns true for every registered language and false for unknown', () => {
for (const def of getLanguageDefs()) {
expect(isLanguageSupported(def.name as never)).toBe(true);
}
expect(isLanguageSupported('unknown' as never)).toBe(false);
});

it('getSupportedLanguages returns exactly the registry names', () => {
const fromRegistry = getLanguageDefs().map((d) => d.name).sort();
const supported = (getSupportedLanguages() as string[]).sort();
expect(supported).toEqual(fromRegistry);
});

it('getLanguageDisplayName uses each defs displayName', () => {
for (const def of getLanguageDefs()) {
expect(getLanguageDisplayName(def.name as never)).toBe(def.displayName);
}
});
});

describe('lookup helpers', () => {
it('getLanguageDefByName returns the def for a registered name', () => {
expect(getLanguageDefByName('typescript')?.displayName).toBe('TypeScript');
});

it('getLanguageDefByName returns undefined for unknown names', () => {
expect(getLanguageDefByName('nonexistent-language-name')).toBeUndefined();
});

it('getLanguageDefByExtension is case-insensitive', () => {
expect(getLanguageDefByExtension('.TS')?.name).toBe('typescript');
expect(getLanguageDefByExtension('.ts')?.name).toBe('typescript');
});

it('Pascal extensionOverrides routes .dfm and .fmx to a customExtractor', () => {
const def = getLanguageDefByName('pascal');
expect(def?.extensionOverrides?.['.dfm']?.customExtractor).toBeInstanceOf(Function);
expect(def?.extensionOverrides?.['.fmx']?.customExtractor).toBeInstanceOf(Function);
});
});
194 changes: 194 additions & 0 deletions src/default-config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/**
* Default project configuration.
*
* Lives in its own file (separate from `types.ts`) because the
* `include` glob list is derived from the language registry — and
* the registry transitively imports `types.ts` via per-language
* files, which would create an evaluation cycle if `default-config`
* were itself imported by `types.ts` eagerly.
*
* **Lazy include resolution.** The `include` array is built on
* first access via a property getter, not at module load. By the
* time anything reads `DEFAULT_CONFIG.include`, the registry has
* fully evaluated, so all language definitions are available.
*/

import type { CodeGraphConfig } from './types';
import { getLanguageDefs } from './extraction/languages/registry';

let _includeCache: string[] | null = null;
function buildIncludeGlobs(): string[] {
if (_includeCache) return _includeCache;
const seen = new Set<string>();
const out: string[] = [];
for (const def of getLanguageDefs()) {
for (const glob of def.includeGlobs) {
if (seen.has(glob)) continue;
seen.add(glob);
out.push(glob);
}
}
_includeCache = out;
return out;
}

const baseConfig: CodeGraphConfig = {
version: 1,
rootDir: '.',
include: [], // populated lazily via the getter below
exclude: [
// Version control
'**/.git/**',

// Dependencies
'**/node_modules/**',
'**/vendor/**',
'**/Pods/**',

// Generic build outputs
'**/dist/**',
'**/build/**',
'**/out/**',
'**/bin/**',
'**/obj/**',
'**/target/**',

// JavaScript/TypeScript
'**/*.min.js',
'**/*.bundle.js',
'**/.next/**',
'**/.nuxt/**',
'**/.svelte-kit/**',
'**/.output/**',
'**/.turbo/**',
'**/.cache/**',
'**/.parcel-cache/**',
'**/.vite/**',
'**/.astro/**',
'**/.docusaurus/**',
'**/.gatsby/**',
'**/.webpack/**',
'**/.nx/**',
'**/.yarn/cache/**',
'**/.pnpm-store/**',
'**/storybook-static/**',

// React Native / Expo
'**/.expo/**',
'**/web-build/**',
'**/ios/Pods/**',
'**/ios/build/**',
'**/android/build/**',
'**/android/.gradle/**',

// Python
'**/__pycache__/**',
'**/.venv/**',
'**/venv/**',
'**/site-packages/**',
'**/dist-packages/**',
'**/.pytest_cache/**',
'**/.mypy_cache/**',
'**/.ruff_cache/**',
'**/.tox/**',
'**/.nox/**',
'**/*.egg-info/**',
'**/.eggs/**',

// Go
'**/go/pkg/mod/**',

// Rust
'**/target/debug/**',
'**/target/release/**',

// Java/Kotlin/Gradle
'**/.gradle/**',
'**/.m2/**',
'**/generated-sources/**',
'**/.kotlin/**',

// Dart/Flutter
'**/.dart_tool/**',

// C#/.NET
'**/.vs/**',
'**/.nuget/**',
'**/artifacts/**',
'**/publish/**',

// C/C++
'**/cmake-build-*/**',
'**/CMakeFiles/**',
'**/bazel-*/**',
'**/vcpkg_installed/**',
'**/.conan/**',
'**/Debug/**',
'**/Release/**',
'**/x64/**',
'**/.pio/**', // Platform.io (IoT/embedded build artifacts and library deps)

// Electron
'**/release/**',
'**/*.app/**',
'**/*.asar',

// Swift/iOS/Xcode
'**/DerivedData/**',
'**/.build/**',
'**/.swiftpm/**',
'**/xcuserdata/**',
'**/Carthage/Build/**',
'**/SourcePackages/**',

// Delphi/Pascal
'**/__history/**',
'**/__recovery/**',
'**/*.dcu',

// PHP
'**/.composer/**',
'**/storage/framework/**',
'**/bootstrap/cache/**',

// Ruby
'**/.bundle/**',
'**/tmp/cache/**',
'**/public/assets/**',
'**/public/packs/**',
'**/.yardoc/**',

// Testing/Coverage
'**/coverage/**',
'**/htmlcov/**',
'**/.nyc_output/**',
'**/test-results/**',
'**/.coverage/**',

// IDE/Editor
'**/.idea/**',

// Logs and temp
'**/logs/**',
'**/tmp/**',
'**/temp/**',

// Documentation build output
'**/_build/**',
'**/docs/_build/**',
'**/site/**',
],
languages: [],
frameworks: [],
maxFileSize: 1024 * 1024, // 1MB
extractDocstrings: true,
trackCallSites: true,
};

Object.defineProperty(baseConfig, 'include', {
get: () => buildIncludeGlobs(),
enumerable: true,
configurable: true,
});

export const DEFAULT_CONFIG: CodeGraphConfig = baseConfig;
Loading