diff --git a/Cargo.lock b/Cargo.lock index d1cbc7ba..40361559 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -838,6 +838,7 @@ dependencies = [ "tree-sitter-javascript", "tree-sitter-kotlin-codanna", "tree-sitter-lua", + "tree-sitter-nix", "tree-sitter-php", "tree-sitter-python", "tree-sitter-rust", @@ -5383,6 +5384,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-php" version = "0.24.2" diff --git a/Cargo.toml b/Cargo.toml index 62cb5cf9..2f77d7eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,7 @@ serde_json5 = "0.2.1" tree-sitter-swift = "0.7.2" tree-sitter-lua = "0.5.0" tree-sitter-clojure-orchard = "0.2.5" +tree-sitter-nix = "0.3.0" glob = "0.3.3" async-trait = "0.1.89" reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } diff --git a/contributing/parsers/grammar-versions.lock b/contributing/parsers/grammar-versions.lock index 34557889..85da42b5 100644 --- a/contributing/parsers/grammar-versions.lock +++ b/contributing/parsers/grammar-versions.lock @@ -74,6 +74,12 @@ "updated": "2025-07-27T12:20:37-07:00", "abi_version": "15" }, + "nix": { + "repo": "https://github.com/nix-community/tree-sitter-nix", + "version": "0.3.0", + "updated": "2026-05-21", + "abi_version": "15" + }, "typescript": { "repo": "https://github.com/tree-sitter/tree-sitter-typescript", "commit": "75b3874edb2dc714fb1fd77a32013d0f8699989f", diff --git a/contributing/parsers/nix/AUDIT_REPORT.md b/contributing/parsers/nix/AUDIT_REPORT.md new file mode 100644 index 00000000..b7f2c12f --- /dev/null +++ b/contributing/parsers/nix/AUDIT_REPORT.md @@ -0,0 +1,27 @@ +# Nix Parser Symbol Extraction Coverage Report + +## Summary +- Key nodes: 18/18 (100%) +- Total grammar nodes: 63 +- Total implemented: 56 +- Symbol kinds extracted: {"Variable", "Parameter", "Function"} + +## Key Nodes Coverage +- [✓] source_code +- [✓] binding +- [✓] attrset_expression +- [✓] rec_attrset_expression +- [✓] let_expression +- [✓] function_expression +- [✓] formals +- [✓] formal +- [✓] inherit +- [✓] inherit_from +- [✓] apply_expression +- [✓] select_expression +- [✓] attrpath +- [✓] identifier +- [✓] if_expression +- [✓] assert_expression +- [✓] with_expression +- [✓] comment diff --git a/contributing/parsers/nix/GRAMMAR_ANALYSIS.md b/contributing/parsers/nix/GRAMMAR_ANALYSIS.md new file mode 100644 index 00000000..1511b9c4 --- /dev/null +++ b/contributing/parsers/nix/GRAMMAR_ANALYSIS.md @@ -0,0 +1,90 @@ +# Nix Grammar Analysis + +*Generated: 2026-05-21 12:23:48 UTC* + +## Statistics +- Total nodes in grammar JSON: 36 +- Nodes found in comprehensive.nix: 63 +- Nodes handled by parser: 56 +- Symbol kinds extracted: 3 + +## Successfully Handled Nodes +These nodes are in examples and handled by parser: +- != +- " +- ${ +- '' +- ( +- ) +- * +- + +- . +- / +- ; +- < +- == +- [ +- ] +- apply_expression +- assert +- assert_expression +- attrpath +- attrset_expression +- binary_expression +- binding +- binding_set +- comment +- else +- formal +- formals +- function_expression +- identifier +- if +- if_expression +- in +- indented_string_expression +- inherit +- inherit_from +- integer_expression +- interpolation +- let +- let_expression +- list_expression +- parenthesized_expression +- path_expression +- path_fragment +- rec +- rec_attrset_expression +- select_expression +- source_code +- spath_expression +- string_expression +- string_fragment +- then +- variable_expression +- with +- with_expression +- { +- } + +## Implementation Gaps +These nodes appear in comprehensive.nix but aren't handled: +- , +- : +- = +- ? +- @ +- ellipses +- inherited_attrs + +## Missing from Examples +These grammar nodes aren't in comprehensive.nix: +- float_expression +- has_attr_expression +- unary_expression + +## Symbol Kinds Extracted +- Function +- Parameter +- Variable + diff --git a/contributing/parsers/nix/IMPLEMENTATION_PLAN.md b/contributing/parsers/nix/IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..508d480d --- /dev/null +++ b/contributing/parsers/nix/IMPLEMENTATION_PLAN.md @@ -0,0 +1,313 @@ +# Nix Language Parser — Implementation Plan + +> Status: **planning** · Branch: `feature/parser-for-nix-lang` · Target grammar: `tree-sitter-nix 0.3.0` +> +> This document is the execution plan for adding Nix expression-language support to +> codanna. It follows the conventions in +> [`contributing/development/language-support.md`](../../development/language-support.md) +> and uses **Lua** as the closest reference parser (dynamic, no traits, attrset/scope-centric). + +--- + +## 1. Compatibility verdict + +| Item | Value | +|---|---| +| Grammar crate | `tree-sitter-nix = "0.3.0"` (nix-community, Jul 2025) | +| Binding style | modern `LANGUAGE: tree_sitter_language::LanguageFn` (ABI-14/15) | +| Core dep | `tree-sitter-language = "0.1.0"` (no direct `tree-sitter` dep) | +| codanna core | `tree-sitter 0.26.9` — **compatible** | +| Wiring | identical to Lua: `tree_sitter_nix::LANGUAGE.into()` → `parser.set_language(&lang)` | + +No version conflict: the grammar exposes the same `LanguageFn` constant codanna already +consumes for Lua/Clojure/etc. + +--- + +## 2. Module + wiring map + +Six **new** files in `src/parsing/nix/` plus a small set of **existing** files to edit. +`config.rs` is intentionally NOT edited — `generate_language_defaults()` auto-populates from +the registry. + +```mermaid +flowchart LR + subgraph NEW["NEW — src/parsing/nix/ (6 files)"] + direction TB + DEF["definition.rs
LanguageDefinition"] + PAR["parser.rs
LanguageParser + NodeTracker"] + BEH["behavior.rs
LanguageBehavior"] + RES["resolution.rs
ResolutionScope"] + AUD["audit.rs
ABI-15 coverage"] + MOD["mod.rs
re-exports + register"] + end + + subgraph EDIT["EDIT — existing files"] + direction TB + CARGO["Cargo.toml
+ tree-sitter-nix = 0.3.0"] + PMOD["parsing/mod.rs
pub mod nix; pub use ..."] + REG["parsing/registry.rs
initialize_registry()
+ Deserialize match arm"] + LANG["parsing/language.rs
enum Language::Nix
+ 6 match arms"] + TESTS["tests/parsers_tests.rs
gateway #path entries"] + LOCK["parsers/grammar-versions.lock
nix entry (optional)"] + end + + subgraph SKIP["NO EDIT NEEDED"] + CFG["config.rs
auto from registry.iter_all()"] + end + + MOD --> PMOD + DEF --> REG + PAR --> LANG + DEF -. "tree_sitter_nix::LANGUAGE" .-> CARGO + AUD --> TESTS + REG -. registers .-> MOD + CFG -. reads .-> REG +``` + +> `language.rs` is **required**, not optional: `Language::from_extension` calls +> `from_language_id("nix")`; without the `Nix` arm it returns `None` and `.nix` files are +> never detected. + +--- + +## 3. Trait architecture + +The four traits each new file implements, and the shared types they touch. + +```mermaid +classDiagram + class LanguageDefinition { + <> + +id() LanguageId + +name() str + +extensions() slice + +create_parser(settings) LanguageParser + +create_behavior() LanguageBehavior + +default_enabled() bool + } + class LanguageParser { + <> + +parse(code, file_id, counter) Vec~Symbol~ + +find_calls(code) Vec + +find_imports(code, file_id) Vec~Import~ + +extract_doc_comment(node, code) Option + +language() Language + +as_any() Any + } + class LanguageBehavior { + <> + +module_separator() str + +parse_visibility(sig) Visibility + +supports_traits() bool + +get_language() TsLanguage + +create_resolution_context(file_id) ResolutionScope + } + class ResolutionScope { + <> + +resolve(name) Option~SymbolId~ + +add_symbol(name, id, level) + +enter_scope(kind) + +exit_scope() + } + + class NixLanguage + class NixParser + class NixBehavior + class NixResolutionContext + class GenericInheritanceResolver + + NixLanguage ..|> LanguageDefinition + NixParser ..|> LanguageParser + NixBehavior ..|> LanguageBehavior + NixResolutionContext ..|> ResolutionScope + + NixLanguage --> NixParser : creates + NixLanguage --> NixBehavior : creates + NixBehavior --> NixResolutionContext : creates + NixBehavior --> GenericInheritanceResolver : no traits, reuse no-op +``` + +--- + +## 4. Nix → codanna symbol mapping + +Node names use the `_expression` suffix convention of tree-sitter-nix and **must be confirmed +in Phase 0** (AST discovery). + +| Nix construct | tree-sitter-nix node (confirm) | SymbolKind | Notes | +|---|---|---|---| +| `.nix` file | root (`source_code`) | Module | file-based path, separator `.` | +| binding whose value is a lambda | `binding` + `function_expression` | **Function** | key heuristic | +| binding with non-lambda value | `binding` | Variable / Constant | literal RHS → Constant | +| returned attrset keys | `attrset_expression` → `binding` | Field | Public | +| `rec { ... }` attrs | `rec_attrset_expression` | Field | self-referential scope | +| lambda params `{ a, b ? d, ... }:` | `formals` / `formal` | Parameter | `@`-pattern binds whole set | +| `inherit a;` / `inherit (src) a;` | `inherit` / `inherit_from` | Variable (+ref to src) | one node → many bindings | +| `import ./x.nix`, `` | `apply_expression` + `path_expression` / `spath_expression` | Import | dynamic interpolation = best-effort | +| `f x`, `callPackage ./p.nix {}` | `apply_expression` | call relationship | resolve `function` field | +| `a.b.c` | `select_expression` + `attrpath` | reference | def-vs-ref by position | + +**Visibility:** no keywords → `let`/`formals` = `Private`; returned attrset attrs = `Public`. +**Traits/inheritance:** none → `supports_traits() = false`, reuse `GenericInheritanceResolver`. + +--- + +## 5. Parser traversal logic + +`extract_symbols_from_node` decision flow (every visited node is registered with `NodeTracker` +to drive the audit report). + +```mermaid +flowchart TD + START(["visit node"]) --> REG["register node in NodeTracker"] + REG --> K{"node kind?"} + K -->|binding| B{"value is function_expression?"} + B -->|yes| FN["emit Function symbol"] + B -->|no| VAR["emit Variable or Constant"] + K -->|attrset or rec_attrset| ATTR["enter attrset scope, emit Field per binding"] + K -->|let_expression| LET["enter let scope, bindings are Private"] + K -->|function_expression| LAM["enter lambda scope, emit Parameter per formal"] + K -->|inherit or inherit_from| INH["emit one Variable per name, record ref to source"] + K -->|apply_expression| APP{"function is import?"} + APP -->|yes| IMP["record Import"] + APP -->|no| CALL["record call relationship"] + K -->|select_expression| SEL["record attrpath reference"] + K -->|ERROR| ERR["do not skip, recurse into children"] + K -->|other| OTH["pass through"] + FN --> CH["recurse children"] + VAR --> CH + ATTR --> CH + LET --> CH + LAM --> CH + INH --> CH + IMP --> CH + CALL --> CH + SEL --> CH + ERR --> CH + OTH --> CH + CH --> EXIT["exit scope if entered"] + EXIT --> DONE(["return"]) +``` + +--- + +## 6. Scope resolution order + +```mermaid +flowchart TD + Q(["resolve identifier"]) --> L{"in let or formals local scope?"} + L -->|yes| HIT(["resolved"]) + L -->|no| R{"in enclosing rec attrset?"} + R -->|yes| HIT + R -->|no| W{"in a with namespace?"} + W -->|maybe| WUN["mark UNRESOLVED, with is context-sensitive"] + W -->|no| T{"file top-level binding?"} + T -->|yes| HIT + T -->|no| I{"imported symbol?"} + I -->|yes| HIT + I -->|no| MISS(["unresolved"]) + WUN --> MISS +``` + +> `with expr;` cannot be resolved statically (its bindings depend on a runtime value, and it +> never shadows other bindings). Treat such references as unresolved — a documented limitation, +> same class codanna already tolerates for dynamic dispatch. + +--- + +## 7. Execution phases + +```mermaid +flowchart LR + P0["Phase 0
AST discovery
comprehensive.nix
+ explore test"] + P1["Phase 1
scaffold + dep
6 files from Lua"] + P2["Phase 2
parser.rs
symbol extraction"] + P3["Phase 3
behavior +
resolution"] + P4["Phase 4
register
mod/registry/language"] + P5["Phase 5
tests + audit
>70% coverage"] + P6["Phase 6
e2e verify
clippy/fmt/docs"] + P0 --> P1 --> P2 --> P3 --> P4 --> P5 --> P6 +``` + +All commands run in the flake (`nix develop -c ...`). + +### Phase 0 — AST node discovery (do first; do not guess node names) +- Write `examples/nix/comprehensive.nix` covering: lambdas, `let`, `rec`, `inherit` / + `inherit (x)`, `with`, `if`, `assert`, attrsets, lists, `import`, ``, string + interpolation, paths, `a.b.c`, `@`-patterns, defaults, `...`. +- Add a throwaway `explore_nix_abi15` test that loads `tree_sitter_nix::LANGUAGE`, parses it, + and prints `node.kind()` + `kind_id()` (reuse `discover_nodes` from `lua/audit.rs`). + `nix develop -c cargo test explore_nix_abi15 -- --nocapture` +- Record findings → `contributing/parsers/nix/NODE_MAPPING.md` + `node-types.json`. +- Alternative: add `pkgs.tree-sitter` + `pkgs.nodejs` to the flake devShell and use + `contributing/tree-sitter/scripts/`. + +### Phase 1 — Scaffold + dependency +- `Cargo.toml` += `tree-sitter-nix = "0.3.0"` (after the `tree-sitter-clojure-orchard` line). +- `mkdir src/parsing/nix`, copy the six `lua/*.rs` files as skeletons, rename + `Lua`→`Nix`, `tree_sitter_lua`→`tree_sitter_nix`. +- Implement order: `definition.rs` → `parser.rs` → `behavior.rs` → `resolution.rs` → + `audit.rs` → `mod.rs`. + +### Phase 2 — parser.rs +- `extract_symbols_from_node` per the traversal diagram. Minimum methods: `parse`, + `find_calls`, `find_imports`, `extract_doc_comment`, `as_any`, `language()` → `Language::Nix`. +- Register every node in `NodeTracker`. Handle `ERROR` by recursing. Zero-copy slices. + +### Phase 3 — behavior.rs + resolution.rs +- behavior: `module_separator() = "."`, file-based `module_path_from_file`, + `parse_visibility` (Public default), `supports_traits() = false`, + `get_language()` = `tree_sitter_nix::LANGUAGE.into()`. +- resolution: `NixResolutionContext` with the scope order above; `with` → unresolved. + +### Phase 4 — registration +- `src/parsing/mod.rs`: `pub mod nix;` + `pub use nix::{NixBehavior, NixParser};` +- `src/parsing/registry.rs`: `super::nix::register(registry);` in `initialize_registry()`, + and `"nix" => "nix",` in the `Deserialize` match. +- `src/parsing/language.rs`: `Nix` variant + arms in `to_language_id`, `from_language_id`, + `from_extension` fallback, `extensions`, `config_key`, `name` (extension `"nix"`). +- Decide `default_enabled()` — **recommend `false` during dev, flip to `true` at release**. + +### Phase 5 — tests + audit +- `audit.rs` (copy Lua, swap key-node list). +- `tests/parsers/nix/` + gateway `#[path = "parsers/nix/..."]` in `tests/parsers_tests.rs`. +- `nix develop -c cargo test nix` +- `nix develop -c cargo test audit_nix -- --nocapture` (target >70% key-node coverage) + +### Phase 6 — end-to-end verify + polish +``` +nix develop -c cargo build +nix develop -c cargo clippy --fix +nix develop -c cargo fmt +nix develop -c bash -c 'cargo run -- init && cargo run -- index . && cargo run -- retrieve search "mkDerivation"' +``` +- Update supported-list in `language-support.md` and add the `nix` entry to + `grammar-versions.lock`. + +--- + +## 8. Risks & open decisions + +| Item | Disposition | +|---|---| +| `with expr;` scoping | statically unresolvable → documented limitation, do not block | +| string/path interpolation `${...}` | extract inner identifier as ref; dynamic target best-effort | +| `attrpath` def vs ref | disambiguate by binding (LHS) vs expression (RHS) position | +| flake lacks `tree-sitter` CLI | use Rust exploration test (Phase 0), or add `tree-sitter`+`nodejs` to flake | +| `default_enabled` true/false at merge | **open** — recommend `false` until audit coverage is solid | + +--- + +## 9. Touch-point checklist + +- [ ] `Cargo.toml` — `tree-sitter-nix = "0.3.0"` +- [ ] `src/parsing/nix/{mod,definition,parser,behavior,resolution,audit}.rs` +- [ ] `src/parsing/mod.rs` — module + re-export +- [ ] `src/parsing/registry.rs` — `initialize_registry()` + `Deserialize` arm +- [ ] `src/parsing/language.rs` — `Language::Nix` + 6 match arms +- [ ] `examples/nix/comprehensive.nix` (+ `main.nix`) +- [ ] `tests/parsers/nix/` + `tests/parsers_tests.rs` gateway +- [ ] `contributing/parsers/nix/NODE_MAPPING.md` + `node-types.json` +- [ ] `contributing/parsers/grammar-versions.lock` — nix entry +- [ ] `config.rs` — **no edit** (auto from registry) diff --git a/contributing/parsers/nix/NODE_MAPPING.md b/contributing/parsers/nix/NODE_MAPPING.md new file mode 100644 index 00000000..6ec2703a --- /dev/null +++ b/contributing/parsers/nix/NODE_MAPPING.md @@ -0,0 +1,88 @@ +# Nix AST Node Mapping + +Discovered from `tree-sitter-nix = "0.3.0"` via `explore_nix_abi15` test (Phase 0). +63 total grammar nodes. + +## Root + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `source_code` | 62 | root; has single `expression:` field | + +## Bindings + +| tree-sitter node | ID | field names | codanna symbol | +|---|---|---|---| +| `binding_set` | 91 | `binding:` (multiple) | container; recurse | +| `binding` | 92 | `attrpath:`, `expression:` | Function / Variable / Constant depending on RHS | +| `attrpath` | 95 | `attr:` (identifier) | key of binding | + +## Attrsets + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `attrset_expression` | 86 | enter Class scope, recurse | +| `rec_attrset_expression` | 88 | enter Class scope (self-referential), recurse | + +## Let + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `let_expression` | 73 | enter Block scope; bindings are Private | + +## Functions / Lambdas + +| tree-sitter node | ID | field names | codanna symbol | +|---|---|---|---| +| `function_expression` | 68 | `universal:` (simple `x:`) OR `formals:` + `body:` | enter Function scope | +| `formals` | 69 | `formal:` (multiple) | iterate for parameters | +| `formal` | 70 | `name:` (identifier), `default:` (optional expr) | Parameter | + +> **`universal`** is the field name for a simple single-identifier lambda parameter (`x: body`). +> **`formals`** is the field name for destructuring pattern (`{ a, b ? 1, ... }:`). +> The `@`-pattern sibling identifier appears at the `function_expression` level as an unnamed child. + +## Inherit + +| tree-sitter node | ID | codanna symbol | +|---|---|---| +| `inherit` | 50 | Variable per name in `inherited_attrs` | +| `inherit_from` | 94 | Variable per name in `inherited_attrs` (source in parentheses) | +| `inherited_attrs` | 96 | container for the names | + +## Control flow / other expressions + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `apply_expression` | 81 | `function:` + `argument:` — recurse; detect `import` calls | +| `select_expression` | 83 | `expression:` + `index:` — recurse | +| `if_expression` | 75 | recurse | +| `assert_expression` | 71 | recurse | +| `with_expression` | 72 | recurse (bindings statically unresolvable) | +| `let_expression` | 73 | enter Block scope | +| `binary_expression` | 79 | recurse | +| `parenthesized_expression` | 85 | recurse | +| `list_expression` | 99 | recurse | + +## Literals / leaf nodes + +| tree-sitter node | ID | notes | +|---|---|---| +| `identifier` | 2 | bare name (used inside attrpath, formal, etc.) | +| `variable_expression` | 64 | wraps `identifier` in expression position; has `name:` field | +| `integer_expression` | 3 | literal integer | +| `string_expression` | 89 | `"..."` string | +| `indented_string_expression` | 90 | `''...''` multiline string | +| `path_expression` | 65 | `/absolute/path` | +| `path_fragment` | 59 | segment inside a path | +| `spath_expression` | 6 | `` angle-bracket path | +| `interpolation` | 98 | `${...}` inside strings | +| `string_fragment` | 56 | plain text inside a string | +| `comment` | 55 | `# ...` comment | +| `ellipses` | 14 | `...` in formals | + +## Known limitations + +- `with expr;` bindings are statically unresolvable; references inside `with_expression` are left unresolved. +- String interpolation `${...}` paths in imports are treated as best-effort (raw text recorded). +- Complex `attrpath` bindings like `a.b.c = ...` are not emitted as symbols (skipped). diff --git a/contributing/parsers/nix/node-types.json b/contributing/parsers/nix/node-types.json new file mode 100644 index 00000000..6432314c --- /dev/null +++ b/contributing/parsers/nix/node-types.json @@ -0,0 +1,111 @@ +[ + { + "type": "source_code", + "named": true, + "fields": { + "expression": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "binding", + "named": true, + "fields": { + "attrpath": { "multiple": false, "required": true, "types": [{ "type": "attrpath", "named": true }] }, + "expression": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { "type": "binding_set", "named": true }, + { + "type": "attrpath", + "named": true, + "fields": { + "attr": { "multiple": true, "required": true, "types": [{ "type": "identifier", "named": true }, { "type": "string_expression", "named": true }, { "type": "interpolation", "named": true }] } + } + }, + { "type": "attrset_expression", "named": true }, + { "type": "rec_attrset_expression", "named": true }, + { + "type": "let_expression", + "named": true, + "fields": { + "body": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "function_expression", + "named": true, + "fields": { + "universal": { "multiple": false, "required": false, "types": [{ "type": "identifier", "named": true }] }, + "formals": { "multiple": false, "required": false, "types": [{ "type": "formals", "named": true }] }, + "body": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "formals", + "named": true, + "children": { "multiple": true, "required": false, "types": [{ "type": "formal", "named": true }] } + }, + { + "type": "formal", + "named": true, + "fields": { + "name": { "multiple": false, "required": true, "types": [{ "type": "identifier", "named": true }] }, + "default": { "multiple": false, "required": false, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "apply_expression", + "named": true, + "fields": { + "function": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "argument": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "select_expression", + "named": true, + "fields": { + "expression": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "attrpath": { "multiple": false, "required": true, "types": [{ "type": "attrpath", "named": true }] }, + "default": { "multiple": false, "required": false, "types": [{ "type": "_expr", "named": true }] } + } + }, + { "type": "inherit", "named": true }, + { "type": "inherit_from", "named": true }, + { "type": "inherited_attrs", "named": true }, + { + "type": "if_expression", + "named": true, + "fields": { + "condition": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "consequence": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "alternative": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { "type": "assert_expression", "named": true }, + { "type": "with_expression", "named": true }, + { "type": "binary_expression", "named": true }, + { "type": "unary_expression", "named": true }, + { "type": "has_attr_expression", "named": true }, + { "type": "parenthesized_expression", "named": true }, + { "type": "list_expression", "named": true }, + { + "type": "variable_expression", + "named": true, + "fields": { + "name": { "multiple": false, "required": true, "types": [{ "type": "identifier", "named": true }] } + } + }, + { "type": "identifier", "named": true }, + { "type": "integer_expression", "named": true }, + { "type": "float_expression", "named": true }, + { "type": "string_expression", "named": true }, + { "type": "indented_string_expression", "named": true }, + { "type": "path_expression", "named": true }, + { "type": "spath_expression", "named": true }, + { "type": "interpolation", "named": true }, + { "type": "string_fragment", "named": true }, + { "type": "path_fragment", "named": true }, + { "type": "comment", "named": true }, + { "type": "ellipses", "named": true } +] diff --git a/contributing/parsers/nix/node_discovery.txt b/contributing/parsers/nix/node_discovery.txt new file mode 100644 index 00000000..098f1b6e --- /dev/null +++ b/contributing/parsers/nix/node_discovery.txt @@ -0,0 +1,87 @@ +=== Nix Language NODE MAPPING === + Generated: 2026-05-21 12:23:48 UTC + ABI Version: 13 + Node kind count: 63 + +=== ROOT NODES === + + source_code -> ID: 62 + +=== BINDING NODES === + + binding_set -> ID: 91 + + binding -> ID: 92 + + attrpath -> ID: 95 + + identifier -> ID: 2 + +=== ATTRSET NODES === + + attrset_expression -> ID: 86 + + rec_attrset_expression -> ID: 88 + +=== FUNCTION NODES === + + function_expression -> ID: 68 + + formals -> ID: 69 + + formal -> ID: 70 + + apply_expression -> ID: 81 + +=== SCOPE NODES === + + let_expression -> ID: 73 + + with_expression -> ID: 72 + + inherit -> ID: 50 + + inherit_from -> ID: 94 + + inherited_attrs -> ID: 96 + +=== EXPRESSION NODES === + + if_expression -> ID: 75 + + assert_expression -> ID: 71 + + select_expression -> ID: 83 + + binary_expression -> ID: 79 + + parenthesized_expression -> ID: 85 + + list_expression -> ID: 99 + + variable_expression -> ID: 64 + +=== LITERAL NODES === + + integer_expression -> ID: 3 + + string_expression -> ID: 89 + + indented_string_expression -> ID: 90 + + path_expression -> ID: 65 + + spath_expression -> ID: 6 + + interpolation -> ID: 98 + +=== COMMENT NODES === + + comment -> ID: 55 + +=== UNCATEGORIZED NODES === + + != -> ID: 26 + + " -> ID: 44 + + ${ -> ID: 51 + + '' -> ID: 47 + + ( -> ID: 41 + + ) -> ID: 42 + + * -> ID: 34 + + + -> ID: 33 + + , -> ID: 12 + + . -> ID: 39 + + / -> ID: 35 + + : -> ID: 8 + + ; -> ID: 16 + + < -> ID: 27 + + = -> ID: 49 + + == -> ID: 25 + + ? -> ID: 13 + + @ -> ID: 9 + + [ -> ID: 53 + + ] -> ID: 54 + + assert -> ID: 15 + + ellipses -> ID: 14 + + else -> ID: 22 + + if -> ID: 20 + + in -> ID: 19 + + let -> ID: 18 + + path_fragment -> ID: 59 + + rec -> ID: 43 + + string_fragment -> ID: 56 + + then -> ID: 21 + + with -> ID: 17 + + { -> ID: 10 + + } -> ID: 11 + +Legend: + = found in file, o = in grammar but not in file, x = not in grammar diff --git a/examples/nix/comprehensive.nix b/examples/nix/comprehensive.nix new file mode 100644 index 00000000..e7400d43 --- /dev/null +++ b/examples/nix/comprehensive.nix @@ -0,0 +1,127 @@ +# Comprehensive Nix example — one attrset at the top level (valid Nix) +{ + # --- simple lambda --- + identity = x: x; + + # --- nested lambda (curried) --- + add = a: b: a + b; + + # --- attrset binding --- + config = { + host = "localhost"; + port = 8080; + debug = false; + }; + + # --- rec attrset (self-referential) --- + defaults = rec { + base = "/var/lib"; + data = "${base}/data"; + logs = "${base}/logs"; + }; + + # --- let expression --- + result = + let + x = 10; + y = 20; + inner = v: v * 2; + in + inner x + y; + + # --- formals with defaults and ellipsis --- + mkService = { name, port ? 8080, debug ? false, ... }: { + inherit name port debug; + description = "Service: ${name}"; + }; + + # --- @ pattern (bind whole set + named fields) --- + mkDerivation = args @ { name, src, buildInputs ? [], ... }: + derivation { + inherit name src buildInputs; + system = builtins.currentSystem; + builder = "/bin/sh"; + }; + + # --- inherit from source --- + pkgAttrs = + let pkgs = import {}; + in { + inherit (pkgs) stdenv fetchurl; + lib = pkgs.lib; + }; + + # --- with expression --- + withExample = with builtins; [ + (toString 42) + (typeOf "hello") + (length [ 1 2 3 ]) + ]; + + # --- if expression --- + classify = n: + if n < 0 then "negative" + else if n == 0 then "zero" + else "positive"; + + # --- assert --- + safeDivide = a: b: + assert b != 0; + a / b; + + # --- import with path --- + nixpkgsLib = import ; + + # --- select expression (attrpath) --- + version = builtins.currentSystem; + + # --- list --- + items = [ 1 2 3 "four" true ]; + + # --- string interpolation --- + greeting = name: "Hello, ${name}!"; + + # --- multiline string --- + script = '' + #!/bin/bash + echo "hello" + exit 0 + ''; + + # --- path expression --- + configPath = /etc/nixos/configuration.nix; + + # --- spath (angle-bracket path) --- + nixpkgsPath = ; + + # --- inherit without source --- + passThrough = { a, b, c }: { + inherit a b c; + }; + + # --- nested attrset access (select_expression) --- + deep = { + a = { + b = { + c = 42; + }; + }; + }; + + # --- function as attrset value (common nixpkgs pattern) --- + lib = { + mkOption = { type, default, description ? "" }: { + _type = "option"; + inherit type default description; + }; + + types = rec { + str = { name = "str"; check = builtins.isString; }; + int = { name = "int"; check = builtins.isInt; }; + listOf = element: { + name = "listOf"; + check = x: builtins.isList x; + }; + }; + }; +} diff --git a/examples/nix/main.nix b/examples/nix/main.nix new file mode 100644 index 00000000..c9ce6789 --- /dev/null +++ b/examples/nix/main.nix @@ -0,0 +1,26 @@ +# Entry point example — a minimal NixOS/nixpkgs-style package set +let + pkgs = import {}; +in +{ + # A simple derivation + hello = pkgs.stdenv.mkDerivation { + name = "hello-1.0"; + src = ./src; + buildPhase = '' + gcc -o hello main.c + ''; + installPhase = '' + mkdir -p $out/bin + cp hello $out/bin/ + ''; + }; + + # A shell for development + devShell = pkgs.mkShell { + buildInputs = [ pkgs.gcc pkgs.gnumake ]; + shellHook = '' + echo "Dev shell ready" + ''; + }; +} diff --git a/flake.lock b/flake.lock index fe873248..5feb4391 100644 --- a/flake.lock +++ b/flake.lock @@ -109,11 +109,11 @@ ] }, "locked": { - "lastModified": 1774062094, - "narHash": "sha256-ba3c+hS7KzEiwtZRGHagIAYdcmdY3rCSWVCyn64rx7s=", + "lastModified": 1779333539, + "narHash": "sha256-lpmN2lrBDZDPjov2cbD3bOOJsI0fkKolKXasYPCqSys=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "c807e83cc2e32adc35f51138b3bdef722c0812ab", + "rev": "672fa5fc5608d5cd82286a6f69aaf84a40b4fe41", "type": "github" }, "original": { diff --git a/src/io/parse.rs b/src/io/parse.rs index b059d337..89c342ef 100644 --- a/src/io/parse.rs +++ b/src/io/parse.rs @@ -264,6 +264,7 @@ pub fn execute_parse( Language::Java => tree_sitter_java::LANGUAGE.into(), Language::Kotlin => tree_sitter_kotlin::language(), Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::Nix => tree_sitter_nix::LANGUAGE.into(), Language::Swift => tree_sitter_swift::LANGUAGE.into(), }; diff --git a/src/parsing/factory.rs b/src/parsing/factory.rs index d35037a3..6678694f 100644 --- a/src/parsing/factory.rs +++ b/src/parsing/factory.rs @@ -7,9 +7,9 @@ use super::{ CBehavior, CParser, CSharpBehavior, CSharpParser, ClojureBehavior, ClojureParser, CppBehavior, CppParser, GdscriptBehavior, GdscriptParser, GoBehavior, GoParser, JavaBehavior, JavaParser, JavaScriptBehavior, JavaScriptParser, KotlinBehavior, KotlinParser, Language, LanguageBehavior, - LanguageId, LanguageParser, LuaBehavior, LuaParser, PhpBehavior, PhpParser, PythonBehavior, - PythonParser, RustBehavior, RustParser, SwiftBehavior, SwiftParser, TypeScriptBehavior, - TypeScriptParser, get_registry, + LanguageId, LanguageParser, LuaBehavior, LuaParser, NixBehavior, NixParser, PhpBehavior, + PhpParser, PythonBehavior, PythonParser, RustBehavior, RustParser, SwiftBehavior, SwiftParser, + TypeScriptBehavior, TypeScriptParser, get_registry, }; use crate::{IndexError, IndexResult, Settings}; use std::sync::Arc; @@ -186,6 +186,10 @@ impl ParserFactory { let parser = LuaParser::new().map_err(|e| IndexError::General(e.to_string()))?; Ok(Box::new(parser)) } + Language::Nix => { + let parser = NixParser::new().map_err(|e| IndexError::General(e.to_string()))?; + Ok(Box::new(parser)) + } Language::Swift => { let parser = SwiftParser::new().map_err(|e| IndexError::General(e.to_string()))?; Ok(Box::new(parser)) @@ -329,6 +333,13 @@ impl ParserFactory { behavior: Box::new(LuaBehavior::new()), } } + Language::Nix => { + let parser = NixParser::new().map_err(|e| IndexError::General(e.to_string()))?; + ParserWithBehavior { + parser: Box::new(parser), + behavior: Box::new(NixBehavior::new()), + } + } Language::Swift => { let parser = SwiftParser::new().map_err(|e| IndexError::General(e.to_string()))?; ParserWithBehavior { @@ -373,6 +384,7 @@ impl ParserFactory { Language::JavaScript, Language::Kotlin, Language::Lua, + Language::Nix, Language::Php, Language::Python, Language::Rust, diff --git a/src/parsing/language.rs b/src/parsing/language.rs index e0d85092..23471fc8 100644 --- a/src/parsing/language.rs +++ b/src/parsing/language.rs @@ -22,6 +22,7 @@ pub enum Language { Java, Kotlin, Lua, + Nix, Swift, } @@ -47,6 +48,7 @@ impl Language { Language::Java => super::LanguageId::new("java"), Language::Kotlin => super::LanguageId::new("kotlin"), Language::Lua => super::LanguageId::new("lua"), + Language::Nix => super::LanguageId::new("nix"), Language::Swift => super::LanguageId::new("swift"), } } @@ -110,6 +112,7 @@ impl Language { "java" => Some(Language::Java), "kt" | "kts" => Some(Language::Kotlin), "lua" => Some(Language::Lua), + "nix" => Some(Language::Nix), "swift" => Some(Language::Swift), _ => None, } @@ -141,6 +144,7 @@ impl Language { Language::Java => &["java"], Language::Kotlin => &["kt", "kts"], Language::Lua => &["lua"], + Language::Nix => &["nix"], Language::Swift => &["swift"], } } @@ -162,6 +166,7 @@ impl Language { Language::Java => "java", Language::Kotlin => "kotlin", Language::Lua => "lua", + Language::Nix => "nix", Language::Swift => "swift", } } @@ -183,6 +188,7 @@ impl Language { Language::Java => "Java", Language::Kotlin => "Kotlin", Language::Lua => "Lua", + Language::Nix => "Nix", Language::Swift => "Swift", } } diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 2837f68c..c3adc442 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -15,6 +15,7 @@ pub mod language; pub mod language_behavior; pub mod lua; pub mod method_call; +pub mod nix; pub mod parser; pub mod paths; pub mod php; @@ -43,6 +44,7 @@ pub use language_behavior::{ }; pub use lua::{LuaBehavior, LuaParser}; pub use method_call::{MethodCall, MethodCallResolver}; +pub use nix::{NixBehavior, NixParser}; pub use parser::{ HandledNode, LanguageParser, NodeTracker, NodeTrackingState, safe_substring_window, safe_truncate_str, truncate_for_display, diff --git a/src/parsing/nix/audit.rs b/src/parsing/nix/audit.rs new file mode 100644 index 00000000..0f345437 --- /dev/null +++ b/src/parsing/nix/audit.rs @@ -0,0 +1,169 @@ +use super::NixParser; +use crate::parsing::{LanguageParser, NodeTracker}; +use crate::types::FileId; +use std::collections::{HashMap, HashSet}; +use thiserror::Error; +use tree_sitter::{Node, Parser}; + +#[derive(Error, Debug)] +pub enum AuditError { + #[error("IO error: {0}")] + FileRead(#[from] std::io::Error), + #[error("Language setup error: {0}")] + LanguageSetup(String), + #[error("Parse failure")] + ParseFailure, + #[error("Parser creation error: {0}")] + ParserCreation(String), +} + +pub struct NixParserAudit { + pub grammar_nodes: HashMap, + pub implemented_nodes: HashSet, + pub extracted_symbol_kinds: HashSet, +} + +impl NixParserAudit { + pub fn audit_file(file_path: &str) -> Result { + let code = std::fs::read_to_string(file_path)?; + Self::audit_code(&code) + } + + pub fn audit_code(code: &str) -> Result { + let mut parser = Parser::new(); + let language = tree_sitter_nix::LANGUAGE.into(); + parser + .set_language(&language) + .map_err(|e| AuditError::LanguageSetup(e.to_string()))?; + + let tree = parser.parse(code, None).ok_or(AuditError::ParseFailure)?; + + let mut grammar_nodes = HashMap::new(); + discover_nodes(tree.root_node(), &mut grammar_nodes); + + let mut nix_parser = + NixParser::new().map_err(|e| AuditError::ParserCreation(e.to_string()))?; + let file_id = FileId::new(1).unwrap(); + let mut symbol_counter = crate::types::SymbolCounter::new(); + let symbols = nix_parser.parse(code, file_id, &mut symbol_counter); + + let mut extracted_symbol_kinds = HashSet::new(); + for symbol in &symbols { + extracted_symbol_kinds.insert(format!("{:?}", symbol.kind)); + } + + let implemented_nodes: HashSet = nix_parser + .get_handled_nodes() + .iter() + .map(|n| n.name.clone()) + .collect(); + + Ok(Self { + grammar_nodes, + implemented_nodes, + extracted_symbol_kinds, + }) + } + + pub fn generate_report(&self) -> String { + let key_nodes = vec![ + "source_code", + "binding", + "attrset_expression", + "rec_attrset_expression", + "let_expression", + "function_expression", + "formals", + "formal", + "inherit", + "inherit_from", + "apply_expression", + "select_expression", + "attrpath", + "identifier", + "if_expression", + "assert_expression", + "with_expression", + "comment", + ]; + + let key_implemented = key_nodes + .iter() + .filter(|n| self.implemented_nodes.contains(**n)) + .count(); + + let mut report = String::new(); + report.push_str("# Nix Parser Symbol Extraction Coverage Report\n\n"); + report.push_str("## Summary\n"); + report.push_str(&format!( + "- Key nodes: {}/{} ({:.0}%)\n", + key_implemented, + key_nodes.len(), + (key_implemented as f64 / key_nodes.len() as f64) * 100.0 + )); + report.push_str(&format!( + "- Total grammar nodes: {}\n", + self.grammar_nodes.len() + )); + report.push_str(&format!( + "- Total implemented: {}\n", + self.implemented_nodes.len() + )); + report.push_str(&format!( + "- Symbol kinds extracted: {:?}\n\n", + self.extracted_symbol_kinds + )); + + report.push_str("## Key Nodes Coverage\n"); + for node in &key_nodes { + let status = if self.implemented_nodes.contains(*node) { + "✓" + } else { + "✗" + }; + report.push_str(&format!("- [{status}] {node}\n")); + } + report + } +} + +pub fn discover_nodes(node: Node, registry: &mut HashMap) { + let mut stack = vec![node]; + while let Some(current) = stack.pop() { + registry.insert(current.kind().to_string(), current.kind_id()); + let mut cursor = current.walk(); + for child in current.children(&mut cursor) { + stack.push(child); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn audit_nix_comprehensive() { + let code = std::fs::read_to_string(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/nix/comprehensive.nix" + )) + .unwrap_or_else(|_| r#"{ x = 1; add = a: b: a + b; inherit x; }"#.to_string()); + + let audit = NixParserAudit::audit_code(&code).unwrap(); + let report = audit.generate_report(); + println!("{report}"); + + assert!( + !audit.grammar_nodes.is_empty(), + "Should have discovered grammar nodes" + ); + } + + #[test] + fn test_audit_simple_nix() { + let code = r#"{ x = 1; add = a: b: a + b; }"#; + let audit = NixParserAudit::audit_code(code).unwrap(); + assert!(audit.grammar_nodes.contains_key("binding")); + } +} diff --git a/src/parsing/nix/behavior.rs b/src/parsing/nix/behavior.rs new file mode 100644 index 00000000..6c943b9a --- /dev/null +++ b/src/parsing/nix/behavior.rs @@ -0,0 +1,224 @@ +use crate::Visibility; +use crate::parsing::LanguageBehavior; +use crate::parsing::behavior_state::{BehaviorState, StatefulBehavior}; +use crate::parsing::resolution::{InheritanceResolver, ResolutionScope}; +use crate::types::FileId; +use std::path::{Path, PathBuf}; +use tree_sitter::Language; + +use super::resolution::{NixInheritanceResolver, NixResolutionContext}; + +#[derive(Clone)] +pub struct NixBehavior { + state: BehaviorState, +} + +impl NixBehavior { + pub fn new() -> Self { + Self { + state: BehaviorState::new(), + } + } +} + +impl Default for NixBehavior { + fn default() -> Self { + Self::new() + } +} + +impl StatefulBehavior for NixBehavior { + fn state(&self) -> &BehaviorState { + &self.state + } +} + +impl LanguageBehavior for NixBehavior { + fn language_id(&self) -> crate::parsing::registry::LanguageId { + crate::parsing::registry::LanguageId::new("nix") + } + + fn format_module_path(&self, base_path: &str, _symbol_name: &str) -> String { + base_path.to_string() + } + + fn get_language(&self) -> Language { + tree_sitter_nix::LANGUAGE.into() + } + + fn module_separator(&self) -> &'static str { + "." + } + + fn format_path_as_module(&self, components: &[&str]) -> Option { + if components.is_empty() { + Some(".".to_string()) + } else { + Some(components.join(".")) + } + } + + fn module_path_from_file( + &self, + file_path: &Path, + project_root: &Path, + extensions: &[&str], + ) -> Option { + use crate::parsing::paths::strip_extension; + + let relative_path = if file_path.is_absolute() { + file_path.strip_prefix(project_root).ok()? + } else { + file_path + }; + + let path = relative_path.to_str()?; + let path_clean = path.trim_start_matches("./"); + let module_path = strip_extension(path_clean, extensions); + let module_path = module_path.replace(['/', '\\'], "."); + + if module_path.is_empty() { + Some(".".to_string()) + } else { + Some(module_path) + } + } + + fn parse_visibility(&self, _signature: &str) -> Visibility { + // Nix has no visibility keywords — callers set Public/Private based on context. + Visibility::Public + } + + fn supports_traits(&self) -> bool { + false + } + + fn supports_inherent_methods(&self) -> bool { + false + } + + fn create_resolution_context(&self, file_id: FileId) -> Box { + Box::new(NixResolutionContext::new(file_id)) + } + + fn create_inheritance_resolver(&self) -> Box { + Box::new(NixInheritanceResolver::new()) + } + + fn inheritance_relation_name(&self) -> &'static str { + "extends" + } + + fn map_relationship(&self, language_specific: &str) -> crate::relationship::RelationKind { + use crate::relationship::RelationKind; + match language_specific { + "extends" => RelationKind::Extends, + "uses" => RelationKind::Uses, + "calls" => RelationKind::Calls, + "defines" => RelationKind::Defines, + _ => RelationKind::References, + } + } + + fn register_file(&self, path: PathBuf, file_id: FileId, module_path: String) { + self.register_file_with_state(path, file_id, module_path); + } + + fn add_import(&self, import: crate::parsing::Import) { + self.add_import_with_state(import); + } + + fn get_imports_for_file(&self, file_id: FileId) -> Vec { + self.get_imports_from_state(file_id) + } + + fn is_resolvable_symbol(&self, symbol: &crate::Symbol) -> bool { + use crate::SymbolKind; + use crate::symbol::ScopeContext; + + let module_level = matches!( + symbol.kind, + SymbolKind::Function + | SymbolKind::Class + | SymbolKind::Constant + | SymbolKind::Variable + | SymbolKind::Field + ); + if module_level { + return true; + } + + if let Some(ref scope_context) = symbol.scope_context { + match scope_context { + ScopeContext::Module | ScopeContext::Global | ScopeContext::Package => true, + ScopeContext::Local { .. } | ScopeContext::Parameter => false, + ScopeContext::ClassMember { .. } => { + matches!(symbol.visibility, Visibility::Public) + } + } + } else { + false + } + } + + fn get_module_path_for_file(&self, file_id: FileId) -> Option { + self.state.get_module_path(file_id) + } + + fn configure_symbol(&self, symbol: &mut crate::Symbol, module_path: Option<&str>) { + if let Some(path) = module_path { + symbol.module_path = Some(path.to_string().into()); + } + if symbol.module_path.is_none() { + symbol.module_path = Some(".".to_string().into()); + } + } + + fn import_matches_symbol( + &self, + import_path: &str, + symbol_module_path: &str, + _importing_module: Option<&str>, + ) -> bool { + if import_path == symbol_module_path { + return true; + } + let normalized = import_path.replace(['/', '\\'], "."); + normalized == symbol_module_path + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_module_separator() { + assert_eq!(NixBehavior::new().module_separator(), "."); + } + + #[test] + fn test_supports_traits() { + assert!(!NixBehavior::new().supports_traits()); + } + + #[test] + fn test_module_path_from_file() { + let behavior = NixBehavior::new(); + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + let file = root.join("pkgs/hello.nix"); + assert_eq!( + behavior.module_path_from_file(&file, root, &["nix"]), + Some("pkgs.hello".to_string()) + ); + + let file = root.join("default.nix"); + assert_eq!( + behavior.module_path_from_file(&file, root, &["nix"]), + Some("default".to_string()) + ); + } +} diff --git a/src/parsing/nix/definition.rs b/src/parsing/nix/definition.rs new file mode 100644 index 00000000..eab901f2 --- /dev/null +++ b/src/parsing/nix/definition.rs @@ -0,0 +1,89 @@ +use crate::parsing::{ + LanguageBehavior, LanguageDefinition, LanguageId, LanguageParser, LanguageRegistry, +}; +use crate::{IndexError, IndexResult, Settings}; +use std::sync::Arc; + +use super::{NixBehavior, NixParser}; + +pub struct NixLanguage; + +impl LanguageDefinition for NixLanguage { + fn id(&self) -> LanguageId { + LanguageId::new("nix") + } + + fn name(&self) -> &'static str { + "Nix" + } + + fn extensions(&self) -> &'static [&'static str] { + &["nix"] + } + + fn create_parser(&self, _settings: &Settings) -> IndexResult> { + let parser = NixParser::new().map_err(|e| IndexError::General(e.to_string()))?; + Ok(Box::new(parser)) + } + + fn create_behavior(&self) -> Box { + Box::new(NixBehavior::new()) + } + + fn default_enabled(&self) -> bool { + false + } + + fn is_enabled(&self, settings: &Settings) -> bool { + settings + .languages + .get(self.id().as_str()) + .map(|config| config.enabled) + .unwrap_or(self.default_enabled()) + } +} + +pub(crate) fn register(registry: &mut LanguageRegistry) { + registry.register(Arc::new(NixLanguage)); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nix_language_id() { + assert_eq!(NixLanguage.id(), LanguageId::new("nix")); + } + + #[test] + fn test_nix_language_name() { + assert_eq!(NixLanguage.name(), "Nix"); + } + + #[test] + fn test_nix_file_extensions() { + assert_eq!(NixLanguage.extensions(), &["nix"]); + } + + #[test] + fn test_nix_disabled_by_default() { + assert!(!NixLanguage.default_enabled()); + } + + #[test] + fn test_nix_parser_creation() { + let settings = Settings::default(); + let result = NixLanguage.create_parser(&settings); + assert!(result.is_ok()); + assert_eq!(result.unwrap().language(), crate::parsing::Language::Nix); + } + + #[test] + fn test_nix_language_registry_registration() { + use crate::parsing::LanguageRegistry; + let mut registry = LanguageRegistry::new(); + register(&mut registry); + assert!(registry.get(LanguageId::new("nix")).is_some()); + } +} diff --git a/src/parsing/nix/mod.rs b/src/parsing/nix/mod.rs new file mode 100644 index 00000000..833d9f6c --- /dev/null +++ b/src/parsing/nix/mod.rs @@ -0,0 +1,11 @@ +pub mod audit; +mod behavior; +mod definition; +mod parser; +mod resolution; + +pub use behavior::NixBehavior; +pub use definition::NixLanguage; +pub(crate) use definition::register; +pub use parser::NixParser; +pub use resolution::{NixInheritanceResolver, NixResolutionContext}; diff --git a/src/parsing/nix/parser.rs b/src/parsing/nix/parser.rs new file mode 100644 index 00000000..f26c3a56 --- /dev/null +++ b/src/parsing/nix/parser.rs @@ -0,0 +1,743 @@ +use crate::parsing::parser::check_recursion_depth; +use crate::parsing::{ + HandledNode, Import, LanguageParser, NodeTracker, NodeTrackingState, ParserContext, ScopeType, +}; +use crate::types::SymbolCounter; +use crate::{FileId, Range, Symbol, SymbolKind, Visibility}; +use std::any::Any; +use tree_sitter::{Node, Parser}; + +pub struct NixParser { + parser: Parser, + context: ParserContext, + node_tracker: NodeTrackingState, +} + +fn range_from_node(node: &Node) -> Range { + let start = node.start_position(); + let end = node.end_position(); + Range::new( + start.row as u32, + start.column as u16, + end.row as u32, + end.column as u16, + ) +} + +impl NixParser { + pub fn new() -> Result { + let mut parser = Parser::new(); + let lang = tree_sitter_nix::LANGUAGE; + parser + .set_language(&lang.into()) + .map_err(|e| format!("Failed to set Nix language: {e}"))?; + + Ok(Self { + parser, + context: ParserContext::new(), + node_tracker: NodeTrackingState::new(), + }) + } + + fn create_symbol( + &self, + id: crate::types::SymbolId, + name: String, + kind: SymbolKind, + file_id: FileId, + range: Range, + signature: Option, + doc_comment: Option, + module_path: &str, + visibility: Visibility, + ) -> Symbol { + let mut symbol = Symbol::new(id, name, kind, file_id, range); + if let Some(sig) = signature { + symbol = symbol.with_signature(sig); + } + if let Some(doc) = doc_comment { + symbol = symbol.with_doc(doc); + } + if !module_path.is_empty() { + symbol = symbol.with_module_path(module_path); + } + symbol = symbol.with_visibility(visibility); + symbol.scope_context = Some(self.context.current_scope_context()); + symbol + } + + fn node_text<'a>(&self, node: &Node, code: &'a str) -> &'a str { + &code[node.byte_range()] + } + + /// Check whether a binding's `expression` child is a function_expression (lambda). + fn value_is_function(node: Node) -> bool { + if let Some(expr) = node.child_by_field_name("expression") { + let kind = expr.kind(); + if kind == "function_expression" { + return true; + } + if kind == "parenthesized_expression" { + let mut cursor = expr.walk(); + for child in expr.children(&mut cursor) { + if child.kind() == "function_expression" { + return true; + } + } + } + } + false + } + + fn extract_symbols_from_node( + &mut self, + node: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + depth: usize, + ) { + if !check_recursion_depth(depth, node) { + return; + } + + self.node_tracker + .register_handled_node(node.kind(), node.kind_id()); + + match node.kind() { + // ── root ───────────────────────────────────────────────────────── + "source_code" => { + // source_code has field `expression:` pointing to the root expr + if let Some(expr) = node.child_by_field_name("expression") { + self.extract_symbols_from_node( + expr, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } else { + self.recurse_children( + node, + code, + file_id, + counter, + symbols, + module_path, + depth, + ); + } + } + + // ── binding_set (container inside attrsets / let) ──────────────── + "binding_set" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── binding ────────────────────────────────────────────────────── + "binding" => { + self.process_binding(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── attrset_expression ──────────────────────────────────────────── + "attrset_expression" => { + self.context.enter_scope(ScopeType::Class); + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + self.context.exit_scope(); + } + + // ── rec_attrset_expression ──────────────────────────────────────── + "rec_attrset_expression" => { + self.context.enter_scope(ScopeType::Class); + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + self.context.exit_scope(); + } + + // ── let_expression ──────────────────────────────────────────────── + "let_expression" => { + self.context.enter_scope(ScopeType::Block); + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + self.context.exit_scope(); + } + + // ── function_expression (lambda) ────────────────────────────────── + // Fields confirmed: `universal:` (simple `x:` param), `formals:`, `body:` + "function_expression" => { + self.context.enter_scope(ScopeType::hoisting_function()); + + if let Some(formals) = node.child_by_field_name("formals") { + self.process_formals(formals, code, file_id, counter, symbols, module_path); + } else if let Some(param) = node.child_by_field_name("universal") { + // simple `x: body` form — param is an identifier + if param.kind() == "identifier" { + let name = self.node_text(¶m, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Parameter, + file_id, + range_from_node(¶m), + Some(name), + None, + module_path, + Visibility::Private, + ); + symbols.push(sym); + } + } + + if let Some(body) = node.child_by_field_name("body") { + self.extract_symbols_from_node( + body, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } + self.context.exit_scope(); + } + + // ── inherit ─────────────────────────────────────────────────────── + // `inherit a b c;` — names inside `inherited_attrs` + "inherit" => { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "inherited_attrs" { + let mut c2 = child.walk(); + for attr in child.children(&mut c2) { + if attr.kind() == "identifier" { + let name = self.node_text(&attr, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Variable, + file_id, + range_from_node(&attr), + Some(name), + None, + module_path, + Visibility::Public, + ); + symbols.push(sym); + } + } + } else if child.kind() == "identifier" { + // some grammar versions put identifiers directly + let name = self.node_text(&child, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Variable, + file_id, + range_from_node(&child), + Some(name), + None, + module_path, + Visibility::Public, + ); + symbols.push(sym); + } + } + } + + // ── inherit_from ────────────────────────────────────────────────── + // `inherit (src) a b c;` + "inherit_from" => { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "inherited_attrs" { + let mut c2 = child.walk(); + for attr in child.children(&mut c2) { + if attr.kind() == "identifier" { + let name = self.node_text(&attr, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Variable, + file_id, + range_from_node(&attr), + Some(name), + None, + module_path, + Visibility::Public, + ); + symbols.push(sym); + } + } + } + } + } + + // ── apply_expression (function call / import) ───────────────────── + "apply_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── select_expression (a.b.c) ───────────────────────────────────── + "select_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── with_expression ─────────────────────────────────────────────── + "with_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── if_expression ───────────────────────────────────────────────── + "if_expression" | "assert_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── variable_expression — leaf, no symbols emitted ──────────────── + "variable_expression" => {} + + // ── ERROR — recurse ─────────────────────────────────────────────── + "ERROR" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── everything else — pass through ──────────────────────────────── + _ => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + } + } + + fn recurse_children( + &mut self, + node: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + depth: usize, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.extract_symbols_from_node( + child, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } + } + + fn process_binding( + &mut self, + node: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + depth: usize, + ) { + // Key field is `attrpath:`, value field is `expression:` + let Some(key_node) = node.child_by_field_name("attrpath") else { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + return; + }; + + let name = self.node_text(&key_node, code).to_string(); + // Only emit symbols for simple single-component names; skip `a.b.c` paths + if name.contains('.') || name.contains('"') || name.contains('$') { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + return; + } + + let is_func = Self::value_is_function(node); + let (kind, visibility) = if is_func { + (SymbolKind::Function, Visibility::Public) + } else if name.chars().all(|c| c.is_uppercase() || c == '_') && name.len() > 1 { + (SymbolKind::Constant, Visibility::Public) + } else { + (SymbolKind::Variable, Visibility::Public) + }; + + let doc_comment = self.extract_nix_doc_comment(&node, code); + let range = range_from_node(&node); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + kind, + file_id, + range, + Some(name), + doc_comment, + module_path, + visibility, + ); + symbols.push(sym); + + // Recurse into value so nested structures are also visited + if let Some(value) = node.child_by_field_name("expression") { + self.extract_symbols_from_node( + value, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } + } + + fn process_formals( + &mut self, + formals: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + ) { + self.node_tracker + .register_handled_node(formals.kind(), formals.kind_id()); + + let mut cursor = formals.walk(); + for child in formals.children(&mut cursor) { + if child.kind() == "formal" { + self.node_tracker + .register_handled_node(child.kind(), child.kind_id()); + // formal has field `name:` (identifier) + if let Some(name_node) = child.child_by_field_name("name") { + let name = self.node_text(&name_node, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Parameter, + file_id, + range_from_node(&name_node), + Some(name), + None, + module_path, + Visibility::Private, + ); + symbols.push(sym); + } else { + // fallback: first identifier child + let mut c2 = child.walk(); + for fc in child.children(&mut c2) { + if fc.kind() == "identifier" { + let name = self.node_text(&fc, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Parameter, + file_id, + range_from_node(&fc), + Some(name), + None, + module_path, + Visibility::Private, + ); + symbols.push(sym); + break; + } + } + } + } + } + } + + fn extract_nix_doc_comment(&self, node: &Node, code: &str) -> Option { + let mut prev = node.prev_sibling(); + let mut comments = Vec::new(); + + while let Some(sibling) = prev { + if sibling.kind() == "comment" { + let text = &code[sibling.byte_range()]; + let trimmed = text.trim_start_matches('#').trim(); + if !trimmed.is_empty() { + comments.push(trimmed.to_string()); + } + prev = sibling.prev_sibling(); + } else { + break; + } + } + + if comments.is_empty() { + return None; + } + comments.reverse(); + Some(comments.join("\n")) + } +} + +impl NodeTracker for NixParser { + fn get_handled_nodes(&self) -> &std::collections::HashSet { + self.node_tracker.get_handled_nodes() + } + + fn register_handled_node(&mut self, node_kind: &str, node_id: u16) { + self.node_tracker.register_handled_node(node_kind, node_id); + } +} + +impl LanguageParser for NixParser { + fn parse( + &mut self, + code: &str, + file_id: FileId, + symbol_counter: &mut SymbolCounter, + ) -> Vec { + self.context = ParserContext::new(); + let mut symbols = Vec::new(); + + if let Some(tree) = self.parser.parse(code, None) { + let root_node = tree.root_node(); + self.extract_symbols_from_node( + root_node, + code, + file_id, + symbol_counter, + &mut symbols, + "", + 0, + ); + } + + symbols + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn extract_doc_comment(&self, node: &Node, code: &str) -> Option { + self.extract_nix_doc_comment(node, code) + } + + fn find_calls<'a>(&mut self, code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + let Some(tree) = self.parser.parse(code, None) else { + return Vec::new(); + }; + + let mut results = Vec::new(); + Self::collect_calls(tree.root_node(), code, &mut results); + results + } + + fn find_implementations<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + Vec::new() + } + + fn find_uses<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + Vec::new() + } + + fn find_defines<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + Vec::new() + } + + fn find_imports(&mut self, code: &str, file_id: FileId) -> Vec { + let Some(tree) = self.parser.parse(code, None) else { + return Vec::new(); + }; + + let mut imports = Vec::new(); + Self::collect_imports(tree.root_node(), code, file_id, &mut imports); + imports + } + + fn language(&self) -> crate::parsing::Language { + crate::parsing::Language::Nix + } +} + +impl NixParser { + fn collect_calls<'a>(node: Node, code: &'a str, results: &mut Vec<(&'a str, &'a str, Range)>) { + if node.kind() == "apply_expression" { + if let Some(func) = node.child_by_field_name("function") { + if matches!(func.kind(), "variable_expression" | "select_expression") { + let callee = &code[func.byte_range()]; + results.push(("", callee, range_from_node(&node))); + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + Self::collect_calls(child, code, results); + } + } + + fn collect_imports(node: Node, code: &str, file_id: FileId, imports: &mut Vec) { + // `import ./path` or `import ` + if node.kind() == "apply_expression" { + if let Some(func) = node.child_by_field_name("function") { + let func_text = &code[func.byte_range()]; + if func_text == "import" { + if let Some(arg) = node.child_by_field_name("argument") { + let raw = &code[arg.byte_range()]; + let path = raw + .trim_matches('<') + .trim_matches('>') + .trim_matches('"') + .to_string(); + if !path.is_empty() { + imports.push(Import { + path, + alias: None, + file_id, + is_glob: false, + is_type_only: false, + }); + return; + } + } + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + Self::collect_imports(child, code, file_id, imports); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::SymbolCounter; + use std::collections::HashMap; + + /// Phase 0 — AST node discovery for tree-sitter-nix. + /// Run with: `cargo test explore_nix_abi15 -- --nocapture` + #[test] + fn explore_nix_abi15() { + let code = std::fs::read_to_string(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/nix/comprehensive.nix" + )) + .unwrap_or_else(|_| r#"{ x = 1; add = a: b: a + b; }"#.to_string()); + + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_nix::LANGUAGE.into()) + .expect("failed to set Nix language"); + + let tree = parser.parse(&code, None).expect("parse failed"); + + let mut registry: HashMap = HashMap::new(); + discover_nodes(tree.root_node(), &mut registry); + + let mut sorted: Vec<_> = registry.iter().collect(); + sorted.sort_by_key(|(k, _)| k.as_str()); + + println!( + "\n=== tree-sitter-nix node kinds ({} total) ===", + sorted.len() + ); + for (kind, id) in &sorted { + println!(" [{id:4}] {kind}"); + } + + println!("\n=== Parse tree (first 80 fragments) ==="); + let s_expr = tree.root_node().to_sexp(); + for (i, fragment) in s_expr.split('(').take(80).enumerate() { + println!("{i:3} ({fragment}"); + } + + assert!(!registry.is_empty()); + assert!( + registry.contains_key("source_code"), + "Expected source_code root node" + ); + assert!(registry.contains_key("binding"), "Expected binding node"); + } + + fn discover_nodes(node: tree_sitter::Node, registry: &mut HashMap) { + let mut stack = vec![node]; + while let Some(current) = stack.pop() { + registry.insert(current.kind().to_string(), current.kind_id()); + let mut cursor = current.walk(); + for child in current.children(&mut cursor) { + stack.push(child); + } + } + } + + #[test] + fn test_nix_parser_creation() { + assert!(NixParser::new().is_ok()); + } + + #[test] + fn test_nix_parse_attrset_bindings() { + let mut parser = NixParser::new().unwrap(); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"{ x = 1; y = 2; }"#; + let symbols = parser.parse(code, file_id, &mut counter); + + let names: Vec<_> = symbols.iter().map(|s| s.name.as_ref()).collect(); + println!("symbols from attrset: {names:?}"); + assert!(names.contains(&"x"), "Expected x in {names:?}"); + assert!(names.contains(&"y"), "Expected y in {names:?}"); + } + + #[test] + fn test_nix_parse_function_binding() { + let mut parser = NixParser::new().unwrap(); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"{ add = a: b: a + b; name = "hello"; }"#; + let symbols = parser.parse(code, file_id, &mut counter); + + let names: Vec<_> = symbols.iter().map(|s| s.name.as_ref()).collect(); + println!("symbols from attrset with lambda: {names:?}"); + + let add_sym = symbols.iter().find(|s| s.name.as_ref() == "add"); + assert!(add_sym.is_some(), "Expected add symbol"); + assert_eq!(add_sym.unwrap().kind, SymbolKind::Function); + } + + #[test] + fn test_nix_parse_let_expression() { + let mut parser = NixParser::new().unwrap(); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"let x = 1; f = a: a + 1; in f x"#; + let symbols = parser.parse(code, file_id, &mut counter); + + let names: Vec<_> = symbols.iter().map(|s| s.name.as_ref()).collect(); + println!("symbols from let: {names:?}"); + assert!(names.contains(&"x"), "Expected x"); + assert!(names.contains(&"f"), "Expected f"); + } + + #[test] + fn test_nix_find_imports() { + let mut parser = NixParser::new().unwrap(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"{ pkgs = import {}; local = import ./local.nix; }"#; + let imports = parser.find_imports(code, file_id); + + println!("imports: {imports:?}"); + assert!(!imports.is_empty(), "Expected at least one import"); + } + + #[test] + fn test_nix_language() { + let parser = NixParser::new().unwrap(); + assert_eq!(parser.language(), crate::parsing::Language::Nix); + } +} diff --git a/src/parsing/nix/resolution.rs b/src/parsing/nix/resolution.rs new file mode 100644 index 00000000..a0d87a48 --- /dev/null +++ b/src/parsing/nix/resolution.rs @@ -0,0 +1,234 @@ +use crate::parsing::{InheritanceResolver, ResolutionScope, ScopeLevel, ScopeType}; +use crate::symbol::ScopeContext; +use crate::{FileId, SymbolId}; +use std::any::Any; +use std::collections::HashMap; + +#[derive(Debug)] +pub struct NixResolutionContext { + scope_stack: Vec, + imports: HashMap, + global_symbols: HashMap, + module_symbols: HashMap, +} + +#[derive(Debug)] +struct NixScope { + symbols: HashMap, + #[allow(dead_code)] + scope_type: ScopeType, +} + +impl Default for NixResolutionContext { + fn default() -> Self { + Self { + scope_stack: vec![NixScope { + symbols: HashMap::new(), + scope_type: ScopeType::Module, + }], + imports: HashMap::new(), + global_symbols: HashMap::new(), + module_symbols: HashMap::new(), + } + } +} + +impl NixResolutionContext { + pub fn new(_file_id: FileId) -> Self { + Self::default() + } + + pub fn add_import_symbol(&mut self, name: String, symbol_id: SymbolId, _is_type_only: bool) { + self.imports.insert(name, symbol_id); + } + + pub fn add_symbol_with_context( + &mut self, + name: String, + symbol_id: SymbolId, + scope_context: Option<&ScopeContext>, + ) { + let scope_level = match scope_context { + Some(ScopeContext::Global) => ScopeLevel::Global, + Some(ScopeContext::Module) | Some(ScopeContext::Package) => ScopeLevel::Module, + Some(ScopeContext::Local { hoisted: true, .. }) => ScopeLevel::Module, + Some(ScopeContext::Local { hoisted: false, .. }) => ScopeLevel::Local, + Some(ScopeContext::Parameter) => ScopeLevel::Local, + Some(ScopeContext::ClassMember { .. }) => ScopeLevel::Module, + None => ScopeLevel::Module, + }; + self.add_symbol(name, symbol_id, scope_level); + } +} + +impl ResolutionScope for NixResolutionContext { + fn add_symbol(&mut self, name: String, symbol_id: SymbolId, scope_level: ScopeLevel) { + match scope_level { + ScopeLevel::Global => { + self.global_symbols.insert(name, symbol_id); + } + ScopeLevel::Module | ScopeLevel::Package => { + self.module_symbols.insert(name, symbol_id); + } + ScopeLevel::Local => { + if let Some(current_scope) = self.scope_stack.last_mut() { + current_scope.symbols.insert(name, symbol_id); + } + } + } + } + + fn resolve(&self, name: &str) -> Option { + for scope in self.scope_stack.iter().rev() { + if let Some(id) = scope.symbols.get(name) { + return Some(*id); + } + } + if let Some(id) = self.imports.get(name) { + return Some(*id); + } + if let Some(id) = self.module_symbols.get(name) { + return Some(*id); + } + if let Some(id) = self.global_symbols.get(name) { + return Some(*id); + } + None + } + + fn clear_local_scope(&mut self) { + if let Some(scope) = self.scope_stack.last_mut() { + scope.symbols.clear(); + } + } + + fn enter_scope(&mut self, scope_type: ScopeType) { + self.scope_stack.push(NixScope { + symbols: HashMap::new(), + scope_type, + }); + } + + fn exit_scope(&mut self) { + if self.scope_stack.len() > 1 { + self.scope_stack.pop(); + } + } + + fn symbols_in_scope(&self) -> Vec<(String, SymbolId, ScopeLevel)> { + let mut result = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for scope in self.scope_stack.iter().rev() { + for (name, id) in &scope.symbols { + if seen.insert(name.clone()) { + result.push((name.clone(), *id, ScopeLevel::Local)); + } + } + } + for (name, id) in &self.module_symbols { + if seen.insert(name.clone()) { + result.push((name.clone(), *id, ScopeLevel::Module)); + } + } + for (name, id) in &self.global_symbols { + if seen.insert(name.clone()) { + result.push((name.clone(), *id, ScopeLevel::Global)); + } + } + result + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +#[derive(Debug, Default)] +pub struct NixInheritanceResolver { + inheritance: HashMap>, + type_methods: HashMap>, +} + +impl NixInheritanceResolver { + pub fn new() -> Self { + Self::default() + } +} + +impl InheritanceResolver for NixInheritanceResolver { + fn add_inheritance(&mut self, child: String, parent: String, kind: &str) { + self.inheritance + .entry(child) + .or_default() + .push((parent, kind.to_string())); + } + + fn resolve_method(&self, type_name: &str, method: &str) -> Option { + let mut to_visit = vec![type_name.to_string()]; + let mut visited = std::collections::HashSet::new(); + + while let Some(current) = to_visit.pop() { + if !visited.insert(current.clone()) { + continue; + } + if let Some(methods) = self.type_methods.get(¤t) { + if methods.iter().any(|m| m == method) { + return Some(current); + } + } + if let Some(parents) = self.inheritance.get(¤t) { + for (parent, _) in parents { + to_visit.push(parent.clone()); + } + } + } + None + } + + fn get_inheritance_chain(&self, type_name: &str) -> Vec { + let mut chain = vec![type_name.to_string()]; + let mut visited = std::collections::HashSet::new(); + visited.insert(type_name.to_string()); + let mut to_visit = vec![type_name.to_string()]; + + while let Some(current) = to_visit.pop() { + if let Some(parents) = self.inheritance.get(¤t) { + for (parent, _) in parents { + if visited.insert(parent.clone()) { + chain.push(parent.clone()); + to_visit.push(parent.clone()); + } + } + } + } + chain + } + + fn is_subtype(&self, child: &str, parent: &str) -> bool { + if child == parent { + return true; + } + self.get_inheritance_chain(child) + .contains(&parent.to_string()) + } + + fn add_type_methods(&mut self, type_name: String, methods: Vec) { + self.type_methods.insert(type_name, methods); + } + + fn get_all_methods(&self, type_name: &str) -> Vec { + let mut methods = Vec::new(); + let mut seen = std::collections::HashSet::new(); + for ancestor in self.get_inheritance_chain(type_name) { + if let Some(type_methods) = self.type_methods.get(&ancestor) { + for method in type_methods { + if seen.insert(method.clone()) { + methods.push(method.clone()); + } + } + } + } + methods + } +} diff --git a/src/parsing/registry.rs b/src/parsing/registry.rs index 9a3fda11..cad08d0e 100644 --- a/src/parsing/registry.rs +++ b/src/parsing/registry.rs @@ -85,6 +85,7 @@ impl<'de> Deserialize<'de> for LanguageId { "javascript" => "javascript", "kotlin" => "kotlin", "lua" => "lua", + "nix" => "nix", "php" => "php", "python" => "python", "rust" => "rust", @@ -391,6 +392,7 @@ fn initialize_registry(registry: &mut LanguageRegistry) { super::kotlin::register(registry); super::clojure::register(registry); super::lua::register(registry); + super::nix::register(registry); super::swift::register(registry); } diff --git a/tests/exploration/abi15_grammar_audit/mod.rs b/tests/exploration/abi15_grammar_audit/mod.rs index 7be20ac8..c77077fb 100644 --- a/tests/exploration/abi15_grammar_audit/mod.rs +++ b/tests/exploration/abi15_grammar_audit/mod.rs @@ -29,6 +29,7 @@ mod java; mod javascript; mod kotlin; mod lua; +mod nix; mod php; mod python; mod rust_lang; diff --git a/tests/exploration/abi15_grammar_audit/nix.rs b/tests/exploration/abi15_grammar_audit/nix.rs new file mode 100644 index 00000000..7cf1d100 --- /dev/null +++ b/tests/exploration/abi15_grammar_audit/nix.rs @@ -0,0 +1,91 @@ +//! Nix grammar audit. + +use super::helpers::{AuditData, LanguageAuditConfig, run_comprehensive_analysis}; +use codanna::parsing::nix::audit::NixParserAudit; + +const CONFIG: LanguageAuditConfig = LanguageAuditConfig { + language_name: "Nix", + file_extension: "nix", + grammar_json_path: "contributing/parsers/nix/node-types.json", + example_file_path: "examples/nix/comprehensive.nix", + output_dir: "contributing/parsers/nix", +}; + +fn node_categories() -> Vec<(&'static str, Vec<&'static str>)> { + vec![ + ("ROOT NODES", vec!["source_code"]), + ( + "BINDING NODES", + vec!["binding_set", "binding", "attrpath", "identifier"], + ), + ( + "ATTRSET NODES", + vec!["attrset_expression", "rec_attrset_expression"], + ), + ( + "FUNCTION NODES", + vec![ + "function_expression", + "formals", + "formal", + "apply_expression", + ], + ), + ( + "SCOPE NODES", + vec![ + "let_expression", + "with_expression", + "inherit", + "inherit_from", + "inherited_attrs", + ], + ), + ( + "EXPRESSION NODES", + vec![ + "if_expression", + "assert_expression", + "select_expression", + "binary_expression", + "parenthesized_expression", + "list_expression", + "variable_expression", + ], + ), + ( + "LITERAL NODES", + vec![ + "integer_expression", + "string_expression", + "indented_string_expression", + "path_expression", + "spath_expression", + "interpolation", + ], + ), + ("COMMENT NODES", vec!["comment"]), + ] +} + +#[test] +fn comprehensive_nix_analysis() { + run_comprehensive_analysis( + &CONFIG, + tree_sitter_nix::LANGUAGE.into(), + "{ x = 1; f = a: a + 1; }\n", + &node_categories(), + |path| { + let audit = NixParserAudit::audit_file(path).map_err(|e| e.to_string())?; + let report = audit.generate_report(); + Ok(( + AuditData::new( + audit.grammar_nodes, + audit.implemented_nodes, + audit.extracted_symbol_kinds, + ), + report, + )) + }, + ); +} diff --git a/tests/fixtures/nix/basic.nix b/tests/fixtures/nix/basic.nix new file mode 100644 index 00000000..7013589e --- /dev/null +++ b/tests/fixtures/nix/basic.nix @@ -0,0 +1,26 @@ +# Basic Nix fixture: simple attrset with functions and values +{ + # Plain value bindings + host = "localhost"; + port = 8080; + debug = false; + + # Lambda bindings + identity = x: x; + add = a: b: a + b; + greet = name: "Hello, ${name}"; + + # Nested attrset + config = { + timeout = 30; + retries = 3; + }; + + # Let expression + computed = + let + base = 10; + factor = 2; + in + base * factor; +} diff --git a/tests/fixtures/nix/functions.nix b/tests/fixtures/nix/functions.nix new file mode 100644 index 00000000..d8c30a3e --- /dev/null +++ b/tests/fixtures/nix/functions.nix @@ -0,0 +1,30 @@ +# Nix fixture: various function patterns +{ + # Simple curried lambda + add = a: b: a + b; + multiply = a: b: a * b; + + # Formals (destructuring) + mkService = { name, port ? 8080, debug ? false }: { + inherit name port debug; + }; + + # Formals with ellipsis + mkPkg = { name, src, buildInputs ? [], ... }: derivation { + inherit name src buildInputs; + system = builtins.currentSystem; + builder = "/bin/sh"; + }; + + # @ pattern + withExtras = args @ { name, ... }: { + fullArgs = args; + inherit name; + }; + + # Higher-order function + compose = f: g: x: f (g x); + + # Nested lambdas + applyTwice = f: x: f (f x); +} diff --git a/tests/fixtures/nix/imports.nix b/tests/fixtures/nix/imports.nix new file mode 100644 index 00000000..0f76f175 --- /dev/null +++ b/tests/fixtures/nix/imports.nix @@ -0,0 +1,11 @@ +# Nix fixture: import patterns +let + localLib = import ./lib.nix; + nixpkgs = import {}; + pinned = import (fetchTarball "https://example.com/nixpkgs.tar.gz") {}; +in +{ + inherit (nixpkgs) stdenv fetchurl; + lib = localLib; + pkgs = nixpkgs; +} diff --git a/tests/parsers/nix/mod.rs b/tests/parsers/nix/mod.rs new file mode 100644 index 00000000..b8c5a052 --- /dev/null +++ b/tests/parsers/nix/mod.rs @@ -0,0 +1,2 @@ +mod test_symbol_extraction; +mod test_call_tracking; diff --git a/tests/parsers/nix/test_call_tracking.rs b/tests/parsers/nix/test_call_tracking.rs new file mode 100644 index 00000000..71ef6ba7 --- /dev/null +++ b/tests/parsers/nix/test_call_tracking.rs @@ -0,0 +1,92 @@ +use codanna::parsing::LanguageParser; +use codanna::parsing::nix::NixParser; + +fn find_calls(code: &str) -> Vec<(String, String)> { + let mut parser = NixParser::new().expect("Failed to create NixParser"); + parser + .find_calls(code) + .into_iter() + .map(|(caller, callee, _)| (caller.to_string(), callee.to_string())) + .collect() +} + +#[test] +fn test_simple_apply_expression() { + let code = r#"{ result = builtins.toString 42; }"#; + let calls = find_calls(code); + println!("calls: {calls:?}"); + assert!( + calls.iter().any(|(_, callee)| callee.contains("toString")), + "expected toString call, got {calls:?}" + ); +} + +#[test] +fn test_callpackage_pattern() { + let code = r#" +{ + hello = pkgs.callPackage ./hello.nix {}; + world = pkgs.callPackage ./world.nix { inherit stdenv; }; +} +"#; + let calls = find_calls(code); + println!("callPackage calls: {calls:?}"); + assert!( + calls + .iter() + .any(|(_, callee)| callee.contains("callPackage")), + "expected callPackage call, got {calls:?}" + ); +} + +#[test] +fn test_nested_apply_expressions() { + let code = r#"{ x = builtins.toString (builtins.length [ 1 2 3 ]); }"#; + let calls = find_calls(code); + println!("nested calls: {calls:?}"); + // Should detect both function applications + assert!(calls.len() >= 2, "expected at least 2 calls, got {calls:?}"); +} + +#[test] +fn test_import_not_counted_as_call() { + // import is handled via find_imports, not find_calls + let code = r#"{ pkgs = import {}; }"#; + let calls = find_calls(code); + println!("calls for import expr: {calls:?}"); + // The apply_expression `import ` will appear — that's fine + // just document current behaviour +} + +#[test] +fn test_find_imports_basic() { + use codanna::types::FileId; + let code = r#" +{ + nixpkgs = import {}; + local = import ./local.nix; +} +"#; + let mut parser = NixParser::new().unwrap(); + let file_id = FileId::new(1).unwrap(); + let imports = parser.find_imports(code, file_id); + println!("imports: {imports:?}"); + assert!( + !imports.is_empty(), + "expected at least one import, got none" + ); +} + +#[test] +fn test_find_imports_from_fixture() { + use codanna::types::FileId; + let code = include_str!("../../fixtures/nix/imports.nix"); + let mut parser = NixParser::new().unwrap(); + let file_id = FileId::new(1).unwrap(); + let imports = parser.find_imports(code, file_id); + println!("imports from fixture: {imports:?}"); + assert!( + imports.iter().any(|i| i.path.contains("lib.nix")), + "expected ./lib.nix import, got {imports:?}" + ); +} diff --git a/tests/parsers/nix/test_symbol_extraction.rs b/tests/parsers/nix/test_symbol_extraction.rs new file mode 100644 index 00000000..896da3b1 --- /dev/null +++ b/tests/parsers/nix/test_symbol_extraction.rs @@ -0,0 +1,176 @@ +use codanna::SymbolKind; +use codanna::parsing::Language; +use codanna::parsing::LanguageParser; +use codanna::parsing::nix::NixParser; +use codanna::types::{FileId, SymbolCounter}; + +fn parse(code: &str) -> Vec { + let mut parser = NixParser::new().expect("Failed to create NixParser"); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + parser.parse(code, file_id, &mut counter) +} + +// ── language identity ──────────────────────────────────────────────────────── + +#[test] +fn test_nix_language_identity() { + let parser = NixParser::new().unwrap(); + assert_eq!(parser.language(), Language::Nix); +} + +// ── basic attrset bindings ─────────────────────────────────────────────────── + +#[test] +fn test_attrset_simple_values() { + let symbols = parse(r#"{ host = "localhost"; port = 8080; debug = false; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"host"), "expected host, got {names:?}"); + assert!(names.contains(&"port"), "expected port, got {names:?}"); + assert!(names.contains(&"debug"), "expected debug, got {names:?}"); +} + +#[test] +fn test_attrset_function_binding_kind() { + let symbols = parse(r#"{ add = a: b: a + b; }"#); + let add = symbols.iter().find(|s| s.name.as_ref() == "add").unwrap(); + assert_eq!(add.kind, SymbolKind::Function, "add should be Function"); +} + +#[test] +fn test_attrset_value_binding_kind() { + let symbols = parse(r#"{ x = 42; }"#); + let x = symbols.iter().find(|s| s.name.as_ref() == "x").unwrap(); + assert_eq!(x.kind, SymbolKind::Variable, "x should be Variable"); +} + +// ── lambda parameters ──────────────────────────────────────────────────────── + +#[test] +fn test_simple_lambda_param() { + let symbols = parse(r#"{ f = x: x + 1; }"#); + let params: Vec<&str> = symbols + .iter() + .filter(|s| s.kind == SymbolKind::Parameter) + .map(|s| s.name.as_ref()) + .collect(); + assert!(params.contains(&"x"), "expected param x, got {params:?}"); +} + +#[test] +fn test_formals_params() { + let symbols = parse(r#"{ f = { a, b ? 0, c ? 1 }: a + b + c; }"#); + let params: Vec<&str> = symbols + .iter() + .filter(|s| s.kind == SymbolKind::Parameter) + .map(|s| s.name.as_ref()) + .collect(); + assert!(params.contains(&"a"), "expected param a, got {params:?}"); + assert!(params.contains(&"b"), "expected param b, got {params:?}"); + assert!(params.contains(&"c"), "expected param c, got {params:?}"); +} + +#[test] +fn test_curried_lambda_params() { + let symbols = parse(r#"{ add = a: b: a + b; }"#); + let params: Vec<&str> = symbols + .iter() + .filter(|s| s.kind == SymbolKind::Parameter) + .map(|s| s.name.as_ref()) + .collect(); + assert!(params.contains(&"a"), "expected param a, got {params:?}"); + assert!(params.contains(&"b"), "expected param b, got {params:?}"); +} + +// ── let expressions ────────────────────────────────────────────────────────── + +#[test] +fn test_let_bindings() { + let symbols = parse(r#"let x = 1; y = 2; in x + y"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"x"), "expected x, got {names:?}"); + assert!(names.contains(&"y"), "expected y, got {names:?}"); +} + +#[test] +fn test_let_function_binding() { + let symbols = parse(r#"let double = x: x * 2; in double 5"#); + let double = symbols + .iter() + .find(|s| s.name.as_ref() == "double") + .unwrap(); + assert_eq!(double.kind, SymbolKind::Function); +} + +// ── rec attrset ────────────────────────────────────────────────────────────── + +#[test] +fn test_rec_attrset_bindings() { + let symbols = parse(r#"rec { base = "/var"; data = "${base}/data"; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"base"), "expected base, got {names:?}"); + assert!(names.contains(&"data"), "expected data, got {names:?}"); +} + +// ── inherit ────────────────────────────────────────────────────────────────── + +#[test] +fn test_inherit_emits_variables() { + let symbols = parse(r#"{ inherit stdenv fetchurl; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"stdenv"), "expected stdenv, got {names:?}"); + assert!( + names.contains(&"fetchurl"), + "expected fetchurl, got {names:?}" + ); +} + +#[test] +fn test_inherit_from_emits_variables() { + let symbols = parse(r#"{ inherit (pkgs) stdenv fetchurl; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"stdenv"), "expected stdenv, got {names:?}"); + assert!( + names.contains(&"fetchurl"), + "expected fetchurl, got {names:?}" + ); +} + +// ── nested attrsets ────────────────────────────────────────────────────────── + +#[test] +fn test_nested_attrset_outer_binding() { + let symbols = parse(r#"{ config = { host = "localhost"; port = 8080; }; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"config"), "expected config, got {names:?}"); + // inner bindings also visible + assert!(names.contains(&"host"), "expected host, got {names:?}"); + assert!(names.contains(&"port"), "expected port, got {names:?}"); +} + +// ── fixture files ──────────────────────────────────────────────────────────── + +#[test] +fn test_basic_fixture() { + let code = include_str!("../../fixtures/nix/basic.nix"); + let symbols = parse(code); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + + assert!(names.contains(&"host"), "expected host"); + assert!(names.contains(&"add"), "expected add"); + assert!(names.contains(&"config"), "expected config"); + + let add = symbols.iter().find(|s| s.name.as_ref() == "add").unwrap(); + assert_eq!(add.kind, SymbolKind::Function); +} + +#[test] +fn test_functions_fixture() { + let code = include_str!("../../fixtures/nix/functions.nix"); + let symbols = parse(code); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + + assert!(names.contains(&"add"), "expected add"); + assert!(names.contains(&"mkService"), "expected mkService"); + assert!(names.contains(&"compose"), "expected compose"); +} diff --git a/tests/parsers_tests.rs b/tests/parsers_tests.rs index 19376871..b10829e3 100644 --- a/tests/parsers_tests.rs +++ b/tests/parsers_tests.rs @@ -145,6 +145,12 @@ mod test_c_method_call_static; #[path = "parsers/lua/test_method_call_static.rs"] mod test_lua_method_call_static; +#[path = "parsers/nix/test_symbol_extraction.rs"] +mod test_nix_symbol_extraction; + +#[path = "parsers/nix/test_call_tracking.rs"] +mod test_nix_call_tracking; + #[path = "parsers/gdscript/test_method_call_static.rs"] mod test_gdscript_method_call_static;