From d59502a0867bb024f6f48c5b65cacea460f2a914 Mon Sep 17 00:00:00 2001 From: Anton Vasiljev Date: Thu, 21 May 2026 14:14:00 +0300 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=93=9C=20docs:=20add=20Nix=20parser?= =?UTF-8?q?=20implementation=20plan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Execution plan for adding Nix language support to codanna under the per-parser docs convention (contributing/parsers/nix/). - Compatibility verdict for tree-sitter-nix 0.3.0 (LanguageFn / ABI-15, compatible with tree-sitter 0.26) - Module + wiring map, trait architecture, and Nix->symbol mapping - Parser traversal and scope-resolution logic - 6-phase execution plan with touch-point checklist - 5 Mermaid diagrams (GitHub-render-safe, quoted labels) --- .../parsers/nix/IMPLEMENTATION_PLAN.md | 313 ++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 contributing/parsers/nix/IMPLEMENTATION_PLAN.md diff --git a/contributing/parsers/nix/IMPLEMENTATION_PLAN.md b/contributing/parsers/nix/IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..508d480d --- /dev/null +++ b/contributing/parsers/nix/IMPLEMENTATION_PLAN.md @@ -0,0 +1,313 @@ +# Nix Language Parser — Implementation Plan + +> Status: **planning** · Branch: `feature/parser-for-nix-lang` · Target grammar: `tree-sitter-nix 0.3.0` +> +> This document is the execution plan for adding Nix expression-language support to +> codanna. It follows the conventions in +> [`contributing/development/language-support.md`](../../development/language-support.md) +> and uses **Lua** as the closest reference parser (dynamic, no traits, attrset/scope-centric). + +--- + +## 1. Compatibility verdict + +| Item | Value | +|---|---| +| Grammar crate | `tree-sitter-nix = "0.3.0"` (nix-community, Jul 2025) | +| Binding style | modern `LANGUAGE: tree_sitter_language::LanguageFn` (ABI-14/15) | +| Core dep | `tree-sitter-language = "0.1.0"` (no direct `tree-sitter` dep) | +| codanna core | `tree-sitter 0.26.9` — **compatible** | +| Wiring | identical to Lua: `tree_sitter_nix::LANGUAGE.into()` → `parser.set_language(&lang)` | + +No version conflict: the grammar exposes the same `LanguageFn` constant codanna already +consumes for Lua/Clojure/etc. + +--- + +## 2. Module + wiring map + +Six **new** files in `src/parsing/nix/` plus a small set of **existing** files to edit. +`config.rs` is intentionally NOT edited — `generate_language_defaults()` auto-populates from +the registry. + +```mermaid +flowchart LR + subgraph NEW["NEW — src/parsing/nix/ (6 files)"] + direction TB + DEF["definition.rs
LanguageDefinition"] + PAR["parser.rs
LanguageParser + NodeTracker"] + BEH["behavior.rs
LanguageBehavior"] + RES["resolution.rs
ResolutionScope"] + AUD["audit.rs
ABI-15 coverage"] + MOD["mod.rs
re-exports + register"] + end + + subgraph EDIT["EDIT — existing files"] + direction TB + CARGO["Cargo.toml
+ tree-sitter-nix = 0.3.0"] + PMOD["parsing/mod.rs
pub mod nix; pub use ..."] + REG["parsing/registry.rs
initialize_registry()
+ Deserialize match arm"] + LANG["parsing/language.rs
enum Language::Nix
+ 6 match arms"] + TESTS["tests/parsers_tests.rs
gateway #path entries"] + LOCK["parsers/grammar-versions.lock
nix entry (optional)"] + end + + subgraph SKIP["NO EDIT NEEDED"] + CFG["config.rs
auto from registry.iter_all()"] + end + + MOD --> PMOD + DEF --> REG + PAR --> LANG + DEF -. "tree_sitter_nix::LANGUAGE" .-> CARGO + AUD --> TESTS + REG -. registers .-> MOD + CFG -. reads .-> REG +``` + +> `language.rs` is **required**, not optional: `Language::from_extension` calls +> `from_language_id("nix")`; without the `Nix` arm it returns `None` and `.nix` files are +> never detected. + +--- + +## 3. Trait architecture + +The four traits each new file implements, and the shared types they touch. + +```mermaid +classDiagram + class LanguageDefinition { + <> + +id() LanguageId + +name() str + +extensions() slice + +create_parser(settings) LanguageParser + +create_behavior() LanguageBehavior + +default_enabled() bool + } + class LanguageParser { + <> + +parse(code, file_id, counter) Vec~Symbol~ + +find_calls(code) Vec + +find_imports(code, file_id) Vec~Import~ + +extract_doc_comment(node, code) Option + +language() Language + +as_any() Any + } + class LanguageBehavior { + <> + +module_separator() str + +parse_visibility(sig) Visibility + +supports_traits() bool + +get_language() TsLanguage + +create_resolution_context(file_id) ResolutionScope + } + class ResolutionScope { + <> + +resolve(name) Option~SymbolId~ + +add_symbol(name, id, level) + +enter_scope(kind) + +exit_scope() + } + + class NixLanguage + class NixParser + class NixBehavior + class NixResolutionContext + class GenericInheritanceResolver + + NixLanguage ..|> LanguageDefinition + NixParser ..|> LanguageParser + NixBehavior ..|> LanguageBehavior + NixResolutionContext ..|> ResolutionScope + + NixLanguage --> NixParser : creates + NixLanguage --> NixBehavior : creates + NixBehavior --> NixResolutionContext : creates + NixBehavior --> GenericInheritanceResolver : no traits, reuse no-op +``` + +--- + +## 4. Nix → codanna symbol mapping + +Node names use the `_expression` suffix convention of tree-sitter-nix and **must be confirmed +in Phase 0** (AST discovery). + +| Nix construct | tree-sitter-nix node (confirm) | SymbolKind | Notes | +|---|---|---|---| +| `.nix` file | root (`source_code`) | Module | file-based path, separator `.` | +| binding whose value is a lambda | `binding` + `function_expression` | **Function** | key heuristic | +| binding with non-lambda value | `binding` | Variable / Constant | literal RHS → Constant | +| returned attrset keys | `attrset_expression` → `binding` | Field | Public | +| `rec { ... }` attrs | `rec_attrset_expression` | Field | self-referential scope | +| lambda params `{ a, b ? d, ... }:` | `formals` / `formal` | Parameter | `@`-pattern binds whole set | +| `inherit a;` / `inherit (src) a;` | `inherit` / `inherit_from` | Variable (+ref to src) | one node → many bindings | +| `import ./x.nix`, `` | `apply_expression` + `path_expression` / `spath_expression` | Import | dynamic interpolation = best-effort | +| `f x`, `callPackage ./p.nix {}` | `apply_expression` | call relationship | resolve `function` field | +| `a.b.c` | `select_expression` + `attrpath` | reference | def-vs-ref by position | + +**Visibility:** no keywords → `let`/`formals` = `Private`; returned attrset attrs = `Public`. +**Traits/inheritance:** none → `supports_traits() = false`, reuse `GenericInheritanceResolver`. + +--- + +## 5. Parser traversal logic + +`extract_symbols_from_node` decision flow (every visited node is registered with `NodeTracker` +to drive the audit report). + +```mermaid +flowchart TD + START(["visit node"]) --> REG["register node in NodeTracker"] + REG --> K{"node kind?"} + K -->|binding| B{"value is function_expression?"} + B -->|yes| FN["emit Function symbol"] + B -->|no| VAR["emit Variable or Constant"] + K -->|attrset or rec_attrset| ATTR["enter attrset scope, emit Field per binding"] + K -->|let_expression| LET["enter let scope, bindings are Private"] + K -->|function_expression| LAM["enter lambda scope, emit Parameter per formal"] + K -->|inherit or inherit_from| INH["emit one Variable per name, record ref to source"] + K -->|apply_expression| APP{"function is import?"} + APP -->|yes| IMP["record Import"] + APP -->|no| CALL["record call relationship"] + K -->|select_expression| SEL["record attrpath reference"] + K -->|ERROR| ERR["do not skip, recurse into children"] + K -->|other| OTH["pass through"] + FN --> CH["recurse children"] + VAR --> CH + ATTR --> CH + LET --> CH + LAM --> CH + INH --> CH + IMP --> CH + CALL --> CH + SEL --> CH + ERR --> CH + OTH --> CH + CH --> EXIT["exit scope if entered"] + EXIT --> DONE(["return"]) +``` + +--- + +## 6. Scope resolution order + +```mermaid +flowchart TD + Q(["resolve identifier"]) --> L{"in let or formals local scope?"} + L -->|yes| HIT(["resolved"]) + L -->|no| R{"in enclosing rec attrset?"} + R -->|yes| HIT + R -->|no| W{"in a with namespace?"} + W -->|maybe| WUN["mark UNRESOLVED, with is context-sensitive"] + W -->|no| T{"file top-level binding?"} + T -->|yes| HIT + T -->|no| I{"imported symbol?"} + I -->|yes| HIT + I -->|no| MISS(["unresolved"]) + WUN --> MISS +``` + +> `with expr;` cannot be resolved statically (its bindings depend on a runtime value, and it +> never shadows other bindings). Treat such references as unresolved — a documented limitation, +> same class codanna already tolerates for dynamic dispatch. + +--- + +## 7. Execution phases + +```mermaid +flowchart LR + P0["Phase 0
AST discovery
comprehensive.nix
+ explore test"] + P1["Phase 1
scaffold + dep
6 files from Lua"] + P2["Phase 2
parser.rs
symbol extraction"] + P3["Phase 3
behavior +
resolution"] + P4["Phase 4
register
mod/registry/language"] + P5["Phase 5
tests + audit
>70% coverage"] + P6["Phase 6
e2e verify
clippy/fmt/docs"] + P0 --> P1 --> P2 --> P3 --> P4 --> P5 --> P6 +``` + +All commands run in the flake (`nix develop -c ...`). + +### Phase 0 — AST node discovery (do first; do not guess node names) +- Write `examples/nix/comprehensive.nix` covering: lambdas, `let`, `rec`, `inherit` / + `inherit (x)`, `with`, `if`, `assert`, attrsets, lists, `import`, ``, string + interpolation, paths, `a.b.c`, `@`-patterns, defaults, `...`. +- Add a throwaway `explore_nix_abi15` test that loads `tree_sitter_nix::LANGUAGE`, parses it, + and prints `node.kind()` + `kind_id()` (reuse `discover_nodes` from `lua/audit.rs`). + `nix develop -c cargo test explore_nix_abi15 -- --nocapture` +- Record findings → `contributing/parsers/nix/NODE_MAPPING.md` + `node-types.json`. +- Alternative: add `pkgs.tree-sitter` + `pkgs.nodejs` to the flake devShell and use + `contributing/tree-sitter/scripts/`. + +### Phase 1 — Scaffold + dependency +- `Cargo.toml` += `tree-sitter-nix = "0.3.0"` (after the `tree-sitter-clojure-orchard` line). +- `mkdir src/parsing/nix`, copy the six `lua/*.rs` files as skeletons, rename + `Lua`→`Nix`, `tree_sitter_lua`→`tree_sitter_nix`. +- Implement order: `definition.rs` → `parser.rs` → `behavior.rs` → `resolution.rs` → + `audit.rs` → `mod.rs`. + +### Phase 2 — parser.rs +- `extract_symbols_from_node` per the traversal diagram. Minimum methods: `parse`, + `find_calls`, `find_imports`, `extract_doc_comment`, `as_any`, `language()` → `Language::Nix`. +- Register every node in `NodeTracker`. Handle `ERROR` by recursing. Zero-copy slices. + +### Phase 3 — behavior.rs + resolution.rs +- behavior: `module_separator() = "."`, file-based `module_path_from_file`, + `parse_visibility` (Public default), `supports_traits() = false`, + `get_language()` = `tree_sitter_nix::LANGUAGE.into()`. +- resolution: `NixResolutionContext` with the scope order above; `with` → unresolved. + +### Phase 4 — registration +- `src/parsing/mod.rs`: `pub mod nix;` + `pub use nix::{NixBehavior, NixParser};` +- `src/parsing/registry.rs`: `super::nix::register(registry);` in `initialize_registry()`, + and `"nix" => "nix",` in the `Deserialize` match. +- `src/parsing/language.rs`: `Nix` variant + arms in `to_language_id`, `from_language_id`, + `from_extension` fallback, `extensions`, `config_key`, `name` (extension `"nix"`). +- Decide `default_enabled()` — **recommend `false` during dev, flip to `true` at release**. + +### Phase 5 — tests + audit +- `audit.rs` (copy Lua, swap key-node list). +- `tests/parsers/nix/` + gateway `#[path = "parsers/nix/..."]` in `tests/parsers_tests.rs`. +- `nix develop -c cargo test nix` +- `nix develop -c cargo test audit_nix -- --nocapture` (target >70% key-node coverage) + +### Phase 6 — end-to-end verify + polish +``` +nix develop -c cargo build +nix develop -c cargo clippy --fix +nix develop -c cargo fmt +nix develop -c bash -c 'cargo run -- init && cargo run -- index . && cargo run -- retrieve search "mkDerivation"' +``` +- Update supported-list in `language-support.md` and add the `nix` entry to + `grammar-versions.lock`. + +--- + +## 8. Risks & open decisions + +| Item | Disposition | +|---|---| +| `with expr;` scoping | statically unresolvable → documented limitation, do not block | +| string/path interpolation `${...}` | extract inner identifier as ref; dynamic target best-effort | +| `attrpath` def vs ref | disambiguate by binding (LHS) vs expression (RHS) position | +| flake lacks `tree-sitter` CLI | use Rust exploration test (Phase 0), or add `tree-sitter`+`nodejs` to flake | +| `default_enabled` true/false at merge | **open** — recommend `false` until audit coverage is solid | + +--- + +## 9. Touch-point checklist + +- [ ] `Cargo.toml` — `tree-sitter-nix = "0.3.0"` +- [ ] `src/parsing/nix/{mod,definition,parser,behavior,resolution,audit}.rs` +- [ ] `src/parsing/mod.rs` — module + re-export +- [ ] `src/parsing/registry.rs` — `initialize_registry()` + `Deserialize` arm +- [ ] `src/parsing/language.rs` — `Language::Nix` + 6 match arms +- [ ] `examples/nix/comprehensive.nix` (+ `main.nix`) +- [ ] `tests/parsers/nix/` + `tests/parsers_tests.rs` gateway +- [ ] `contributing/parsers/nix/NODE_MAPPING.md` + `node-types.json` +- [ ] `contributing/parsers/grammar-versions.lock` — nix entry +- [ ] `config.rs` — **no edit** (auto from registry) From ea5d23eb907bf549bd274449f34a187ecbea380d Mon Sep 17 00:00:00 2001 From: Anton Vasiljev Date: Thu, 21 May 2026 18:15:38 +0300 Subject: [PATCH 2/4] =?UTF-8?q?=F0=9F=8E=89=20feat(parsing):=20add=20Nix?= =?UTF-8?q?=20language=20parser?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - tree-sitter-nix 0.3.0 via nix-community (ABI-15) - Parses attrsets, rec attrsets, let expressions, lambdas (universal + formals), inherit / inherit_from, apply_expression, select_expression - 100% key-node coverage (18/18) in audit - Language::Nix wired into registry, factory, language.rs, io/parse.rs - default_enabled = false (flip to true at release) - flake.lock → rustc 1.95.0 (required by sysinfo 0.39.2) - 40 new tests across unit, integration, and abi15 audit --- Cargo.lock | 11 + Cargo.toml | 1 + contributing/parsers/grammar-versions.lock | 6 + contributing/parsers/nix/AUDIT_REPORT.md | 27 + contributing/parsers/nix/GRAMMAR_ANALYSIS.md | 90 +++ contributing/parsers/nix/NODE_MAPPING.md | 88 +++ contributing/parsers/nix/node-types.json | 111 +++ contributing/parsers/nix/node_discovery.txt | 87 +++ examples/nix/comprehensive.nix | 127 ++++ examples/nix/main.nix | 26 + flake.lock | 6 +- src/io/parse.rs | 1 + src/parsing/factory.rs | 18 +- src/parsing/language.rs | 6 + src/parsing/mod.rs | 2 + src/parsing/nix/audit.rs | 169 +++++ src/parsing/nix/behavior.rs | 224 ++++++ src/parsing/nix/definition.rs | 89 +++ src/parsing/nix/mod.rs | 11 + src/parsing/nix/parser.rs | 743 +++++++++++++++++++ src/parsing/nix/resolution.rs | 234 ++++++ src/parsing/registry.rs | 2 + tests/exploration/abi15_grammar_audit/mod.rs | 1 + tests/exploration/abi15_grammar_audit/nix.rs | 102 +++ tests/fixtures/nix/basic.nix | 26 + tests/fixtures/nix/functions.nix | 30 + tests/fixtures/nix/imports.nix | 11 + tests/parsers/nix/mod.rs | 2 + tests/parsers/nix/test_call_tracking.rs | 87 +++ tests/parsers/nix/test_symbol_extraction.rs | 167 +++++ tests/parsers_tests.rs | 6 + 31 files changed, 2505 insertions(+), 6 deletions(-) create mode 100644 contributing/parsers/nix/AUDIT_REPORT.md create mode 100644 contributing/parsers/nix/GRAMMAR_ANALYSIS.md create mode 100644 contributing/parsers/nix/NODE_MAPPING.md create mode 100644 contributing/parsers/nix/node-types.json create mode 100644 contributing/parsers/nix/node_discovery.txt create mode 100644 examples/nix/comprehensive.nix create mode 100644 examples/nix/main.nix create mode 100644 src/parsing/nix/audit.rs create mode 100644 src/parsing/nix/behavior.rs create mode 100644 src/parsing/nix/definition.rs create mode 100644 src/parsing/nix/mod.rs create mode 100644 src/parsing/nix/parser.rs create mode 100644 src/parsing/nix/resolution.rs create mode 100644 tests/exploration/abi15_grammar_audit/nix.rs create mode 100644 tests/fixtures/nix/basic.nix create mode 100644 tests/fixtures/nix/functions.nix create mode 100644 tests/fixtures/nix/imports.nix create mode 100644 tests/parsers/nix/mod.rs create mode 100644 tests/parsers/nix/test_call_tracking.rs create mode 100644 tests/parsers/nix/test_symbol_extraction.rs diff --git a/Cargo.lock b/Cargo.lock index d1cbc7ba..40361559 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -838,6 +838,7 @@ dependencies = [ "tree-sitter-javascript", "tree-sitter-kotlin-codanna", "tree-sitter-lua", + "tree-sitter-nix", "tree-sitter-php", "tree-sitter-python", "tree-sitter-rust", @@ -5383,6 +5384,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-nix" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-php" version = "0.24.2" diff --git a/Cargo.toml b/Cargo.toml index 62cb5cf9..2f77d7eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -100,6 +100,7 @@ serde_json5 = "0.2.1" tree-sitter-swift = "0.7.2" tree-sitter-lua = "0.5.0" tree-sitter-clojure-orchard = "0.2.5" +tree-sitter-nix = "0.3.0" glob = "0.3.3" async-trait = "0.1.89" reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } diff --git a/contributing/parsers/grammar-versions.lock b/contributing/parsers/grammar-versions.lock index 34557889..85da42b5 100644 --- a/contributing/parsers/grammar-versions.lock +++ b/contributing/parsers/grammar-versions.lock @@ -74,6 +74,12 @@ "updated": "2025-07-27T12:20:37-07:00", "abi_version": "15" }, + "nix": { + "repo": "https://github.com/nix-community/tree-sitter-nix", + "version": "0.3.0", + "updated": "2026-05-21", + "abi_version": "15" + }, "typescript": { "repo": "https://github.com/tree-sitter/tree-sitter-typescript", "commit": "75b3874edb2dc714fb1fd77a32013d0f8699989f", diff --git a/contributing/parsers/nix/AUDIT_REPORT.md b/contributing/parsers/nix/AUDIT_REPORT.md new file mode 100644 index 00000000..b7f2c12f --- /dev/null +++ b/contributing/parsers/nix/AUDIT_REPORT.md @@ -0,0 +1,27 @@ +# Nix Parser Symbol Extraction Coverage Report + +## Summary +- Key nodes: 18/18 (100%) +- Total grammar nodes: 63 +- Total implemented: 56 +- Symbol kinds extracted: {"Variable", "Parameter", "Function"} + +## Key Nodes Coverage +- [✓] source_code +- [✓] binding +- [✓] attrset_expression +- [✓] rec_attrset_expression +- [✓] let_expression +- [✓] function_expression +- [✓] formals +- [✓] formal +- [✓] inherit +- [✓] inherit_from +- [✓] apply_expression +- [✓] select_expression +- [✓] attrpath +- [✓] identifier +- [✓] if_expression +- [✓] assert_expression +- [✓] with_expression +- [✓] comment diff --git a/contributing/parsers/nix/GRAMMAR_ANALYSIS.md b/contributing/parsers/nix/GRAMMAR_ANALYSIS.md new file mode 100644 index 00000000..1511b9c4 --- /dev/null +++ b/contributing/parsers/nix/GRAMMAR_ANALYSIS.md @@ -0,0 +1,90 @@ +# Nix Grammar Analysis + +*Generated: 2026-05-21 12:23:48 UTC* + +## Statistics +- Total nodes in grammar JSON: 36 +- Nodes found in comprehensive.nix: 63 +- Nodes handled by parser: 56 +- Symbol kinds extracted: 3 + +## Successfully Handled Nodes +These nodes are in examples and handled by parser: +- != +- " +- ${ +- '' +- ( +- ) +- * +- + +- . +- / +- ; +- < +- == +- [ +- ] +- apply_expression +- assert +- assert_expression +- attrpath +- attrset_expression +- binary_expression +- binding +- binding_set +- comment +- else +- formal +- formals +- function_expression +- identifier +- if +- if_expression +- in +- indented_string_expression +- inherit +- inherit_from +- integer_expression +- interpolation +- let +- let_expression +- list_expression +- parenthesized_expression +- path_expression +- path_fragment +- rec +- rec_attrset_expression +- select_expression +- source_code +- spath_expression +- string_expression +- string_fragment +- then +- variable_expression +- with +- with_expression +- { +- } + +## Implementation Gaps +These nodes appear in comprehensive.nix but aren't handled: +- , +- : +- = +- ? +- @ +- ellipses +- inherited_attrs + +## Missing from Examples +These grammar nodes aren't in comprehensive.nix: +- float_expression +- has_attr_expression +- unary_expression + +## Symbol Kinds Extracted +- Function +- Parameter +- Variable + diff --git a/contributing/parsers/nix/NODE_MAPPING.md b/contributing/parsers/nix/NODE_MAPPING.md new file mode 100644 index 00000000..6ec2703a --- /dev/null +++ b/contributing/parsers/nix/NODE_MAPPING.md @@ -0,0 +1,88 @@ +# Nix AST Node Mapping + +Discovered from `tree-sitter-nix = "0.3.0"` via `explore_nix_abi15` test (Phase 0). +63 total grammar nodes. + +## Root + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `source_code` | 62 | root; has single `expression:` field | + +## Bindings + +| tree-sitter node | ID | field names | codanna symbol | +|---|---|---|---| +| `binding_set` | 91 | `binding:` (multiple) | container; recurse | +| `binding` | 92 | `attrpath:`, `expression:` | Function / Variable / Constant depending on RHS | +| `attrpath` | 95 | `attr:` (identifier) | key of binding | + +## Attrsets + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `attrset_expression` | 86 | enter Class scope, recurse | +| `rec_attrset_expression` | 88 | enter Class scope (self-referential), recurse | + +## Let + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `let_expression` | 73 | enter Block scope; bindings are Private | + +## Functions / Lambdas + +| tree-sitter node | ID | field names | codanna symbol | +|---|---|---|---| +| `function_expression` | 68 | `universal:` (simple `x:`) OR `formals:` + `body:` | enter Function scope | +| `formals` | 69 | `formal:` (multiple) | iterate for parameters | +| `formal` | 70 | `name:` (identifier), `default:` (optional expr) | Parameter | + +> **`universal`** is the field name for a simple single-identifier lambda parameter (`x: body`). +> **`formals`** is the field name for destructuring pattern (`{ a, b ? 1, ... }:`). +> The `@`-pattern sibling identifier appears at the `function_expression` level as an unnamed child. + +## Inherit + +| tree-sitter node | ID | codanna symbol | +|---|---|---| +| `inherit` | 50 | Variable per name in `inherited_attrs` | +| `inherit_from` | 94 | Variable per name in `inherited_attrs` (source in parentheses) | +| `inherited_attrs` | 96 | container for the names | + +## Control flow / other expressions + +| tree-sitter node | ID | codanna handling | +|---|---|---| +| `apply_expression` | 81 | `function:` + `argument:` — recurse; detect `import` calls | +| `select_expression` | 83 | `expression:` + `index:` — recurse | +| `if_expression` | 75 | recurse | +| `assert_expression` | 71 | recurse | +| `with_expression` | 72 | recurse (bindings statically unresolvable) | +| `let_expression` | 73 | enter Block scope | +| `binary_expression` | 79 | recurse | +| `parenthesized_expression` | 85 | recurse | +| `list_expression` | 99 | recurse | + +## Literals / leaf nodes + +| tree-sitter node | ID | notes | +|---|---|---| +| `identifier` | 2 | bare name (used inside attrpath, formal, etc.) | +| `variable_expression` | 64 | wraps `identifier` in expression position; has `name:` field | +| `integer_expression` | 3 | literal integer | +| `string_expression` | 89 | `"..."` string | +| `indented_string_expression` | 90 | `''...''` multiline string | +| `path_expression` | 65 | `/absolute/path` | +| `path_fragment` | 59 | segment inside a path | +| `spath_expression` | 6 | `` angle-bracket path | +| `interpolation` | 98 | `${...}` inside strings | +| `string_fragment` | 56 | plain text inside a string | +| `comment` | 55 | `# ...` comment | +| `ellipses` | 14 | `...` in formals | + +## Known limitations + +- `with expr;` bindings are statically unresolvable; references inside `with_expression` are left unresolved. +- String interpolation `${...}` paths in imports are treated as best-effort (raw text recorded). +- Complex `attrpath` bindings like `a.b.c = ...` are not emitted as symbols (skipped). diff --git a/contributing/parsers/nix/node-types.json b/contributing/parsers/nix/node-types.json new file mode 100644 index 00000000..6432314c --- /dev/null +++ b/contributing/parsers/nix/node-types.json @@ -0,0 +1,111 @@ +[ + { + "type": "source_code", + "named": true, + "fields": { + "expression": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "binding", + "named": true, + "fields": { + "attrpath": { "multiple": false, "required": true, "types": [{ "type": "attrpath", "named": true }] }, + "expression": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { "type": "binding_set", "named": true }, + { + "type": "attrpath", + "named": true, + "fields": { + "attr": { "multiple": true, "required": true, "types": [{ "type": "identifier", "named": true }, { "type": "string_expression", "named": true }, { "type": "interpolation", "named": true }] } + } + }, + { "type": "attrset_expression", "named": true }, + { "type": "rec_attrset_expression", "named": true }, + { + "type": "let_expression", + "named": true, + "fields": { + "body": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "function_expression", + "named": true, + "fields": { + "universal": { "multiple": false, "required": false, "types": [{ "type": "identifier", "named": true }] }, + "formals": { "multiple": false, "required": false, "types": [{ "type": "formals", "named": true }] }, + "body": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "formals", + "named": true, + "children": { "multiple": true, "required": false, "types": [{ "type": "formal", "named": true }] } + }, + { + "type": "formal", + "named": true, + "fields": { + "name": { "multiple": false, "required": true, "types": [{ "type": "identifier", "named": true }] }, + "default": { "multiple": false, "required": false, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "apply_expression", + "named": true, + "fields": { + "function": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "argument": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { + "type": "select_expression", + "named": true, + "fields": { + "expression": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "attrpath": { "multiple": false, "required": true, "types": [{ "type": "attrpath", "named": true }] }, + "default": { "multiple": false, "required": false, "types": [{ "type": "_expr", "named": true }] } + } + }, + { "type": "inherit", "named": true }, + { "type": "inherit_from", "named": true }, + { "type": "inherited_attrs", "named": true }, + { + "type": "if_expression", + "named": true, + "fields": { + "condition": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "consequence": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] }, + "alternative": { "multiple": false, "required": true, "types": [{ "type": "_expr", "named": true }] } + } + }, + { "type": "assert_expression", "named": true }, + { "type": "with_expression", "named": true }, + { "type": "binary_expression", "named": true }, + { "type": "unary_expression", "named": true }, + { "type": "has_attr_expression", "named": true }, + { "type": "parenthesized_expression", "named": true }, + { "type": "list_expression", "named": true }, + { + "type": "variable_expression", + "named": true, + "fields": { + "name": { "multiple": false, "required": true, "types": [{ "type": "identifier", "named": true }] } + } + }, + { "type": "identifier", "named": true }, + { "type": "integer_expression", "named": true }, + { "type": "float_expression", "named": true }, + { "type": "string_expression", "named": true }, + { "type": "indented_string_expression", "named": true }, + { "type": "path_expression", "named": true }, + { "type": "spath_expression", "named": true }, + { "type": "interpolation", "named": true }, + { "type": "string_fragment", "named": true }, + { "type": "path_fragment", "named": true }, + { "type": "comment", "named": true }, + { "type": "ellipses", "named": true } +] diff --git a/contributing/parsers/nix/node_discovery.txt b/contributing/parsers/nix/node_discovery.txt new file mode 100644 index 00000000..098f1b6e --- /dev/null +++ b/contributing/parsers/nix/node_discovery.txt @@ -0,0 +1,87 @@ +=== Nix Language NODE MAPPING === + Generated: 2026-05-21 12:23:48 UTC + ABI Version: 13 + Node kind count: 63 + +=== ROOT NODES === + + source_code -> ID: 62 + +=== BINDING NODES === + + binding_set -> ID: 91 + + binding -> ID: 92 + + attrpath -> ID: 95 + + identifier -> ID: 2 + +=== ATTRSET NODES === + + attrset_expression -> ID: 86 + + rec_attrset_expression -> ID: 88 + +=== FUNCTION NODES === + + function_expression -> ID: 68 + + formals -> ID: 69 + + formal -> ID: 70 + + apply_expression -> ID: 81 + +=== SCOPE NODES === + + let_expression -> ID: 73 + + with_expression -> ID: 72 + + inherit -> ID: 50 + + inherit_from -> ID: 94 + + inherited_attrs -> ID: 96 + +=== EXPRESSION NODES === + + if_expression -> ID: 75 + + assert_expression -> ID: 71 + + select_expression -> ID: 83 + + binary_expression -> ID: 79 + + parenthesized_expression -> ID: 85 + + list_expression -> ID: 99 + + variable_expression -> ID: 64 + +=== LITERAL NODES === + + integer_expression -> ID: 3 + + string_expression -> ID: 89 + + indented_string_expression -> ID: 90 + + path_expression -> ID: 65 + + spath_expression -> ID: 6 + + interpolation -> ID: 98 + +=== COMMENT NODES === + + comment -> ID: 55 + +=== UNCATEGORIZED NODES === + + != -> ID: 26 + + " -> ID: 44 + + ${ -> ID: 51 + + '' -> ID: 47 + + ( -> ID: 41 + + ) -> ID: 42 + + * -> ID: 34 + + + -> ID: 33 + + , -> ID: 12 + + . -> ID: 39 + + / -> ID: 35 + + : -> ID: 8 + + ; -> ID: 16 + + < -> ID: 27 + + = -> ID: 49 + + == -> ID: 25 + + ? -> ID: 13 + + @ -> ID: 9 + + [ -> ID: 53 + + ] -> ID: 54 + + assert -> ID: 15 + + ellipses -> ID: 14 + + else -> ID: 22 + + if -> ID: 20 + + in -> ID: 19 + + let -> ID: 18 + + path_fragment -> ID: 59 + + rec -> ID: 43 + + string_fragment -> ID: 56 + + then -> ID: 21 + + with -> ID: 17 + + { -> ID: 10 + + } -> ID: 11 + +Legend: + = found in file, o = in grammar but not in file, x = not in grammar diff --git a/examples/nix/comprehensive.nix b/examples/nix/comprehensive.nix new file mode 100644 index 00000000..e7400d43 --- /dev/null +++ b/examples/nix/comprehensive.nix @@ -0,0 +1,127 @@ +# Comprehensive Nix example — one attrset at the top level (valid Nix) +{ + # --- simple lambda --- + identity = x: x; + + # --- nested lambda (curried) --- + add = a: b: a + b; + + # --- attrset binding --- + config = { + host = "localhost"; + port = 8080; + debug = false; + }; + + # --- rec attrset (self-referential) --- + defaults = rec { + base = "/var/lib"; + data = "${base}/data"; + logs = "${base}/logs"; + }; + + # --- let expression --- + result = + let + x = 10; + y = 20; + inner = v: v * 2; + in + inner x + y; + + # --- formals with defaults and ellipsis --- + mkService = { name, port ? 8080, debug ? false, ... }: { + inherit name port debug; + description = "Service: ${name}"; + }; + + # --- @ pattern (bind whole set + named fields) --- + mkDerivation = args @ { name, src, buildInputs ? [], ... }: + derivation { + inherit name src buildInputs; + system = builtins.currentSystem; + builder = "/bin/sh"; + }; + + # --- inherit from source --- + pkgAttrs = + let pkgs = import {}; + in { + inherit (pkgs) stdenv fetchurl; + lib = pkgs.lib; + }; + + # --- with expression --- + withExample = with builtins; [ + (toString 42) + (typeOf "hello") + (length [ 1 2 3 ]) + ]; + + # --- if expression --- + classify = n: + if n < 0 then "negative" + else if n == 0 then "zero" + else "positive"; + + # --- assert --- + safeDivide = a: b: + assert b != 0; + a / b; + + # --- import with path --- + nixpkgsLib = import ; + + # --- select expression (attrpath) --- + version = builtins.currentSystem; + + # --- list --- + items = [ 1 2 3 "four" true ]; + + # --- string interpolation --- + greeting = name: "Hello, ${name}!"; + + # --- multiline string --- + script = '' + #!/bin/bash + echo "hello" + exit 0 + ''; + + # --- path expression --- + configPath = /etc/nixos/configuration.nix; + + # --- spath (angle-bracket path) --- + nixpkgsPath = ; + + # --- inherit without source --- + passThrough = { a, b, c }: { + inherit a b c; + }; + + # --- nested attrset access (select_expression) --- + deep = { + a = { + b = { + c = 42; + }; + }; + }; + + # --- function as attrset value (common nixpkgs pattern) --- + lib = { + mkOption = { type, default, description ? "" }: { + _type = "option"; + inherit type default description; + }; + + types = rec { + str = { name = "str"; check = builtins.isString; }; + int = { name = "int"; check = builtins.isInt; }; + listOf = element: { + name = "listOf"; + check = x: builtins.isList x; + }; + }; + }; +} diff --git a/examples/nix/main.nix b/examples/nix/main.nix new file mode 100644 index 00000000..c9ce6789 --- /dev/null +++ b/examples/nix/main.nix @@ -0,0 +1,26 @@ +# Entry point example — a minimal NixOS/nixpkgs-style package set +let + pkgs = import {}; +in +{ + # A simple derivation + hello = pkgs.stdenv.mkDerivation { + name = "hello-1.0"; + src = ./src; + buildPhase = '' + gcc -o hello main.c + ''; + installPhase = '' + mkdir -p $out/bin + cp hello $out/bin/ + ''; + }; + + # A shell for development + devShell = pkgs.mkShell { + buildInputs = [ pkgs.gcc pkgs.gnumake ]; + shellHook = '' + echo "Dev shell ready" + ''; + }; +} diff --git a/flake.lock b/flake.lock index fe873248..5feb4391 100644 --- a/flake.lock +++ b/flake.lock @@ -109,11 +109,11 @@ ] }, "locked": { - "lastModified": 1774062094, - "narHash": "sha256-ba3c+hS7KzEiwtZRGHagIAYdcmdY3rCSWVCyn64rx7s=", + "lastModified": 1779333539, + "narHash": "sha256-lpmN2lrBDZDPjov2cbD3bOOJsI0fkKolKXasYPCqSys=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "c807e83cc2e32adc35f51138b3bdef722c0812ab", + "rev": "672fa5fc5608d5cd82286a6f69aaf84a40b4fe41", "type": "github" }, "original": { diff --git a/src/io/parse.rs b/src/io/parse.rs index b059d337..89c342ef 100644 --- a/src/io/parse.rs +++ b/src/io/parse.rs @@ -264,6 +264,7 @@ pub fn execute_parse( Language::Java => tree_sitter_java::LANGUAGE.into(), Language::Kotlin => tree_sitter_kotlin::language(), Language::Lua => tree_sitter_lua::LANGUAGE.into(), + Language::Nix => tree_sitter_nix::LANGUAGE.into(), Language::Swift => tree_sitter_swift::LANGUAGE.into(), }; diff --git a/src/parsing/factory.rs b/src/parsing/factory.rs index d35037a3..6678694f 100644 --- a/src/parsing/factory.rs +++ b/src/parsing/factory.rs @@ -7,9 +7,9 @@ use super::{ CBehavior, CParser, CSharpBehavior, CSharpParser, ClojureBehavior, ClojureParser, CppBehavior, CppParser, GdscriptBehavior, GdscriptParser, GoBehavior, GoParser, JavaBehavior, JavaParser, JavaScriptBehavior, JavaScriptParser, KotlinBehavior, KotlinParser, Language, LanguageBehavior, - LanguageId, LanguageParser, LuaBehavior, LuaParser, PhpBehavior, PhpParser, PythonBehavior, - PythonParser, RustBehavior, RustParser, SwiftBehavior, SwiftParser, TypeScriptBehavior, - TypeScriptParser, get_registry, + LanguageId, LanguageParser, LuaBehavior, LuaParser, NixBehavior, NixParser, PhpBehavior, + PhpParser, PythonBehavior, PythonParser, RustBehavior, RustParser, SwiftBehavior, SwiftParser, + TypeScriptBehavior, TypeScriptParser, get_registry, }; use crate::{IndexError, IndexResult, Settings}; use std::sync::Arc; @@ -186,6 +186,10 @@ impl ParserFactory { let parser = LuaParser::new().map_err(|e| IndexError::General(e.to_string()))?; Ok(Box::new(parser)) } + Language::Nix => { + let parser = NixParser::new().map_err(|e| IndexError::General(e.to_string()))?; + Ok(Box::new(parser)) + } Language::Swift => { let parser = SwiftParser::new().map_err(|e| IndexError::General(e.to_string()))?; Ok(Box::new(parser)) @@ -329,6 +333,13 @@ impl ParserFactory { behavior: Box::new(LuaBehavior::new()), } } + Language::Nix => { + let parser = NixParser::new().map_err(|e| IndexError::General(e.to_string()))?; + ParserWithBehavior { + parser: Box::new(parser), + behavior: Box::new(NixBehavior::new()), + } + } Language::Swift => { let parser = SwiftParser::new().map_err(|e| IndexError::General(e.to_string()))?; ParserWithBehavior { @@ -373,6 +384,7 @@ impl ParserFactory { Language::JavaScript, Language::Kotlin, Language::Lua, + Language::Nix, Language::Php, Language::Python, Language::Rust, diff --git a/src/parsing/language.rs b/src/parsing/language.rs index e0d85092..23471fc8 100644 --- a/src/parsing/language.rs +++ b/src/parsing/language.rs @@ -22,6 +22,7 @@ pub enum Language { Java, Kotlin, Lua, + Nix, Swift, } @@ -47,6 +48,7 @@ impl Language { Language::Java => super::LanguageId::new("java"), Language::Kotlin => super::LanguageId::new("kotlin"), Language::Lua => super::LanguageId::new("lua"), + Language::Nix => super::LanguageId::new("nix"), Language::Swift => super::LanguageId::new("swift"), } } @@ -110,6 +112,7 @@ impl Language { "java" => Some(Language::Java), "kt" | "kts" => Some(Language::Kotlin), "lua" => Some(Language::Lua), + "nix" => Some(Language::Nix), "swift" => Some(Language::Swift), _ => None, } @@ -141,6 +144,7 @@ impl Language { Language::Java => &["java"], Language::Kotlin => &["kt", "kts"], Language::Lua => &["lua"], + Language::Nix => &["nix"], Language::Swift => &["swift"], } } @@ -162,6 +166,7 @@ impl Language { Language::Java => "java", Language::Kotlin => "kotlin", Language::Lua => "lua", + Language::Nix => "nix", Language::Swift => "swift", } } @@ -183,6 +188,7 @@ impl Language { Language::Java => "Java", Language::Kotlin => "Kotlin", Language::Lua => "Lua", + Language::Nix => "Nix", Language::Swift => "Swift", } } diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs index 2837f68c..c3adc442 100644 --- a/src/parsing/mod.rs +++ b/src/parsing/mod.rs @@ -15,6 +15,7 @@ pub mod language; pub mod language_behavior; pub mod lua; pub mod method_call; +pub mod nix; pub mod parser; pub mod paths; pub mod php; @@ -43,6 +44,7 @@ pub use language_behavior::{ }; pub use lua::{LuaBehavior, LuaParser}; pub use method_call::{MethodCall, MethodCallResolver}; +pub use nix::{NixBehavior, NixParser}; pub use parser::{ HandledNode, LanguageParser, NodeTracker, NodeTrackingState, safe_substring_window, safe_truncate_str, truncate_for_display, diff --git a/src/parsing/nix/audit.rs b/src/parsing/nix/audit.rs new file mode 100644 index 00000000..0f345437 --- /dev/null +++ b/src/parsing/nix/audit.rs @@ -0,0 +1,169 @@ +use super::NixParser; +use crate::parsing::{LanguageParser, NodeTracker}; +use crate::types::FileId; +use std::collections::{HashMap, HashSet}; +use thiserror::Error; +use tree_sitter::{Node, Parser}; + +#[derive(Error, Debug)] +pub enum AuditError { + #[error("IO error: {0}")] + FileRead(#[from] std::io::Error), + #[error("Language setup error: {0}")] + LanguageSetup(String), + #[error("Parse failure")] + ParseFailure, + #[error("Parser creation error: {0}")] + ParserCreation(String), +} + +pub struct NixParserAudit { + pub grammar_nodes: HashMap, + pub implemented_nodes: HashSet, + pub extracted_symbol_kinds: HashSet, +} + +impl NixParserAudit { + pub fn audit_file(file_path: &str) -> Result { + let code = std::fs::read_to_string(file_path)?; + Self::audit_code(&code) + } + + pub fn audit_code(code: &str) -> Result { + let mut parser = Parser::new(); + let language = tree_sitter_nix::LANGUAGE.into(); + parser + .set_language(&language) + .map_err(|e| AuditError::LanguageSetup(e.to_string()))?; + + let tree = parser.parse(code, None).ok_or(AuditError::ParseFailure)?; + + let mut grammar_nodes = HashMap::new(); + discover_nodes(tree.root_node(), &mut grammar_nodes); + + let mut nix_parser = + NixParser::new().map_err(|e| AuditError::ParserCreation(e.to_string()))?; + let file_id = FileId::new(1).unwrap(); + let mut symbol_counter = crate::types::SymbolCounter::new(); + let symbols = nix_parser.parse(code, file_id, &mut symbol_counter); + + let mut extracted_symbol_kinds = HashSet::new(); + for symbol in &symbols { + extracted_symbol_kinds.insert(format!("{:?}", symbol.kind)); + } + + let implemented_nodes: HashSet = nix_parser + .get_handled_nodes() + .iter() + .map(|n| n.name.clone()) + .collect(); + + Ok(Self { + grammar_nodes, + implemented_nodes, + extracted_symbol_kinds, + }) + } + + pub fn generate_report(&self) -> String { + let key_nodes = vec![ + "source_code", + "binding", + "attrset_expression", + "rec_attrset_expression", + "let_expression", + "function_expression", + "formals", + "formal", + "inherit", + "inherit_from", + "apply_expression", + "select_expression", + "attrpath", + "identifier", + "if_expression", + "assert_expression", + "with_expression", + "comment", + ]; + + let key_implemented = key_nodes + .iter() + .filter(|n| self.implemented_nodes.contains(**n)) + .count(); + + let mut report = String::new(); + report.push_str("# Nix Parser Symbol Extraction Coverage Report\n\n"); + report.push_str("## Summary\n"); + report.push_str(&format!( + "- Key nodes: {}/{} ({:.0}%)\n", + key_implemented, + key_nodes.len(), + (key_implemented as f64 / key_nodes.len() as f64) * 100.0 + )); + report.push_str(&format!( + "- Total grammar nodes: {}\n", + self.grammar_nodes.len() + )); + report.push_str(&format!( + "- Total implemented: {}\n", + self.implemented_nodes.len() + )); + report.push_str(&format!( + "- Symbol kinds extracted: {:?}\n\n", + self.extracted_symbol_kinds + )); + + report.push_str("## Key Nodes Coverage\n"); + for node in &key_nodes { + let status = if self.implemented_nodes.contains(*node) { + "✓" + } else { + "✗" + }; + report.push_str(&format!("- [{status}] {node}\n")); + } + report + } +} + +pub fn discover_nodes(node: Node, registry: &mut HashMap) { + let mut stack = vec![node]; + while let Some(current) = stack.pop() { + registry.insert(current.kind().to_string(), current.kind_id()); + let mut cursor = current.walk(); + for child in current.children(&mut cursor) { + stack.push(child); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn audit_nix_comprehensive() { + let code = std::fs::read_to_string(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/nix/comprehensive.nix" + )) + .unwrap_or_else(|_| r#"{ x = 1; add = a: b: a + b; inherit x; }"#.to_string()); + + let audit = NixParserAudit::audit_code(&code).unwrap(); + let report = audit.generate_report(); + println!("{report}"); + + assert!( + !audit.grammar_nodes.is_empty(), + "Should have discovered grammar nodes" + ); + } + + #[test] + fn test_audit_simple_nix() { + let code = r#"{ x = 1; add = a: b: a + b; }"#; + let audit = NixParserAudit::audit_code(code).unwrap(); + assert!(audit.grammar_nodes.contains_key("binding")); + } +} diff --git a/src/parsing/nix/behavior.rs b/src/parsing/nix/behavior.rs new file mode 100644 index 00000000..6c943b9a --- /dev/null +++ b/src/parsing/nix/behavior.rs @@ -0,0 +1,224 @@ +use crate::Visibility; +use crate::parsing::LanguageBehavior; +use crate::parsing::behavior_state::{BehaviorState, StatefulBehavior}; +use crate::parsing::resolution::{InheritanceResolver, ResolutionScope}; +use crate::types::FileId; +use std::path::{Path, PathBuf}; +use tree_sitter::Language; + +use super::resolution::{NixInheritanceResolver, NixResolutionContext}; + +#[derive(Clone)] +pub struct NixBehavior { + state: BehaviorState, +} + +impl NixBehavior { + pub fn new() -> Self { + Self { + state: BehaviorState::new(), + } + } +} + +impl Default for NixBehavior { + fn default() -> Self { + Self::new() + } +} + +impl StatefulBehavior for NixBehavior { + fn state(&self) -> &BehaviorState { + &self.state + } +} + +impl LanguageBehavior for NixBehavior { + fn language_id(&self) -> crate::parsing::registry::LanguageId { + crate::parsing::registry::LanguageId::new("nix") + } + + fn format_module_path(&self, base_path: &str, _symbol_name: &str) -> String { + base_path.to_string() + } + + fn get_language(&self) -> Language { + tree_sitter_nix::LANGUAGE.into() + } + + fn module_separator(&self) -> &'static str { + "." + } + + fn format_path_as_module(&self, components: &[&str]) -> Option { + if components.is_empty() { + Some(".".to_string()) + } else { + Some(components.join(".")) + } + } + + fn module_path_from_file( + &self, + file_path: &Path, + project_root: &Path, + extensions: &[&str], + ) -> Option { + use crate::parsing::paths::strip_extension; + + let relative_path = if file_path.is_absolute() { + file_path.strip_prefix(project_root).ok()? + } else { + file_path + }; + + let path = relative_path.to_str()?; + let path_clean = path.trim_start_matches("./"); + let module_path = strip_extension(path_clean, extensions); + let module_path = module_path.replace(['/', '\\'], "."); + + if module_path.is_empty() { + Some(".".to_string()) + } else { + Some(module_path) + } + } + + fn parse_visibility(&self, _signature: &str) -> Visibility { + // Nix has no visibility keywords — callers set Public/Private based on context. + Visibility::Public + } + + fn supports_traits(&self) -> bool { + false + } + + fn supports_inherent_methods(&self) -> bool { + false + } + + fn create_resolution_context(&self, file_id: FileId) -> Box { + Box::new(NixResolutionContext::new(file_id)) + } + + fn create_inheritance_resolver(&self) -> Box { + Box::new(NixInheritanceResolver::new()) + } + + fn inheritance_relation_name(&self) -> &'static str { + "extends" + } + + fn map_relationship(&self, language_specific: &str) -> crate::relationship::RelationKind { + use crate::relationship::RelationKind; + match language_specific { + "extends" => RelationKind::Extends, + "uses" => RelationKind::Uses, + "calls" => RelationKind::Calls, + "defines" => RelationKind::Defines, + _ => RelationKind::References, + } + } + + fn register_file(&self, path: PathBuf, file_id: FileId, module_path: String) { + self.register_file_with_state(path, file_id, module_path); + } + + fn add_import(&self, import: crate::parsing::Import) { + self.add_import_with_state(import); + } + + fn get_imports_for_file(&self, file_id: FileId) -> Vec { + self.get_imports_from_state(file_id) + } + + fn is_resolvable_symbol(&self, symbol: &crate::Symbol) -> bool { + use crate::SymbolKind; + use crate::symbol::ScopeContext; + + let module_level = matches!( + symbol.kind, + SymbolKind::Function + | SymbolKind::Class + | SymbolKind::Constant + | SymbolKind::Variable + | SymbolKind::Field + ); + if module_level { + return true; + } + + if let Some(ref scope_context) = symbol.scope_context { + match scope_context { + ScopeContext::Module | ScopeContext::Global | ScopeContext::Package => true, + ScopeContext::Local { .. } | ScopeContext::Parameter => false, + ScopeContext::ClassMember { .. } => { + matches!(symbol.visibility, Visibility::Public) + } + } + } else { + false + } + } + + fn get_module_path_for_file(&self, file_id: FileId) -> Option { + self.state.get_module_path(file_id) + } + + fn configure_symbol(&self, symbol: &mut crate::Symbol, module_path: Option<&str>) { + if let Some(path) = module_path { + symbol.module_path = Some(path.to_string().into()); + } + if symbol.module_path.is_none() { + symbol.module_path = Some(".".to_string().into()); + } + } + + fn import_matches_symbol( + &self, + import_path: &str, + symbol_module_path: &str, + _importing_module: Option<&str>, + ) -> bool { + if import_path == symbol_module_path { + return true; + } + let normalized = import_path.replace(['/', '\\'], "."); + normalized == symbol_module_path + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_module_separator() { + assert_eq!(NixBehavior::new().module_separator(), "."); + } + + #[test] + fn test_supports_traits() { + assert!(!NixBehavior::new().supports_traits()); + } + + #[test] + fn test_module_path_from_file() { + let behavior = NixBehavior::new(); + let temp_dir = TempDir::new().unwrap(); + let root = temp_dir.path(); + + let file = root.join("pkgs/hello.nix"); + assert_eq!( + behavior.module_path_from_file(&file, root, &["nix"]), + Some("pkgs.hello".to_string()) + ); + + let file = root.join("default.nix"); + assert_eq!( + behavior.module_path_from_file(&file, root, &["nix"]), + Some("default".to_string()) + ); + } +} diff --git a/src/parsing/nix/definition.rs b/src/parsing/nix/definition.rs new file mode 100644 index 00000000..eab901f2 --- /dev/null +++ b/src/parsing/nix/definition.rs @@ -0,0 +1,89 @@ +use crate::parsing::{ + LanguageBehavior, LanguageDefinition, LanguageId, LanguageParser, LanguageRegistry, +}; +use crate::{IndexError, IndexResult, Settings}; +use std::sync::Arc; + +use super::{NixBehavior, NixParser}; + +pub struct NixLanguage; + +impl LanguageDefinition for NixLanguage { + fn id(&self) -> LanguageId { + LanguageId::new("nix") + } + + fn name(&self) -> &'static str { + "Nix" + } + + fn extensions(&self) -> &'static [&'static str] { + &["nix"] + } + + fn create_parser(&self, _settings: &Settings) -> IndexResult> { + let parser = NixParser::new().map_err(|e| IndexError::General(e.to_string()))?; + Ok(Box::new(parser)) + } + + fn create_behavior(&self) -> Box { + Box::new(NixBehavior::new()) + } + + fn default_enabled(&self) -> bool { + false + } + + fn is_enabled(&self, settings: &Settings) -> bool { + settings + .languages + .get(self.id().as_str()) + .map(|config| config.enabled) + .unwrap_or(self.default_enabled()) + } +} + +pub(crate) fn register(registry: &mut LanguageRegistry) { + registry.register(Arc::new(NixLanguage)); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_nix_language_id() { + assert_eq!(NixLanguage.id(), LanguageId::new("nix")); + } + + #[test] + fn test_nix_language_name() { + assert_eq!(NixLanguage.name(), "Nix"); + } + + #[test] + fn test_nix_file_extensions() { + assert_eq!(NixLanguage.extensions(), &["nix"]); + } + + #[test] + fn test_nix_disabled_by_default() { + assert!(!NixLanguage.default_enabled()); + } + + #[test] + fn test_nix_parser_creation() { + let settings = Settings::default(); + let result = NixLanguage.create_parser(&settings); + assert!(result.is_ok()); + assert_eq!(result.unwrap().language(), crate::parsing::Language::Nix); + } + + #[test] + fn test_nix_language_registry_registration() { + use crate::parsing::LanguageRegistry; + let mut registry = LanguageRegistry::new(); + register(&mut registry); + assert!(registry.get(LanguageId::new("nix")).is_some()); + } +} diff --git a/src/parsing/nix/mod.rs b/src/parsing/nix/mod.rs new file mode 100644 index 00000000..833d9f6c --- /dev/null +++ b/src/parsing/nix/mod.rs @@ -0,0 +1,11 @@ +pub mod audit; +mod behavior; +mod definition; +mod parser; +mod resolution; + +pub use behavior::NixBehavior; +pub use definition::NixLanguage; +pub(crate) use definition::register; +pub use parser::NixParser; +pub use resolution::{NixInheritanceResolver, NixResolutionContext}; diff --git a/src/parsing/nix/parser.rs b/src/parsing/nix/parser.rs new file mode 100644 index 00000000..f26c3a56 --- /dev/null +++ b/src/parsing/nix/parser.rs @@ -0,0 +1,743 @@ +use crate::parsing::parser::check_recursion_depth; +use crate::parsing::{ + HandledNode, Import, LanguageParser, NodeTracker, NodeTrackingState, ParserContext, ScopeType, +}; +use crate::types::SymbolCounter; +use crate::{FileId, Range, Symbol, SymbolKind, Visibility}; +use std::any::Any; +use tree_sitter::{Node, Parser}; + +pub struct NixParser { + parser: Parser, + context: ParserContext, + node_tracker: NodeTrackingState, +} + +fn range_from_node(node: &Node) -> Range { + let start = node.start_position(); + let end = node.end_position(); + Range::new( + start.row as u32, + start.column as u16, + end.row as u32, + end.column as u16, + ) +} + +impl NixParser { + pub fn new() -> Result { + let mut parser = Parser::new(); + let lang = tree_sitter_nix::LANGUAGE; + parser + .set_language(&lang.into()) + .map_err(|e| format!("Failed to set Nix language: {e}"))?; + + Ok(Self { + parser, + context: ParserContext::new(), + node_tracker: NodeTrackingState::new(), + }) + } + + fn create_symbol( + &self, + id: crate::types::SymbolId, + name: String, + kind: SymbolKind, + file_id: FileId, + range: Range, + signature: Option, + doc_comment: Option, + module_path: &str, + visibility: Visibility, + ) -> Symbol { + let mut symbol = Symbol::new(id, name, kind, file_id, range); + if let Some(sig) = signature { + symbol = symbol.with_signature(sig); + } + if let Some(doc) = doc_comment { + symbol = symbol.with_doc(doc); + } + if !module_path.is_empty() { + symbol = symbol.with_module_path(module_path); + } + symbol = symbol.with_visibility(visibility); + symbol.scope_context = Some(self.context.current_scope_context()); + symbol + } + + fn node_text<'a>(&self, node: &Node, code: &'a str) -> &'a str { + &code[node.byte_range()] + } + + /// Check whether a binding's `expression` child is a function_expression (lambda). + fn value_is_function(node: Node) -> bool { + if let Some(expr) = node.child_by_field_name("expression") { + let kind = expr.kind(); + if kind == "function_expression" { + return true; + } + if kind == "parenthesized_expression" { + let mut cursor = expr.walk(); + for child in expr.children(&mut cursor) { + if child.kind() == "function_expression" { + return true; + } + } + } + } + false + } + + fn extract_symbols_from_node( + &mut self, + node: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + depth: usize, + ) { + if !check_recursion_depth(depth, node) { + return; + } + + self.node_tracker + .register_handled_node(node.kind(), node.kind_id()); + + match node.kind() { + // ── root ───────────────────────────────────────────────────────── + "source_code" => { + // source_code has field `expression:` pointing to the root expr + if let Some(expr) = node.child_by_field_name("expression") { + self.extract_symbols_from_node( + expr, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } else { + self.recurse_children( + node, + code, + file_id, + counter, + symbols, + module_path, + depth, + ); + } + } + + // ── binding_set (container inside attrsets / let) ──────────────── + "binding_set" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── binding ────────────────────────────────────────────────────── + "binding" => { + self.process_binding(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── attrset_expression ──────────────────────────────────────────── + "attrset_expression" => { + self.context.enter_scope(ScopeType::Class); + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + self.context.exit_scope(); + } + + // ── rec_attrset_expression ──────────────────────────────────────── + "rec_attrset_expression" => { + self.context.enter_scope(ScopeType::Class); + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + self.context.exit_scope(); + } + + // ── let_expression ──────────────────────────────────────────────── + "let_expression" => { + self.context.enter_scope(ScopeType::Block); + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + self.context.exit_scope(); + } + + // ── function_expression (lambda) ────────────────────────────────── + // Fields confirmed: `universal:` (simple `x:` param), `formals:`, `body:` + "function_expression" => { + self.context.enter_scope(ScopeType::hoisting_function()); + + if let Some(formals) = node.child_by_field_name("formals") { + self.process_formals(formals, code, file_id, counter, symbols, module_path); + } else if let Some(param) = node.child_by_field_name("universal") { + // simple `x: body` form — param is an identifier + if param.kind() == "identifier" { + let name = self.node_text(¶m, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Parameter, + file_id, + range_from_node(¶m), + Some(name), + None, + module_path, + Visibility::Private, + ); + symbols.push(sym); + } + } + + if let Some(body) = node.child_by_field_name("body") { + self.extract_symbols_from_node( + body, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } + self.context.exit_scope(); + } + + // ── inherit ─────────────────────────────────────────────────────── + // `inherit a b c;` — names inside `inherited_attrs` + "inherit" => { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "inherited_attrs" { + let mut c2 = child.walk(); + for attr in child.children(&mut c2) { + if attr.kind() == "identifier" { + let name = self.node_text(&attr, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Variable, + file_id, + range_from_node(&attr), + Some(name), + None, + module_path, + Visibility::Public, + ); + symbols.push(sym); + } + } + } else if child.kind() == "identifier" { + // some grammar versions put identifiers directly + let name = self.node_text(&child, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Variable, + file_id, + range_from_node(&child), + Some(name), + None, + module_path, + Visibility::Public, + ); + symbols.push(sym); + } + } + } + + // ── inherit_from ────────────────────────────────────────────────── + // `inherit (src) a b c;` + "inherit_from" => { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.kind() == "inherited_attrs" { + let mut c2 = child.walk(); + for attr in child.children(&mut c2) { + if attr.kind() == "identifier" { + let name = self.node_text(&attr, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Variable, + file_id, + range_from_node(&attr), + Some(name), + None, + module_path, + Visibility::Public, + ); + symbols.push(sym); + } + } + } + } + } + + // ── apply_expression (function call / import) ───────────────────── + "apply_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── select_expression (a.b.c) ───────────────────────────────────── + "select_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── with_expression ─────────────────────────────────────────────── + "with_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── if_expression ───────────────────────────────────────────────── + "if_expression" | "assert_expression" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── variable_expression — leaf, no symbols emitted ──────────────── + "variable_expression" => {} + + // ── ERROR — recurse ─────────────────────────────────────────────── + "ERROR" => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + + // ── everything else — pass through ──────────────────────────────── + _ => { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + } + } + } + + fn recurse_children( + &mut self, + node: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + depth: usize, + ) { + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + self.extract_symbols_from_node( + child, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } + } + + fn process_binding( + &mut self, + node: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + depth: usize, + ) { + // Key field is `attrpath:`, value field is `expression:` + let Some(key_node) = node.child_by_field_name("attrpath") else { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + return; + }; + + let name = self.node_text(&key_node, code).to_string(); + // Only emit symbols for simple single-component names; skip `a.b.c` paths + if name.contains('.') || name.contains('"') || name.contains('$') { + self.recurse_children(node, code, file_id, counter, symbols, module_path, depth); + return; + } + + let is_func = Self::value_is_function(node); + let (kind, visibility) = if is_func { + (SymbolKind::Function, Visibility::Public) + } else if name.chars().all(|c| c.is_uppercase() || c == '_') && name.len() > 1 { + (SymbolKind::Constant, Visibility::Public) + } else { + (SymbolKind::Variable, Visibility::Public) + }; + + let doc_comment = self.extract_nix_doc_comment(&node, code); + let range = range_from_node(&node); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + kind, + file_id, + range, + Some(name), + doc_comment, + module_path, + visibility, + ); + symbols.push(sym); + + // Recurse into value so nested structures are also visited + if let Some(value) = node.child_by_field_name("expression") { + self.extract_symbols_from_node( + value, + code, + file_id, + counter, + symbols, + module_path, + depth + 1, + ); + } + } + + fn process_formals( + &mut self, + formals: Node, + code: &str, + file_id: FileId, + counter: &mut SymbolCounter, + symbols: &mut Vec, + module_path: &str, + ) { + self.node_tracker + .register_handled_node(formals.kind(), formals.kind_id()); + + let mut cursor = formals.walk(); + for child in formals.children(&mut cursor) { + if child.kind() == "formal" { + self.node_tracker + .register_handled_node(child.kind(), child.kind_id()); + // formal has field `name:` (identifier) + if let Some(name_node) = child.child_by_field_name("name") { + let name = self.node_text(&name_node, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Parameter, + file_id, + range_from_node(&name_node), + Some(name), + None, + module_path, + Visibility::Private, + ); + symbols.push(sym); + } else { + // fallback: first identifier child + let mut c2 = child.walk(); + for fc in child.children(&mut c2) { + if fc.kind() == "identifier" { + let name = self.node_text(&fc, code).to_string(); + let sym = self.create_symbol( + counter.next_id(), + name.clone(), + SymbolKind::Parameter, + file_id, + range_from_node(&fc), + Some(name), + None, + module_path, + Visibility::Private, + ); + symbols.push(sym); + break; + } + } + } + } + } + } + + fn extract_nix_doc_comment(&self, node: &Node, code: &str) -> Option { + let mut prev = node.prev_sibling(); + let mut comments = Vec::new(); + + while let Some(sibling) = prev { + if sibling.kind() == "comment" { + let text = &code[sibling.byte_range()]; + let trimmed = text.trim_start_matches('#').trim(); + if !trimmed.is_empty() { + comments.push(trimmed.to_string()); + } + prev = sibling.prev_sibling(); + } else { + break; + } + } + + if comments.is_empty() { + return None; + } + comments.reverse(); + Some(comments.join("\n")) + } +} + +impl NodeTracker for NixParser { + fn get_handled_nodes(&self) -> &std::collections::HashSet { + self.node_tracker.get_handled_nodes() + } + + fn register_handled_node(&mut self, node_kind: &str, node_id: u16) { + self.node_tracker.register_handled_node(node_kind, node_id); + } +} + +impl LanguageParser for NixParser { + fn parse( + &mut self, + code: &str, + file_id: FileId, + symbol_counter: &mut SymbolCounter, + ) -> Vec { + self.context = ParserContext::new(); + let mut symbols = Vec::new(); + + if let Some(tree) = self.parser.parse(code, None) { + let root_node = tree.root_node(); + self.extract_symbols_from_node( + root_node, + code, + file_id, + symbol_counter, + &mut symbols, + "", + 0, + ); + } + + symbols + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn extract_doc_comment(&self, node: &Node, code: &str) -> Option { + self.extract_nix_doc_comment(node, code) + } + + fn find_calls<'a>(&mut self, code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + let Some(tree) = self.parser.parse(code, None) else { + return Vec::new(); + }; + + let mut results = Vec::new(); + Self::collect_calls(tree.root_node(), code, &mut results); + results + } + + fn find_implementations<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + Vec::new() + } + + fn find_uses<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + Vec::new() + } + + fn find_defines<'a>(&mut self, _code: &'a str) -> Vec<(&'a str, &'a str, Range)> { + Vec::new() + } + + fn find_imports(&mut self, code: &str, file_id: FileId) -> Vec { + let Some(tree) = self.parser.parse(code, None) else { + return Vec::new(); + }; + + let mut imports = Vec::new(); + Self::collect_imports(tree.root_node(), code, file_id, &mut imports); + imports + } + + fn language(&self) -> crate::parsing::Language { + crate::parsing::Language::Nix + } +} + +impl NixParser { + fn collect_calls<'a>(node: Node, code: &'a str, results: &mut Vec<(&'a str, &'a str, Range)>) { + if node.kind() == "apply_expression" { + if let Some(func) = node.child_by_field_name("function") { + if matches!(func.kind(), "variable_expression" | "select_expression") { + let callee = &code[func.byte_range()]; + results.push(("", callee, range_from_node(&node))); + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + Self::collect_calls(child, code, results); + } + } + + fn collect_imports(node: Node, code: &str, file_id: FileId, imports: &mut Vec) { + // `import ./path` or `import ` + if node.kind() == "apply_expression" { + if let Some(func) = node.child_by_field_name("function") { + let func_text = &code[func.byte_range()]; + if func_text == "import" { + if let Some(arg) = node.child_by_field_name("argument") { + let raw = &code[arg.byte_range()]; + let path = raw + .trim_matches('<') + .trim_matches('>') + .trim_matches('"') + .to_string(); + if !path.is_empty() { + imports.push(Import { + path, + alias: None, + file_id, + is_glob: false, + is_type_only: false, + }); + return; + } + } + } + } + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + Self::collect_imports(child, code, file_id, imports); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::SymbolCounter; + use std::collections::HashMap; + + /// Phase 0 — AST node discovery for tree-sitter-nix. + /// Run with: `cargo test explore_nix_abi15 -- --nocapture` + #[test] + fn explore_nix_abi15() { + let code = std::fs::read_to_string(concat!( + env!("CARGO_MANIFEST_DIR"), + "/examples/nix/comprehensive.nix" + )) + .unwrap_or_else(|_| r#"{ x = 1; add = a: b: a + b; }"#.to_string()); + + let mut parser = tree_sitter::Parser::new(); + parser + .set_language(&tree_sitter_nix::LANGUAGE.into()) + .expect("failed to set Nix language"); + + let tree = parser.parse(&code, None).expect("parse failed"); + + let mut registry: HashMap = HashMap::new(); + discover_nodes(tree.root_node(), &mut registry); + + let mut sorted: Vec<_> = registry.iter().collect(); + sorted.sort_by_key(|(k, _)| k.as_str()); + + println!( + "\n=== tree-sitter-nix node kinds ({} total) ===", + sorted.len() + ); + for (kind, id) in &sorted { + println!(" [{id:4}] {kind}"); + } + + println!("\n=== Parse tree (first 80 fragments) ==="); + let s_expr = tree.root_node().to_sexp(); + for (i, fragment) in s_expr.split('(').take(80).enumerate() { + println!("{i:3} ({fragment}"); + } + + assert!(!registry.is_empty()); + assert!( + registry.contains_key("source_code"), + "Expected source_code root node" + ); + assert!(registry.contains_key("binding"), "Expected binding node"); + } + + fn discover_nodes(node: tree_sitter::Node, registry: &mut HashMap) { + let mut stack = vec![node]; + while let Some(current) = stack.pop() { + registry.insert(current.kind().to_string(), current.kind_id()); + let mut cursor = current.walk(); + for child in current.children(&mut cursor) { + stack.push(child); + } + } + } + + #[test] + fn test_nix_parser_creation() { + assert!(NixParser::new().is_ok()); + } + + #[test] + fn test_nix_parse_attrset_bindings() { + let mut parser = NixParser::new().unwrap(); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"{ x = 1; y = 2; }"#; + let symbols = parser.parse(code, file_id, &mut counter); + + let names: Vec<_> = symbols.iter().map(|s| s.name.as_ref()).collect(); + println!("symbols from attrset: {names:?}"); + assert!(names.contains(&"x"), "Expected x in {names:?}"); + assert!(names.contains(&"y"), "Expected y in {names:?}"); + } + + #[test] + fn test_nix_parse_function_binding() { + let mut parser = NixParser::new().unwrap(); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"{ add = a: b: a + b; name = "hello"; }"#; + let symbols = parser.parse(code, file_id, &mut counter); + + let names: Vec<_> = symbols.iter().map(|s| s.name.as_ref()).collect(); + println!("symbols from attrset with lambda: {names:?}"); + + let add_sym = symbols.iter().find(|s| s.name.as_ref() == "add"); + assert!(add_sym.is_some(), "Expected add symbol"); + assert_eq!(add_sym.unwrap().kind, SymbolKind::Function); + } + + #[test] + fn test_nix_parse_let_expression() { + let mut parser = NixParser::new().unwrap(); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"let x = 1; f = a: a + 1; in f x"#; + let symbols = parser.parse(code, file_id, &mut counter); + + let names: Vec<_> = symbols.iter().map(|s| s.name.as_ref()).collect(); + println!("symbols from let: {names:?}"); + assert!(names.contains(&"x"), "Expected x"); + assert!(names.contains(&"f"), "Expected f"); + } + + #[test] + fn test_nix_find_imports() { + let mut parser = NixParser::new().unwrap(); + let file_id = FileId::new(1).unwrap(); + + let code = r#"{ pkgs = import {}; local = import ./local.nix; }"#; + let imports = parser.find_imports(code, file_id); + + println!("imports: {imports:?}"); + assert!(!imports.is_empty(), "Expected at least one import"); + } + + #[test] + fn test_nix_language() { + let parser = NixParser::new().unwrap(); + assert_eq!(parser.language(), crate::parsing::Language::Nix); + } +} diff --git a/src/parsing/nix/resolution.rs b/src/parsing/nix/resolution.rs new file mode 100644 index 00000000..a0d87a48 --- /dev/null +++ b/src/parsing/nix/resolution.rs @@ -0,0 +1,234 @@ +use crate::parsing::{InheritanceResolver, ResolutionScope, ScopeLevel, ScopeType}; +use crate::symbol::ScopeContext; +use crate::{FileId, SymbolId}; +use std::any::Any; +use std::collections::HashMap; + +#[derive(Debug)] +pub struct NixResolutionContext { + scope_stack: Vec, + imports: HashMap, + global_symbols: HashMap, + module_symbols: HashMap, +} + +#[derive(Debug)] +struct NixScope { + symbols: HashMap, + #[allow(dead_code)] + scope_type: ScopeType, +} + +impl Default for NixResolutionContext { + fn default() -> Self { + Self { + scope_stack: vec![NixScope { + symbols: HashMap::new(), + scope_type: ScopeType::Module, + }], + imports: HashMap::new(), + global_symbols: HashMap::new(), + module_symbols: HashMap::new(), + } + } +} + +impl NixResolutionContext { + pub fn new(_file_id: FileId) -> Self { + Self::default() + } + + pub fn add_import_symbol(&mut self, name: String, symbol_id: SymbolId, _is_type_only: bool) { + self.imports.insert(name, symbol_id); + } + + pub fn add_symbol_with_context( + &mut self, + name: String, + symbol_id: SymbolId, + scope_context: Option<&ScopeContext>, + ) { + let scope_level = match scope_context { + Some(ScopeContext::Global) => ScopeLevel::Global, + Some(ScopeContext::Module) | Some(ScopeContext::Package) => ScopeLevel::Module, + Some(ScopeContext::Local { hoisted: true, .. }) => ScopeLevel::Module, + Some(ScopeContext::Local { hoisted: false, .. }) => ScopeLevel::Local, + Some(ScopeContext::Parameter) => ScopeLevel::Local, + Some(ScopeContext::ClassMember { .. }) => ScopeLevel::Module, + None => ScopeLevel::Module, + }; + self.add_symbol(name, symbol_id, scope_level); + } +} + +impl ResolutionScope for NixResolutionContext { + fn add_symbol(&mut self, name: String, symbol_id: SymbolId, scope_level: ScopeLevel) { + match scope_level { + ScopeLevel::Global => { + self.global_symbols.insert(name, symbol_id); + } + ScopeLevel::Module | ScopeLevel::Package => { + self.module_symbols.insert(name, symbol_id); + } + ScopeLevel::Local => { + if let Some(current_scope) = self.scope_stack.last_mut() { + current_scope.symbols.insert(name, symbol_id); + } + } + } + } + + fn resolve(&self, name: &str) -> Option { + for scope in self.scope_stack.iter().rev() { + if let Some(id) = scope.symbols.get(name) { + return Some(*id); + } + } + if let Some(id) = self.imports.get(name) { + return Some(*id); + } + if let Some(id) = self.module_symbols.get(name) { + return Some(*id); + } + if let Some(id) = self.global_symbols.get(name) { + return Some(*id); + } + None + } + + fn clear_local_scope(&mut self) { + if let Some(scope) = self.scope_stack.last_mut() { + scope.symbols.clear(); + } + } + + fn enter_scope(&mut self, scope_type: ScopeType) { + self.scope_stack.push(NixScope { + symbols: HashMap::new(), + scope_type, + }); + } + + fn exit_scope(&mut self) { + if self.scope_stack.len() > 1 { + self.scope_stack.pop(); + } + } + + fn symbols_in_scope(&self) -> Vec<(String, SymbolId, ScopeLevel)> { + let mut result = Vec::new(); + let mut seen = std::collections::HashSet::new(); + + for scope in self.scope_stack.iter().rev() { + for (name, id) in &scope.symbols { + if seen.insert(name.clone()) { + result.push((name.clone(), *id, ScopeLevel::Local)); + } + } + } + for (name, id) in &self.module_symbols { + if seen.insert(name.clone()) { + result.push((name.clone(), *id, ScopeLevel::Module)); + } + } + for (name, id) in &self.global_symbols { + if seen.insert(name.clone()) { + result.push((name.clone(), *id, ScopeLevel::Global)); + } + } + result + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +#[derive(Debug, Default)] +pub struct NixInheritanceResolver { + inheritance: HashMap>, + type_methods: HashMap>, +} + +impl NixInheritanceResolver { + pub fn new() -> Self { + Self::default() + } +} + +impl InheritanceResolver for NixInheritanceResolver { + fn add_inheritance(&mut self, child: String, parent: String, kind: &str) { + self.inheritance + .entry(child) + .or_default() + .push((parent, kind.to_string())); + } + + fn resolve_method(&self, type_name: &str, method: &str) -> Option { + let mut to_visit = vec![type_name.to_string()]; + let mut visited = std::collections::HashSet::new(); + + while let Some(current) = to_visit.pop() { + if !visited.insert(current.clone()) { + continue; + } + if let Some(methods) = self.type_methods.get(¤t) { + if methods.iter().any(|m| m == method) { + return Some(current); + } + } + if let Some(parents) = self.inheritance.get(¤t) { + for (parent, _) in parents { + to_visit.push(parent.clone()); + } + } + } + None + } + + fn get_inheritance_chain(&self, type_name: &str) -> Vec { + let mut chain = vec![type_name.to_string()]; + let mut visited = std::collections::HashSet::new(); + visited.insert(type_name.to_string()); + let mut to_visit = vec![type_name.to_string()]; + + while let Some(current) = to_visit.pop() { + if let Some(parents) = self.inheritance.get(¤t) { + for (parent, _) in parents { + if visited.insert(parent.clone()) { + chain.push(parent.clone()); + to_visit.push(parent.clone()); + } + } + } + } + chain + } + + fn is_subtype(&self, child: &str, parent: &str) -> bool { + if child == parent { + return true; + } + self.get_inheritance_chain(child) + .contains(&parent.to_string()) + } + + fn add_type_methods(&mut self, type_name: String, methods: Vec) { + self.type_methods.insert(type_name, methods); + } + + fn get_all_methods(&self, type_name: &str) -> Vec { + let mut methods = Vec::new(); + let mut seen = std::collections::HashSet::new(); + for ancestor in self.get_inheritance_chain(type_name) { + if let Some(type_methods) = self.type_methods.get(&ancestor) { + for method in type_methods { + if seen.insert(method.clone()) { + methods.push(method.clone()); + } + } + } + } + methods + } +} diff --git a/src/parsing/registry.rs b/src/parsing/registry.rs index 9a3fda11..cad08d0e 100644 --- a/src/parsing/registry.rs +++ b/src/parsing/registry.rs @@ -85,6 +85,7 @@ impl<'de> Deserialize<'de> for LanguageId { "javascript" => "javascript", "kotlin" => "kotlin", "lua" => "lua", + "nix" => "nix", "php" => "php", "python" => "python", "rust" => "rust", @@ -391,6 +392,7 @@ fn initialize_registry(registry: &mut LanguageRegistry) { super::kotlin::register(registry); super::clojure::register(registry); super::lua::register(registry); + super::nix::register(registry); super::swift::register(registry); } diff --git a/tests/exploration/abi15_grammar_audit/mod.rs b/tests/exploration/abi15_grammar_audit/mod.rs index 7be20ac8..c77077fb 100644 --- a/tests/exploration/abi15_grammar_audit/mod.rs +++ b/tests/exploration/abi15_grammar_audit/mod.rs @@ -29,6 +29,7 @@ mod java; mod javascript; mod kotlin; mod lua; +mod nix; mod php; mod python; mod rust_lang; diff --git a/tests/exploration/abi15_grammar_audit/nix.rs b/tests/exploration/abi15_grammar_audit/nix.rs new file mode 100644 index 00000000..87b06c0e --- /dev/null +++ b/tests/exploration/abi15_grammar_audit/nix.rs @@ -0,0 +1,102 @@ +//! Nix grammar audit. + +use super::helpers::{AuditData, LanguageAuditConfig, run_comprehensive_analysis}; +use codanna::parsing::nix::audit::NixParserAudit; + +const CONFIG: LanguageAuditConfig = LanguageAuditConfig { + language_name: "Nix", + file_extension: "nix", + grammar_json_path: "contributing/parsers/nix/node-types.json", + example_file_path: "examples/nix/comprehensive.nix", + output_dir: "contributing/parsers/nix", +}; + +fn node_categories() -> Vec<(&'static str, Vec<&'static str>)> { + vec![ + ( + "ROOT NODES", + vec!["source_code"], + ), + ( + "BINDING NODES", + vec![ + "binding_set", + "binding", + "attrpath", + "identifier", + ], + ), + ( + "ATTRSET NODES", + vec![ + "attrset_expression", + "rec_attrset_expression", + ], + ), + ( + "FUNCTION NODES", + vec![ + "function_expression", + "formals", + "formal", + "apply_expression", + ], + ), + ( + "SCOPE NODES", + vec![ + "let_expression", + "with_expression", + "inherit", + "inherit_from", + "inherited_attrs", + ], + ), + ( + "EXPRESSION NODES", + vec![ + "if_expression", + "assert_expression", + "select_expression", + "binary_expression", + "parenthesized_expression", + "list_expression", + "variable_expression", + ], + ), + ( + "LITERAL NODES", + vec![ + "integer_expression", + "string_expression", + "indented_string_expression", + "path_expression", + "spath_expression", + "interpolation", + ], + ), + ("COMMENT NODES", vec!["comment"]), + ] +} + +#[test] +fn comprehensive_nix_analysis() { + run_comprehensive_analysis( + &CONFIG, + tree_sitter_nix::LANGUAGE.into(), + "{ x = 1; f = a: a + 1; }\n", + &node_categories(), + |path| { + let audit = NixParserAudit::audit_file(path).map_err(|e| e.to_string())?; + let report = audit.generate_report(); + Ok(( + AuditData::new( + audit.grammar_nodes, + audit.implemented_nodes, + audit.extracted_symbol_kinds, + ), + report, + )) + }, + ); +} diff --git a/tests/fixtures/nix/basic.nix b/tests/fixtures/nix/basic.nix new file mode 100644 index 00000000..7013589e --- /dev/null +++ b/tests/fixtures/nix/basic.nix @@ -0,0 +1,26 @@ +# Basic Nix fixture: simple attrset with functions and values +{ + # Plain value bindings + host = "localhost"; + port = 8080; + debug = false; + + # Lambda bindings + identity = x: x; + add = a: b: a + b; + greet = name: "Hello, ${name}"; + + # Nested attrset + config = { + timeout = 30; + retries = 3; + }; + + # Let expression + computed = + let + base = 10; + factor = 2; + in + base * factor; +} diff --git a/tests/fixtures/nix/functions.nix b/tests/fixtures/nix/functions.nix new file mode 100644 index 00000000..d8c30a3e --- /dev/null +++ b/tests/fixtures/nix/functions.nix @@ -0,0 +1,30 @@ +# Nix fixture: various function patterns +{ + # Simple curried lambda + add = a: b: a + b; + multiply = a: b: a * b; + + # Formals (destructuring) + mkService = { name, port ? 8080, debug ? false }: { + inherit name port debug; + }; + + # Formals with ellipsis + mkPkg = { name, src, buildInputs ? [], ... }: derivation { + inherit name src buildInputs; + system = builtins.currentSystem; + builder = "/bin/sh"; + }; + + # @ pattern + withExtras = args @ { name, ... }: { + fullArgs = args; + inherit name; + }; + + # Higher-order function + compose = f: g: x: f (g x); + + # Nested lambdas + applyTwice = f: x: f (f x); +} diff --git a/tests/fixtures/nix/imports.nix b/tests/fixtures/nix/imports.nix new file mode 100644 index 00000000..0f76f175 --- /dev/null +++ b/tests/fixtures/nix/imports.nix @@ -0,0 +1,11 @@ +# Nix fixture: import patterns +let + localLib = import ./lib.nix; + nixpkgs = import {}; + pinned = import (fetchTarball "https://example.com/nixpkgs.tar.gz") {}; +in +{ + inherit (nixpkgs) stdenv fetchurl; + lib = localLib; + pkgs = nixpkgs; +} diff --git a/tests/parsers/nix/mod.rs b/tests/parsers/nix/mod.rs new file mode 100644 index 00000000..b8c5a052 --- /dev/null +++ b/tests/parsers/nix/mod.rs @@ -0,0 +1,2 @@ +mod test_symbol_extraction; +mod test_call_tracking; diff --git a/tests/parsers/nix/test_call_tracking.rs b/tests/parsers/nix/test_call_tracking.rs new file mode 100644 index 00000000..fe7d193b --- /dev/null +++ b/tests/parsers/nix/test_call_tracking.rs @@ -0,0 +1,87 @@ +use codanna::parsing::LanguageParser; +use codanna::parsing::nix::NixParser; + +fn find_calls(code: &str) -> Vec<(String, String)> { + let mut parser = NixParser::new().expect("Failed to create NixParser"); + parser + .find_calls(code) + .into_iter() + .map(|(caller, callee, _)| (caller.to_string(), callee.to_string())) + .collect() +} + +#[test] +fn test_simple_apply_expression() { + let code = r#"{ result = builtins.toString 42; }"#; + let calls = find_calls(code); + println!("calls: {calls:?}"); + assert!( + calls.iter().any(|(_, callee)| callee.contains("toString")), + "expected toString call, got {calls:?}" + ); +} + +#[test] +fn test_callpackage_pattern() { + let code = r#" +{ + hello = pkgs.callPackage ./hello.nix {}; + world = pkgs.callPackage ./world.nix { inherit stdenv; }; +} +"#; + let calls = find_calls(code); + println!("callPackage calls: {calls:?}"); + assert!( + calls.iter().any(|(_, callee)| callee.contains("callPackage")), + "expected callPackage call, got {calls:?}" + ); +} + +#[test] +fn test_nested_apply_expressions() { + let code = r#"{ x = builtins.toString (builtins.length [ 1 2 3 ]); }"#; + let calls = find_calls(code); + println!("nested calls: {calls:?}"); + // Should detect both function applications + assert!(calls.len() >= 2, "expected at least 2 calls, got {calls:?}"); +} + +#[test] +fn test_import_not_counted_as_call() { + // import is handled via find_imports, not find_calls + let code = r#"{ pkgs = import {}; }"#; + let calls = find_calls(code); + println!("calls for import expr: {calls:?}"); + // The apply_expression `import ` will appear — that's fine + // just document current behaviour +} + +#[test] +fn test_find_imports_basic() { + use codanna::types::FileId; + let code = r#" +{ + nixpkgs = import {}; + local = import ./local.nix; +} +"#; + let mut parser = NixParser::new().unwrap(); + let file_id = FileId::new(1).unwrap(); + let imports = parser.find_imports(code, file_id); + println!("imports: {imports:?}"); + assert!(!imports.is_empty(), "expected at least one import, got none"); +} + +#[test] +fn test_find_imports_from_fixture() { + use codanna::types::FileId; + let code = include_str!("../../fixtures/nix/imports.nix"); + let mut parser = NixParser::new().unwrap(); + let file_id = FileId::new(1).unwrap(); + let imports = parser.find_imports(code, file_id); + println!("imports from fixture: {imports:?}"); + assert!( + imports.iter().any(|i| i.path.contains("lib.nix")), + "expected ./lib.nix import, got {imports:?}" + ); +} diff --git a/tests/parsers/nix/test_symbol_extraction.rs b/tests/parsers/nix/test_symbol_extraction.rs new file mode 100644 index 00000000..ae87f9b4 --- /dev/null +++ b/tests/parsers/nix/test_symbol_extraction.rs @@ -0,0 +1,167 @@ +use codanna::parsing::Language; +use codanna::parsing::LanguageParser; +use codanna::parsing::nix::NixParser; +use codanna::types::{FileId, SymbolCounter}; +use codanna::SymbolKind; + +fn parse(code: &str) -> Vec { + let mut parser = NixParser::new().expect("Failed to create NixParser"); + let mut counter = SymbolCounter::new(); + let file_id = FileId::new(1).unwrap(); + parser.parse(code, file_id, &mut counter) +} + +// ── language identity ──────────────────────────────────────────────────────── + +#[test] +fn test_nix_language_identity() { + let parser = NixParser::new().unwrap(); + assert_eq!(parser.language(), Language::Nix); +} + +// ── basic attrset bindings ─────────────────────────────────────────────────── + +#[test] +fn test_attrset_simple_values() { + let symbols = parse(r#"{ host = "localhost"; port = 8080; debug = false; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"host"), "expected host, got {names:?}"); + assert!(names.contains(&"port"), "expected port, got {names:?}"); + assert!(names.contains(&"debug"), "expected debug, got {names:?}"); +} + +#[test] +fn test_attrset_function_binding_kind() { + let symbols = parse(r#"{ add = a: b: a + b; }"#); + let add = symbols.iter().find(|s| s.name.as_ref() == "add").unwrap(); + assert_eq!(add.kind, SymbolKind::Function, "add should be Function"); +} + +#[test] +fn test_attrset_value_binding_kind() { + let symbols = parse(r#"{ x = 42; }"#); + let x = symbols.iter().find(|s| s.name.as_ref() == "x").unwrap(); + assert_eq!(x.kind, SymbolKind::Variable, "x should be Variable"); +} + +// ── lambda parameters ──────────────────────────────────────────────────────── + +#[test] +fn test_simple_lambda_param() { + let symbols = parse(r#"{ f = x: x + 1; }"#); + let params: Vec<&str> = symbols + .iter() + .filter(|s| s.kind == SymbolKind::Parameter) + .map(|s| s.name.as_ref()) + .collect(); + assert!(params.contains(&"x"), "expected param x, got {params:?}"); +} + +#[test] +fn test_formals_params() { + let symbols = parse(r#"{ f = { a, b ? 0, c ? 1 }: a + b + c; }"#); + let params: Vec<&str> = symbols + .iter() + .filter(|s| s.kind == SymbolKind::Parameter) + .map(|s| s.name.as_ref()) + .collect(); + assert!(params.contains(&"a"), "expected param a, got {params:?}"); + assert!(params.contains(&"b"), "expected param b, got {params:?}"); + assert!(params.contains(&"c"), "expected param c, got {params:?}"); +} + +#[test] +fn test_curried_lambda_params() { + let symbols = parse(r#"{ add = a: b: a + b; }"#); + let params: Vec<&str> = symbols + .iter() + .filter(|s| s.kind == SymbolKind::Parameter) + .map(|s| s.name.as_ref()) + .collect(); + assert!(params.contains(&"a"), "expected param a, got {params:?}"); + assert!(params.contains(&"b"), "expected param b, got {params:?}"); +} + +// ── let expressions ────────────────────────────────────────────────────────── + +#[test] +fn test_let_bindings() { + let symbols = parse(r#"let x = 1; y = 2; in x + y"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"x"), "expected x, got {names:?}"); + assert!(names.contains(&"y"), "expected y, got {names:?}"); +} + +#[test] +fn test_let_function_binding() { + let symbols = parse(r#"let double = x: x * 2; in double 5"#); + let double = symbols.iter().find(|s| s.name.as_ref() == "double").unwrap(); + assert_eq!(double.kind, SymbolKind::Function); +} + +// ── rec attrset ────────────────────────────────────────────────────────────── + +#[test] +fn test_rec_attrset_bindings() { + let symbols = parse(r#"rec { base = "/var"; data = "${base}/data"; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"base"), "expected base, got {names:?}"); + assert!(names.contains(&"data"), "expected data, got {names:?}"); +} + +// ── inherit ────────────────────────────────────────────────────────────────── + +#[test] +fn test_inherit_emits_variables() { + let symbols = parse(r#"{ inherit stdenv fetchurl; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"stdenv"), "expected stdenv, got {names:?}"); + assert!(names.contains(&"fetchurl"), "expected fetchurl, got {names:?}"); +} + +#[test] +fn test_inherit_from_emits_variables() { + let symbols = parse(r#"{ inherit (pkgs) stdenv fetchurl; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"stdenv"), "expected stdenv, got {names:?}"); + assert!(names.contains(&"fetchurl"), "expected fetchurl, got {names:?}"); +} + +// ── nested attrsets ────────────────────────────────────────────────────────── + +#[test] +fn test_nested_attrset_outer_binding() { + let symbols = parse(r#"{ config = { host = "localhost"; port = 8080; }; }"#); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + assert!(names.contains(&"config"), "expected config, got {names:?}"); + // inner bindings also visible + assert!(names.contains(&"host"), "expected host, got {names:?}"); + assert!(names.contains(&"port"), "expected port, got {names:?}"); +} + +// ── fixture files ──────────────────────────────────────────────────────────── + +#[test] +fn test_basic_fixture() { + let code = include_str!("../../fixtures/nix/basic.nix"); + let symbols = parse(code); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + + assert!(names.contains(&"host"), "expected host"); + assert!(names.contains(&"add"), "expected add"); + assert!(names.contains(&"config"), "expected config"); + + let add = symbols.iter().find(|s| s.name.as_ref() == "add").unwrap(); + assert_eq!(add.kind, SymbolKind::Function); +} + +#[test] +fn test_functions_fixture() { + let code = include_str!("../../fixtures/nix/functions.nix"); + let symbols = parse(code); + let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); + + assert!(names.contains(&"add"), "expected add"); + assert!(names.contains(&"mkService"), "expected mkService"); + assert!(names.contains(&"compose"), "expected compose"); +} diff --git a/tests/parsers_tests.rs b/tests/parsers_tests.rs index 19376871..b10829e3 100644 --- a/tests/parsers_tests.rs +++ b/tests/parsers_tests.rs @@ -145,6 +145,12 @@ mod test_c_method_call_static; #[path = "parsers/lua/test_method_call_static.rs"] mod test_lua_method_call_static; +#[path = "parsers/nix/test_symbol_extraction.rs"] +mod test_nix_symbol_extraction; + +#[path = "parsers/nix/test_call_tracking.rs"] +mod test_nix_call_tracking; + #[path = "parsers/gdscript/test_method_call_static.rs"] mod test_gdscript_method_call_static; From e98860c51fdb079cd9893f2ec0eff9a65962ba8a Mon Sep 17 00:00:00 2001 From: Anton Vasiljev Date: Thu, 21 May 2026 19:53:46 +0300 Subject: [PATCH 3/4] style: apply rustfmt to nix parser files Fix cargo fmt check failures on CI. --- src/parsing/nix/behavior.rs | 6 +++--- src/parsing/nix/mod.rs | 2 +- tests/exploration/abi15_grammar_audit/nix.rs | 19 ++++--------------- tests/parsers/nix/test_call_tracking.rs | 11 ++++++++--- tests/parsers/nix/test_symbol_extraction.rs | 17 +++++++++++++---- 5 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/parsing/nix/behavior.rs b/src/parsing/nix/behavior.rs index 6c943b9a..ea45c69d 100644 --- a/src/parsing/nix/behavior.rs +++ b/src/parsing/nix/behavior.rs @@ -1,8 +1,8 @@ -use crate::Visibility; -use crate::parsing::LanguageBehavior; use crate::parsing::behavior_state::{BehaviorState, StatefulBehavior}; use crate::parsing::resolution::{InheritanceResolver, ResolutionScope}; +use crate::parsing::LanguageBehavior; use crate::types::FileId; +use crate::Visibility; use std::path::{Path, PathBuf}; use tree_sitter::Language; @@ -133,8 +133,8 @@ impl LanguageBehavior for NixBehavior { } fn is_resolvable_symbol(&self, symbol: &crate::Symbol) -> bool { - use crate::SymbolKind; use crate::symbol::ScopeContext; + use crate::SymbolKind; let module_level = matches!( symbol.kind, diff --git a/src/parsing/nix/mod.rs b/src/parsing/nix/mod.rs index 833d9f6c..ddce7c1c 100644 --- a/src/parsing/nix/mod.rs +++ b/src/parsing/nix/mod.rs @@ -5,7 +5,7 @@ mod parser; mod resolution; pub use behavior::NixBehavior; -pub use definition::NixLanguage; pub(crate) use definition::register; +pub use definition::NixLanguage; pub use parser::NixParser; pub use resolution::{NixInheritanceResolver, NixResolutionContext}; diff --git a/tests/exploration/abi15_grammar_audit/nix.rs b/tests/exploration/abi15_grammar_audit/nix.rs index 87b06c0e..14aa44eb 100644 --- a/tests/exploration/abi15_grammar_audit/nix.rs +++ b/tests/exploration/abi15_grammar_audit/nix.rs @@ -1,6 +1,6 @@ //! Nix grammar audit. -use super::helpers::{AuditData, LanguageAuditConfig, run_comprehensive_analysis}; +use super::helpers::{run_comprehensive_analysis, AuditData, LanguageAuditConfig}; use codanna::parsing::nix::audit::NixParserAudit; const CONFIG: LanguageAuditConfig = LanguageAuditConfig { @@ -13,25 +13,14 @@ const CONFIG: LanguageAuditConfig = LanguageAuditConfig { fn node_categories() -> Vec<(&'static str, Vec<&'static str>)> { vec![ - ( - "ROOT NODES", - vec!["source_code"], - ), + ("ROOT NODES", vec!["source_code"]), ( "BINDING NODES", - vec![ - "binding_set", - "binding", - "attrpath", - "identifier", - ], + vec!["binding_set", "binding", "attrpath", "identifier"], ), ( "ATTRSET NODES", - vec![ - "attrset_expression", - "rec_attrset_expression", - ], + vec!["attrset_expression", "rec_attrset_expression"], ), ( "FUNCTION NODES", diff --git a/tests/parsers/nix/test_call_tracking.rs b/tests/parsers/nix/test_call_tracking.rs index fe7d193b..12d0eec0 100644 --- a/tests/parsers/nix/test_call_tracking.rs +++ b/tests/parsers/nix/test_call_tracking.rs @@ -1,5 +1,5 @@ -use codanna::parsing::LanguageParser; use codanna::parsing::nix::NixParser; +use codanna::parsing::LanguageParser; fn find_calls(code: &str) -> Vec<(String, String)> { let mut parser = NixParser::new().expect("Failed to create NixParser"); @@ -32,7 +32,9 @@ fn test_callpackage_pattern() { let calls = find_calls(code); println!("callPackage calls: {calls:?}"); assert!( - calls.iter().any(|(_, callee)| callee.contains("callPackage")), + calls + .iter() + .any(|(_, callee)| callee.contains("callPackage")), "expected callPackage call, got {calls:?}" ); } @@ -69,7 +71,10 @@ fn test_find_imports_basic() { let file_id = FileId::new(1).unwrap(); let imports = parser.find_imports(code, file_id); println!("imports: {imports:?}"); - assert!(!imports.is_empty(), "expected at least one import, got none"); + assert!( + !imports.is_empty(), + "expected at least one import, got none" + ); } #[test] diff --git a/tests/parsers/nix/test_symbol_extraction.rs b/tests/parsers/nix/test_symbol_extraction.rs index ae87f9b4..7f5dac66 100644 --- a/tests/parsers/nix/test_symbol_extraction.rs +++ b/tests/parsers/nix/test_symbol_extraction.rs @@ -1,6 +1,6 @@ +use codanna::parsing::nix::NixParser; use codanna::parsing::Language; use codanna::parsing::LanguageParser; -use codanna::parsing::nix::NixParser; use codanna::types::{FileId, SymbolCounter}; use codanna::SymbolKind; @@ -95,7 +95,10 @@ fn test_let_bindings() { #[test] fn test_let_function_binding() { let symbols = parse(r#"let double = x: x * 2; in double 5"#); - let double = symbols.iter().find(|s| s.name.as_ref() == "double").unwrap(); + let double = symbols + .iter() + .find(|s| s.name.as_ref() == "double") + .unwrap(); assert_eq!(double.kind, SymbolKind::Function); } @@ -116,7 +119,10 @@ fn test_inherit_emits_variables() { let symbols = parse(r#"{ inherit stdenv fetchurl; }"#); let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); assert!(names.contains(&"stdenv"), "expected stdenv, got {names:?}"); - assert!(names.contains(&"fetchurl"), "expected fetchurl, got {names:?}"); + assert!( + names.contains(&"fetchurl"), + "expected fetchurl, got {names:?}" + ); } #[test] @@ -124,7 +130,10 @@ fn test_inherit_from_emits_variables() { let symbols = parse(r#"{ inherit (pkgs) stdenv fetchurl; }"#); let names: Vec<&str> = symbols.iter().map(|s| s.name.as_ref()).collect(); assert!(names.contains(&"stdenv"), "expected stdenv, got {names:?}"); - assert!(names.contains(&"fetchurl"), "expected fetchurl, got {names:?}"); + assert!( + names.contains(&"fetchurl"), + "expected fetchurl, got {names:?}" + ); } // ── nested attrsets ────────────────────────────────────────────────────────── From acde35416f4902b903747f3ee704e35d100a00d5 Mon Sep 17 00:00:00 2001 From: Anton Vasiljev Date: Thu, 21 May 2026 20:00:14 +0300 Subject: [PATCH 4/4] style: fix import ordering with cargo fmt via nix develop Prior commit used standalone rustfmt which sorts imports differently from cargo fmt run inside the nix devshell. Use nix develop going forward to match CI exactly. --- src/parsing/nix/behavior.rs | 6 +++--- src/parsing/nix/mod.rs | 2 +- tests/exploration/abi15_grammar_audit/nix.rs | 2 +- tests/parsers/nix/test_call_tracking.rs | 2 +- tests/parsers/nix/test_symbol_extraction.rs | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parsing/nix/behavior.rs b/src/parsing/nix/behavior.rs index ea45c69d..6c943b9a 100644 --- a/src/parsing/nix/behavior.rs +++ b/src/parsing/nix/behavior.rs @@ -1,8 +1,8 @@ +use crate::Visibility; +use crate::parsing::LanguageBehavior; use crate::parsing::behavior_state::{BehaviorState, StatefulBehavior}; use crate::parsing::resolution::{InheritanceResolver, ResolutionScope}; -use crate::parsing::LanguageBehavior; use crate::types::FileId; -use crate::Visibility; use std::path::{Path, PathBuf}; use tree_sitter::Language; @@ -133,8 +133,8 @@ impl LanguageBehavior for NixBehavior { } fn is_resolvable_symbol(&self, symbol: &crate::Symbol) -> bool { - use crate::symbol::ScopeContext; use crate::SymbolKind; + use crate::symbol::ScopeContext; let module_level = matches!( symbol.kind, diff --git a/src/parsing/nix/mod.rs b/src/parsing/nix/mod.rs index ddce7c1c..833d9f6c 100644 --- a/src/parsing/nix/mod.rs +++ b/src/parsing/nix/mod.rs @@ -5,7 +5,7 @@ mod parser; mod resolution; pub use behavior::NixBehavior; -pub(crate) use definition::register; pub use definition::NixLanguage; +pub(crate) use definition::register; pub use parser::NixParser; pub use resolution::{NixInheritanceResolver, NixResolutionContext}; diff --git a/tests/exploration/abi15_grammar_audit/nix.rs b/tests/exploration/abi15_grammar_audit/nix.rs index 14aa44eb..7cf1d100 100644 --- a/tests/exploration/abi15_grammar_audit/nix.rs +++ b/tests/exploration/abi15_grammar_audit/nix.rs @@ -1,6 +1,6 @@ //! Nix grammar audit. -use super::helpers::{run_comprehensive_analysis, AuditData, LanguageAuditConfig}; +use super::helpers::{AuditData, LanguageAuditConfig, run_comprehensive_analysis}; use codanna::parsing::nix::audit::NixParserAudit; const CONFIG: LanguageAuditConfig = LanguageAuditConfig { diff --git a/tests/parsers/nix/test_call_tracking.rs b/tests/parsers/nix/test_call_tracking.rs index 12d0eec0..71ef6ba7 100644 --- a/tests/parsers/nix/test_call_tracking.rs +++ b/tests/parsers/nix/test_call_tracking.rs @@ -1,5 +1,5 @@ -use codanna::parsing::nix::NixParser; use codanna::parsing::LanguageParser; +use codanna::parsing::nix::NixParser; fn find_calls(code: &str) -> Vec<(String, String)> { let mut parser = NixParser::new().expect("Failed to create NixParser"); diff --git a/tests/parsers/nix/test_symbol_extraction.rs b/tests/parsers/nix/test_symbol_extraction.rs index 7f5dac66..896da3b1 100644 --- a/tests/parsers/nix/test_symbol_extraction.rs +++ b/tests/parsers/nix/test_symbol_extraction.rs @@ -1,8 +1,8 @@ -use codanna::parsing::nix::NixParser; +use codanna::SymbolKind; use codanna::parsing::Language; use codanna::parsing::LanguageParser; +use codanna::parsing::nix::NixParser; use codanna::types::{FileId, SymbolCounter}; -use codanna::SymbolKind; fn parse(code: &str) -> Vec { let mut parser = NixParser::new().expect("Failed to create NixParser");