From 0b3951851862f4d88f99ed70d6614290a5831339 Mon Sep 17 00:00:00 2001 From: Dimitrije Jankovic Date: Mon, 8 Jun 2026 16:37:44 -0400 Subject: [PATCH] Tree-sitter queries: highlights, locals, tags Author the three shared query files against the frozen grammar node set (D3) and register them with the grammar package: - highlights.scm: keywords, Create types, command heads, member fields, resource/variable names, literals, labels, operators, punctuation. - locals.scm: a single file scope; Create / function / parameter / For-loop definitions and identifier references, for go-to-definition and find-references. - tags.scm: resource and GmatFunction definitions plus command and output-binding-call references, for the symbol outline and navigation. - tree-sitter.json / package.json: list locals alongside highlights (tags is auto-discovered); all three pack with the npm grammar. Add tests/test_queries.py: every query loads against the vendored compiled grammar, highlight captures land on the expected nodes, resource definition/reference resolution works on a fixture, and tags expose the resource and command symbols. --- tests/test_queries.py | 237 ++++++++++++++++++++++++ tree-sitter-gmat/package.json | 3 +- tree-sitter-gmat/queries/highlights.scm | 99 +++++++++- tree-sitter-gmat/queries/locals.scm | 33 ++++ tree-sitter-gmat/queries/tags.scm | 39 ++++ tree-sitter-gmat/tree-sitter.json | 3 + 6 files changed, 410 insertions(+), 4 deletions(-) create mode 100644 tests/test_queries.py create mode 100644 tree-sitter-gmat/queries/locals.scm create mode 100644 tree-sitter-gmat/queries/tags.scm diff --git a/tests/test_queries.py b/tests/test_queries.py new file mode 100644 index 0000000..dce3ae8 --- /dev/null +++ b/tests/test_queries.py @@ -0,0 +1,237 @@ +"""The tree-sitter query layer: highlights / locals / tags load against the compiled grammar and +capture the right nodes (D1 / D3). + +The three query files ship in ``tree-sitter-gmat/queries/`` and are packed with the npm grammar; +they are the shared layer behind editor highlighting, go-to-definition / find-references, and the +document-symbol outline. These tests assert the definition-of-done: + +* all three load against the *vendored, compiled* grammar (the same language the wheel ships); +* highlight captures land on the expected nodes (keywords, types, fields, names, literals, …); +* resource definition / reference resolution works via ``locals.scm`` — a name's definition is at + its ``Create`` and each use resolves to it; +* ``tags.scm`` exposes resource and command symbols. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest +from tree_sitter import Language, Query, QueryCursor + +from gmat_script._grammar import language +from gmat_script.ast.base import node_text +from gmat_script.parser import parse + +if TYPE_CHECKING: + from tree_sitter import Node + +_QUERIES_DIR = Path(__file__).parent.parent / "tree-sitter-gmat" / "queries" +_QUERY_FILES = ("highlights.scm", "locals.scm", "tags.scm") + + +def _gmat_language() -> Language: + """The vendored, compiled GMAT grammar — the language the queries must load against.""" + return Language(language()) + + +def _load_query(name: str) -> Query: + source = (_QUERIES_DIR / name).read_text(encoding="utf-8") + return Query(_gmat_language(), source) + + +def _captures(name: str, source: str) -> dict[str, list[Node]]: + """Run query *name* over parsed *source* and return its capture-name → nodes mapping.""" + cursor = QueryCursor(_load_query(name)) + return cursor.captures(parse(source).root_node) + + +def _matches(name: str, source: str) -> list[dict[str, list[Node]]]: + """Run query *name* over parsed *source*, returning per-match capture maps. + + Unlike :func:`_captures`, this groups each match's captures together, so a wrapper capture + (``@definition.class``) and the ``@name`` inside it stay associated. + """ + cursor = QueryCursor(_load_query(name)) + return [match for _, match in cursor.matches(parse(source).root_node)] + + +def _names_for(matches: list[dict[str, list[Node]]], capture: str) -> set[str]: + """The ``@name`` texts of every match that carries *capture* (e.g. ``definition.class``).""" + return { + node_text(name) for match in matches if capture in match for name in match.get("name", []) + } + + +def _texts(captures: dict[str, list[Node]]) -> dict[str, list[str]]: + return {name: [node_text(node) for node in nodes] for name, nodes in captures.items()} + + +# -------------------------------------------------------------------------------------------------- +# Every query loads against the compiled grammar (DoD: "queries load ... without error"). +# -------------------------------------------------------------------------------------------------- + + +@pytest.mark.parametrize("name", _QUERY_FILES) +def test_query_loads_against_compiled_grammar(name: str) -> None: + # Query() validates every node type, field, and anonymous token against the grammar and raises + # QueryError on an unknown one, so constructing and running it is the load assertion; a + # non-empty result confirms the patterns match the grammar's real node set. + assert _captures(name, _HIGHLIGHT_FIXTURE) + + +# -------------------------------------------------------------------------------------------------- +# highlights.scm +# -------------------------------------------------------------------------------------------------- + +_HIGHLIGHT_FIXTURE = """% a leading comment +#Include 'helpers.script' +Create Spacecraft Sat +Create Variable count +BeginMissionSequence +GMAT Sat.SMA = 7000.5 +Propagate 'Step one' Prop(Sat) {Sat.ElapsedSecs = 60} +If Sat.TA > 90 + Stop +EndIf +""" + + +def test_highlights_capture_each_category() -> None: + texts = _texts(_captures("highlights.scm", _HIGHLIGHT_FIXTURE)) + + # Keywords: the structural words only — resource types and command heads are *not* keywords. + for kw in ("#Include", "Create", "GMAT", "BeginMissionSequence", "If", "EndIf"): + assert kw in texts["keyword"], f"{kw!r} should be a keyword" + + # Resource types in a Create declaration — exactly the two type positions. + assert sorted(texts["type"]) == ["Spacecraft", "Variable"] + + # Command heads highlight as functions (Propagate, Stop), not plain variables. + for head in ("Propagate", "Stop"): + assert head in texts["function"] + + # Dotted field / property access. + for field in ("SMA", "ElapsedSecs", "TA"): + assert field in texts["property"] + + # Resource / variable names fall through to the catch-all. + for name in ("Sat", "count"): + assert name in texts["variable"] + + assert "% a leading comment" in texts["comment"] + assert "'helpers.script'" in texts["string"] + assert "'Step one'" in texts["label"] + assert "7000.5" in texts["number"] and "60" in texts["number"] + assert "=" in texts["operator"] and ">" in texts["operator"] + assert "{" in texts["punctuation.bracket"] and "(" in texts["punctuation.bracket"] + + +def test_command_label_is_not_a_plain_string() -> None: + texts = _texts(_captures("highlights.scm", _HIGHLIGHT_FIXTURE)) + # The mission-step label is a @label, distinct from the #Include path @string. + assert "'Step one'" in texts["label"] + assert "'Step one'" not in texts.get("string", []) + + +# -------------------------------------------------------------------------------------------------- +# locals.scm — definition / reference resolution +# -------------------------------------------------------------------------------------------------- + +_LOCALS_FIXTURE = """Create Spacecraft Sat +Create ForceModel FM + +BeginMissionSequence +GMAT Sat.SMA = 7000 +Sat.Coord = FM +Propagate Prop(Sat) +""" + + +def _resolve_locals(source: str) -> tuple[dict[str, Node], list[Node]]: + """Mirror the locals model: a node captured as a definition is not also counted a reference. + + Returns ``(definitions_by_name, references)`` where *references* excludes the definition nodes + (the trailing catch-all ``(identifier) @local.reference`` matches them too, but the earlier, + more specific ``@local.definition`` pattern wins — the first-match precedence a host applies). + """ + captures = _captures("locals.scm", source) + definition_nodes = captures.get("local.definition", []) + definition_ranges = {node.byte_range for node in definition_nodes} + definitions = {node_text(node): node for node in definition_nodes} + references = [ + node + for node in captures.get("local.reference", []) + if node.byte_range not in definition_ranges + ] + return definitions, references + + +def test_locals_marks_create_as_the_definition() -> None: + definitions, _ = _resolve_locals(_LOCALS_FIXTURE) + + assert set(definitions) == {"Sat", "FM"} + # Each definition node is the name in its Create declaration. + for node in definitions.values(): + assert node.parent is not None + assert node.parent.type == "create_command" + + +def test_locals_resolves_each_use_to_its_definition() -> None: + definitions, references = _resolve_locals(_LOCALS_FIXTURE) + + # Find-references for Sat: Sat.SMA, Sat.Coord, and Prop(Sat) — three uses, none of them the + # declaration itself. + sat_uses = [node for node in references if node_text(node) == "Sat"] + assert len(sat_uses) == 3 + + # FM is used once, as the right-hand side of Sat.Coord = FM. + assert sum(node_text(node) == "FM" for node in references) == 1 + + # Go-to-definition: every reference whose name is a declared resource resolves to that Create. + for use in sat_uses: + target = definitions[node_text(use)] + assert target.parent is not None and target.parent.type == "create_command" + assert target.start_byte < use.start_byte # the declaration precedes the use here + + +def test_locals_does_not_treat_field_names_as_resource_definitions() -> None: + # SMA / Coord are member properties, never declared with Create, so they define nothing. + definitions, _ = _resolve_locals(_LOCALS_FIXTURE) + assert "SMA" not in definitions + assert "Coord" not in definitions + + +# -------------------------------------------------------------------------------------------------- +# tags.scm — symbol outline / navigation +# -------------------------------------------------------------------------------------------------- + +_TAGS_FIXTURE = """Create Spacecraft Sat +Create ForceModel FM + +BeginMissionSequence +Propagate Prop(Sat) +Maneuver TOI +[range] = ComputeRange(Sat) +[now] = Python.time.time() +""" + +_GMF_FIXTURE = "function [out] = helper(a, b)\n" + + +def test_tags_define_resources() -> None: + matches = _matches("tags.scm", _TAGS_FIXTURE) + assert _names_for(matches, "definition.class") == {"Sat", "FM"} + + +def test_tags_reference_commands_and_calls() -> None: + matches = _matches("tags.scm", _TAGS_FIXTURE) + # Generic mission commands plus the output-binding calls (bare and dotted-through-call). + expected = {"Propagate", "Maneuver", "ComputeRange", "time"} + assert expected <= _names_for(matches, "reference.call") + + +def test_tags_define_gmat_functions() -> None: + matches = _matches("tags.scm", _GMF_FIXTURE) + assert _names_for(matches, "definition.function") == {"helper"} diff --git a/tree-sitter-gmat/package.json b/tree-sitter-gmat/package.json index 6b7fb4b..48b8d43 100644 --- a/tree-sitter-gmat/package.json +++ b/tree-sitter-gmat/package.json @@ -32,7 +32,8 @@ "script", "gmf" ], - "highlights": "queries/highlights.scm" + "highlights": "queries/highlights.scm", + "locals": "queries/locals.scm" } ] } diff --git a/tree-sitter-gmat/queries/highlights.scm b/tree-sitter-gmat/queries/highlights.scm index 32e8e13..d2c7180 100644 --- a/tree-sitter-gmat/queries/highlights.scm +++ b/tree-sitter-gmat/queries/highlights.scm @@ -1,5 +1,98 @@ ; Syntax-highlighting queries for the GMAT grammar. ; -; Placeholder. The highlight, locals, and tags queries are authored once the grammar's node set is -; in place (the editor-tooling milestone); this directory is created now so the grammar package is -; structurally complete. See docs/design/decisions.md (D1). +; Node names follow the frozen CST taxonomy (docs/design/decisions.md, D3). Captures use the +; standard tree-sitter highlight names so any host theme colours them without remapping. Patterns +; are ordered specific-first: a host that resolves overlaps by first match (the tree-sitter +; convention) gives the earlier, more specific capture precedence over the trailing catch-alls. + +; ---- comments / literals -------------------------------------------------------------------------- + +(comment) @comment + +(number) @number +(string) @string + +; A single-quoted command / mission-step label (`Propagate 'Raise apogee' …`) — distinct from a +; string-valued argument. +(command_label) @label + +; ---- keywords ------------------------------------------------------------------------------------- + +; Structural keywords: the configuration / sequence boundary words, control-flow and solver block +; delimiters, and the GmatFunction header. Resource types and command keywords are *not* keywords — +; the grammar parses them generically (D3) — so they are not enumerable here. +[ + "Create" + "GMAT" + "If" + "Else" + "EndIf" + "For" + "EndFor" + "While" + "EndWhile" + "Target" + "EndTarget" + "Optimize" + "EndOptimize" + "BeginScript" + "EndScript" + "function" +] @keyword + +"#Include" @keyword +(begin_mission_sequence) @keyword + +; ---- operators / punctuation ---------------------------------------------------------------------- + +[ + "=" + "+" + "-" + "*" + "/" + "^" + "<" + "<=" + ">" + ">=" + "==" + "~=" + "&" + "|" +] @operator + +[ + "(" + ")" + "[" + "]" + "{" + "}" +] @punctuation.bracket + +[ + "," + ";" + ":" + "." +] @punctuation.delimiter + +; ---- names ---------------------------------------------------------------------------------------- + +; The resource type in a `Create ` declaration. +(create_command + type: (identifier) @type) + +; A command head — the mission operation (`Propagate`, `Maneuver`, `Report`, …). Generic by design +; (D3), so highlighted by position rather than by an enumerated keyword set. +(command + name: (identifier) @function) + +; A dotted field / property access — the trailing names of `Sat.SMA`, `FM.GravityField.Earth`. +(member_expression + property: (identifier) @property) + +; Catch-all: every other name is a resource / variable reference. Last, so the specific captures +; above win on hosts that resolve overlaps by first match. +(identifier) @variable diff --git a/tree-sitter-gmat/queries/locals.scm b/tree-sitter-gmat/queries/locals.scm new file mode 100644 index 0000000..0c383b6 --- /dev/null +++ b/tree-sitter-gmat/queries/locals.scm @@ -0,0 +1,33 @@ +; Scope / definition / reference queries for the GMAT grammar. +; +; These power go-to-definition and find-references: a `@local.definition` introduces a name in the +; enclosing `@local.scope`, and a `@local.reference` resolves to the nearest such definition of the +; same text. Node names follow the frozen CST taxonomy (docs/design/decisions.md, D3). + +; GMAT resources are file-global — `Create` may be referenced before it appears, and there is no +; lexical block scoping for names — so the whole script is a single scope. +(source_file) @local.scope + +; ---- definitions ---------------------------------------------------------------------------------- + +; A resource is defined by its `Create` declaration: `Create Spacecraft Sat` defines `Sat`. +(create_command + name: (identifier) @local.definition) + +; A GmatFunction (.gmf) header defines the function name and its parameters. +(function_definition + name: (identifier) @local.definition) + +(parameter_list + (identifier) @local.definition) + +; A `For` loop binds its iteration variable. +(for_statement + variable: (identifier) @local.definition) + +; ---- references ----------------------------------------------------------------------------------- + +; Every name used anywhere is a reference. A member-access `property` is also an aliased identifier +; and is matched here too; naming no declared resource, it simply resolves to nothing — the safe +; direction (an unresolved reference is harmless; a missed one is not). +(identifier) @local.reference diff --git a/tree-sitter-gmat/queries/tags.scm b/tree-sitter-gmat/queries/tags.scm new file mode 100644 index 0000000..713b812 --- /dev/null +++ b/tree-sitter-gmat/queries/tags.scm @@ -0,0 +1,39 @@ +; Symbol-tag queries for the GMAT grammar — the document-symbol outline and code navigation. +; +; A `@definition.*` capture names a symbol declared in the file; a `@reference.*` capture names a +; use / call site. The `@name` capture inside each marks the symbol's name span. Node names follow +; the frozen CST taxonomy (docs/design/decisions.md, D3). + +; ---- definitions ---------------------------------------------------------------------------------- + +; Each resource declared with `Create` is a top-level symbol (`Create Spacecraft Sat` → `Sat`). +(create_command + name: (identifier) @name) @definition.class + +; A GmatFunction (.gmf) header defines a function. +(function_definition + name: (identifier) @name) @definition.function + +; ---- references ----------------------------------------------------------------------------------- + +; Mission commands — the operations invoked in the sequence (`Propagate`, `Maneuver`, …). +(command + name: (identifier) @name) @reference.call + +; Output-binding function calls — `[out] = Func(args)`. The function reference may be a bare name, a +; dotted path, or a parenthesised call; tag the leaf name in each shape. +(function_call_command + function: (identifier) @name) @reference.call + +(function_call_command + function: (member_expression + property: (identifier) @name)) @reference.call + +(function_call_command + function: (call_expression + function: (identifier) @name)) @reference.call + +(function_call_command + function: (call_expression + function: (member_expression + property: (identifier) @name))) @reference.call diff --git a/tree-sitter-gmat/tree-sitter.json b/tree-sitter-gmat/tree-sitter.json index 39212e9..141c55c 100644 --- a/tree-sitter-gmat/tree-sitter.json +++ b/tree-sitter-gmat/tree-sitter.json @@ -10,6 +10,9 @@ ], "highlights": [ "queries/highlights.scm" + ], + "locals": [ + "queries/locals.scm" ] } ],