From 19941735a84f457237789e12ed2ad9cfad8b6396 Mon Sep 17 00:00:00 2001 From: cssbruno Date: Thu, 11 Jun 2026 16:32:32 -0300 Subject: [PATCH] feat(compiler): harden M2 compiler and language contract Implements the M2 compiler/language-contract additions tracked by #294-#299. - #299: ADR 0010 commits to a shared tokenizer plus recursive-descent parser with error recovery, migrated behind the stable gwdkast AST seam. Wires the decision into the decisions index and the pipeline doc. - #294: source.SourcePosition carries a 0-based byte Offset, with PositionAt and OffsetOf rune-accurate conversion helpers as the exact-span substrate. - #296: the formatter tracks brace depth with the parser's string- and comment-aware scanner, so braces inside string literals, comments, and template literals no longer skew indentation. - #297: conflict diagnostics (duplicate_route and route_method_conflict, including contract-route conflicts) carry a related "first declared here" location. check --json gains an additive `related` array and the LSP emits relatedInformation. - #295: a machine-checked .gwdk conformance corpus (accept/reject cases with golden diagnostic codes) pins the language contract, referenced from the spec, grammar, and syntax-contributor docs. - #298: a per-construct stability and deprecation table, with metadata keywords DRYed into lang.MetadataKeywords and view.SupportedDirectiveNames exported, guarded against drift by a cross-check test. Tests: go test ./internal/... ./cmd/... . all pass; gofmt and go vet clean; node --test editors/vscode/*.test.js passes (39). --- CHANGELOG.md | 27 ++++ docs/compiler/pipeline.md | 6 + docs/compiler/syntax-contributors.md | 5 + ...0010-tokenizer-recursive-descent-parser.md | 139 ++++++++++++++++++ docs/engineering/decisions/README.md | 3 + docs/language/README.md | 2 + docs/language/conformance.md | 51 +++++++ docs/language/grammar.md | 4 + docs/language/spec.md | 10 ++ docs/language/stability.md | 111 ++++++++++++++ internal/compiler/routes.go | 20 +++ internal/compiler/routes_related_test.go | 77 ++++++++++ internal/compiler/validate.go | 7 +- internal/lang/conformance_test.go | 119 +++++++++++++++ internal/lang/diagnostic.go | 26 +++- internal/lang/format.go | 22 +-- internal/lang/format_test.go | 31 ++++ internal/lang/keywords.go | 37 +++++ internal/lang/lexer.go | 7 +- internal/lang/stability_doc_test.go | 35 +++++ .../conformance/accept/component.cmp.gwdk | 7 + .../conformance/accept/minimal_page.gwdk | 10 ++ .../accept/page_with_metadata.gwdk | 11 ++ .../conformance/reject/malformed_use.gwdk | 10 ++ .../conformance/reject/old_action_block.gwdk | 12 ++ .../conformance/reject/old_api_block.gwdk | 12 ++ .../reject/unknown_top_level_block.gwdk | 12 ++ internal/lang/tools.go | 17 +++ internal/lsp/diagnostics.go | 34 ++++- internal/lsp/notifications.go | 2 +- internal/lsp/protocol_types.go | 14 +- internal/parser/braces.go | 29 ++++ internal/source/source.go | 72 +++++++++ internal/source/source_test.go | 52 +++++++ internal/view/directives.go | 14 ++ 35 files changed, 1011 insertions(+), 36 deletions(-) create mode 100644 docs/engineering/decisions/0010-tokenizer-recursive-descent-parser.md create mode 100644 docs/language/conformance.md create mode 100644 docs/language/stability.md create mode 100644 internal/compiler/routes_related_test.go create mode 100644 internal/lang/conformance_test.go create mode 100644 internal/lang/keywords.go create mode 100644 internal/lang/stability_doc_test.go create mode 100644 internal/lang/testdata/conformance/accept/component.cmp.gwdk create mode 100644 internal/lang/testdata/conformance/accept/minimal_page.gwdk create mode 100644 internal/lang/testdata/conformance/accept/page_with_metadata.gwdk create mode 100644 internal/lang/testdata/conformance/reject/malformed_use.gwdk create mode 100644 internal/lang/testdata/conformance/reject/old_action_block.gwdk create mode 100644 internal/lang/testdata/conformance/reject/old_api_block.gwdk create mode 100644 internal/lang/testdata/conformance/reject/unknown_top_level_block.gwdk diff --git a/CHANGELOG.md b/CHANGELOG.md index 01881acd..28474cdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,33 @@ packages, and tooling contracts may change before a stable release. ### Changed +- Conflict diagnostics (`duplicate_route`, `route_method_conflict`, including + contract-route conflicts) now carry a related source location pointing at the + first declaration. `gowdk check --json` gains an additive `related` array per + diagnostic, and the language server reports it as `relatedInformation`. +- The formatter now tracks brace depth with the parser's string- and + comment-aware scanner, so braces inside string literals, comments, and + template literals (for example `title "a { b"`) no longer skew indentation. + +### Implemented + +- A machine-checked `.gwdk` conformance corpus + (`internal/lang/testdata/conformance/`) pins the language contract: `accept/` + cases must check clean and `reject/` cases must produce their declared stable + diagnostic codes. See `docs/language/conformance.md`. +- A per-construct stability and deprecation table + (`docs/language/stability.md`) documents which blocks, metadata keywords, and + `g:` directives are stable, partial, planned, or deprecated, guarded against + drift from the code registries by a test. +- `source.SourcePosition` carries a byte `Offset`, with `source.PositionAt` and + `source.OffsetOf` conversion helpers, as the exact substrate for future + AST-backed formatting and precise editor edits. +- ADR 0010 records the decision to replace the line-oriented parser with a + shared tokenizer and a recursive-descent parser with error recovery, migrated + behind the stable `gwdkast` AST seam. + +### Changed + - A page that declares no `guard` is no longer a build error. `guard` is now optional, but a page is not public by default: `missing_page_guard` is now a **warning** and the page's route is denied (403) at request time until the diff --git a/docs/compiler/pipeline.md b/docs/compiler/pipeline.md index 688344ce..e71ba278 100644 --- a/docs/compiler/pipeline.md +++ b/docs/compiler/pipeline.md @@ -70,3 +70,9 @@ project config Future build work should expand from the current generated-output slice while keeping downstream passes on `internal/gwdkir.Program`. + +The `lex/parse full AST` front-end is the line-oriented parser today. The +decision to replace it with a shared tokenizer and a recursive-descent parser +with error recovery, migrated behind the stable `internal/gwdkast` AST seam, is +recorded in +`docs/engineering/decisions/0010-tokenizer-recursive-descent-parser.md`. diff --git a/docs/compiler/syntax-contributors.md b/docs/compiler/syntax-contributors.md index 1a4a23ca..68829d73 100644 --- a/docs/compiler/syntax-contributors.md +++ b/docs/compiler/syntax-contributors.md @@ -44,6 +44,11 @@ language contract. - LSP/editor: `go test ./internal/lsp` plus editor checks when touched. - CLI report changes: update `cmd/gowdk/testdata/*_golden` and run `go test ./cmd/gowdk`. +8. Add a conformance corpus case: + - Accepted syntax: an `accept/` file under + `internal/lang/testdata/conformance/` that exercises it. + - A rejection or new diagnostic: a `reject/` file with a leading + `// expect: ` directive. See `docs/language/conformance.md`. ## Guardrails diff --git a/docs/engineering/decisions/0010-tokenizer-recursive-descent-parser.md b/docs/engineering/decisions/0010-tokenizer-recursive-descent-parser.md new file mode 100644 index 00000000..8462b150 --- /dev/null +++ b/docs/engineering/decisions/0010-tokenizer-recursive-descent-parser.md @@ -0,0 +1,139 @@ +# ADR 0010: Tokenizer and Recursive-Descent Parser Direction + +Date: 2026-06-11 + +## Status + +Accepted + +## Context + +The compiler front-end is line-oriented. `internal/parser.ParseSyntax` reads +source with a `bufio.Scanner`, matches patterns against each trimmed line +(`internal/parser/patterns.go` `lexLine`), tracks nesting with a separate stateful +brace scanner (`internal/parser/braces.go`), and returns on the first syntax +error with no recovery. Source positions are 1-based line/column with no byte +offset, so many spans are line-wide approximations (`sourceLineSpan`). The +formatter (`internal/lang/format.go`) is independent whitespace-only string +manipulation that counts braces without skipping strings or comments. + +This single foundation is the upstream constraint behind most of the deferred +parser/formatter/diagnostics work (#250): error recovery, an AST-backed +formatter, exact token spans, and granular per-construct diagnostic codes are all +downstream of having a real token stream and a node-producing parser. Right now +the line-oriented parser is deferred by omission rather than by an explicit +decision. + +Two facts make the direction clear rather than open-ended: + +1. The documented target pipeline (`docs/compiler/pipeline.md`) already names a + `lex/parse full AST -> semantic analysis -> stable internal IR` front-end. + This ADR makes explicit the parser-internals decision that target already + implies. +2. A real character-level tokenizer already exists. `internal/lang.Lex` + (`internal/lang/lexer.go`) scans runes into typed tokens with line/column + positions, but only editor and CLI tooling consume it. The compiler parser + ignores it and re-lexes per line. The codebase therefore maintains two + divergent front-ends for the same language. + +Crucially, the typed AST is already a stable seam. `internal/parser.ParseSyntax` +produces the `internal/gwdkast` AST, and every downstream pass +(`internal/gwdkanalysis` lowering to `internal/gwdkir.Program`, validation, and +generation) consumes that AST. The parser can be replaced behind that seam +without disturbing IR, validation, reports, or codegen. + +## Decision + +Commit to a single shared tokenizer and a recursive-descent parser with error +recovery, producing the existing `internal/gwdkast` AST. Migrate incrementally +behind the AST seam. + +Concretely: + +- **One tokenizer.** Promote the `internal/lang` rune scanner into the shared + lexer that both the compiler parser and editor/CLI tooling consume. Retire the + per-line `lexLine` path in `internal/parser`. There is one lexical definition + of `.gwdk`, not two. +- **Recursive-descent parser over tokens.** Parse the token stream into + `gwdkast.File` with explicit declaration, block, and view productions instead + of line-pattern matching. The brace scanner's string/comment/template state + becomes ordinary lexer state rather than a separate counter. +- **Error recovery.** The parser synchronizes at top-level declaration + boundaries and block braces so one syntax error does not hide the rest of the + file. It accumulates diagnostics instead of returning on the first error. +- **Exact spans.** Tokens carry byte offsets (ADR depends on #294), so AST nodes + and diagnostics get exact token ranges instead of line-wide approximations. +- **AST is the frozen seam.** `internal/gwdkast.File` is the contract. The new + parser must produce the same AST as the line-oriented parser for the currently + supported subset; `gwdkanalysis`, `gwdkir`, validation, reports, and codegen do + not change as part of this work. +- **Formatter follows.** Once the parser yields full nodes, the AST-backed + formatter deferred in #250 becomes possible and replaces line-oriented + `format.go`. Until then, the line-oriented formatter keeps its documented + limits (see #296). + +Migration is incremental and non-breaking. The line-oriented parser keeps working +while the new parser is built to produce identical `gwdkast.File` output for the +supported subset, gated by golden AST-equivalence tests and the language +conformance corpus (#295). Cutover happens per declaration kind once equivalence +holds, then the line-oriented path and `lexLine` are removed. + +## Consequences + +### Positive + +- One lexical and grammatical definition of `.gwdk` shared by the compiler and + the language server, instead of a line parser plus a separate tooling lexer. +- Error recovery, exact spans, AST-backed formatting, and granular diagnostic + codes become reachable; #250 stops being blocked by the front-end. +- Diagnostics point at tokens rather than whole lines, improving CLI output and + LSP precision. +- Braces inside strings, comments, and template literals are handled by lexer + state, removing a class of parser and formatter miscounts by construction. + +### Negative + +- A recursive-descent parser plus recovery is materially more code than the + current line parser, and the migration must preserve AST output exactly to stay + non-breaking. +- Equivalence testing across every declaration kind is required before cutover; + this is real up-front cost before any user-visible benefit lands. +- Recovery and span precision depend on byte offsets (#294) landing first. + +### Neutral + +- The public language surface does not change. This is a front-end + implementation decision, not a grammar change; the conformance corpus (#295) + pins behavior across the migration. +- Downstream passes are untouched because the AST seam is stable. + +## Alternatives Considered + +- **Keep the line-oriented parser, document its limits.** Lowest cost, but + permanently caps span precision, error recovery, and AST-backed formatting, and + keeps two divergent front-ends. Rejected: it contradicts the already-documented + target pipeline and leaves #250 structurally blocked. +- **Adopt a parser generator or third-party combinator library** (ANTLR, + participle, goyacc). Rejected: adds a dependency and a generated/runtime layer + against the project's lean-dependency stance, and a hand-written + recursive-descent parser gives better control over recovery and diagnostics for + a small surface language. +- **Incremental/streaming parser from day one.** Useful for an editor, but + premature. The AST seam lets an incremental layer be added later without + another front-end decision. + +## Follow-Up + +- #294 (byte offsets in source positions) is the prerequisite; land it first. +- Build the shared tokenizer by promoting `internal/lang`'s scanner; retire + `internal/parser` `lexLine`. +- Build the recursive-descent parser to `gwdkast.File` with recovery, gated by + golden AST-equivalence tests and the conformance corpus (#295). +- Cut over per declaration kind; remove the line-oriented parser when equivalence + holds across the supported subset. +- AST-backed formatter and granular per-construct diagnostic codes (#250) consume + the new parser; #296 is the interim formatter guard. +- Link this ADR from the #250 deferral so the line-oriented limitation is a + conscious choice with a committed exit. +- Keep `docs/compiler/pipeline.md` and `docs/engineering/architecture.md` aligned + as the migration proceeds. diff --git a/docs/engineering/decisions/README.md b/docs/engineering/decisions/README.md index ef23427a..92beebab 100644 --- a/docs/engineering/decisions/README.md +++ b/docs/engineering/decisions/README.md @@ -23,3 +23,6 @@ Recommended naming: - `0007-static-first-spa-navigation.md`: accepted static-first SPA navigation and generated JavaScript guardrails. - `0008-bounded-client-language.md`: accepted bounded `client {}` language and page-scoped store boundaries. - `0009-optional-inline-go-authoring.md`: accepted optional inline Go authoring direction, with extraction to normal package Go. +- `0010-tokenizer-recursive-descent-parser.md`: accepted shared tokenizer and + recursive-descent parser with error recovery, migrated behind the stable + `gwdkast` AST seam. diff --git a/docs/language/README.md b/docs/language/README.md index e5f18be4..4aca5e4f 100644 --- a/docs/language/README.md +++ b/docs/language/README.md @@ -51,6 +51,8 @@ component contract and inline package-go-block slices. - `hybrid.md`: hybrid request-time behavior and deferred hybrid capabilities. - `diagnostics.md`: current diagnostic shape and known codes. - `formatting.md`: current formatter behavior. +- `stability.md`: per-construct stability and deprecation tiers. +- `conformance.md`: machine-checked accept/reject corpus that pins the contract. ## File Kinds diff --git a/docs/language/conformance.md b/docs/language/conformance.md new file mode 100644 index 00000000..b61cf850 --- /dev/null +++ b/docs/language/conformance.md @@ -0,0 +1,51 @@ +# .gwdk Conformance Corpus + +The conformance corpus is the machine-checked source of truth for the `.gwdk` +language contract. The prose in `docs/language/spec.md` and +`docs/language/grammar.md` describes the language; the corpus *pins* it, so a +parser or validator change that silently accepts or rejects different syntax +fails a test instead of drifting from the docs. + +## Location + +```text +internal/lang/testdata/conformance/ + accept/ # files that must check clean (no error-severity diagnostics) + reject/ # files that must produce specific stable diagnostic codes +``` + +The runner is `TestConformanceCorpusAccept` and `TestConformanceCorpusReject` in +`internal/lang/conformance_test.go`. Each file is checked with +`lang.CheckSource`, the same single-file path the editor and `gowdk check` use, +so cases are hermetic and need no project layout. + +## Accept cases + +Any `.gwdk` file under `accept/` must produce no error-severity diagnostics. +Warnings (for example `missing_img_alt`) are allowed, because they do not fail a +build. File-kind classification follows the filename suffix, so a component case +is named `*.cmp.gwdk` and a layout case `*.layout.gwdk`. + +## Reject cases + +Any `.gwdk` file under `reject/` must declare the stable diagnostic codes it is +expected to produce in a leading directive comment: + +```gwdk +// expect: old_action_block_syntax +package pages +... +``` + +Multiple codes may be comma- or space-separated. The test asserts every named +code appears among the diagnostics for that file. Diagnostic codes are the ones +registered in `internal/diagnostics/registry.go` and documented in +`docs/reference/diagnostic-codes.md`. + +## Adding a corpus case + +New or changed `.gwdk` syntax must come with a corpus case. Adding accepted +syntax means an `accept/` file exercising it; adding a rejection or a new +diagnostic means a `reject/` file with the expected code. This requirement is +part of the syntax contributor checklist in +`docs/compiler/syntax-contributors.md`. diff --git a/docs/language/grammar.md b/docs/language/grammar.md index cc8cc2f8..265472b5 100644 --- a/docs/language/grammar.md +++ b/docs/language/grammar.md @@ -2,6 +2,10 @@ This is the grammar accepted by the current metadata parser. It is intentionally line-oriented and incomplete. +Accepted and rejected syntax is pinned by the machine-checked conformance corpus +in [Conformance Corpus](conformance.md), which is the contract source of truth +when this grammar drifts. + ```text file = line* line = blank | comment | packageDecl | metadataDecl | importDecl | useDecl | blockDecl | goDecl | actionDecl | apiDecl | unsupportedBlock | other diff --git a/docs/language/spec.md b/docs/language/spec.md index aa4e3bd4..e932f39a 100644 --- a/docs/language/spec.md +++ b/docs/language/spec.md @@ -8,6 +8,16 @@ instead of becoming accidental behavior. Detailed behavior stays in the feature pages linked from [GOWDK Language](README.md). +This prose is pinned by the machine-checked conformance corpus described in +[Conformance Corpus](conformance.md): accepted syntax has an `accept/` case that +must check clean, and rejected syntax has a `reject/` case asserting its stable +diagnostic code. When this spec and the corpus disagree, the corpus is the +contract and one of them is a bug. + +Per-construct stability and deprecation tiers (which blocks, metadata keywords, +and `g:` directives are stable, partial, planned, or deprecated) are published +in [Language Construct Stability](stability.md). + ## Status Terms - Implemented: accepted by the current compiler and covered by tests or a diff --git a/docs/language/stability.md b/docs/language/stability.md new file mode 100644 index 00000000..2a1a6b52 --- /dev/null +++ b/docs/language/stability.md @@ -0,0 +1,111 @@ +# Language Construct Stability + +This table is the per-construct stability and deprecation contract for the +experimental 0.x `.gwdk` language. The diagnostics registry already records a +stability tier per diagnostic code (`internal/diagnostics/registry.go`); this +page does the same for the language constructs themselves, so a user or tooling +author can tell which syntax is safe to depend on and which is still moving. + +It complements, and is pinned by, the machine-checked +[Conformance Corpus](conformance.md): a `Stable` or `Partial` construct should +have an `accept/` case, and a `Planned`/`Deprecated` construct should have a +`reject/` case asserting the diagnostic code named below. + +## Status Tiers + +- **Stable**: accepted by the current compiler and not expected to change shape + within 0.x without a deprecation step. +- **Partial**: accepted for a narrower slice than the final contract; the syntax + is real but its capability will grow. +- **Planned**: not accepted as source behavior yet; using it is rejected with + the listed diagnostic code so it cannot become accidental behavior. +- **Deprecated**: previously accepted spelling that is now rejected with a + migration diagnostic. + +The canonical construct names below are the source of truth in code +(`lang.MetadataKeywords` and `view.SupportedDirectiveNames()`) and are +cross-checked against this page by `TestStabilityTableCoversConstructs`. + +## Top-Level Blocks + +| Construct | Tier | Notes | +| --- | --- | --- | +| `package` | Stable | Required first declaration. | +| `import` | Stable | Go import for colocated blocks. | +| `use` | Stable | Package-scoped component import. | +| `paths {}` | Partial | Literal `=> { field: "value" }` records only. | +| `build {}` | Partial | Literal records and no-argument Go calls. | +| `load {}` | Partial | Request-time data; requires the SSR addon. | +| `view {}` | Stable | Markup; see directives below. | +| `style {}` | Stable | Scoped CSS body. | +| `client {}` | Partial | Bounded component client language. | +| `go {}` / `go ssr {}` / `go client {}` / `go addon.* {}` | Partial | Colocated Go lanes. | +| `store` / `props` / `state` / `emits` | Partial | Component contracts. | +| Unknown top-level block | Planned | Rejected with `unsupported_top_level_block`. | + +## Metadata Keywords + +All metadata keywords are **Stable**. The canonical list is `lang.MetadataKeywords`. + +| Keyword | Tier | +| --- | --- | +| `page` | Stable | +| `route` | Stable | +| `title` | Stable | +| `description` | Stable | +| `canonical` | Stable | +| `image` | Stable | +| `layout` | Stable | +| `cache` | Stable | +| `revalidate` | Stable | +| `error` | Stable | +| `guard` | Stable | +| `css` | Stable | +| `component` | Stable | +| `wasm` | Stable | +| `asset` | Stable | + +Legacy `@`-prefixed metadata is **Deprecated** and rejected with +`malformed_legacy_metadata`. + +## View `g:` Directives + +Supported exact-name directives (the closed set in +`view.SupportedDirectiveNames()`): + +| Directive | Tier | Notes | +| --- | --- | --- | +| `g:if` / `g:else-if` / `g:else` | Stable | Conditional flow. | +| `g:for` / `g:key` | Stable | List rendering. | +| `g:bind:value` / `g:bind:checked` | Partial | Two-way bindings. | +| `g:on:*` | Partial | Event handlers with `.prevent`/`.stop`/`.once`/`.capture`/`.debounce`/`.throttle`. | +| `g:post` / `g:target` / `g:swap` | Partial | Progressive form/fragment submission. | +| `g:message:*` | Partial | `required`, `minlength`, `maxlength`, `pattern`. | +| `g:island` | Partial | `js` or `wasm` island. | +| `g:command` / `g:query` | Partial | Contract web adapters. | +| `g:event` | Partial | Parses to explain backend-owned domain events. | +| `g:html` | Stable | Raw HTML escape hatch; `unsafe_raw_html` is reported. | +| `g:ref` | Partial | Client reference. | +| `g:slot` | Partial | Named/scoped slot. | + +Planned directives are rejected with `unsupported_markup_directive`: + +| Directive family | Tier | Replacement | +| --- | --- | --- | +| `g:transition`, `g:animate` | Planned | CSS transitions or a future addon. | +| `g:window`, `g:document`, `g:body`, `g:head` | Planned | Page metadata or `g:on:*` on elements. | +| `g:await`, `g:async` | Planned | build/load data, actions, APIs, fragments. | +| `g:use`, `g:action`, `g:attach` | Planned | `client {}` with `g:ref`. | + +Foreign template syntax (`{#if}`, `{@html}`, and similar) is **Planned/Unsupported** +and rejected with `unsupported_markup_syntax`. + +## Endpoint Declarations + +| Construct | Tier | Notes | +| --- | --- | --- | +| `act POST ""` | Stable | POST only today. | +| `api ""` | Stable | GET/POST/PUT/PATCH/DELETE. | +| Fragment endpoints | Partial | First-slice partial updates. | +| `act { ... }` block form | Deprecated | Rejected with `old_action_block_syntax`. | +| `api { ... }` block form | Deprecated | Rejected with `old_api_block_syntax`. | diff --git a/internal/compiler/routes.go b/internal/compiler/routes.go index 89c76dba..7a961864 100644 --- a/internal/compiler/routes.go +++ b/internal/compiler/routes.go @@ -27,6 +27,11 @@ func validateUniquePageRoutes(pages []gwdkir.Page) []ValidationError { PageID: page.ID, Source: page.Source, Span: page.Spans.Route, + Related: relatedSpan( + first.Source, + first.Spans.Route, + fmt.Sprintf("route %q first declared here", page.Route), + ), Message: duplicateRouteMessage( page.Route, first.ID, @@ -39,6 +44,16 @@ func validateUniquePageRoutes(pages []gwdkir.Page) []ValidationError { return diagnostics } +// relatedSpan returns a single-element related-location slice for a conflict +// diagnostic's earlier declaration, or nil when the earlier span is unset so a +// missing location is never reported as a bogus 1:1 position. +func relatedSpan(src string, span source.SourceSpan, message string) []source.RelatedSpan { + if !hasSpan(span) { + return nil + } + return []source.RelatedSpan{{Source: src, Span: span, Message: message}} +} + func duplicateRouteMessage(route, firstID, firstSource, duplicateID, duplicateSource string) string { message := fmt.Sprintf("duplicate page route %q", route) if firstID != "" && duplicateID != "" { @@ -191,6 +206,11 @@ func validateRouteMethodConflicts(pages []gwdkir.Page, endpoints []gwdkir.GoEndp PageID: registration.PageID, Source: registration.Source, Span: registration.Span, + Related: relatedSpan( + previous.Source, + previous.Span, + fmt.Sprintf("%s first declared here", previous.Owner), + ), Message: fmt.Sprintf( "%s %s for %s conflicts with %s", registration.Method, diff --git a/internal/compiler/routes_related_test.go b/internal/compiler/routes_related_test.go new file mode 100644 index 00000000..5b1178f1 --- /dev/null +++ b/internal/compiler/routes_related_test.go @@ -0,0 +1,77 @@ +package compiler + +import ( + "testing" + + "github.com/cssbruno/gowdk/internal/gwdkir" + "github.com/cssbruno/gowdk/internal/source" +) + +func span(line, startColumn, endColumn int) source.SourceSpan { + return source.SourceSpan{ + Start: source.SourcePosition{Line: line, Column: startColumn}, + End: source.SourcePosition{Line: line, Column: endColumn}, + } +} + +func findByCode(diagnostics []ValidationError, code string) (ValidationError, bool) { + for _, diagnostic := range diagnostics { + if diagnostic.Code == code { + return diagnostic, true + } + } + return ValidationError{}, false +} + +func TestDuplicateRouteCarriesRelatedFirstDeclaration(t *testing.T) { + pages := []gwdkir.Page{ + {ID: "home", Source: "home.page.gwdk", Route: "/", Spans: gwdkir.PageSpans{Route: span(2, 1, 9)}}, + {ID: "index", Source: "index.page.gwdk", Route: "/", Spans: gwdkir.PageSpans{Route: span(3, 1, 9)}}, + } + + diagnostic, ok := findByCode(validateUniquePageRoutes(pages), "duplicate_route") + if !ok { + t.Fatal("expected a duplicate_route diagnostic") + } + if diagnostic.Source != "index.page.gwdk" { + t.Fatalf("primary diagnostic should point at the duplicate; got %q", diagnostic.Source) + } + if len(diagnostic.Related) != 1 { + t.Fatalf("expected one related location, got %d", len(diagnostic.Related)) + } + related := diagnostic.Related[0] + if related.Source != "home.page.gwdk" { + t.Fatalf("related location should point at the first declaration; got %q", related.Source) + } + if related.Span != span(2, 1, 9) { + t.Fatalf("related span should be the first route span; got %+v", related.Span) + } + if related.Message == "" { + t.Fatal("related location should carry a message") + } +} + +func TestContractRouteConflictCarriesRelatedFirstDeclaration(t *testing.T) { + // Two differently-named query contracts on the same GET route conflict + // through the shared route-registration path; the conflict must point back + // at the first contract's declaration. + refs := []gwdkir.ContractReference{ + {Kind: gwdkir.ContractQuery, Name: "Reports", Method: "GET", Path: "/reports", Source: "reports.gwdk", Span: span(4, 1, 12)}, + {Kind: gwdkir.ContractQuery, Name: "Summary", Method: "GET", Path: "/reports", Source: "summary.gwdk", Span: span(7, 1, 12)}, + } + + diagnostic, ok := findByCode(validateRouteMethodConflicts(nil, nil, refs), "route_method_conflict") + if !ok { + t.Fatal("expected a route_method_conflict diagnostic") + } + if len(diagnostic.Related) != 1 { + t.Fatalf("expected one related location, got %d", len(diagnostic.Related)) + } + related := diagnostic.Related[0] + if related.Source != "reports.gwdk" { + t.Fatalf("related location should point at the first contract; got %q", related.Source) + } + if related.Span != span(4, 1, 12) { + t.Fatalf("related span should be the first contract span; got %+v", related.Span) + } +} diff --git a/internal/compiler/validate.go b/internal/compiler/validate.go index 0e2923a1..4613e752 100644 --- a/internal/compiler/validate.go +++ b/internal/compiler/validate.go @@ -24,8 +24,11 @@ type ValidationError struct { ComponentName string Source string Span source.SourceSpan - Message string - Severity Severity + // Related carries secondary source locations, such as the first declaration + // that a conflict diagnostic also points at. It is optional and additive. + Related []source.RelatedSpan + Message string + Severity Severity } func (err ValidationError) Error() string { diff --git a/internal/lang/conformance_test.go b/internal/lang/conformance_test.go new file mode 100644 index 00000000..bbc28dcb --- /dev/null +++ b/internal/lang/conformance_test.go @@ -0,0 +1,119 @@ +package lang + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/cssbruno/gowdk" +) + +// The conformance corpus pins the .gwdk language contract: every file under +// testdata/conformance/accept must check clean (no error-severity +// diagnostics), and every file under testdata/conformance/reject must produce +// the stable diagnostic codes named in its leading `// expect: [, code]` +// directive. New language syntax should add a corpus case here. See +// docs/language/conformance.md. + +const conformanceExpectPrefix = "// expect:" + +func TestConformanceCorpusAccept(t *testing.T) { + dir := filepath.FromSlash("testdata/conformance/accept") + for _, name := range conformanceFiles(t, dir) { + t.Run(name, func(t *testing.T) { + path := filepath.Join(dir, name) + source := readConformanceFile(t, path) + _, diagnostics := CheckSource(gowdk.Config{}, path, source) + for _, diagnostic := range diagnostics { + if diagnostic.Severity == "error" { + t.Errorf("accept case produced error %q: %s", diagnostic.Code, diagnostic.Message) + } + } + }) + } +} + +func TestConformanceCorpusReject(t *testing.T) { + dir := filepath.FromSlash("testdata/conformance/reject") + for _, name := range conformanceFiles(t, dir) { + t.Run(name, func(t *testing.T) { + path := filepath.Join(dir, name) + source := readConformanceFile(t, path) + expected := conformanceExpectedCodes(source) + if len(expected) == 0 { + t.Fatalf("reject case %s is missing a %q directive", name, conformanceExpectPrefix) + } + _, diagnostics := CheckSource(gowdk.Config{}, path, source) + got := conformanceCodes(diagnostics) + for _, code := range expected { + if !containsCode(got, code) { + t.Errorf("reject case %s expected diagnostic %q; got %v", name, code, got) + } + } + }) + } +} + +func conformanceFiles(t *testing.T, dir string) []string { + t.Helper() + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("read corpus dir %s: %v", dir, err) + } + var names []string + for _, entry := range entries { + if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".gwdk") { + continue + } + names = append(names, entry.Name()) + } + if len(names) == 0 { + t.Fatalf("corpus dir %s has no .gwdk cases", dir) + } + return names +} + +func readConformanceFile(t *testing.T, path string) []byte { + t.Helper() + source, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + return source +} + +func conformanceExpectedCodes(source []byte) []string { + for _, line := range strings.Split(string(source), "\n") { + trimmed := strings.TrimSpace(line) + if !strings.HasPrefix(trimmed, conformanceExpectPrefix) { + continue + } + rest := strings.TrimSpace(strings.TrimPrefix(trimmed, conformanceExpectPrefix)) + var codes []string + for _, field := range strings.FieldsFunc(rest, func(r rune) bool { return r == ',' || r == ' ' }) { + if field != "" { + codes = append(codes, field) + } + } + return codes + } + return nil +} + +func conformanceCodes(diagnostics Diagnostics) []string { + codes := make([]string, 0, len(diagnostics)) + for _, diagnostic := range diagnostics { + codes = append(codes, diagnostic.Code) + } + return codes +} + +func containsCode(codes []string, want string) bool { + for _, code := range codes { + if code == want { + return true + } + } + return false +} diff --git a/internal/lang/diagnostic.go b/internal/lang/diagnostic.go index 08f1adaa..604f8a9f 100644 --- a/internal/lang/diagnostic.go +++ b/internal/lang/diagnostic.go @@ -20,16 +20,26 @@ type Range struct { End Position `json:"end"` } +// RelatedLocation is a secondary source location attached to a diagnostic, such +// as the first declaration that a conflict diagnostic also points at. +type RelatedLocation struct { + File string `json:"file,omitempty"` + Pos Position `json:"pos"` + Range *Range `json:"range,omitempty"` + Message string `json:"message,omitempty"` +} + // Diagnostic describes a language-tool finding. type Diagnostic struct { - File string `json:"file"` - Code string `json:"code,omitempty"` - Pos Position `json:"pos"` - Range *Range `json:"range,omitempty"` - Severity string `json:"severity"` - Fix *diagnostics.Fix `json:"fix,omitempty"` - Message string `json:"message"` - Suggestion string `json:"suggestion,omitempty"` + File string `json:"file"` + Code string `json:"code,omitempty"` + Pos Position `json:"pos"` + Range *Range `json:"range,omitempty"` + Severity string `json:"severity"` + Fix *diagnostics.Fix `json:"fix,omitempty"` + Message string `json:"message"` + Suggestion string `json:"suggestion,omitempty"` + Related []RelatedLocation `json:"related,omitempty"` } func (diagnostic Diagnostic) String() string { diff --git a/internal/lang/format.go b/internal/lang/format.go index 5bece8dd..391e67d3 100644 --- a/internal/lang/format.go +++ b/internal/lang/format.go @@ -2,13 +2,19 @@ package lang import ( "strings" + + "github.com/cssbruno/gowdk/internal/parser" ) -// Format normalizes whitespace for top-level .gwdk metadata and blocks. +// Format normalizes whitespace for top-level .gwdk metadata and blocks. Brace +// depth is tracked with the parser's string/comment-aware scanner so braces +// inside string literals, comments, and template literals do not skew +// indentation (for example `title "a { b"` or `// note about }`). func Format(source []byte) []byte { var out []string blankPending := false depth := 0 + braces := parser.NewBraceDepth() for _, raw := range strings.Split(string(source), "\n") { line := strings.TrimSpace(raw) @@ -22,12 +28,15 @@ func Format(source []byte) []byte { } blankPending = false + // A line that is textually a closing brace dedents itself, unless we are + // inside a multi-line literal/comment where that "}" is body content. + inMultiline := braces.InMultiline() indent := depth - if strings.HasPrefix(line, "}") && indent > 0 { + if !inMultiline && strings.HasPrefix(line, "}") && indent > 0 { indent-- } out = append(out, strings.Repeat(" ", indent)+line) - depth += strings.Count(line, "{") - strings.Count(line, "}") + depth += braces.Delta(line) if depth < 0 { depth = 0 } @@ -48,10 +57,5 @@ func isTopLevelMetadataLine(line string) bool { if len(fields) == 0 { return false } - switch fields[0] { - case "page", "route", "title", "description", "canonical", "image", "layout", "cache", "revalidate", "error", "guard", "css", "component", "wasm", "asset": - return true - default: - return false - } + return IsMetadataKeyword(fields[0]) } diff --git a/internal/lang/format_test.go b/internal/lang/format_test.go index 7901beb7..3a41246c 100644 --- a/internal/lang/format_test.go +++ b/internal/lang/format_test.go @@ -59,6 +59,37 @@ func TestFormatKeepsElseBranchesAligned(t *testing.T) { } } +func TestFormatIgnoresBracesInStrings(t *testing.T) { + // The brace inside the title string must not open a nesting level; the + // following route stays at top level. A naive brace count would indent it. + source := []byte("page home\ntitle \"a { b\"\nroute \"/\"\n") + got := string(Format(source)) + want := "page home\ntitle \"a { b\"\nroute \"/\"\n" + if got != want { + t.Fatalf("brace in string changed indentation:\n--- got ---\n%s--- want ---\n%s", got, want) + } +} + +func TestFormatIgnoresBracesInComments(t *testing.T) { + // The unbalanced brace in the comment must not change depth; the sibling + // statement stays indented inside the block. + source := []byte("go {\n// closes here }\na()\n}\n") + got := string(Format(source)) + want := "go {\n // closes here }\n a()\n}\n" + if got != want { + t.Fatalf("brace in comment changed indentation:\n--- got ---\n%s--- want ---\n%s", got, want) + } +} + +func TestFormatIgnoresBracesInTemplateLiterals(t *testing.T) { + source := []byte("client {\nconst t = `a ${x} }`\nrun()\n}\n") + got := string(Format(source)) + want := "client {\n const t = `a ${x} }`\n run()\n}\n" + if got != want { + t.Fatalf("brace in template literal changed indentation:\n--- got ---\n%s--- want ---\n%s", got, want) + } +} + func TestFormatIsIdempotentForSupportedShapes(t *testing.T) { tests := map[string]string{ "page": `package app diff --git a/internal/lang/keywords.go b/internal/lang/keywords.go new file mode 100644 index 00000000..184c985b --- /dev/null +++ b/internal/lang/keywords.go @@ -0,0 +1,37 @@ +package lang + +// MetadataKeywords is the canonical, ordered set of top-level metadata keywords +// the current parser recognizes. It is the single source of truth for metadata +// classification (the lexer and the formatter both consume it) and is +// cross-checked against the published stability table in +// docs/language/stability.md by TestStabilityTableCoversConstructs. +var MetadataKeywords = []string{ + "page", + "route", + "title", + "description", + "canonical", + "image", + "layout", + "cache", + "revalidate", + "error", + "guard", + "css", + "component", + "wasm", + "asset", +} + +var metadataKeywordSet = func() map[string]bool { + set := make(map[string]bool, len(MetadataKeywords)) + for _, keyword := range MetadataKeywords { + set[keyword] = true + } + return set +}() + +// IsMetadataKeyword reports whether value is a top-level metadata keyword. +func IsMetadataKeyword(value string) bool { + return metadataKeywordSet[value] +} diff --git a/internal/lang/lexer.go b/internal/lang/lexer.go index ee623270..d902b386 100644 --- a/internal/lang/lexer.go +++ b/internal/lang/lexer.go @@ -206,10 +206,5 @@ func (scanner *scanner) isLineLeading(start int) bool { } func isMetadataLexeme(value string) bool { - switch value { - case "page", "route", "title", "description", "canonical", "image", "layout", "cache", "revalidate", "error", "guard", "css", "component", "wasm", "asset": - return true - default: - return false - } + return IsMetadataKeyword(value) } diff --git a/internal/lang/stability_doc_test.go b/internal/lang/stability_doc_test.go new file mode 100644 index 00000000..2c9df9ba --- /dev/null +++ b/internal/lang/stability_doc_test.go @@ -0,0 +1,35 @@ +package lang + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/cssbruno/gowdk/internal/view" +) + +// TestStabilityTableCoversConstructs guards against the published stability +// table drifting from the code registries: every metadata keyword and every +// supported g: directive must appear in docs/language/stability.md, so adding a +// construct in code without documenting its tier fails here. +func TestStabilityTableCoversConstructs(t *testing.T) { + path := filepath.FromSlash("../../docs/language/stability.md") + content, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read stability table: %v", err) + } + doc := string(content) + + for _, keyword := range MetadataKeywords { + if !strings.Contains(doc, "`"+keyword+"`") { + t.Errorf("metadata keyword %q is missing from %s", keyword, path) + } + } + + for _, directive := range view.SupportedDirectiveNames() { + if !strings.Contains(doc, directive) { + t.Errorf("g: directive %q is missing from %s", directive, path) + } + } +} diff --git a/internal/lang/testdata/conformance/accept/component.cmp.gwdk b/internal/lang/testdata/conformance/accept/component.cmp.gwdk new file mode 100644 index 00000000..d3dc24a7 --- /dev/null +++ b/internal/lang/testdata/conformance/accept/component.cmp.gwdk @@ -0,0 +1,7 @@ +package widgets + +component Badge + +view { + Badge +} diff --git a/internal/lang/testdata/conformance/accept/minimal_page.gwdk b/internal/lang/testdata/conformance/accept/minimal_page.gwdk new file mode 100644 index 00000000..dfb9c88a --- /dev/null +++ b/internal/lang/testdata/conformance/accept/minimal_page.gwdk @@ -0,0 +1,10 @@ +// Minimal static page: package, route, and a view. +package pages + +route "/" + +view { +
+

Hello

+
+} diff --git a/internal/lang/testdata/conformance/accept/page_with_metadata.gwdk b/internal/lang/testdata/conformance/accept/page_with_metadata.gwdk new file mode 100644 index 00000000..f7fa9f3c --- /dev/null +++ b/internal/lang/testdata/conformance/accept/page_with_metadata.gwdk @@ -0,0 +1,11 @@ +package pages + +route "/about" +title "About" +description "About this site" + +view { +
+

About us.

+
+} diff --git a/internal/lang/testdata/conformance/reject/malformed_use.gwdk b/internal/lang/testdata/conformance/reject/malformed_use.gwdk new file mode 100644 index 00000000..703c02d6 --- /dev/null +++ b/internal/lang/testdata/conformance/reject/malformed_use.gwdk @@ -0,0 +1,10 @@ +// expect: malformed_gowdk_use +package pages + +use widgets + +route "/" + +view { +
+} diff --git a/internal/lang/testdata/conformance/reject/old_action_block.gwdk b/internal/lang/testdata/conformance/reject/old_action_block.gwdk new file mode 100644 index 00000000..68551d03 --- /dev/null +++ b/internal/lang/testdata/conformance/reject/old_action_block.gwdk @@ -0,0 +1,12 @@ +// expect: old_action_block_syntax +package pages + +route "/login" + +act Login { + method POST +} + +view { +
+} diff --git a/internal/lang/testdata/conformance/reject/old_api_block.gwdk b/internal/lang/testdata/conformance/reject/old_api_block.gwdk new file mode 100644 index 00000000..c057d161 --- /dev/null +++ b/internal/lang/testdata/conformance/reject/old_api_block.gwdk @@ -0,0 +1,12 @@ +// expect: old_api_block_syntax +package pages + +route "/data" + +api Items { + method GET +} + +view { +
+} diff --git a/internal/lang/testdata/conformance/reject/unknown_top_level_block.gwdk b/internal/lang/testdata/conformance/reject/unknown_top_level_block.gwdk new file mode 100644 index 00000000..97cd8713 --- /dev/null +++ b/internal/lang/testdata/conformance/reject/unknown_top_level_block.gwdk @@ -0,0 +1,12 @@ +// expect: unsupported_top_level_block +package pages + +route "/" + +frobnicate { + doStuff() +} + +view { +
+} diff --git a/internal/lang/tools.go b/internal/lang/tools.go index 9dfd2e0d..acada9a3 100644 --- a/internal/lang/tools.go +++ b/internal/lang/tools.go @@ -447,6 +447,7 @@ func compilerDiagnostics(err error, ir gwdkir.Program) Diagnostics { Severity: severity, Message: validation.Error(), Suggestion: diagnosticSuggestion(validation), + Related: relatedLocations(validation.Related), }) } return diagnostics @@ -521,6 +522,22 @@ func sourcePosition(position source.SourcePosition) Position { return Position{Line: position.Line, Column: position.Column} } +func relatedLocations(related []source.RelatedSpan) []RelatedLocation { + if len(related) == 0 { + return nil + } + locations := make([]RelatedLocation, 0, len(related)) + for _, item := range related { + locations = append(locations, RelatedLocation{ + File: item.Source, + Pos: sourcePosition(item.Span.Start), + Range: sourceSpanRange(item.Span), + Message: item.Message, + }) + } + return locations +} + func sourceSpanRange(span source.SourceSpan) *Range { if span.Start.Line <= 0 || span.Start.Column <= 0 || span.End.Line <= 0 || span.End.Column <= 0 { return nil diff --git a/internal/lsp/diagnostics.go b/internal/lsp/diagnostics.go index e38c5e28..279331ba 100644 --- a/internal/lsp/diagnostics.go +++ b/internal/lsp/diagnostics.go @@ -8,20 +8,42 @@ import ( "github.com/cssbruno/gowdk/internal/source" ) -func diagnosticFromLang(item lang.Diagnostic, body string) diagnostic { +func diagnosticFromLang(item lang.Diagnostic, uri string, body string) diagnostic { severity := diagnosticSeverityError if item.Severity == "warning" { severity = diagnosticSeverityWarning } return diagnostic{ - Range: rangeFromLangDiagnostic(item, body), - Severity: severity, - Code: item.Code, - Source: "gowdk", - Message: lang.RedactMessage(item.Message), + Range: rangeFromLangDiagnostic(item, body), + Severity: severity, + Code: item.Code, + Source: "gowdk", + Message: lang.RedactMessage(item.Message), + RelatedInformation: relatedInformationFromLang(item.Related, uri, body), } } +// relatedInformationFromLang maps a diagnostic's secondary locations to LSP +// relatedInformation. The current single-document check surfaces same-file +// conflicts, so ranges are computed against body and the document uri is used. +func relatedInformationFromLang(related []lang.RelatedLocation, uri string, body string) []diagnosticRelatedInformation { + if len(related) == 0 { + return nil + } + information := make([]diagnosticRelatedInformation, 0, len(related)) + for _, item := range related { + rng := rangeFromPosition(item.Pos, body) + if item.Range != nil { + rng = rangeFromLangRange(*item.Range, body) + } + information = append(information, diagnosticRelatedInformation{ + Location: location{URI: uri, Range: rng}, + Message: lang.RedactMessage(item.Message), + }) + } + return information +} + func rangeFromLangDiagnostic(item lang.Diagnostic, body string) lspRange { if item.Range != nil { return rangeFromLangRange(*item.Range, body) diff --git a/internal/lsp/notifications.go b/internal/lsp/notifications.go index 09fe3618..4dccd4db 100644 --- a/internal/lsp/notifications.go +++ b/internal/lsp/notifications.go @@ -74,7 +74,7 @@ func (server *Server) publishDiagnostics(doc document) []byte { _, diagnostics := lang.CheckSource(server.config, doc.Path, []byte(doc.Text)) items := make([]diagnostic, 0, len(diagnostics)) for _, item := range diagnostics { - items = append(items, diagnosticFromLang(item, doc.Text)) + items = append(items, diagnosticFromLang(item, doc.URI, doc.Text)) } return publishDiagnostics(doc.URI, items) } diff --git a/internal/lsp/protocol_types.go b/internal/lsp/protocol_types.go index 6dabc321..6fbc8168 100644 --- a/internal/lsp/protocol_types.go +++ b/internal/lsp/protocol_types.go @@ -184,10 +184,16 @@ type publishDiagnosticsParams struct { } type diagnostic struct { - Range lspRange `json:"range"` - Severity int `json:"severity,omitempty"` - Code string `json:"code,omitempty"` - Source string `json:"source,omitempty"` + Range lspRange `json:"range"` + Severity int `json:"severity,omitempty"` + Code string `json:"code,omitempty"` + Source string `json:"source,omitempty"` + Message string `json:"message"` + RelatedInformation []diagnosticRelatedInformation `json:"relatedInformation,omitempty"` +} + +type diagnosticRelatedInformation struct { + Location location `json:"location"` Message string `json:"message"` } diff --git a/internal/parser/braces.go b/internal/parser/braces.go index b36c3f4d..27bec559 100644 --- a/internal/parser/braces.go +++ b/internal/parser/braces.go @@ -88,6 +88,35 @@ func (s *braceScanner) delta(line string) int { return delta } +// BraceDepth tracks net brace depth across the lines of a .gwdk file for +// tooling such as the formatter, skipping braces that appear inside string +// literals, comments, Go raw strings, and JS template literals. It carries +// multi-line state across Delta calls. It uses Go lexical rules, which cover the +// top-level `.gwdk` surface and Go/JS block bodies; the one accepted edge is a +// `//` sequence inside a CSS value (e.g. a `url(http://...)`), which truncates +// brace counting for the rest of that line only. +type BraceDepth struct { + scanner braceScanner +} + +// NewBraceDepth returns a brace-depth tracker using Go lexical rules. +func NewBraceDepth() *BraceDepth { + return &BraceDepth{scanner: braceScanner{lang: braceLangGo}} +} + +// Delta scans one line and returns the net change in brace depth it +// contributes, skipping braces inside strings and comments. +func (b *BraceDepth) Delta(line string) int { + return b.scanner.delta(line) +} + +// InMultiline reports whether the tracker is currently inside a multi-line +// construct (block comment, Go raw string, or JS template literal). A line that +// is textually "}" while InMultiline is body content, not a block terminator. +func (b *BraceDepth) InMultiline() bool { + return b.scanner.inMultiline() +} + // blockScanLang maps a top-level block kind to the lexical rules used to scan // its body for brace depth. Kinds whose bodies are not brace-scanned default to // Go rules, which is harmless because their scanner is never fed. diff --git a/internal/source/source.go b/internal/source/source.go index 5bfec55e..81c11511 100644 --- a/internal/source/source.go +++ b/internal/source/source.go @@ -16,12 +16,73 @@ import ( "fmt" "path" "strings" + "unicode/utf8" ) // SourcePosition is a 1-based source location in a parsed .gwdk file. +// +// Offset is the 0-based byte offset of the position into the source buffer. It +// is the exact substrate for AST-backed formatting, precise LSP edits, and +// exact diagnostic ranges, none of which should re-derive offsets from +// line/column. Offset is best-effort: the current line-oriented parser does not +// populate it, so a position produced by that parser leaves Offset at its zero +// value while Line/Column are set. Use PositionAt/OffsetOf to convert against a +// source buffer when an exact offset is required. Set-ness of a position is +// determined by Line/Column being positive, not by Offset, because byte offset +// 0 is a valid first-byte position. type SourcePosition struct { Line int Column int + Offset int +} + +// PositionAt returns the 1-based line/column (column counted in runes, matching +// the parser's rune-column spans) for a 0-based byte offset into src, with +// Offset set to that byte offset. The offset is clamped to the buffer bounds. +func PositionAt(src []byte, offset int) SourcePosition { + if offset < 0 { + offset = 0 + } + if offset > len(src) { + offset = len(src) + } + line, column := 1, 1 + for index := 0; index < offset; { + r, size := utf8.DecodeRune(src[index:]) + if r == '\n' { + line++ + column = 1 + } else { + column++ + } + index += size + } + return SourcePosition{Line: line, Column: column, Offset: offset} +} + +// OffsetOf returns the 0-based byte offset into src for a 1-based line/column +// position (column counted in runes). An unset position (non-positive line or +// column) maps to 0, and a position past the end of src is clamped to len(src). +// It is the inverse of PositionAt for in-bounds, rune-aligned positions. +func OffsetOf(src []byte, pos SourcePosition) int { + if pos.Line <= 0 || pos.Column <= 0 { + return 0 + } + line, column := 1, 1 + for index := 0; index < len(src); { + if line == pos.Line && column == pos.Column { + return index + } + r, size := utf8.DecodeRune(src[index:]) + if r == '\n' { + line++ + column = 1 + } else { + column++ + } + index += size + } + return len(src) } // SourceSpan is a 1-based source range. End is exclusive. @@ -36,6 +97,17 @@ type NamedSpan struct { Span SourceSpan } +// RelatedSpan is a secondary source location attached to a diagnostic, such as +// the first declaration that a conflict diagnostic also points at. Source is the +// owning file label (matching a diagnostic's primary Source) and may be empty +// for a same-file relation. Message is a short note shown alongside the location +// (for example "first declared here"). +type RelatedSpan struct { + Source string + Span SourceSpan + Message string +} + // RouteParam describes one dynamic route parameter and its declared scalar // type. Empty Type means string for compatibility with legacy {name} syntax. type RouteParam struct { diff --git a/internal/source/source_test.go b/internal/source/source_test.go index 09b20f5a..44cd2b47 100644 --- a/internal/source/source_test.go +++ b/internal/source/source_test.go @@ -38,6 +38,58 @@ func TestValidateBackendRoutePath(t *testing.T) { } } +func TestPositionAtAndOffsetOf(t *testing.T) { + // Multi-line, multi-byte (the euro sign is 3 bytes) so rune columns and byte + // offsets diverge. + src := []byte("ab\ncd€f\ngh") + + cases := []struct { + offset int + line int + column int + }{ + {0, 1, 1}, // 'a' + {1, 1, 2}, // 'b' + {2, 1, 3}, // '\n' at end of line 1 + {3, 2, 1}, // 'c' + {5, 2, 3}, // start of the 3-byte euro rune + {8, 2, 4}, // 'f', immediately after the euro rune + {10, 3, 1}, // 'g' on line 3 + {12, 3, 3}, // end of buffer + } + + for _, tc := range cases { + got := PositionAt(src, tc.offset) + if got.Line != tc.line || got.Column != tc.column || got.Offset != tc.offset { + t.Fatalf("PositionAt(%d) = {Line:%d Column:%d Offset:%d}, want {Line:%d Column:%d Offset:%d}", + tc.offset, got.Line, got.Column, got.Offset, tc.line, tc.column, tc.offset) + } + if back := OffsetOf(src, got); back != tc.offset { + t.Fatalf("OffsetOf(PositionAt(%d)) = %d, want %d", tc.offset, back, tc.offset) + } + } +} + +func TestPositionAtClampsBounds(t *testing.T) { + src := []byte("abc") + if got := PositionAt(src, -5); got.Offset != 0 || got.Line != 1 || got.Column != 1 { + t.Fatalf("PositionAt(-5) = %+v, want clamped to start", got) + } + if got := PositionAt(src, 99); got.Offset != len(src) { + t.Fatalf("PositionAt(99) Offset = %d, want %d", got.Offset, len(src)) + } +} + +func TestOffsetOfUnsetPosition(t *testing.T) { + src := []byte("abc") + if got := OffsetOf(src, SourcePosition{}); got != 0 { + t.Fatalf("OffsetOf(unset) = %d, want 0", got) + } + if got := OffsetOf(src, SourcePosition{Line: 9, Column: 9}); got != len(src) { + t.Fatalf("OffsetOf(out-of-range) = %d, want clamp %d", got, len(src)) + } +} + func TestBackendRouteMethod(t *testing.T) { if got := BackendRouteMethod(" post "); got != "POST" { t.Fatalf("expected normalized method POST, got %q", got) diff --git a/internal/view/directives.go b/internal/view/directives.go index dfc5f0ac..295f2e28 100644 --- a/internal/view/directives.go +++ b/internal/view/directives.go @@ -2,6 +2,7 @@ package view import ( "fmt" + "sort" "strings" ) @@ -40,6 +41,19 @@ var supportedMessageDirectives = map[string]bool{ "g:message:pattern": true, } +// SupportedDirectiveNames returns the sorted closed set of exact-name g: +// directives owned by the current view contract (excluding the g:on:* event +// family and the g:message:* rules, which are validated separately). It is the +// source of truth cross-checked against docs/language/stability.md. +func SupportedDirectiveNames() []string { + names := make([]string, 0, len(supportedDirectiveNames)) + for name := range supportedDirectiveNames { + names = append(names, name) + } + sort.Strings(names) + return names +} + func isSupportedDirectiveName(name string) bool { if supportedDirectiveNames[name] { return true