From 270b09b150b45eaf68b87352573a1bb8185505d2 Mon Sep 17 00:00:00 2001 From: cssbruno Date: Thu, 11 Jun 2026 18:43:30 -0300 Subject: [PATCH 1/3] feat(lang): expand corpus coverage and add tokenizer byte offsets Continues the remaining M2 work. #304 (conformance corpus): add styled-page and slot-component accept cases and document the inherent single-file scope. Reactive g: directives, endpoints, layouts, wasm, and assets need project context (a Go-typed state contract, Go handlers, sibling files) that single-file CheckSource cannot resolve, so they stay covered by package/build-level tests rather than the corpus; this is now stated in docs/language/conformance.md. #306 (ADR 0010 phase 1): the shared lang tokenizer now records each token's 0-based byte Offset, tracked alongside the existing rune scan. A test verifies the offsets are byte-accurate and consistent with source.OffsetOf. This is the substrate the planned recursive-descent parser consumes; phases 2-3 (the parser itself and the per-declaration cutover) remain open on #306. Tests: go test ./internal/... ./cmd/... . pass; gofmt/vet clean; node 39/0. --- docs/language/conformance.md | 13 ++++++ internal/lang/lexer.go | 45 +++++++++++-------- internal/lang/lexer_offset_test.go | 42 +++++++++++++++++ .../accept/slot_component.cmp.gwdk | 14 ++++++ .../conformance/accept/styled_page.gwdk | 15 +++++++ internal/lang/token.go | 6 ++- 6 files changed, 116 insertions(+), 19 deletions(-) create mode 100644 internal/lang/lexer_offset_test.go create mode 100644 internal/lang/testdata/conformance/accept/slot_component.cmp.gwdk create mode 100644 internal/lang/testdata/conformance/accept/styled_page.gwdk diff --git a/docs/language/conformance.md b/docs/language/conformance.md index d788977..fb2ab4a 100644 --- a/docs/language/conformance.md +++ b/docs/language/conformance.md @@ -42,6 +42,19 @@ code appears among the diagnostics for that file. Diagnostic codes are the ones registered in `internal/diagnostics/registry.go` and documented in `docs/reference/diagnostic-codes.md`. +## Scope and limits + +The corpus uses single-file `CheckSource`, so it pins what one file can verify +without a project: package and metadata declarations, route forms, `view {}` +markup, `style {}`, literal `build {}`, slots, and the rejection contracts below. + +It cannot cleanly cover constructs that require project context: reactive `g:` +directives (`g:if`/`g:on`/`g:bind`) reference a Go-typed `state` contract that +does not resolve single-file; endpoint forms (`act`/`api`) need exported Go +handlers; and `layout`/`wasm`/`asset`/`css` need sibling files or config. Those +are exercised by the package- and build-level tests instead. Expanding the +corpus to a project-level harness for them is tracked separately. + ## Coverage `TestConformanceCorpusCoversRejectionContracts` fails when a rejection contract diff --git a/internal/lang/lexer.go b/internal/lang/lexer.go index d902b38..b4f2f93 100644 --- a/internal/lang/lexer.go +++ b/internal/lang/lexer.go @@ -1,6 +1,9 @@ package lang -import "unicode" +import ( + "unicode" + "unicode/utf8" +) // Lex tokenizes .gwdk source for editor and CLI tooling. func Lex(source string) ([]Token, Diagnostics) { @@ -13,10 +16,11 @@ func Lex(source string) ([]Token, Diagnostics) { } type scanner struct { - source []rune - index int - line int - column int + source []rune + index int + byteIndex int + line int + column int } func (scanner *scanner) scan() ([]Token, Diagnostics) { @@ -26,13 +30,14 @@ func (scanner *scanner) scan() ([]Token, Diagnostics) { for !scanner.done() { ch := scanner.peek() pos := scanner.position() + offset := scanner.byteIndex switch { case ch == '\r': scanner.advance() case ch == '\n': scanner.advance() - tokens = append(tokens, Token{Kind: TokenNewline, Lexeme: "\n", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenNewline, Lexeme: "\n", Pos: pos, Offset: offset}) case unicode.IsSpace(ch): scanner.advance() case ch == '/' && scanner.peekNext() == '/': @@ -47,47 +52,49 @@ func (scanner *scanner) scan() ([]Token, Diagnostics) { } case ch == '{': scanner.advance() - tokens = append(tokens, Token{Kind: TokenLBrace, Lexeme: "{", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenLBrace, Lexeme: "{", Pos: pos, Offset: offset}) case ch == '}': scanner.advance() - tokens = append(tokens, Token{Kind: TokenRBrace, Lexeme: "}", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenRBrace, Lexeme: "}", Pos: pos, Offset: offset}) case ch == ',': scanner.advance() - tokens = append(tokens, Token{Kind: TokenComma, Lexeme: ",", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenComma, Lexeme: ",", Pos: pos, Offset: offset}) case ch == ':': scanner.advance() - tokens = append(tokens, Token{Kind: TokenColon, Lexeme: ":", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenColon, Lexeme: ":", Pos: pos, Offset: offset}) case ch == '?': scanner.advance() - tokens = append(tokens, Token{Kind: TokenQuestion, Lexeme: "?", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenQuestion, Lexeme: "?", Pos: pos, Offset: offset}) case ch == '=' && scanner.peekNext() == '>': scanner.advance() scanner.advance() - tokens = append(tokens, Token{Kind: TokenArrow, Lexeme: "=>", Pos: pos}) + tokens = append(tokens, Token{Kind: TokenArrow, Lexeme: "=>", Pos: pos, Offset: offset}) default: tokens = append(tokens, scanner.text()) } } - tokens = append(tokens, Token{Kind: TokenEOF, Pos: scanner.position()}) + tokens = append(tokens, Token{Kind: TokenEOF, Pos: scanner.position(), Offset: scanner.byteIndex}) return tokens, diagnostics } func (scanner *scanner) identifier() Token { pos := scanner.position() + offset := scanner.byteIndex start := scanner.index for !scanner.done() && (isIdentPart(scanner.peek()) || scanner.peek() == '.' || scanner.peek() == '-') { scanner.advance() } lexeme := string(scanner.source[start:scanner.index]) if scanner.isLineLeading(start) && isMetadataLexeme(lexeme) { - return Token{Kind: TokenMetadata, Lexeme: lexeme, Pos: pos} + return Token{Kind: TokenMetadata, Lexeme: lexeme, Pos: pos, Offset: offset} } - return Token{Kind: TokenIdentifier, Lexeme: lexeme, Pos: pos} + return Token{Kind: TokenIdentifier, Lexeme: lexeme, Pos: pos, Offset: offset} } func (scanner *scanner) quotedString() (Token, Diagnostic) { pos := scanner.position() + offset := scanner.byteIndex start := scanner.index scanner.advance() for !scanner.done() { @@ -101,14 +108,14 @@ func (scanner *scanner) quotedString() (Token, Diagnostic) { } if ch == '"' { scanner.advance() - return Token{Kind: TokenString, Lexeme: string(scanner.source[start:scanner.index]), Pos: pos}, Diagnostic{} + return Token{Kind: TokenString, Lexeme: string(scanner.source[start:scanner.index]), Pos: pos, Offset: offset}, Diagnostic{} } if ch == '\n' { break } scanner.advance() } - return Token{Kind: TokenIllegal, Lexeme: string(scanner.source[start:scanner.index]), Pos: pos}, Diagnostic{ + return Token{Kind: TokenIllegal, Lexeme: string(scanner.source[start:scanner.index]), Pos: pos, Offset: offset}, Diagnostic{ Pos: pos, Range: sourceRange(pos, scanner.position()), Code: "unterminated_string", @@ -129,6 +136,7 @@ func sourceRange(start, end Position) *Range { func (scanner *scanner) text() Token { pos := scanner.position() + offset := scanner.byteIndex start := scanner.index for !scanner.done() { ch := scanner.peek() @@ -140,7 +148,7 @@ func (scanner *scanner) text() Token { } scanner.advance() } - return Token{Kind: TokenText, Lexeme: string(scanner.source[start:scanner.index]), Pos: pos} + return Token{Kind: TokenText, Lexeme: string(scanner.source[start:scanner.index]), Pos: pos, Offset: offset} } func (scanner *scanner) skipLineComment() { @@ -170,6 +178,7 @@ func (scanner *scanner) peekNext() rune { func (scanner *scanner) advance() rune { ch := scanner.source[scanner.index] scanner.index++ + scanner.byteIndex += utf8.RuneLen(ch) if ch == '\n' { scanner.line++ scanner.column = 1 diff --git a/internal/lang/lexer_offset_test.go b/internal/lang/lexer_offset_test.go new file mode 100644 index 0000000..aa8b787 --- /dev/null +++ b/internal/lang/lexer_offset_test.go @@ -0,0 +1,42 @@ +package lang + +import ( + "testing" + + "github.com/cssbruno/gowdk/internal/source" +) + +// TestLexTokenOffsetsAreByteAccurate verifies the tokenizer records each token's +// 0-based byte offset and that it stays consistent with the token's line/column +// via the source conversion helpers, including across a multi-byte rune. This is +// the substrate contract the recursive-descent parser (ADR 0010) depends on. +func TestLexTokenOffsetsAreByteAccurate(t *testing.T) { + // The euro sign is three bytes, so byte offsets and rune columns diverge + // after it. + src := "page home\ntitle \"€\"\nroute \"/\"\n" + tokens, _ := Lex(src) + + buffer := []byte(src) + for _, token := range tokens { + if token.Kind == TokenEOF { + continue + } + // The token's recorded byte offset must point at its lexeme in the + // source buffer. + if token.Offset < 0 || token.Offset > len(buffer) { + t.Fatalf("token %q offset %d out of bounds", token.Lexeme, token.Offset) + } + if token.Kind != TokenNewline && token.Lexeme != "" { + got := string(buffer[token.Offset : token.Offset+len(token.Lexeme)]) + if got != token.Lexeme { + t.Fatalf("token %q at offset %d points at %q", token.Lexeme, token.Offset, got) + } + } + // The byte offset and the line/column must describe the same position. + want := source.SourcePosition{Line: token.Pos.Line, Column: token.Pos.Column} + if off := source.OffsetOf(buffer, want); off != token.Offset { + t.Fatalf("token %q: OffsetOf(line %d,col %d)=%d, token offset=%d", + token.Lexeme, token.Pos.Line, token.Pos.Column, off, token.Offset) + } + } +} diff --git a/internal/lang/testdata/conformance/accept/slot_component.cmp.gwdk b/internal/lang/testdata/conformance/accept/slot_component.cmp.gwdk new file mode 100644 index 0000000..8bfb487 --- /dev/null +++ b/internal/lang/testdata/conformance/accept/slot_component.cmp.gwdk @@ -0,0 +1,14 @@ +package components + +component Card + +props { + title string +} + +view { +
+

{title}

+ +
+} diff --git a/internal/lang/testdata/conformance/accept/styled_page.gwdk b/internal/lang/testdata/conformance/accept/styled_page.gwdk new file mode 100644 index 0000000..c535cf1 --- /dev/null +++ b/internal/lang/testdata/conformance/accept/styled_page.gwdk @@ -0,0 +1,15 @@ +package pages + +route "/styled" +title "Styled" + +style { + main { padding: 1rem; } + h1 { color: #222; } +} + +view { +
+

Styled

+
+} diff --git a/internal/lang/token.go b/internal/lang/token.go index 7a176c5..9532b95 100644 --- a/internal/lang/token.go +++ b/internal/lang/token.go @@ -19,9 +19,13 @@ const ( TokenText TokenKind = "text" ) -// Token is a lexical token with source location. +// Token is a lexical token with source location. Offset is the 0-based byte +// offset of the token start in the source, the exact substrate the planned +// recursive-descent parser (ADR 0010) uses to build spans without re-deriving +// positions from line/column. type Token struct { Kind TokenKind Lexeme string Pos Position + Offset int } From e4282008c64d82be0e4b148e63d9270c9a39caf4 Mon Sep 17 00:00:00 2001 From: cssbruno Date: Thu, 11 Jun 2026 18:59:34 -0300 Subject: [PATCH 2/3] fix(lang): anchor token byte offsets to the original buffer Codex review: deriving Token.Offset by summing utf8.RuneLen drifts on malformed UTF-8. []rune turns each invalid byte into a 3-byte U+FFFD, so RuneLen reports 3 for a byte that occupied 1, pushing every later token's offset past its true position and breaking the exact-span contract. Derive byte offsets from ranging the original string (which reports true byte positions) into a per-rune byteOffsets table the scanner indexes, instead of accumulating RuneLen. Add a malformed-UTF-8 test that asserts offsets stay anchored to the byte buffer. Tests: go test ./internal/... ./cmd/... . pass; gofmt/vet clean. --- internal/lang/lexer.go | 56 ++++++++++++++++++++---------- internal/lang/lexer_offset_test.go | 34 ++++++++++++++++++ 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/internal/lang/lexer.go b/internal/lang/lexer.go index b4f2f93..3dd95b6 100644 --- a/internal/lang/lexer.go +++ b/internal/lang/lexer.go @@ -1,26 +1,47 @@ package lang -import ( - "unicode" - "unicode/utf8" -) +import "unicode" // Lex tokenizes .gwdk source for editor and CLI tooling. func Lex(source string) ([]Token, Diagnostics) { + runes := []rune(source) + // byteOffsets[i] is the 0-based byte offset of rune i in the original + // source; the final entry is the total byte length. Offsets are taken from + // ranging the original string (which reports true byte positions) rather + // than summing utf8.RuneLen, so malformed UTF-8 — where []rune turns each + // bad byte into a 3-byte U+FFFD — does not drift token offsets. + byteOffsets := make([]int, len(runes)+1) + runeIndex := 0 + for byteIndex := range source { + byteOffsets[runeIndex] = byteIndex + runeIndex++ + } + byteOffsets[len(runes)] = len(source) + lexer := scanner{ - source: []rune(source), - line: 1, - column: 1, + source: runes, + byteOffsets: byteOffsets, + line: 1, + column: 1, } return lexer.scan() } type scanner struct { - source []rune - index int - byteIndex int - line int - column int + source []rune + byteOffsets []int + index int + line int + column int +} + +// offset returns the 0-based byte offset of the current rune in the original +// source. +func (scanner *scanner) offset() int { + if scanner.index < len(scanner.byteOffsets) { + return scanner.byteOffsets[scanner.index] + } + return scanner.byteOffsets[len(scanner.byteOffsets)-1] } func (scanner *scanner) scan() ([]Token, Diagnostics) { @@ -30,7 +51,7 @@ func (scanner *scanner) scan() ([]Token, Diagnostics) { for !scanner.done() { ch := scanner.peek() pos := scanner.position() - offset := scanner.byteIndex + offset := scanner.offset() switch { case ch == '\r': @@ -74,13 +95,13 @@ func (scanner *scanner) scan() ([]Token, Diagnostics) { } } - tokens = append(tokens, Token{Kind: TokenEOF, Pos: scanner.position(), Offset: scanner.byteIndex}) + tokens = append(tokens, Token{Kind: TokenEOF, Pos: scanner.position(), Offset: scanner.offset()}) return tokens, diagnostics } func (scanner *scanner) identifier() Token { pos := scanner.position() - offset := scanner.byteIndex + offset := scanner.offset() start := scanner.index for !scanner.done() && (isIdentPart(scanner.peek()) || scanner.peek() == '.' || scanner.peek() == '-') { scanner.advance() @@ -94,7 +115,7 @@ func (scanner *scanner) identifier() Token { func (scanner *scanner) quotedString() (Token, Diagnostic) { pos := scanner.position() - offset := scanner.byteIndex + offset := scanner.offset() start := scanner.index scanner.advance() for !scanner.done() { @@ -136,7 +157,7 @@ func sourceRange(start, end Position) *Range { func (scanner *scanner) text() Token { pos := scanner.position() - offset := scanner.byteIndex + offset := scanner.offset() start := scanner.index for !scanner.done() { ch := scanner.peek() @@ -178,7 +199,6 @@ func (scanner *scanner) peekNext() rune { func (scanner *scanner) advance() rune { ch := scanner.source[scanner.index] scanner.index++ - scanner.byteIndex += utf8.RuneLen(ch) if ch == '\n' { scanner.line++ scanner.column = 1 diff --git a/internal/lang/lexer_offset_test.go b/internal/lang/lexer_offset_test.go index aa8b787..80b4e0e 100644 --- a/internal/lang/lexer_offset_test.go +++ b/internal/lang/lexer_offset_test.go @@ -40,3 +40,37 @@ func TestLexTokenOffsetsAreByteAccurate(t *testing.T) { } } } + +// TestLexTokenOffsetsSurviveMalformedUTF8 guards against offset drift after an +// invalid byte: []rune turns a malformed byte into a 3-byte U+FFFD, so deriving +// offsets from utf8.RuneLen would push every later token two bytes past its true +// position. Offsets must stay anchored to the original byte buffer. +func TestLexTokenOffsetsSurviveMalformedUTF8(t *testing.T) { + // "x" then a lone 0xff byte, a newline, then "y": bytes x=0, 0xff=1, \n=2, y=3. + src := "x\xff\ny" + buffer := []byte(src) + tokens, _ := Lex(src) + + for _, token := range tokens { + // Offset and line/column must agree against the real buffer (OffsetOf + // ranges the bytes, so it reports true positions even past a bad byte). + want := source.SourcePosition{Line: token.Pos.Line, Column: token.Pos.Column} + if off := source.OffsetOf(buffer, want); off != token.Offset { + t.Fatalf("token %q (kind %s): OffsetOf=%d, token offset=%d", token.Lexeme, token.Kind, off, token.Offset) + } + } + + // The trailing valid token must land at byte 3, not 5 (the drifted value). + var found bool + for _, token := range tokens { + if token.Kind == TokenIdentifier && token.Lexeme == "y" { + found = true + if token.Offset != 3 { + t.Fatalf("trailing token y offset = %d, want 3", token.Offset) + } + } + } + if !found { + t.Fatal("expected to find the trailing identifier token y") + } +} From afe12aefb8824013e45cb4697fc09409104e37c4 Mon Sep 17 00:00:00 2001 From: cssbruno Date: Thu, 11 Jun 2026 19:06:24 -0300 Subject: [PATCH 3/3] feat(lsp): add document outline via recursive-descent parser (#306 phase 2) Builds the first real consumer of the ADR 0010 parser direction on top of the phase-1 tokenizer, so the byte-offset work is no longer dormant substrate. - internal/lang/outline.go: a recursive-descent pass over the shared tokenizer that parses the top-level declaration structure (package, metadata, imports, uses, blocks, endpoints, component/page) into a flat outline with byte-offset spans. It recovers from unrecognized lines by skipping to the next line, so a malformed line never hides the rest of the outline, and block ranges span to the matching close brace counted over tokens (string literals are single tokens, so braces inside strings never miscount). - internal/lsp: a textDocument/documentSymbol provider consuming the outline, with the documentSymbolProvider capability, mapping outline kinds to LSP SymbolKinds. Tests cover outline parsing, error recovery, brace-aware block ranges, offset spans, and the LSP handler end to end. Tests: go test ./internal/... ./cmd/... . pass; gofmt/vet clean; node 39/0. --- docs/product/language-server.md | 5 + internal/lang/outline.go | 205 +++++++++++++++++++++++++++++++ internal/lang/outline_test.go | 101 +++++++++++++++ internal/lsp/document_symbols.go | 55 +++++++++ internal/lsp/protocol_types.go | 14 +++ internal/lsp/requests.go | 7 ++ internal/lsp/server_test.go | 38 ++++++ 7 files changed, 425 insertions(+) create mode 100644 internal/lang/outline.go create mode 100644 internal/lang/outline_test.go create mode 100644 internal/lsp/document_symbols.go diff --git a/docs/product/language-server.md b/docs/product/language-server.md index 3cc1e26..48bcd4d 100644 --- a/docs/product/language-server.md +++ b/docs/product/language-server.md @@ -55,6 +55,9 @@ Developers editing `.gwdk` files need live feedback from the same language tooli missing GOWDK `use` aliases. - Return full-document semantic tokens for `.gwdk` decorators, identifiers, strings, and operators. +- Return a document outline (top-level package, metadata, imports, uses, blocks, + endpoints, and component/page declarations) from the recursive-descent outline + pass over the shared tokenizer. ### Non-Functional @@ -78,6 +81,8 @@ Developers editing `.gwdk` files need live feedback from the same language tooli - [x] `textDocument/references` returns open-document references for page IDs, routes, components, stores, and guards. - [x] `textDocument/codeAction` returns quick fixes for old endpoint syntax and missing GOWDK use aliases. - [x] `textDocument/semanticTokens/full` returns encoded token data for open `.gwdk` buffers. +- [x] `textDocument/documentSymbol` returns a top-level outline parsed by the + recursive-descent outline pass over the shared tokenizer (ADR 0010). - [x] `go test ./...` and `go build ./cmd/gowdk` pass. ## Edge Cases diff --git a/internal/lang/outline.go b/internal/lang/outline.go new file mode 100644 index 0000000..7bfb2f4 --- /dev/null +++ b/internal/lang/outline.go @@ -0,0 +1,205 @@ +package lang + +import ( + "strings" + + "github.com/cssbruno/gowdk/internal/source" +) + +// OutlineKind classifies a top-level .gwdk declaration for a document outline. +type OutlineKind string + +const ( + OutlineKindPackage OutlineKind = "package" + OutlineKindMetadata OutlineKind = "metadata" + OutlineKindImport OutlineKind = "import" + OutlineKindUse OutlineKind = "use" + OutlineKindBlock OutlineKind = "block" + OutlineKindEndpoint OutlineKind = "endpoint" + OutlineKindComponent OutlineKind = "component" + OutlineKindPage OutlineKind = "page" +) + +// OutlineSymbol is one entry in a document outline. +type OutlineSymbol struct { + Kind OutlineKind + Name string + Detail string + Span source.SourceSpan +} + +// Outline parses the top-level declaration structure of .gwdk source into a flat +// document outline. It is a recursive-descent pass over the shared tokenizer — +// the first consumer of the ADR 0010 parser direction — and recovers from +// unrecognized lines by skipping to the next line, so a malformed line never +// hides the rest of the outline. Block ranges span to the matching close brace, +// counted over tokens (string literals are single tokens, so braces inside +// strings never miscount). +func Outline(src string) []OutlineSymbol { + tokens, _ := Lex(src) + var symbols []OutlineSymbol + + index := 0 + for index < len(tokens) { + token := tokens[index] + if token.Kind == TokenEOF { + break + } + if token.Kind == TokenNewline { + index++ + continue + } + + lineEnd, hasBrace := lineExtent(tokens, index) + line := tokens[index:lineEnd] + + if hasBrace { + closeIndex := matchBrace(tokens, index) + symbols = append(symbols, OutlineSymbol{ + Kind: OutlineKindBlock, + Name: blockName(line), + Span: spanOf(tokens[index], tokens[closeIndex]), + }) + index = closeIndex + 1 + continue + } + + if symbol, ok := classifyLine(line); ok { + symbols = append(symbols, symbol) + } + index = lineEnd + } + + return symbols +} + +// lineExtent returns the index that ends the logical line starting at from (the +// next newline or EOF) and whether the line contains a block-opening brace. +func lineExtent(tokens []Token, from int) (int, bool) { + hasBrace := false + index := from + for index < len(tokens) && tokens[index].Kind != TokenNewline && tokens[index].Kind != TokenEOF { + if tokens[index].Kind == TokenLBrace { + hasBrace = true + } + index++ + } + return index, hasBrace +} + +// matchBrace returns the index of the close brace that balances the first open +// brace at or after from. An unbalanced block recovers to the last token before +// EOF so the outline still terminates. +func matchBrace(tokens []Token, from int) int { + depth := 0 + for index := from; index < len(tokens); index++ { + switch tokens[index].Kind { + case TokenLBrace: + depth++ + case TokenRBrace: + depth-- + if depth == 0 { + return index + } + case TokenEOF: + if index > from { + return index - 1 + } + return index + } + } + return len(tokens) - 1 +} + +func blockName(line []Token) string { + var parts []string + for _, token := range line { + if token.Kind == TokenLBrace { + break + } + if token.Kind == TokenIdentifier || token.Kind == TokenMetadata { + parts = append(parts, token.Lexeme) + } + } + return strings.Join(parts, " ") +} + +func classifyLine(line []Token) (OutlineSymbol, bool) { + first := line[0] + span := spanOf(first, line[len(line)-1]) + + switch { + case first.Kind == TokenIdentifier && first.Lexeme == "package": + return OutlineSymbol{Kind: OutlineKindPackage, Name: "package " + nextLexeme(line, 0), Span: span}, true + case first.Kind == TokenIdentifier && first.Lexeme == "import": + return OutlineSymbol{Kind: OutlineKindImport, Name: "import", Detail: lineValue(line, 1), Span: span}, true + case first.Kind == TokenIdentifier && first.Lexeme == "use": + return OutlineSymbol{Kind: OutlineKindUse, Name: "use " + nextLexeme(line, 0), Detail: lineValue(line, 2), Span: span}, true + case first.Kind == TokenIdentifier && (first.Lexeme == "act" || first.Lexeme == "api"): + return OutlineSymbol{Kind: OutlineKindEndpoint, Name: first.Lexeme + " " + nextLexeme(line, 0), Detail: lineValue(line, 2), Span: span}, true + case first.Kind == TokenMetadata: + return classifyMetadata(first, line, span), true + default: + return OutlineSymbol{}, false + } +} + +func classifyMetadata(first Token, line []Token, span source.SourceSpan) OutlineSymbol { + name := nextLexeme(line, 0) + switch first.Lexeme { + case "component": + if name != "" { + return OutlineSymbol{Kind: OutlineKindComponent, Name: "component " + name, Span: span} + } + case "page": + if name != "" { + return OutlineSymbol{Kind: OutlineKindPage, Name: "page " + name, Span: span} + } + } + return OutlineSymbol{Kind: OutlineKindMetadata, Name: first.Lexeme, Detail: lineValue(line, 1), Span: span} +} + +// nextLexeme returns the lexeme of the first identifier or string after position +// at in the line, unquoted. +func nextLexeme(line []Token, at int) string { + for index := at + 1; index < len(line); index++ { + switch line[index].Kind { + case TokenIdentifier, TokenText: + return line[index].Lexeme + case TokenString: + return unquote(line[index].Lexeme) + } + } + return "" +} + +// lineValue joins the lexemes from position at to the end of the line into a +// short detail string. +func lineValue(line []Token, at int) string { + var parts []string + for index := at; index < len(line); index++ { + lexeme := line[index].Lexeme + if line[index].Kind == TokenString { + lexeme = unquote(lexeme) + } + if strings.TrimSpace(lexeme) != "" { + parts = append(parts, lexeme) + } + } + return strings.Join(parts, " ") +} + +func unquote(lexeme string) string { + return strings.Trim(lexeme, "\"") +} + +func spanOf(first, last Token) source.SourceSpan { + return source.SourceSpan{ + Start: source.SourcePosition{Line: first.Pos.Line, Column: first.Pos.Column, Offset: first.Offset}, + End: source.SourcePosition{ + Line: last.Pos.Line, + Column: last.Pos.Column + len([]rune(last.Lexeme)), + Offset: last.Offset + len(last.Lexeme), + }, + } +} diff --git a/internal/lang/outline_test.go b/internal/lang/outline_test.go new file mode 100644 index 0000000..db54372 --- /dev/null +++ b/internal/lang/outline_test.go @@ -0,0 +1,101 @@ +package lang + +import "testing" + +func symbolNames(symbols []OutlineSymbol) []string { + names := make([]string, 0, len(symbols)) + for _, symbol := range symbols { + names = append(names, symbol.Name) + } + return names +} + +func findSymbol(symbols []OutlineSymbol, name string) (OutlineSymbol, bool) { + for _, symbol := range symbols { + if symbol.Name == name { + return symbol, true + } + } + return OutlineSymbol{}, false +} + +func TestOutlineParsesTopLevelDeclarations(t *testing.T) { + src := `package pages + +route "/" +title "Home" + +view { +
+

{title}

+
+} + +style { + main { padding: 1rem; } +} +` + symbols := Outline(src) + + for _, want := range []string{"package pages", "route", "title", "view", "style"} { + if _, ok := findSymbol(symbols, want); !ok { + t.Errorf("expected outline symbol %q; got %v", want, symbolNames(symbols)) + } + } + + // The view block's range must extend past the interpolation braces to the + // real closing brace, not stop at the {title} interpolation. + view, _ := findSymbol(symbols, "view") + if view.Span.End.Line < 10 { + t.Fatalf("view block range ended too early at line %d (interpolation miscounted?)", view.Span.End.Line) + } +} + +func TestOutlineRecoversFromUnknownLines(t *testing.T) { + // A junk line sits between valid declarations; the parser must skip it and + // still surface the declarations after it. + src := `package pages + +@@@ not valid !!! + +route "/" + +view { +
+} +` + symbols := Outline(src) + for _, want := range []string{"package pages", "route", "view"} { + if _, ok := findSymbol(symbols, want); !ok { + t.Errorf("recovery failed: expected %q after a junk line; got %v", want, symbolNames(symbols)) + } + } +} + +func TestOutlineIncludesEndpointsAndComponents(t *testing.T) { + src := `package widgets + +component Counter + +api Items GET "/items" +` + symbols := Outline(src) + if symbol, ok := findSymbol(symbols, "component Counter"); !ok || symbol.Kind != OutlineKindComponent { + t.Errorf("expected a component symbol; got %v", symbolNames(symbols)) + } + if symbol, ok := findSymbol(symbols, "api Items"); !ok || symbol.Kind != OutlineKindEndpoint { + t.Errorf("expected an endpoint symbol; got %v", symbolNames(symbols)) + } +} + +func TestOutlineSpansCarryOffsets(t *testing.T) { + src := "package pages\nroute \"/\"\n" + symbols := Outline(src) + pkg, ok := findSymbol(symbols, "package pages") + if !ok { + t.Fatalf("expected package symbol; got %v", symbolNames(symbols)) + } + if pkg.Span.Start.Offset != 0 { + t.Fatalf("package symbol start offset = %d, want 0", pkg.Span.Start.Offset) + } +} diff --git a/internal/lsp/document_symbols.go b/internal/lsp/document_symbols.go new file mode 100644 index 0000000..bb3b2cd --- /dev/null +++ b/internal/lsp/document_symbols.go @@ -0,0 +1,55 @@ +package lsp + +import "github.com/cssbruno/gowdk/internal/lang" + +// LSP SymbolKind values (subset) from the language server protocol. +const ( + symbolKindModule = 2 + symbolKindPackage = 4 + symbolKindClass = 5 + symbolKindMethod = 6 + symbolKindProperty = 7 + symbolKindField = 8 +) + +// documentSymbols returns the top-level outline of a .gwdk document, parsed by +// the recursive-descent outline pass over the shared tokenizer. +func (server *Server) documentSymbols(params documentSymbolParams) []documentSymbol { + doc, ok := server.documents[params.TextDocument.URI] + if !ok { + return []documentSymbol{} + } + + outline := lang.Outline(doc.Text) + symbols := make([]documentSymbol, 0, len(outline)) + for _, item := range outline { + rng := lspRangeFromSourceSpan(item.Span, doc.Text) + symbols = append(symbols, documentSymbol{ + Name: item.Name, + Detail: item.Detail, + Kind: outlineSymbolKind(item.Kind), + Range: rng, + SelectionRange: rng, + }) + } + return symbols +} + +func outlineSymbolKind(kind lang.OutlineKind) int { + switch kind { + case lang.OutlineKindPackage: + return symbolKindPackage + case lang.OutlineKindComponent: + return symbolKindClass + case lang.OutlineKindPage: + return symbolKindModule + case lang.OutlineKindEndpoint: + return symbolKindMethod + case lang.OutlineKindBlock: + return symbolKindField + case lang.OutlineKindImport, lang.OutlineKindUse: + return symbolKindModule + default: + return symbolKindProperty + } +} diff --git a/internal/lsp/protocol_types.go b/internal/lsp/protocol_types.go index 6fbc816..f675dc6 100644 --- a/internal/lsp/protocol_types.go +++ b/internal/lsp/protocol_types.go @@ -49,10 +49,24 @@ type serverCapabilities struct { ReferencesProvider bool `json:"referencesProvider"` CodeActionProvider bool `json:"codeActionProvider"` DocumentFormattingProvider bool `json:"documentFormattingProvider"` + DocumentSymbolProvider bool `json:"documentSymbolProvider"` CompletionProvider completionOptions `json:"completionProvider"` SemanticTokensProvider semanticTokensOptions `json:"semanticTokensProvider"` } +type documentSymbolParams struct { + TextDocument textDocumentIdentifier `json:"textDocument"` +} + +type documentSymbol struct { + Name string `json:"name"` + Detail string `json:"detail,omitempty"` + Kind int `json:"kind"` + Range lspRange `json:"range"` + SelectionRange lspRange `json:"selectionRange"` + Children []documentSymbol `json:"children,omitempty"` +} + type textDocumentSyncOptions struct { OpenClose bool `json:"openClose"` Change int `json:"change"` diff --git a/internal/lsp/requests.go b/internal/lsp/requests.go index 5448a72..c5c2bd5 100644 --- a/internal/lsp/requests.go +++ b/internal/lsp/requests.go @@ -21,6 +21,7 @@ func (server *Server) handleRequest(request rpcRequest) [][]byte { ReferencesProvider: true, CodeActionProvider: true, DocumentFormattingProvider: true, + DocumentSymbolProvider: true, CompletionProvider: completionOptions{ TriggerCharacters: []string{"@", ":", "<", " "}, }, @@ -96,6 +97,12 @@ func (server *Server) handleRequest(request rpcRequest) [][]byte { return singleMessage(errorResponse(request.ID, invalidParams, err.Error())) } return singleMessage(response(request.ID, server.semanticTokens(params))) + case "textDocument/documentSymbol": + var params documentSymbolParams + if err := decodeParams(request.Params, ¶ms); err != nil { + return singleMessage(errorResponse(request.ID, invalidParams, err.Error())) + } + return singleMessage(response(request.ID, server.documentSymbols(params))) default: return singleMessage(errorResponse(request.ID, methodNotFound, fmt.Sprintf("method not found: %s", request.Method))) } diff --git a/internal/lsp/server_test.go b/internal/lsp/server_test.go index 71e08e6..3d176d5 100644 --- a/internal/lsp/server_test.go +++ b/internal/lsp/server_test.go @@ -463,6 +463,44 @@ func TestServerReturnsSemanticTokens(t *testing.T) { }) } +func TestServerReturnsDocumentSymbols(t *testing.T) { + uri := "file:///tmp/home.page.gwdk" + input := framed(`{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}}`) + + framed(`{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"`+uri+`","languageId":"gwdk","version":1,"text":"package app\n\nroute \"/\"\ntitle \"Home\"\n\nview {\n
\n}\n"}}}`) + + framed(`{"jsonrpc":"2.0","id":2,"method":"textDocument/documentSymbol","params":{"textDocument":{"uri":"`+uri+`"}}}`) + + framed(`{"jsonrpc":"2.0","id":3,"method":"shutdown","params":null}`) + + framed(`{"jsonrpc":"2.0","method":"exit"}`) + + var output bytes.Buffer + server := NewServer(gowdk.Config{}) + server.log = nil + if err := server.Serve(stringsReader(input), &output); err != nil { + t.Fatal(err) + } + + messages := readOutputMessages(t, output.Bytes()) + capabilities := messages[0]["result"].(map[string]any)["capabilities"].(map[string]any) + if capabilities["documentSymbolProvider"] != true { + t.Fatalf("expected documentSymbolProvider capability, got %#v", capabilities["documentSymbolProvider"]) + } + + assertResponseID(t, messages[2], float64(2)) + result, ok := messages[2]["result"].([]any) + if !ok { + t.Fatalf("expected a document-symbol array, got %#v", messages[2]["result"]) + } + names := map[string]bool{} + for _, item := range result { + symbol := item.(map[string]any) + names[symbol["name"].(string)] = true + } + for _, want := range []string{"package app", "route", "title", "view"} { + if !names[want] { + t.Fatalf("expected document symbol %q, got %#v", want, names) + } + } +} + func TestServerReturnsMethodNotFoundForUnknownRequests(t *testing.T) { input := framed(`{"jsonrpc":"2.0","id":"x","method":"gowdk/unknown","params":{}}`) + framed(`{"jsonrpc":"2.0","method":"exit"}`)