From 9bde75e6d96f4b01c5502ec7f201e191fb61d70a Mon Sep 17 00:00:00 2001 From: Jonathan Marc Bearak Date: Mon, 4 May 2026 19:15:32 -0400 Subject: [PATCH 1/3] fix: register syntax option implicit locals as lowercase Stata's `syntax` command uses uppercase letters in option names to declare a minimum abbreviation (e.g. `Cache(string)` accepts `cache(...)`, `Cac(...)`, ..., `C(...)`), but the implicit local it creates at runtime is always the lowercase form. The analyzer was registering the local under the original casing, so references like `` `cache' `` inside the program body were flagged as undefined for any program declaring `Cache(...)` in its syntax line. --- src/analyzer/index.ts | 17 ++--- .../syntax-command-analyzer.prop.test.ts | 10 ++- .../syntax-option-capitalization.test.ts | 71 +++++++++++++++++++ 3 files changed, 87 insertions(+), 11 deletions(-) create mode 100644 tests/unit/analyzer/syntax-option-capitalization.test.ts diff --git a/src/analyzer/index.ts b/src/analyzer/index.ts index a0769176..3b093c04 100644 --- a/src/analyzer/index.ts +++ b/src/analyzer/index.ts @@ -968,12 +968,14 @@ export class SemanticAnalyzer { } } - // Register each option as an implicit local + // Register each option as an implicit local. Stata's `syntax` command + // uses uppercase letters in option names to declare a minimum + // abbreviation (e.g. `Cache(string)`), but the implicit local Stata + // creates at runtime is always lowercased. for (const opt of signature.options) { - // Check if macro already exists (first definition wins) - const existing_macro = symbols.localMacros.get(opt.name); + const local_name = opt.name.toLowerCase(); + const existing_macro = symbols.localMacros.get(local_name); if (existing_macro) { - // Add to additional_definitions array if (!existing_macro.additional_definitions) { existing_macro.additional_definitions = []; } @@ -983,9 +985,8 @@ export class SemanticAnalyzer { location: { uri: this.uri, range: opt.range } }); } else { - // Create new macro with first definition const macro_symbol: MacroSymbol = { - name: opt.name, + name: local_name, scope: 'local', location: { uri: this.uri, range: opt.range }, sourceUri: this.uri, @@ -994,8 +995,8 @@ export class SemanticAnalyzer { definition_line: opt.range.start.line, }; - current_scope.localMacros.set(opt.name, macro_symbol); - symbols.localMacros.set(opt.name, macro_symbol); + current_scope.localMacros.set(local_name, macro_symbol); + symbols.localMacros.set(local_name, macro_symbol); } } } diff --git a/tests/property/syntax-command-analyzer.prop.test.ts b/tests/property/syntax-command-analyzer.prop.test.ts index 36a2261e..49f69255 100644 --- a/tests/property/syntax-command-analyzer.prop.test.ts +++ b/tests/property/syntax-command-analyzer.prop.test.ts @@ -92,10 +92,14 @@ describe('Syntax Command Analyzer Property Tests', () => { // Analyze const my_result = analyze_program(my_source); - // Verify each option is registered as a local macro + // Verify each option is registered as a local macro. Stata uses + // uppercase letters in option names only to declare a minimum + // abbreviation; the implicit local it creates at runtime is the + // lowercase form of the name. for (const my_opt_name of my_option_names) { - expect(my_result.symbols.localMacros.has(my_opt_name)).toBe(true); - const my_macro = my_result.symbols.localMacros.get(my_opt_name); + const my_runtime_name = my_opt_name.toLowerCase(); + expect(my_result.symbols.localMacros.has(my_runtime_name)).toBe(true); + const my_macro = my_result.symbols.localMacros.get(my_runtime_name); expect(my_macro?.scope).toBe('local'); } } diff --git a/tests/unit/analyzer/syntax-option-capitalization.test.ts b/tests/unit/analyzer/syntax-option-capitalization.test.ts new file mode 100644 index 00000000..299c7181 --- /dev/null +++ b/tests/unit/analyzer/syntax-option-capitalization.test.ts @@ -0,0 +1,71 @@ +/** + * Stata's `syntax` command lets option names use mixed case to declare a + * minimum abbreviation (e.g. `Cache(string)` allows `cache(...)`, `Cac(...)`, + * `Ca(...)`, `C(...)`). At runtime, the implicit local Stata creates is the + * lowercase form of the option name. References like `` `cache' `` therefore + * must not be flagged as undefined inside the program body. + */ + +import { describe, it, expect, beforeEach } from 'bun:test'; +import { SemanticAnalyzer } from '../../../src/analyzer/index'; +import { StataLexer } from '../../../src/lexer'; +import { StataParser } from '../../../src/parser'; +import { StataDiagnosticCode } from '../../../src/types'; + +describe('Syntax option capitalization → implicit locals', () => { + let analyzer: SemanticAnalyzer; + let lexer: StataLexer; + let parser: StataParser; + + beforeEach(() => { + analyzer = new SemanticAnalyzer(); + lexer = new StataLexer(); + parser = new StataParser(); + }); + + function analyze_document(my_source: string) { + const my_lex_result = lexer.tokenize(my_source); + const my_parse_result = parser.parse(my_lex_result.tokens); + return analyzer.analyze( + my_parse_result.ast, + 'file:///test.do', + undefined, + { undefined_macro_enabled: true }, + my_lex_result.tokens + ); + } + + function undefined_macro_messages(my_source: string): string[] { + const my_result = analyze_document(my_source); + return my_result.diagnostics + .filter((my_diag) => my_diag.code === StataDiagnosticCode.UNDEFINED_MACRO) + .map((my_diag) => my_diag.message); + } + + it('treats Cache(string) as defining a local named `cache`', () => { + const my_source = `program define format_wpp_5yr + syntax, Cache(string) Prefix(string) Outpath(string) [Suffix(string)] + display "\`cache'" + display "\`prefix'" + display "\`outpath'" + display "\`suffix'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([]); + }); + + it('still treats lowercase option names normally', () => { + const my_source = `program define lower + syntax, cache(string) + display "\`cache'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([]); + }); + + it('treats fully uppercase options as defining a lowercase local', () => { + const my_source = `program define upper + syntax, OUT(string) + display "\`out'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([]); + }); +}); From 2eb33284ce73f9c64e688c9098f7ba28cfd346b0 Mon Sep 17 00:00:00 2001 From: Jonathan Marc Bearak Date: Mon, 4 May 2026 19:23:18 -0400 Subject: [PATCH 2/3] test: tighten capitalization assertions; document scope of fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - syntax-option-capitalization.test.ts: assert that a wrong-case reference (`Cache') still produces an undefined-macro diagnostic. Locks in the semantics in both directions and prevents accidentally registering both casings. - syntax-command-analyzer.prop.test.ts: deduplicate generated option names by their lowercase form before asserting, and confirm every distinct lowercase name maps to exactly one entry. Previously the generator could produce inputs that collapse to one runtime local (e.g. `Foo`, `foo`), and the assertion looked stronger than it was. - analyzer/index.ts: document why extract_syntax_option_names preserves original casing — full case-insensitive matching of macro-creating options also requires lowercasing call-site option matching, which is broader than the implicit-local fix. --- src/analyzer/index.ts | 6 ++++++ .../syntax-command-analyzer.prop.test.ts | 19 ++++++++++++++++--- .../syntax-option-capitalization.test.ts | 14 ++++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/analyzer/index.ts b/src/analyzer/index.ts index 3b093c04..2d7fef84 100644 --- a/src/analyzer/index.ts +++ b/src/analyzer/index.ts @@ -736,6 +736,12 @@ export class SemanticAnalyzer { * Returns a Set of option names declared in the program's syntax signature. */ private extract_syntax_option_names(nodes: StataNode[]): Set { + // Returns the names verbatim. The c_local-pattern matcher in + // extract_macro_creating_option_patterns compares against the literal + // text inside `\`name'`, so casing must match here too. End-to-end + // correctness for capitalised option names also requires lowercasing + // call-site option matching — a broader change tracked separately + // from the implicit-local fix. const option_names = new Set(); for (const node of nodes) { if (node.type === 'syntax') { diff --git a/tests/property/syntax-command-analyzer.prop.test.ts b/tests/property/syntax-command-analyzer.prop.test.ts index 49f69255..d77d00c9 100644 --- a/tests/property/syntax-command-analyzer.prop.test.ts +++ b/tests/property/syntax-command-analyzer.prop.test.ts @@ -95,13 +95,26 @@ describe('Syntax Command Analyzer Property Tests', () => { // Verify each option is registered as a local macro. Stata uses // uppercase letters in option names only to declare a minimum // abbreviation; the implicit local it creates at runtime is the - // lowercase form of the name. - for (const my_opt_name of my_option_names) { - const my_runtime_name = my_opt_name.toLowerCase(); + // lowercase form of the name. Multiple options that differ only + // in case (e.g. `Foo` and `foo`) collapse onto one runtime local, + // so we deduplicate by the lowercase form before asserting. + const the_runtime_names = new Set( + my_option_names.map((my_name) => my_name.toLowerCase()) + ); + for (const my_runtime_name of the_runtime_names) { expect(my_result.symbols.localMacros.has(my_runtime_name)).toBe(true); const my_macro = my_result.symbols.localMacros.get(my_runtime_name); expect(my_macro?.scope).toBe('local'); } + // Also confirm we did not silently drop options: every distinct + // lowercase name should map to exactly one entry in localMacros. + let the_matching_local_count = 0; + for (const my_runtime_name of the_runtime_names) { + if (my_result.symbols.localMacros.has(my_runtime_name)) { + the_matching_local_count++; + } + } + expect(the_matching_local_count).toBe(the_runtime_names.size); } ), { numRuns: 100 } diff --git a/tests/unit/analyzer/syntax-option-capitalization.test.ts b/tests/unit/analyzer/syntax-option-capitalization.test.ts index 299c7181..52855432 100644 --- a/tests/unit/analyzer/syntax-option-capitalization.test.ts +++ b/tests/unit/analyzer/syntax-option-capitalization.test.ts @@ -68,4 +68,18 @@ end`; end`; expect(undefined_macro_messages(my_source)).toEqual([]); }); + + it('still flags references that use the wrong case (Stata is case-sensitive)', () => { + // Stata only creates the lowercase implicit local. A reference to + // \`Cache' (with capitals) is genuinely undefined and must still + // produce a diagnostic — this guards against accidentally registering + // both casings. + const my_source = `program define wrong_case + syntax, Cache(string) + display "\`Cache'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([ + "Undefined local macro: \`Cache'", + ]); + }); }); From 72eb4c1a08dad22e6466a1de656ae7edd89aa18f Mon Sep 17 00:00:00 2001 From: Jonathan Marc Bearak Date: Mon, 4 May 2026 20:56:49 -0400 Subject: [PATCH 3/3] Fix analyzer test variable names --- .../syntax-option-capitalization.test.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/unit/analyzer/syntax-option-capitalization.test.ts b/tests/unit/analyzer/syntax-option-capitalization.test.ts index 52855432..351c2f1c 100644 --- a/tests/unit/analyzer/syntax-option-capitalization.test.ts +++ b/tests/unit/analyzer/syntax-option-capitalization.test.ts @@ -13,20 +13,20 @@ import { StataParser } from '../../../src/parser'; import { StataDiagnosticCode } from '../../../src/types'; describe('Syntax option capitalization → implicit locals', () => { - let analyzer: SemanticAnalyzer; - let lexer: StataLexer; - let parser: StataParser; + let my_analyzer: SemanticAnalyzer; + let my_lexer: StataLexer; + let my_parser: StataParser; beforeEach(() => { - analyzer = new SemanticAnalyzer(); - lexer = new StataLexer(); - parser = new StataParser(); + my_analyzer = new SemanticAnalyzer(); + my_lexer = new StataLexer(); + my_parser = new StataParser(); }); function analyze_document(my_source: string) { - const my_lex_result = lexer.tokenize(my_source); - const my_parse_result = parser.parse(my_lex_result.tokens); - return analyzer.analyze( + const my_lex_result = my_lexer.tokenize(my_source); + const my_parse_result = my_parser.parse(my_lex_result.tokens); + return my_analyzer.analyze( my_parse_result.ast, 'file:///test.do', undefined,