diff --git a/src/analyzer/index.ts b/src/analyzer/index.ts index a0769176..2d7fef84 100644 --- a/src/analyzer/index.ts +++ b/src/analyzer/index.ts @@ -736,6 +736,12 @@ export class SemanticAnalyzer { * Returns a Set of option names declared in the program's syntax signature. */ private extract_syntax_option_names(nodes: StataNode[]): Set { + // Returns the names verbatim. The c_local-pattern matcher in + // extract_macro_creating_option_patterns compares against the literal + // text inside `\`name'`, so casing must match here too. End-to-end + // correctness for capitalised option names also requires lowercasing + // call-site option matching — a broader change tracked separately + // from the implicit-local fix. const option_names = new Set(); for (const node of nodes) { if (node.type === 'syntax') { @@ -968,12 +974,14 @@ export class SemanticAnalyzer { } } - // Register each option as an implicit local + // Register each option as an implicit local. Stata's `syntax` command + // uses uppercase letters in option names to declare a minimum + // abbreviation (e.g. `Cache(string)`), but the implicit local Stata + // creates at runtime is always lowercased. for (const opt of signature.options) { - // Check if macro already exists (first definition wins) - const existing_macro = symbols.localMacros.get(opt.name); + const local_name = opt.name.toLowerCase(); + const existing_macro = symbols.localMacros.get(local_name); if (existing_macro) { - // Add to additional_definitions array if (!existing_macro.additional_definitions) { existing_macro.additional_definitions = []; } @@ -983,9 +991,8 @@ export class SemanticAnalyzer { location: { uri: this.uri, range: opt.range } }); } else { - // Create new macro with first definition const macro_symbol: MacroSymbol = { - name: opt.name, + name: local_name, scope: 'local', location: { uri: this.uri, range: opt.range }, sourceUri: this.uri, @@ -994,8 +1001,8 @@ export class SemanticAnalyzer { definition_line: opt.range.start.line, }; - current_scope.localMacros.set(opt.name, macro_symbol); - symbols.localMacros.set(opt.name, macro_symbol); + current_scope.localMacros.set(local_name, macro_symbol); + symbols.localMacros.set(local_name, macro_symbol); } } } diff --git a/tests/property/syntax-command-analyzer.prop.test.ts b/tests/property/syntax-command-analyzer.prop.test.ts index 36a2261e..d77d00c9 100644 --- a/tests/property/syntax-command-analyzer.prop.test.ts +++ b/tests/property/syntax-command-analyzer.prop.test.ts @@ -92,12 +92,29 @@ describe('Syntax Command Analyzer Property Tests', () => { // Analyze const my_result = analyze_program(my_source); - // Verify each option is registered as a local macro - for (const my_opt_name of my_option_names) { - expect(my_result.symbols.localMacros.has(my_opt_name)).toBe(true); - const my_macro = my_result.symbols.localMacros.get(my_opt_name); + // Verify each option is registered as a local macro. Stata uses + // uppercase letters in option names only to declare a minimum + // abbreviation; the implicit local it creates at runtime is the + // lowercase form of the name. Multiple options that differ only + // in case (e.g. `Foo` and `foo`) collapse onto one runtime local, + // so we deduplicate by the lowercase form before asserting. + const the_runtime_names = new Set( + my_option_names.map((my_name) => my_name.toLowerCase()) + ); + for (const my_runtime_name of the_runtime_names) { + expect(my_result.symbols.localMacros.has(my_runtime_name)).toBe(true); + const my_macro = my_result.symbols.localMacros.get(my_runtime_name); expect(my_macro?.scope).toBe('local'); } + // Also confirm we did not silently drop options: every distinct + // lowercase name should map to exactly one entry in localMacros. + let the_matching_local_count = 0; + for (const my_runtime_name of the_runtime_names) { + if (my_result.symbols.localMacros.has(my_runtime_name)) { + the_matching_local_count++; + } + } + expect(the_matching_local_count).toBe(the_runtime_names.size); } ), { numRuns: 100 } diff --git a/tests/unit/analyzer/syntax-option-capitalization.test.ts b/tests/unit/analyzer/syntax-option-capitalization.test.ts new file mode 100644 index 00000000..351c2f1c --- /dev/null +++ b/tests/unit/analyzer/syntax-option-capitalization.test.ts @@ -0,0 +1,85 @@ +/** + * Stata's `syntax` command lets option names use mixed case to declare a + * minimum abbreviation (e.g. `Cache(string)` allows `cache(...)`, `Cac(...)`, + * `Ca(...)`, `C(...)`). At runtime, the implicit local Stata creates is the + * lowercase form of the option name. References like `` `cache' `` therefore + * must not be flagged as undefined inside the program body. + */ + +import { describe, it, expect, beforeEach } from 'bun:test'; +import { SemanticAnalyzer } from '../../../src/analyzer/index'; +import { StataLexer } from '../../../src/lexer'; +import { StataParser } from '../../../src/parser'; +import { StataDiagnosticCode } from '../../../src/types'; + +describe('Syntax option capitalization → implicit locals', () => { + let my_analyzer: SemanticAnalyzer; + let my_lexer: StataLexer; + let my_parser: StataParser; + + beforeEach(() => { + my_analyzer = new SemanticAnalyzer(); + my_lexer = new StataLexer(); + my_parser = new StataParser(); + }); + + function analyze_document(my_source: string) { + const my_lex_result = my_lexer.tokenize(my_source); + const my_parse_result = my_parser.parse(my_lex_result.tokens); + return my_analyzer.analyze( + my_parse_result.ast, + 'file:///test.do', + undefined, + { undefined_macro_enabled: true }, + my_lex_result.tokens + ); + } + + function undefined_macro_messages(my_source: string): string[] { + const my_result = analyze_document(my_source); + return my_result.diagnostics + .filter((my_diag) => my_diag.code === StataDiagnosticCode.UNDEFINED_MACRO) + .map((my_diag) => my_diag.message); + } + + it('treats Cache(string) as defining a local named `cache`', () => { + const my_source = `program define format_wpp_5yr + syntax, Cache(string) Prefix(string) Outpath(string) [Suffix(string)] + display "\`cache'" + display "\`prefix'" + display "\`outpath'" + display "\`suffix'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([]); + }); + + it('still treats lowercase option names normally', () => { + const my_source = `program define lower + syntax, cache(string) + display "\`cache'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([]); + }); + + it('treats fully uppercase options as defining a lowercase local', () => { + const my_source = `program define upper + syntax, OUT(string) + display "\`out'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([]); + }); + + it('still flags references that use the wrong case (Stata is case-sensitive)', () => { + // Stata only creates the lowercase implicit local. A reference to + // \`Cache' (with capitals) is genuinely undefined and must still + // produce a diagnostic — this guards against accidentally registering + // both casings. + const my_source = `program define wrong_case + syntax, Cache(string) + display "\`Cache'" +end`; + expect(undefined_macro_messages(my_source)).toEqual([ + "Undefined local macro: \`Cache'", + ]); + }); +});