Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions src/analyzer/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,12 @@ export class SemanticAnalyzer {
* Returns a Set of option names declared in the program's syntax signature.
*/
private extract_syntax_option_names(nodes: StataNode[]): Set<string> {
// Returns the names verbatim. The c_local-pattern matcher in
// extract_macro_creating_option_patterns compares against the literal
// text inside `\`name'`, so casing must match here too. End-to-end
// correctness for capitalised option names also requires lowercasing
// call-site option matching — a broader change tracked separately
// from the implicit-local fix.
const option_names = new Set<string>();
for (const node of nodes) {
if (node.type === 'syntax') {
Expand Down Expand Up @@ -968,12 +974,14 @@ export class SemanticAnalyzer {
}
}

// Register each option as an implicit local
// Register each option as an implicit local. Stata's `syntax` command
// uses uppercase letters in option names to declare a minimum
// abbreviation (e.g. `Cache(string)`), but the implicit local Stata
// creates at runtime is always lowercased.
for (const opt of signature.options) {
// Check if macro already exists (first definition wins)
const existing_macro = symbols.localMacros.get(opt.name);
const local_name = opt.name.toLowerCase();
const existing_macro = symbols.localMacros.get(local_name);
if (existing_macro) {
// Add to additional_definitions array
if (!existing_macro.additional_definitions) {
existing_macro.additional_definitions = [];
}
Expand All @@ -983,9 +991,8 @@ export class SemanticAnalyzer {
location: { uri: this.uri, range: opt.range }
});
} else {
// Create new macro with first definition
const macro_symbol: MacroSymbol = {
name: opt.name,
name: local_name,
scope: 'local',
location: { uri: this.uri, range: opt.range },
sourceUri: this.uri,
Expand All @@ -994,8 +1001,8 @@ export class SemanticAnalyzer {
definition_line: opt.range.start.line,
};

current_scope.localMacros.set(opt.name, macro_symbol);
symbols.localMacros.set(opt.name, macro_symbol);
current_scope.localMacros.set(local_name, macro_symbol);
symbols.localMacros.set(local_name, macro_symbol);
}
}
}
Expand Down
25 changes: 21 additions & 4 deletions tests/property/syntax-command-analyzer.prop.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,29 @@ describe('Syntax Command Analyzer Property Tests', () => {
// Analyze
const my_result = analyze_program(my_source);

// Verify each option is registered as a local macro
for (const my_opt_name of my_option_names) {
expect(my_result.symbols.localMacros.has(my_opt_name)).toBe(true);
const my_macro = my_result.symbols.localMacros.get(my_opt_name);
// Verify each option is registered as a local macro. Stata uses
// uppercase letters in option names only to declare a minimum
// abbreviation; the implicit local it creates at runtime is the
// lowercase form of the name. Multiple options that differ only
// in case (e.g. `Foo` and `foo`) collapse onto one runtime local,
// so we deduplicate by the lowercase form before asserting.
const the_runtime_names = new Set(
my_option_names.map((my_name) => my_name.toLowerCase())
);
for (const my_runtime_name of the_runtime_names) {
expect(my_result.symbols.localMacros.has(my_runtime_name)).toBe(true);
const my_macro = my_result.symbols.localMacros.get(my_runtime_name);
expect(my_macro?.scope).toBe('local');
}
// Also confirm we did not silently drop options: every distinct
// lowercase name should map to exactly one entry in localMacros.
let the_matching_local_count = 0;
for (const my_runtime_name of the_runtime_names) {
if (my_result.symbols.localMacros.has(my_runtime_name)) {
the_matching_local_count++;
}
}
expect(the_matching_local_count).toBe(the_runtime_names.size);
}
),
{ numRuns: 100 }
Expand Down
85 changes: 85 additions & 0 deletions tests/unit/analyzer/syntax-option-capitalization.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/**
* Stata's `syntax` command lets option names use mixed case to declare a
* minimum abbreviation (e.g. `Cache(string)` allows `cache(...)`, `Cac(...)`,
* `Ca(...)`, `C(...)`). At runtime, the implicit local Stata creates is the
* lowercase form of the option name. References like `` `cache' `` therefore
* must not be flagged as undefined inside the program body.
*/

import { describe, it, expect, beforeEach } from 'bun:test';
import { SemanticAnalyzer } from '../../../src/analyzer/index';
import { StataLexer } from '../../../src/lexer';
import { StataParser } from '../../../src/parser';
import { StataDiagnosticCode } from '../../../src/types';

describe('Syntax option capitalization → implicit locals', () => {
let my_analyzer: SemanticAnalyzer;
let my_lexer: StataLexer;
let my_parser: StataParser;

beforeEach(() => {
my_analyzer = new SemanticAnalyzer();
my_lexer = new StataLexer();
my_parser = new StataParser();
});

function analyze_document(my_source: string) {
const my_lex_result = my_lexer.tokenize(my_source);
const my_parse_result = my_parser.parse(my_lex_result.tokens);
return my_analyzer.analyze(
my_parse_result.ast,
'file:///test.do',
undefined,
{ undefined_macro_enabled: true },
my_lex_result.tokens
);
}

function undefined_macro_messages(my_source: string): string[] {
const my_result = analyze_document(my_source);
return my_result.diagnostics
.filter((my_diag) => my_diag.code === StataDiagnosticCode.UNDEFINED_MACRO)
.map((my_diag) => my_diag.message);
}

it('treats Cache(string) as defining a local named `cache`', () => {
const my_source = `program define format_wpp_5yr
syntax, Cache(string) Prefix(string) Outpath(string) [Suffix(string)]
display "\`cache'"
display "\`prefix'"
display "\`outpath'"
display "\`suffix'"
end`;
expect(undefined_macro_messages(my_source)).toEqual([]);
});

it('still treats lowercase option names normally', () => {
const my_source = `program define lower
syntax, cache(string)
display "\`cache'"
end`;
expect(undefined_macro_messages(my_source)).toEqual([]);
});

it('treats fully uppercase options as defining a lowercase local', () => {
const my_source = `program define upper
syntax, OUT(string)
display "\`out'"
end`;
expect(undefined_macro_messages(my_source)).toEqual([]);
});

it('still flags references that use the wrong case (Stata is case-sensitive)', () => {
// Stata only creates the lowercase implicit local. A reference to
// \`Cache' (with capitals) is genuinely undefined and must still
// produce a diagnostic — this guards against accidentally registering
// both casings.
const my_source = `program define wrong_case
syntax, Cache(string)
display "\`Cache'"
end`;
expect(undefined_macro_messages(my_source)).toEqual([
"Undefined local macro: \`Cache'",
]);
});
});
Loading