diff --git a/libs/openant-core/parsers/go/go_parser/callgraph.go b/libs/openant-core/parsers/go/go_parser/callgraph.go index 5ff47f12..f8461499 100644 --- a/libs/openant-core/parsers/go/go_parser/callgraph.go +++ b/libs/openant-core/parsers/go/go_parser/callgraph.go @@ -200,6 +200,13 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo { // import table instead of a name-shape heuristic. imports := c.importsByFile[funcInfo.FilePath] + // Track simple func-value aliases (f := helper) so a later call f() + // resolves to the aliased function. Only single, unconditional bindings + // of the form `name := ` / `name = ` are tracked; any + // reassignment (or a non-ident RHS) marks the name ambiguous so we emit + // no false edge — precision over recall. + aliases := c.collectFuncValueAliases(file) + // Walk the AST looking for call expressions ast.Inspect(file, func(n ast.Node) bool { call, ok := n.(*ast.CallExpr) @@ -208,6 +215,13 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo { } callInfo := c.analyzeCallExpr(call, imports) + // Rewrite an unambiguous func-value alias call (f()) to its target + // (helper()) so it resolves like a direct call. + if callInfo.Name != "" && callInfo.Receiver == "" && callInfo.Package == "" { + if target, ok := aliases[callInfo.Name]; ok { + callInfo.Name = target + } + } if callInfo.Name != "" && !c.builtins[callInfo.Name] && !c.builtins[callInfo.Package] { calls = append(calls, callInfo) } @@ -217,6 +231,62 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo { return calls } +// collectFuncValueAliases scans a parsed function body for single, unconditional +// func-value bindings (`f := helper`) and returns name -> target-function-name. +// A name bound more than once, or bound to anything other than a bare identifier, +// is dropped (left out of the map) so a reassigned/conditional alias never +// produces a false edge. +func (c *CallGraphBuilder) collectFuncValueAliases(file *ast.File) map[string]string { + aliases := make(map[string]string) + ambiguous := make(map[string]bool) + + record := func(lhs, rhs ast.Expr) { + lid, ok := lhs.(*ast.Ident) + if !ok { + return + } + if ambiguous[lid.Name] { + return + } + rid, ok := rhs.(*ast.Ident) + if !ok { + // Bound to a non-ident (call, selector, literal, ...) -> ambiguous. + delete(aliases, lid.Name) + ambiguous[lid.Name] = true + return + } + if _, seen := aliases[lid.Name]; seen { + // Second binding of the same name -> ambiguous, drop it. + delete(aliases, lid.Name) + ambiguous[lid.Name] = true + return + } + aliases[lid.Name] = rid.Name + } + + ast.Inspect(file, func(n ast.Node) bool { + assign, ok := n.(*ast.AssignStmt) + if !ok { + return true + } + // Only handle 1:1 bindings (f := helper); skip tuple assignments. + if len(assign.Lhs) != 1 || len(assign.Rhs) != 1 { + // Mark any ident LHS ambiguous so a multi-value rebind can't alias. + for _, lhs := range assign.Lhs { + if lid, ok := lhs.(*ast.Ident); ok { + delete(aliases, lid.Name) + ambiguous[lid.Name] = true + } + } + return true + } + record(assign.Lhs[0], assign.Rhs[0]) + return true + }) + + return aliases +} + func (c *CallGraphBuilder) analyzeCallExpr(call *ast.CallExpr, imports map[string]string) CallInfo { info := CallInfo{} diff --git a/libs/openant-core/parsers/go/go_parser/callgraph_funcvalue_test.go b/libs/openant-core/parsers/go/go_parser/callgraph_funcvalue_test.go new file mode 100644 index 00000000..3bb1813f --- /dev/null +++ b/libs/openant-core/parsers/go/go_parser/callgraph_funcvalue_test.go @@ -0,0 +1,86 @@ +package main + +// Regression test for BUG-NEW-2026-06-04-go-dataflow_loss: a call through a +// function-value alias (f := helper; f()) must emit an edge caller -> helper, +// mirroring the direct call helper(). + +import "testing" + +// buildGraphForFuncs runs the full call-graph build over a synthetic +// AnalyzerOutput (same shape the extractor emits) and returns the call graph. +func buildGraphForFuncs(t *testing.T, funcs map[string]FunctionInfo) map[string][]string { + t.Helper() + builder := NewCallGraphBuilder(".") + analyzer := &AnalyzerOutput{RepoRoot: ".", Functions: funcs} + // Index directly (parseImports reads real files; none of these synthetic + // funcs use package-qualified calls, so an empty import table is fine). + for funcID, funcInfo := range analyzer.Functions { + builder.functionsByName[funcInfo.Name] = append(builder.functionsByName[funcInfo.Name], funcID) + builder.functionsByFile[funcInfo.FilePath] = append(builder.functionsByFile[funcInfo.FilePath], funcID) + if funcInfo.ClassName != "" { + builder.methodsByType[funcInfo.ClassName] = append(builder.methodsByType[funcInfo.ClassName], funcID) + } + } + cg := make(map[string][]string) + for funcID, funcInfo := range analyzer.Functions { + calls := builder.extractCalls(funcInfo) + resolved := builder.resolveCalls(funcID, funcInfo, calls, analyzer) + if len(resolved) > 0 { + cg[funcID] = resolved + } + } + return cg +} + +func hasEdge(cg map[string][]string, from, to string) bool { + for _, t := range cg[from] { + if t == to { + return true + } + } + return false +} + +// Baseline control: a direct call helper() resolves to an edge. +func TestDirectCallEdge(t *testing.T) { + funcs := map[string]FunctionInfo{ + "main.go:helper": {Name: "helper", FilePath: "main.go", Package: "main", + Code: "func helper() int {\n\treturn 42\n}"}, + "main.go:caller": {Name: "caller", FilePath: "main.go", Package: "main", + Code: "func caller() int {\n\treturn helper()\n}"}, + } + cg := buildGraphForFuncs(t, funcs) + if !hasEdge(cg, "main.go:caller", "main.go:helper") { + t.Fatalf("baseline: expected edge main.go:caller -> main.go:helper, got %v", cg) + } +} + +// BUG-19: a func-value alias f := helper; f() must resolve to the same edge. +func TestFuncValueAliasEdge(t *testing.T) { + funcs := map[string]FunctionInfo{ + "main.go:helper": {Name: "helper", FilePath: "main.go", Package: "main", + Code: "func helper() int {\n\treturn 42\n}"}, + "main.go:caller": {Name: "caller", FilePath: "main.go", Package: "main", + Code: "func caller() int {\n\tf := helper\n\treturn f()\n}"}, + } + cg := buildGraphForFuncs(t, funcs) + if !hasEdge(cg, "main.go:caller", "main.go:helper") { + t.Fatalf("alias: expected edge main.go:caller -> main.go:helper, got %v", cg) + } +} + +// Precision guard: a reassigned/conditional alias must NOT produce a false edge. +func TestFuncValueAliasReassignedNoEdge(t *testing.T) { + funcs := map[string]FunctionInfo{ + "main.go:helper": {Name: "helper", FilePath: "main.go", Package: "main", + Code: "func helper() int {\n\treturn 42\n}"}, + "main.go:other": {Name: "other", FilePath: "main.go", Package: "main", + Code: "func other() int {\n\treturn 7\n}"}, + "main.go:caller": {Name: "caller", FilePath: "main.go", Package: "main", + Code: "func caller() int {\n\tf := helper\n\tf = other\n\treturn f()\n}"}, + } + cg := buildGraphForFuncs(t, funcs) + if hasEdge(cg, "main.go:caller", "main.go:helper") { + t.Fatalf("reassigned: must NOT resolve f() to helper after f=other, got %v", cg) + } +} diff --git a/libs/openant-core/parsers/go/go_parser/go_parser b/libs/openant-core/parsers/go/go_parser/go_parser deleted file mode 100755 index 198b846c..00000000 Binary files a/libs/openant-core/parsers/go/go_parser/go_parser and /dev/null differ diff --git a/libs/openant-core/parsers/go/test_pipeline.py b/libs/openant-core/parsers/go/test_pipeline.py index bba7551d..d7d9fe68 100644 --- a/libs/openant-core/parsers/go/test_pipeline.py +++ b/libs/openant-core/parsers/go/test_pipeline.py @@ -52,6 +52,48 @@ from utilities.file_io import open_utf8, read_json, run_utf8, write_json +def normalize_go_function_records(raw_functions: dict) -> dict: + """Normalize Go FunctionInfo records to the snake_case consumer contract. + + The Go parser is a separate Go binary whose FunctionInfo records + (parsers/go/go_parser/types.go) use camelCase json keys + (``unitType``/``startLine``/``endLine``/``isExported``/``filePath``/ + ``className``), while every other parser emits snake_case. The Python + reachability/entry-point consumers (EntryPointDetector, etc.) read + snake_case — e.g. ``func_data.get('unit_type')``. Without normalization the + Go records' snake keys are ``None`` and any unit_type-based logic is + silently broken for Go (BUG-NEW 5). + + This maps the known FunctionInfo fields to snake_case, reading from either + shape so it is idempotent: already-snake records pass through unchanged. + Scope is the call_graph.json ``functions`` map only — the separate + analyzer_output.json camelCase contract is intentionally NOT touched. + """ + normalized: dict = {} + for func_id, fd in raw_functions.items(): + normalized[func_id] = { + 'name': fd.get('name', ''), + 'unit_type': fd.get('unit_type', fd.get('unitType', 'function')), + 'code': fd.get('code', ''), + 'file_path': fd.get('file_path', fd.get('filePath', '')), + 'start_line': fd.get('start_line', fd.get('startLine', 0)), + 'end_line': fd.get('end_line', fd.get('endLine', 0)), + 'package': fd.get('package', ''), + 'receiver': fd.get('receiver', ''), + 'is_exported': fd.get('is_exported', fd.get('isExported', False)), + 'class_name': fd.get('class_name', fd.get('className', '')), + 'decorators': fd.get('decorators', []), + # Schema-completeness (BUG-5 re-verify): carry the remaining + # FunctionInfo fields so a Go func record matches the snake_case + # shape the other parsers emit. No live consumer reads these today, + # but leaving them None is a latent drift for future consumers. + 'parameters': fd.get('parameters', []), + 'returns': fd.get('returns', []), + 'is_async': fd.get('is_async', fd.get('isAsync', False)), + } + return normalized + + def _stdout_supports_unicode() -> bool: """Return True if sys.stdout can emit the symbols we use for status. @@ -305,21 +347,10 @@ def run_go_parser_all(self) -> bool: dataset_for_cg = read_json(self.dataset_file) raw_functions = analyzer.get("functions", {}) - # Normalise to the camelCase shape EntryPointDetector expects. - normalized_functions = { - func_id: { - 'name': fd.get('name', ''), - 'unitType': fd.get('unit_type', fd.get('unitType', 'function')), - 'code': fd.get('code', ''), - 'filePath': fd.get('file_path', fd.get('filePath', '')), - 'startLine': fd.get('start_line', fd.get('startLine', 0)), - 'endLine': fd.get('end_line', fd.get('endLine', 0)), - 'package': fd.get('package', ''), - 'receiver': fd.get('receiver', ''), - 'isExported': fd.get('is_exported', fd.get('isExported', False)), - } - for func_id, fd in raw_functions.items() - } + # Normalise to the snake_case shape the Python consumers read + # (EntryPointDetector reads func_data.get('unit_type'), etc.). + # Idempotent: already-snake records pass through unchanged. + normalized_functions = normalize_go_function_records(raw_functions) call_graph: dict = {} reverse_call_graph: dict = {} @@ -375,21 +406,11 @@ def apply_reachability_filter(self) -> bool: functions = analyzer.get("functions", {}) - # Convert to expected format for EntryPointDetector - # Go parser uses snake_case, EntryPointDetector expects camelCase - normalized_functions = {} - for func_id, func_data in functions.items(): - normalized_functions[func_id] = { - 'name': func_data.get('name', ''), - 'unitType': func_data.get('unit_type', func_data.get('unitType', 'function')), - 'code': func_data.get('code', ''), - 'filePath': func_data.get('file_path', func_data.get('filePath', '')), - 'startLine': func_data.get('start_line', func_data.get('startLine', 0)), - 'endLine': func_data.get('end_line', func_data.get('endLine', 0)), - 'package': func_data.get('package', ''), - 'receiver': func_data.get('receiver', ''), - 'isExported': func_data.get('is_exported', func_data.get('isExported', False)), - } + # Convert to the snake_case shape the Python consumers read. + # The Go binary's analyzer_output uses camelCase FunctionInfo keys; + # EntryPointDetector / ReachabilityAnalyzer read snake_case + # (func_data.get('unit_type'), etc.). Idempotent normalization. + normalized_functions = normalize_go_function_records(functions) # Load call graph from dataset (go_parser puts it in statistics) dataset = read_json(self.dataset_file) diff --git a/libs/openant-core/tests/parsers/go/test_go_schema_completeness.py b/libs/openant-core/tests/parsers/go/test_go_schema_completeness.py new file mode 100644 index 00000000..d764e6ad --- /dev/null +++ b/libs/openant-core/tests/parsers/go/test_go_schema_completeness.py @@ -0,0 +1,188 @@ +"""Schema-completeness contract test for the Go parser (BUG-NEW 5 family guard). + +BUG-NEW 5 is a *cross-parser schema drift*. The Go parser is a separate Go +binary whose `FunctionInfo` records (`parsers/go/go_parser/types.go`) use +**camelCase** json keys (`unitType`, `startLine`, `endLine`, `isExported`, +`filePath`, `className`). Every other parser (python/ruby/php/c/zig) emits +**snake_case** (`unit_type`, `start_line`, ...). The Python reachability / +entry-point consumers read snake_case -- e.g. +`utilities/agentic_enhancer/entry_point_detector.py` reads +`func_data.get('unit_type')`. So for Go records read out of `call_graph.json` +they got `None`, and any unit_type-based logic (entry-point classification, +statistics, the module_level check) was silently broken for Go. + +The fix normalizes the Go function records to **snake_case** at the single +Python ingestion boundary that builds `call_graph.json`'s `functions` map +(`parsers/go/test_pipeline.py`), matching the consumer contract and every +other parser. No Go rebuild; the analyzer_output.json camelCase contract +(consumed by the camelCase-aware analyzer surface) is intentionally left alone. + +Design: + * REACH_SRC is a Go program with a silent `func main()` that calls a helper + (no decorators, no input patterns) -- the ONLY thing that makes `main` an + entry point is its unit_type/name. This exercises BUG-4 (+name:main) and + BUG-5 (unit_type readable) together. + * The real Go binary + real test_pipeline.py produce call_graph.json and the + reachability-filtered dataset.json. + * Tests assert (1) the ingested function records expose snake_case + unit_type/file_path/start_line (not None), and (2) main seeds reachability + with a unit_type-derived entry-point reason and the helper is reachable. + +If the Go toolchain / subprocess is unavailable the end-to-end tests skip and a +normalization unit-test on a representative camelCase record still runs. +""" + +import json +import os +import shutil +import subprocess +import sys +from pathlib import Path + +import pytest + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +_GO_PARSER_DIR = _CORE_ROOT / "parsers" / "go" / "go_parser" +_TEST_PIPELINE = _CORE_ROOT / "parsers" / "go" / "test_pipeline.py" + +# A silent main() calling a helper: nothing but unit_type/name makes main an +# entry point (no decorators, no request.* / argv input patterns). +REACH_SRC = """package main + +import "fmt" + +func helper(x int) int { +\treturn x * 2 +} + +func main() { +\tfmt.Println(helper(21)) +} +""" + +GO_MOD = "module bug5repo\n\ngo 1.21\n" + +# Snake-case keys the Python reachability/entry-point consumers read out of +# call_graph.json's `functions` records. These are None today for Go records. +CONSUMER_SNAKE_KEYS = ["unit_type", "file_path", "start_line", "end_line", "is_exported"] + + +def _go_available(): + return shutil.which("go") is not None or (_GO_PARSER_DIR / "go_parser").exists() + + +@pytest.fixture(scope="module") +def go_pipeline_output(tmp_path_factory): + """Run the real Go binary + real test_pipeline.py; return (call_graph, dataset).""" + if not _go_available(): + pytest.skip("Go toolchain / go_parser binary unavailable") + + repo = tmp_path_factory.mktemp("bug5_repo") + (repo / "main.go").write_text(REACH_SRC) + (repo / "go.mod").write_text(GO_MOD) + out = tmp_path_factory.mktemp("bug5_out") + + cmd = [ + sys.executable, str(_TEST_PIPELINE), str(repo), + "--output", str(out), + "--processing-level", "reachable", + "--skip-tests", + ] + proc = subprocess.run( + cmd, cwd=str(_CORE_ROOT), + stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, timeout=300, + ) + cg_path = out / "call_graph.json" + ds_path = out / "dataset.json" + if proc.returncode != 0 or not cg_path.exists() or not ds_path.exists(): + pytest.skip(f"Go pipeline did not produce outputs (env-flaky):\n{proc.stdout[-2000:]}") + + return json.loads(cg_path.read_text()), json.loads(ds_path.read_text()) + + +@pytest.mark.parametrize("snake_key", CONSUMER_SNAKE_KEYS) +def test_go_function_record_exposes_snake_case(go_pipeline_output, snake_key): + """Each ingested Go function record must expose the snake_case key the + Python consumer reads (RED: only camelCase present, so snake key is None).""" + call_graph, _dataset = go_pipeline_output + functions = call_graph.get("functions", {}) + assert functions, "Go pipeline produced no function records" + for func_id, fd in functions.items(): + assert snake_key in fd, ( + f"Go function {func_id!r} missing snake_case key {snake_key!r}; " + f"consumer would read None. keys = {sorted(fd)}" + ) + + +def test_go_main_record_unit_type_is_main(go_pipeline_output): + """The main() record's snake_case unit_type must read 'main' (RED: None).""" + call_graph, _dataset = go_pipeline_output + functions = call_graph.get("functions", {}) + main_recs = [fd for fd in functions.values() if fd.get("name") == "main"] + assert main_recs, f"no main record; ids = {list(functions)}" + assert main_recs[0].get("unit_type") == "main", ( + f"main unit_type not readable as snake_case; got {main_recs[0].get('unit_type')!r} " + f"(camel unitType = {main_recs[0].get('unitType')!r})" + ) + + +def test_go_main_seeds_reachability_via_unit_type(go_pipeline_output): + """End-to-end payoff: a silent func main() calling helper() seeds reachability + with a unit_type-derived entry-point reason, and the helper is reachable.""" + _call_graph, dataset = go_pipeline_output + units = {u["id"]: u for u in dataset.get("units", [])} + main_id = next((i for i in units if i.endswith(":main")), None) + helper_id = next((i for i in units if i.endswith(":helper")), None) + assert main_id and helper_id, f"missing main/helper units; ids = {list(units)}" + + assert units[main_id].get("is_entry_point") is True, "main not seeded as entry point" + # The unit_type-derived reason must be present now that unit_type is readable. + reason = units[main_id].get("entry_point_reason", "") + assert "unit_type:main" in reason, ( + f"main entry-point reason lacks unit_type:main (BUG-5 still drifting); reason = {reason!r}" + ) + assert units[helper_id].get("reachable") is True, "helper not reachable from main" + + +def test_normalize_camel_record_to_snake_full_schema(): + """BUG-5 re-verify (no Go toolchain needed): a representative camelCase Go + FunctionInfo record normalizes to the FULL snake_case consumer schema, + including the parameters / returns / is_async fields that were previously + omitted from normalize_go_function_records.""" + from parsers.go.test_pipeline import normalize_go_function_records + camel = {"f.go:Pkg.M": { + "name": "M", "code": "func ...", "startLine": 10, "endLine": 20, + "unitType": "method", "className": "Pkg", "isExported": True, + "package": "main", "filePath": "f.go", "receiver": "Pkg", + "parameters": ["x int"], "returns": ["error"], "isAsync": True, + "decorators": ["// note"], + }} + out = normalize_go_function_records(camel)["f.go:Pkg.M"] + expected = { + "name": "M", "unit_type": "method", "file_path": "f.go", + "start_line": 10, "end_line": 20, "is_exported": True, + "class_name": "Pkg", "package": "main", "receiver": "Pkg", + "parameters": ["x int"], "returns": ["error"], "is_async": True, + "decorators": ["// note"], + } + for k, v in expected.items(): + assert out.get(k) == v, f"{k!r}: got {out.get(k)!r}, expected {v!r}" + # No camelCase keys leak through. + assert not any(k in out for k in ("unitType", "filePath", "isAsync", "className")), out + + +def test_normalize_is_idempotent_on_snake_records(): + """Already-snake records pass through unchanged (idempotency).""" + from parsers.go.test_pipeline import normalize_go_function_records + snake = {"f.go:h": { + "name": "h", "unit_type": "function", "code": "", "file_path": "f.go", + "start_line": 1, "end_line": 2, "package": "main", "receiver": "", + "is_exported": False, "class_name": "", "decorators": [], + "parameters": [], "returns": [], "is_async": False, + }} + once = normalize_go_function_records(snake) + twice = normalize_go_function_records(once) + assert once == twice, "normalization not idempotent on snake records" + assert once["f.go:h"]["unit_type"] == "function" diff --git a/libs/openant-core/tests/parsers/test_entry_point_detector_u12.py b/libs/openant-core/tests/parsers/test_entry_point_detector_u12.py new file mode 100644 index 00000000..1c6f4ec3 --- /dev/null +++ b/libs/openant-core/tests/parsers/test_entry_point_detector_u12.py @@ -0,0 +1,110 @@ +"""Tests for EntryPointDetector — a silent binary `main` (C/Go) classified +unit_type='main' must be seeded as a reachability entry point. + +Regression for the gap where `main` was missing from ENTRY_POINT_TYPES: the C +and Go extractors correctly classify a program's `main` function as +unit_type='main', but the detector only honored unit types in +ENTRY_POINT_TYPES. A *silent* main (no user-input pattern, no decorator, not +module_level) therefore produced zero entry-point reasons, was never seeded as +an execution root, and every function it transitively calls was falsely marked +unreachable (reachability blackout on CLI/binary programs). + +A program's `main` is an execution root by definition; over-approximating it as +an entry point is safe (a false-unreachable hides exploitable code), and a +library has no `main`, so this does not over-claim. +""" +import sys +from pathlib import Path + +# tests/parsers/ -> parents[2] == libs/openant-core (the dir containing utilities/) +_CORE_ROOT = Path(__file__).resolve().parents[2] +sys.path.insert(0, str(_CORE_ROOT)) + +from utilities.agentic_enhancer.entry_point_detector import ( # noqa: E402 + ENTRY_POINT_TYPES, + EntryPointDetector, +) + + +def _make_detector(func_id: str, func_data: dict) -> EntryPointDetector: + return EntryPointDetector({func_id: func_data}, call_graph={}) + + +def test_main_is_in_entry_point_types(): + """`main` must be a recognized entry-point unit type so the reachability + filter treats a program's execution root as a seed.""" + assert "main" in ENTRY_POINT_TYPES, ( + "'main' must be in ENTRY_POINT_TYPES so a function classified " + "unit_type='main' is seeded as a reachability entry point" + ) + + +def test_silent_c_main_is_entry_point(): + """A silent C `main` (no user-input pattern, no decorator) classified + unit_type='main' must be detected as an entry point.""" + detector = _make_detector( + "main.c:main", + { + "name": "main", + "unit_type": "main", + "code": "int main(void) { helper(); return 0; }", + "decorators": [], + }, + ) + entry_points = detector.detect_entry_points() + assert "main.c:main" in entry_points, ( + "silent C main was filtered out — its callees become falsely unreachable" + ) + + +def test_silent_go_main_is_entry_point(): + """A silent Go `main` classified unit_type='main' must be detected as an + entry point (language-agnostic: same unit_type, same seeding).""" + detector = _make_detector( + "main.go:main", + { + "name": "main", + "unit_type": "main", + "code": "func main() { helper() }", + "decorators": [], + }, + ) + entry_points = detector.detect_entry_points() + assert "main.go:main" in entry_points, ( + "silent Go main was filtered out — its callees become falsely unreachable" + ) + + +def test_main_by_name_is_entry_point(): + """Defensive: a function named `main` is seeded as an entry point even if + the extractor classified its unit_type as something other than 'main' + (e.g. a generic 'function').""" + detector = _make_detector( + "main.c:main", + { + "name": "main", + "unit_type": "function", + "code": "int main(void) { helper(); return 0; }", + "decorators": [], + }, + ) + entry_points = detector.detect_entry_points() + assert "main.c:main" in entry_points, ( + "a function named main must be seeded as an execution root by name" + ) + + +def test_non_main_silent_function_is_not_entry_point(): + """True-negative anchor: an ordinary silent helper (no main name, no entry + unit_type, no input pattern) must NOT be an entry point.""" + detector = _make_detector( + "main.c:helper", + { + "name": "helper", + "unit_type": "function", + "code": "void helper(void) { return; }", + "decorators": [], + }, + ) + entry_points = detector.detect_entry_points() + assert "main.c:helper" not in entry_points diff --git a/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py b/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py index 5b278c56..4ebca1bf 100644 --- a/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py +++ b/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py @@ -213,6 +213,14 @@ def _get_entry_point_reasons(self, func_data: Dict) -> List[str]: if unit_type in ENTRY_POINT_TYPES: reasons.append(f'unit_type:{unit_type}') + # Check 1b: A function named `main` is a program execution root by name, + # even when the extractor classified its unit_type as something else + # (defensive: covers language extractors that emit a generic unit_type + # for main). A program's main is an entry point; over-approximating it + # is reachability-safe. + elif func_data.get('name') == 'main': + reasons.append('name:main') + # Check 2: Decorators indicate entry point decorators = func_data.get('decorators', []) decorators_str = ' '.join(decorators)