Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions libs/openant-core/parsers/go/go_parser/callgraph.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,13 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo {
// import table instead of a name-shape heuristic.
imports := c.importsByFile[funcInfo.FilePath]

// Track simple func-value aliases (f := helper) so a later call f()
// resolves to the aliased function. Only single, unconditional bindings
// of the form `name := <ident>` / `name = <ident>` are tracked; any
// reassignment (or a non-ident RHS) marks the name ambiguous so we emit
// no false edge — precision over recall.
aliases := c.collectFuncValueAliases(file)

// Walk the AST looking for call expressions
ast.Inspect(file, func(n ast.Node) bool {
call, ok := n.(*ast.CallExpr)
Expand All @@ -208,6 +215,13 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo {
}

callInfo := c.analyzeCallExpr(call, imports)
// Rewrite an unambiguous func-value alias call (f()) to its target
// (helper()) so it resolves like a direct call.
if callInfo.Name != "" && callInfo.Receiver == "" && callInfo.Package == "" {
if target, ok := aliases[callInfo.Name]; ok {
callInfo.Name = target
}
}
if callInfo.Name != "" && !c.builtins[callInfo.Name] && !c.builtins[callInfo.Package] {
calls = append(calls, callInfo)
}
Expand All @@ -217,6 +231,62 @@ func (c *CallGraphBuilder) extractCalls(funcInfo FunctionInfo) []CallInfo {
return calls
}

// collectFuncValueAliases scans a parsed function body for single, unconditional
// func-value bindings (`f := helper`) and returns name -> target-function-name.
// A name bound more than once, or bound to anything other than a bare identifier,
// is dropped (left out of the map) so a reassigned/conditional alias never
// produces a false edge.
func (c *CallGraphBuilder) collectFuncValueAliases(file *ast.File) map[string]string {
aliases := make(map[string]string)
ambiguous := make(map[string]bool)

record := func(lhs, rhs ast.Expr) {
lid, ok := lhs.(*ast.Ident)
if !ok {
return
}
if ambiguous[lid.Name] {
return
}
rid, ok := rhs.(*ast.Ident)
if !ok {
// Bound to a non-ident (call, selector, literal, ...) -> ambiguous.
delete(aliases, lid.Name)
ambiguous[lid.Name] = true
return
}
if _, seen := aliases[lid.Name]; seen {
// Second binding of the same name -> ambiguous, drop it.
delete(aliases, lid.Name)
ambiguous[lid.Name] = true
return
}
aliases[lid.Name] = rid.Name
}

ast.Inspect(file, func(n ast.Node) bool {
assign, ok := n.(*ast.AssignStmt)
if !ok {
return true
}
// Only handle 1:1 bindings (f := helper); skip tuple assignments.
if len(assign.Lhs) != 1 || len(assign.Rhs) != 1 {
// Mark any ident LHS ambiguous so a multi-value rebind can't alias.
for _, lhs := range assign.Lhs {
if lid, ok := lhs.(*ast.Ident); ok {
delete(aliases, lid.Name)
ambiguous[lid.Name] = true
}
}
return true
}
record(assign.Lhs[0], assign.Rhs[0])
return true
})

return aliases
}

func (c *CallGraphBuilder) analyzeCallExpr(call *ast.CallExpr, imports map[string]string) CallInfo {
info := CallInfo{}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package main

// Regression test for BUG-NEW-2026-06-04-go-dataflow_loss: a call through a
// function-value alias (f := helper; f()) must emit an edge caller -> helper,
// mirroring the direct call helper().

import "testing"

// buildGraphForFuncs runs the full call-graph build over a synthetic
// AnalyzerOutput (same shape the extractor emits) and returns the call graph.
func buildGraphForFuncs(t *testing.T, funcs map[string]FunctionInfo) map[string][]string {
t.Helper()
builder := NewCallGraphBuilder(".")
analyzer := &AnalyzerOutput{RepoRoot: ".", Functions: funcs}
// Index directly (parseImports reads real files; none of these synthetic
// funcs use package-qualified calls, so an empty import table is fine).
for funcID, funcInfo := range analyzer.Functions {
builder.functionsByName[funcInfo.Name] = append(builder.functionsByName[funcInfo.Name], funcID)
builder.functionsByFile[funcInfo.FilePath] = append(builder.functionsByFile[funcInfo.FilePath], funcID)
if funcInfo.ClassName != "" {
builder.methodsByType[funcInfo.ClassName] = append(builder.methodsByType[funcInfo.ClassName], funcID)
}
}
cg := make(map[string][]string)
for funcID, funcInfo := range analyzer.Functions {
calls := builder.extractCalls(funcInfo)
resolved := builder.resolveCalls(funcID, funcInfo, calls, analyzer)
if len(resolved) > 0 {
cg[funcID] = resolved
}
}
return cg
}

func hasEdge(cg map[string][]string, from, to string) bool {
for _, t := range cg[from] {
if t == to {
return true
}
}
return false
}

// Baseline control: a direct call helper() resolves to an edge.
func TestDirectCallEdge(t *testing.T) {
funcs := map[string]FunctionInfo{
"main.go:helper": {Name: "helper", FilePath: "main.go", Package: "main",
Code: "func helper() int {\n\treturn 42\n}"},
"main.go:caller": {Name: "caller", FilePath: "main.go", Package: "main",
Code: "func caller() int {\n\treturn helper()\n}"},
}
cg := buildGraphForFuncs(t, funcs)
if !hasEdge(cg, "main.go:caller", "main.go:helper") {
t.Fatalf("baseline: expected edge main.go:caller -> main.go:helper, got %v", cg)
}
}

// BUG-19: a func-value alias f := helper; f() must resolve to the same edge.
func TestFuncValueAliasEdge(t *testing.T) {
funcs := map[string]FunctionInfo{
"main.go:helper": {Name: "helper", FilePath: "main.go", Package: "main",
Code: "func helper() int {\n\treturn 42\n}"},
"main.go:caller": {Name: "caller", FilePath: "main.go", Package: "main",
Code: "func caller() int {\n\tf := helper\n\treturn f()\n}"},
}
cg := buildGraphForFuncs(t, funcs)
if !hasEdge(cg, "main.go:caller", "main.go:helper") {
t.Fatalf("alias: expected edge main.go:caller -> main.go:helper, got %v", cg)
}
}

// Precision guard: a reassigned/conditional alias must NOT produce a false edge.
func TestFuncValueAliasReassignedNoEdge(t *testing.T) {
funcs := map[string]FunctionInfo{
"main.go:helper": {Name: "helper", FilePath: "main.go", Package: "main",
Code: "func helper() int {\n\treturn 42\n}"},
"main.go:other": {Name: "other", FilePath: "main.go", Package: "main",
Code: "func other() int {\n\treturn 7\n}"},
"main.go:caller": {Name: "caller", FilePath: "main.go", Package: "main",
Code: "func caller() int {\n\tf := helper\n\tf = other\n\treturn f()\n}"},
}
cg := buildGraphForFuncs(t, funcs)
if hasEdge(cg, "main.go:caller", "main.go:helper") {
t.Fatalf("reassigned: must NOT resolve f() to helper after f=other, got %v", cg)
}
}
Binary file removed libs/openant-core/parsers/go/go_parser/go_parser
Binary file not shown.
81 changes: 51 additions & 30 deletions libs/openant-core/parsers/go/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,48 @@
from utilities.file_io import open_utf8, read_json, run_utf8, write_json


def normalize_go_function_records(raw_functions: dict) -> dict:
"""Normalize Go FunctionInfo records to the snake_case consumer contract.

The Go parser is a separate Go binary whose FunctionInfo records
(parsers/go/go_parser/types.go) use camelCase json keys
(``unitType``/``startLine``/``endLine``/``isExported``/``filePath``/
``className``), while every other parser emits snake_case. The Python
reachability/entry-point consumers (EntryPointDetector, etc.) read
snake_case — e.g. ``func_data.get('unit_type')``. Without normalization the
Go records' snake keys are ``None`` and any unit_type-based logic is
silently broken for Go (BUG-NEW 5).

This maps the known FunctionInfo fields to snake_case, reading from either
shape so it is idempotent: already-snake records pass through unchanged.
Scope is the call_graph.json ``functions`` map only — the separate
analyzer_output.json camelCase contract is intentionally NOT touched.
"""
normalized: dict = {}
for func_id, fd in raw_functions.items():
normalized[func_id] = {
'name': fd.get('name', ''),
'unit_type': fd.get('unit_type', fd.get('unitType', 'function')),
'code': fd.get('code', ''),
'file_path': fd.get('file_path', fd.get('filePath', '')),
'start_line': fd.get('start_line', fd.get('startLine', 0)),
'end_line': fd.get('end_line', fd.get('endLine', 0)),
'package': fd.get('package', ''),
'receiver': fd.get('receiver', ''),
'is_exported': fd.get('is_exported', fd.get('isExported', False)),
'class_name': fd.get('class_name', fd.get('className', '')),
'decorators': fd.get('decorators', []),
# Schema-completeness (BUG-5 re-verify): carry the remaining
# FunctionInfo fields so a Go func record matches the snake_case
# shape the other parsers emit. No live consumer reads these today,
# but leaving them None is a latent drift for future consumers.
'parameters': fd.get('parameters', []),
'returns': fd.get('returns', []),
'is_async': fd.get('is_async', fd.get('isAsync', False)),
}
return normalized


def _stdout_supports_unicode() -> bool:
"""Return True if sys.stdout can emit the symbols we use for status.

Expand Down Expand Up @@ -305,21 +347,10 @@ def run_go_parser_all(self) -> bool:
dataset_for_cg = read_json(self.dataset_file)

raw_functions = analyzer.get("functions", {})
# Normalise to the camelCase shape EntryPointDetector expects.
normalized_functions = {
func_id: {
'name': fd.get('name', ''),
'unitType': fd.get('unit_type', fd.get('unitType', 'function')),
'code': fd.get('code', ''),
'filePath': fd.get('file_path', fd.get('filePath', '')),
'startLine': fd.get('start_line', fd.get('startLine', 0)),
'endLine': fd.get('end_line', fd.get('endLine', 0)),
'package': fd.get('package', ''),
'receiver': fd.get('receiver', ''),
'isExported': fd.get('is_exported', fd.get('isExported', False)),
}
for func_id, fd in raw_functions.items()
}
# Normalise to the snake_case shape the Python consumers read
# (EntryPointDetector reads func_data.get('unit_type'), etc.).
# Idempotent: already-snake records pass through unchanged.
normalized_functions = normalize_go_function_records(raw_functions)

call_graph: dict = {}
reverse_call_graph: dict = {}
Expand Down Expand Up @@ -375,21 +406,11 @@ def apply_reachability_filter(self) -> bool:

functions = analyzer.get("functions", {})

# Convert to expected format for EntryPointDetector
# Go parser uses snake_case, EntryPointDetector expects camelCase
normalized_functions = {}
for func_id, func_data in functions.items():
normalized_functions[func_id] = {
'name': func_data.get('name', ''),
'unitType': func_data.get('unit_type', func_data.get('unitType', 'function')),
'code': func_data.get('code', ''),
'filePath': func_data.get('file_path', func_data.get('filePath', '')),
'startLine': func_data.get('start_line', func_data.get('startLine', 0)),
'endLine': func_data.get('end_line', func_data.get('endLine', 0)),
'package': func_data.get('package', ''),
'receiver': func_data.get('receiver', ''),
'isExported': func_data.get('is_exported', func_data.get('isExported', False)),
}
# Convert to the snake_case shape the Python consumers read.
# The Go binary's analyzer_output uses camelCase FunctionInfo keys;
# EntryPointDetector / ReachabilityAnalyzer read snake_case
# (func_data.get('unit_type'), etc.). Idempotent normalization.
normalized_functions = normalize_go_function_records(functions)

# Load call graph from dataset (go_parser puts it in statistics)
dataset = read_json(self.dataset_file)
Expand Down
Loading
Loading