From 13c34296510bc7d3f0884901fd8819673640be38 Mon Sep 17 00:00:00 2001 From: Kris Kersey Date: Sun, 14 Jun 2026 15:06:05 +0000 Subject: [PATCH 1/2] fix(extract): attribute C/C++ CALLS edges to the enclosing function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A CALLS edge whose caller is a C/C++/CUDA/GLSL function was sourced to the file's Module node instead of the calling Function. "Find callers of X" returned a file path, outbound trace_path returned empty, and (:Function)-[:CALLS]->(:Function) queries missed for these languages. Root cause: the enclosing-function resolvers read only tree-sitter's `name` field, but a `function_definition` node has none — the name lives in the declarator chain (pointer/function/parenthesized/array declarators). So func_node_name() (internal/cbm/helpers.c) and resolve_func_name_node() (internal/cbm/extract_unified.c) returned NULL, the enclosing scope fell back to the module QN, and the edge was attributed to the Module node. This is the C counterpart to #220, which fixed the definition-naming path but not the enclosing-call path. Fix: descend the declarator chain to the innermost name node (mirroring resolve_c_declarator_name in extract_defs.c, including qualified and operator names) when a function_definition lacks a `name` field. Adds the regression test c_caller_attribution asserting a C call's enclosing_func_qn is the function, not the module. Fixes #438 Signed-off-by: Kris Kersey --- internal/cbm/extract_unified.c | 42 ++++++++++++++++++++++++++++++ internal/cbm/helpers.c | 47 ++++++++++++++++++++++++++++++++++ tests/test_extraction.c | 26 +++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index 7274158f0..7c457fccb 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -87,6 +87,44 @@ static const char *compute_wolfram_func_qn(CBMExtractCtx *ctx, TSNode node) { return NULL; } +/* C/C++/CUDA/GLSL: function_definition has no `name` field — the name is nested + * in the declarator chain. Descend the `declarator` field to the innermost name + * node. Without this, the enclosing-function scope for calls made inside a C + * function resolves to NULL and the call is attributed to the module rather than + * the function (issue #438). Mirrors resolve_c_declarator_name() in extract_defs.c. */ +#ifndef CBM_DECLARATOR_DEPTH_LIMIT +#define CBM_DECLARATOR_DEPTH_LIMIT 8 /* matches DECLARATOR_DEPTH_LIMIT in extract_defs.c */ +#endif +static TSNode resolve_c_declarator_name_node(TSNode node) { + TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator")); + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { + const char *dk = ts_node_type(decl); + if (strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || + strcmp(dk, "type_identifier") == 0 || strcmp(dk, "destructor_name") == 0 || + strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0) { + return decl; + } + if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { + TSNode nm = ts_node_child_by_field_name(decl, TS_FIELD("name")); + if (!ts_node_is_null(nm)) { + decl = nm; + continue; + } + return decl; + } + TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); + if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { + inner = ts_node_named_child(decl, 0); + } + if (ts_node_is_null(inner)) { + break; + } + decl = inner; + } + TSNode null_node = {0}; + return null_node; +} + // Resolve the name node for a function, handling arrow functions. static TSNode resolve_func_name_node(TSNode node) { TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name")); @@ -101,6 +139,10 @@ static TSNode resolve_func_name_node(TSNode node) { if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_declaration") == 0) { name_node = cbm_find_child_by_kind(node, "simple_identifier"); } + /* C/C++/CUDA/GLSL: function_definition name lives in the declarator chain. */ + if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_definition") == 0) { + name_node = resolve_c_declarator_name_node(node); + } return name_node; } diff --git a/internal/cbm/helpers.c b/internal/cbm/helpers.c index 1efa6b819..fc67f0ae0 100644 --- a/internal/cbm/helpers.c +++ b/internal/cbm/helpers.c @@ -718,6 +718,46 @@ TSNode cbm_find_enclosing_func(TSNode node, CBMLanguage lang) { } // Get the name of a function node (basic: try "name" field) +// C/C++/CUDA/GLSL: function_definition has no "name" field — the function name is +// nested in the declarator chain (pointer/function/parenthesized/array +// declarators wrap it). Descend the `declarator` field to the innermost name +// node. Without this, calls made inside C functions are attributed to the module +// rather than the enclosing function (issue #438). Mirrors resolve_c_declarator_name() +// in extract_defs.c. +#ifndef CBM_DECLARATOR_DEPTH_LIMIT +#define CBM_DECLARATOR_DEPTH_LIMIT 8 /* matches DECLARATOR_DEPTH_LIMIT in extract_defs.c */ +#endif +static TSNode c_declarator_name_node(TSNode func_node) { + TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator")); + for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { + const char *dk = ts_node_type(decl); + if (strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || + strcmp(dk, "type_identifier") == 0 || strcmp(dk, "destructor_name") == 0 || + strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0) { + return decl; + } + if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { + // out-of-line method def (Foo::bar): take the rightmost name segment + TSNode nm = ts_node_child_by_field_name(decl, TS_FIELD("name")); + if (!ts_node_is_null(nm)) { + decl = nm; + continue; + } + return decl; + } + TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); + if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { + inner = ts_node_named_child(decl, 0); + } + if (ts_node_is_null(inner)) { + break; + } + decl = inner; + } + TSNode null_node = {0}; + return null_node; +} + static const char *func_node_name(CBMArena *a, TSNode func_node, const char *source, CBMLanguage lang) { // Wolfram: set_delayed_top/set_top/set_delayed/set — LHS is apply(user_symbol("f"), ...) @@ -752,6 +792,13 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou } } } + // C/C++/CUDA/GLSL: function_definition carries its name in the declarator chain. + if (strcmp(ts_node_type(func_node), "function_definition") == 0) { + TSNode dn = c_declarator_name_node(func_node); + if (!ts_node_is_null(dn)) { + return cbm_node_text(a, dn, source); + } + } return NULL; } diff --git a/tests/test_extraction.c b/tests/test_extraction.c index d06b2a506..9aba9dc7c 100644 --- a/tests/test_extraction.c +++ b/tests/test_extraction.c @@ -1787,6 +1787,31 @@ TEST(wolfram_caller_attribution) { PASS(); } +/* Issue #438: a C function_definition has no `name` field — the name lives in the + * declarator chain. Calls inside a C function must be attributed to the enclosing + * function, not the module. Pre-fix, enclosing_func_qn fell back to the module QN. */ +TEST(c_caller_attribution) { + CBMFileResult *r = extract("int helper(int x) { return x; }\n" + "int caller(void) { return helper(1); }\n", + CBM_LANG_C, "t", "main.c"); + ASSERT_NOT_NULL(r); + ASSERT_FALSE(r->has_error); + ASSERT_GT(r->calls.count, 0); + int saw_helper = 0; + for (int i = 0; i < r->calls.count; i++) { + if (strcmp(r->calls.items[i].callee_name, "helper") == 0) { + saw_helper = 1; + /* enclosing_func_qn must be the function, NOT empty and NOT the module QN. */ + ASSERT_NOT_NULL(r->calls.items[i].enclosing_func_qn); + ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "") == 0); + ASSERT_FALSE(strcmp(r->calls.items[i].enclosing_func_qn, "t.main") == 0); + } + } + ASSERT(saw_helper); + cbm_free_result(r); + PASS(); +} + /* --- Wolfram parse (simple assignment) --- */ TEST(wolfram_parse) { CBMFileResult *r = extract("x = 42;\ny = x + 1;\n", CBM_LANG_WOLFRAM, "t", "simple.wl"); @@ -3091,6 +3116,7 @@ SUITE(extraction) { RUN_TEST(wolfram_function_extended); RUN_TEST(wolfram_call); RUN_TEST(wolfram_caller_attribution); + RUN_TEST(c_caller_attribution); RUN_TEST(wolfram_parse); RUN_TEST(wolfram_import); RUN_TEST(wolfram_nested_def); From adc8304c57ff5f07dc69f310b17668bc26ee2dc7 Mon Sep 17 00:00:00 2001 From: Kris Kersey Date: Wed, 24 Jun 2026 16:41:06 +0000 Subject: [PATCH 2/2] refactor(extract): dedup C declarator-name walker into one shared helper Addresses #463 review: the declarator-chain name resolver was copied into helpers.c, extract_unified.c, and extract_defs.c, and CBM_DECLARATOR_DEPTH_LIMIT was #defined twice -- the same triplication drift that caused #438. - Add cbm_resolve_c_declarator_name_node() to helpers.{c,h} as the single source of truth, carrying is_c_terminal_name/resolve_qualified_name with it. - Route the defs, calls, and unified extractors through it. - Hoist CBM_DECLARATOR_DEPTH_LIMIT into helpers.h; extract_defs.c's DECLARATOR_DEPTH_LIMIT now derives from it. Canonicalizes on the original extract_defs.c logic (operator/destructor aware) so defs behavior is unchanged and calls/unified now agree with it. Test: full suite green except an unrelated ASan RSS-budget check; clang-format clean. Signed-off-by: Kris Kersey --- internal/cbm/extract_defs.c | 50 ++-------------------------------- internal/cbm/extract_unified.c | 40 +-------------------------- internal/cbm/helpers.c | 49 ++++++++++++++++++--------------- internal/cbm/helpers.h | 14 ++++++++++ 4 files changed, 45 insertions(+), 108 deletions(-) diff --git a/internal/cbm/extract_defs.c b/internal/cbm/extract_defs.c index bfff34fb1..63e77b21f 100644 --- a/internal/cbm/extract_defs.c +++ b/internal/cbm/extract_defs.c @@ -23,7 +23,8 @@ // Tree traversal limits. enum { TEMPLATE_DEPTH_LIMIT = 4, - DECLARATOR_DEPTH_LIMIT = 8, + DECLARATOR_DEPTH_LIMIT = CBM_DECLARATOR_DEPTH_LIMIT, // shared define in helpers.h + EXPORT_ANCESTOR_DEPTH = 4, DECORATOR_SCAN_LIMIT = 3, C_RETURN_WALK_DEPTH = 5, @@ -457,27 +458,6 @@ static TSNode resolve_func_name_fp(TSNode node, CBMLanguage lang, const char *ki return null_node; } -// Check if a node type is a terminal C declarator name. -static bool is_c_terminal_name(const char *dk) { - return strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || - strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0 || - strcmp(dk, "destructor_name") == 0; -} - -// Resolve name from a C++ qualified_identifier/scoped_identifier. -static TSNode resolve_qualified_name(TSNode decl) { - static const char *name_kinds[] = {"operator_name", "operator_cast", "destructor_name", - "identifier", "field_identifier", NULL}; - for (const char **k = name_kinds; *k; k++) { - TSNode found = cbm_find_child_by_kind(decl, *k); - if (!ts_node_is_null(found)) { - return found; - } - } - TSNode null_node = {0}; - return null_node; -} - // C++/CUDA: out-of-line method definitions name the function with a qualified // declarator (`Foo::bar`, or `ns::Foo::bar`). Return the immediate enclosing // class name (the scope segment directly left of the function name, e.g. "Foo"), @@ -528,30 +508,6 @@ static char *cpp_out_of_line_parent_class(CBMArena *a, TSNode node, const char * return (text && text[0]) ? text : NULL; } -// Resolve function name from C/C++/CUDA/GLSL declarator chain. -static TSNode resolve_c_declarator_name(TSNode node) { - TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator")); - for (int depth = 0; depth < DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { - const char *dk = ts_node_type(decl); - if (is_c_terminal_name(dk)) { - return decl; - } - if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { - return resolve_qualified_name(decl); - } - TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); - if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { - inner = ts_node_named_child(decl, 0); - } - if (ts_node_is_null(inner)) { - break; - } - decl = inner; - } - TSNode null_node = {0}; - return null_node; -} - // R: resolve function_definition name from parent binary_operator lhs. static TSNode resolve_r_func_name(TSNode node) { TSNode parent = ts_node_parent(node); @@ -675,7 +631,7 @@ static TSNode resolve_func_name_c_family(TSNode *node_ptr, CBMLanguage lang, con lang == CBM_LANG_GLSL || lang == CBM_LANG_HLSL || lang == CBM_LANG_ISPC || lang == CBM_LANG_SLANG) && strcmp(kind, "function_definition") == 0) { - return resolve_c_declarator_name(*node_ptr); + return cbm_resolve_c_declarator_name_node(*node_ptr); } TSNode null_node = {0}; return null_node; diff --git a/internal/cbm/extract_unified.c b/internal/cbm/extract_unified.c index 7c457fccb..f65a64bec 100644 --- a/internal/cbm/extract_unified.c +++ b/internal/cbm/extract_unified.c @@ -87,44 +87,6 @@ static const char *compute_wolfram_func_qn(CBMExtractCtx *ctx, TSNode node) { return NULL; } -/* C/C++/CUDA/GLSL: function_definition has no `name` field — the name is nested - * in the declarator chain. Descend the `declarator` field to the innermost name - * node. Without this, the enclosing-function scope for calls made inside a C - * function resolves to NULL and the call is attributed to the module rather than - * the function (issue #438). Mirrors resolve_c_declarator_name() in extract_defs.c. */ -#ifndef CBM_DECLARATOR_DEPTH_LIMIT -#define CBM_DECLARATOR_DEPTH_LIMIT 8 /* matches DECLARATOR_DEPTH_LIMIT in extract_defs.c */ -#endif -static TSNode resolve_c_declarator_name_node(TSNode node) { - TSNode decl = ts_node_child_by_field_name(node, TS_FIELD("declarator")); - for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { - const char *dk = ts_node_type(decl); - if (strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || - strcmp(dk, "type_identifier") == 0 || strcmp(dk, "destructor_name") == 0 || - strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0) { - return decl; - } - if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { - TSNode nm = ts_node_child_by_field_name(decl, TS_FIELD("name")); - if (!ts_node_is_null(nm)) { - decl = nm; - continue; - } - return decl; - } - TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); - if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { - inner = ts_node_named_child(decl, 0); - } - if (ts_node_is_null(inner)) { - break; - } - decl = inner; - } - TSNode null_node = {0}; - return null_node; -} - // Resolve the name node for a function, handling arrow functions. static TSNode resolve_func_name_node(TSNode node) { TSNode name_node = ts_node_child_by_field_name(node, TS_FIELD("name")); @@ -141,7 +103,7 @@ static TSNode resolve_func_name_node(TSNode node) { } /* C/C++/CUDA/GLSL: function_definition name lives in the declarator chain. */ if (ts_node_is_null(name_node) && strcmp(ts_node_type(node), "function_definition") == 0) { - name_node = resolve_c_declarator_name_node(node); + name_node = cbm_resolve_c_declarator_name_node(node); } return name_node; } diff --git a/internal/cbm/helpers.c b/internal/cbm/helpers.c index fc67f0ae0..c34be9b7c 100644 --- a/internal/cbm/helpers.c +++ b/internal/cbm/helpers.c @@ -717,33 +717,38 @@ TSNode cbm_find_enclosing_func(TSNode node, CBMLanguage lang) { return null_node; } -// Get the name of a function node (basic: try "name" field) -// C/C++/CUDA/GLSL: function_definition has no "name" field — the function name is -// nested in the declarator chain (pointer/function/parenthesized/array -// declarators wrap it). Descend the `declarator` field to the innermost name -// node. Without this, calls made inside C functions are attributed to the module -// rather than the enclosing function (issue #438). Mirrors resolve_c_declarator_name() -// in extract_defs.c. -#ifndef CBM_DECLARATOR_DEPTH_LIMIT -#define CBM_DECLARATOR_DEPTH_LIMIT 8 /* matches DECLARATOR_DEPTH_LIMIT in extract_defs.c */ -#endif -static TSNode c_declarator_name_node(TSNode func_node) { +// Check if a node type is a terminal C declarator name. +static bool is_c_terminal_name(const char *dk) { + return strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || + strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0 || + strcmp(dk, "destructor_name") == 0; +} + +// Resolve name from a C++ qualified_identifier/scoped_identifier. +static TSNode resolve_qualified_name(TSNode decl) { + static const char *name_kinds[] = {"operator_name", "operator_cast", "destructor_name", + "identifier", "field_identifier", NULL}; + for (const char **k = name_kinds; *k; k++) { + TSNode found = cbm_find_child_by_kind(decl, *k); + if (!ts_node_is_null(found)) { + return found; + } + } + TSNode null_node = {0}; + return null_node; +} + +// Resolve function name from C/C++/CUDA/GLSL declarator chain. Shared canonical +// implementation — see the header for the full rationale (#438). +TSNode cbm_resolve_c_declarator_name_node(TSNode func_node) { TSNode decl = ts_node_child_by_field_name(func_node, TS_FIELD("declarator")); for (int depth = 0; depth < CBM_DECLARATOR_DEPTH_LIMIT && !ts_node_is_null(decl); depth++) { const char *dk = ts_node_type(decl); - if (strcmp(dk, "identifier") == 0 || strcmp(dk, "field_identifier") == 0 || - strcmp(dk, "type_identifier") == 0 || strcmp(dk, "destructor_name") == 0 || - strcmp(dk, "operator_name") == 0 || strcmp(dk, "operator_cast") == 0) { + if (is_c_terminal_name(dk)) { return decl; } if (strcmp(dk, "qualified_identifier") == 0 || strcmp(dk, "scoped_identifier") == 0) { - // out-of-line method def (Foo::bar): take the rightmost name segment - TSNode nm = ts_node_child_by_field_name(decl, TS_FIELD("name")); - if (!ts_node_is_null(nm)) { - decl = nm; - continue; - } - return decl; + return resolve_qualified_name(decl); } TSNode inner = ts_node_child_by_field_name(decl, TS_FIELD("declarator")); if (ts_node_is_null(inner) && ts_node_named_child_count(decl) > 0) { @@ -794,7 +799,7 @@ static const char *func_node_name(CBMArena *a, TSNode func_node, const char *sou } // C/C++/CUDA/GLSL: function_definition carries its name in the declarator chain. if (strcmp(ts_node_type(func_node), "function_definition") == 0) { - TSNode dn = c_declarator_name_node(func_node); + TSNode dn = cbm_resolve_c_declarator_name_node(func_node); if (!ts_node_is_null(dn)) { return cbm_node_text(a, dn, source); } diff --git a/internal/cbm/helpers.h b/internal/cbm/helpers.h index ea8154d4c..35d108920 100644 --- a/internal/cbm/helpers.h +++ b/internal/cbm/helpers.h @@ -36,6 +36,20 @@ const char *cbm_enclosing_func_qn(CBMArena *a, TSNode node, CBMLanguage lang, co // Cached version: uses ctx->ef_cache to avoid repeated parent-chain walks. const char *cbm_enclosing_func_qn_cached(CBMExtractCtx *ctx, TSNode node); +// Max declarator-chain descent depth for C/C++/CUDA/GLSL function-name +// resolution. Single source of truth — extract_defs.c's DECLARATOR_DEPTH_LIMIT +// is derived from this so the three extractors cannot drift. +#define CBM_DECLARATOR_DEPTH_LIMIT 8 + +// Resolve the function-name node for a C/C++/CUDA/GLSL `function_definition`. +// Such nodes have no `name` field — the name is nested in the declarator chain +// (pointer/function/parenthesized/array declarators wrap it; out-of-line method +// definitions name it with a qualified_identifier). Descends the `declarator` +// field to the innermost name node and returns it, or a null node if none is +// found. Shared by the defs, calls, and unified extractors so all three agree on +// enclosing-function attribution — drift between private copies caused #438. +TSNode cbm_resolve_c_declarator_name_node(TSNode func_node); + // Find a child node by kind string. TSNode cbm_find_child_by_kind(TSNode parent, const char *kind);