From 858401ef61e6ed95c8f552acd05e1be9b84ac553 Mon Sep 17 00:00:00 2001 From: Gadi Evron Date: Sun, 14 Jun 2026 10:32:40 +0300 Subject: [PATCH 1/2] fix(parsers/php): keep both same-name defs from conditional/guarded branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHP forbids plain function redefinition (fatal), so legal same-(file,name) duplicates arise only from mutually-exclusive conditional branches — an `if/else` or a defensive double `if(!function_exists('x')){ function x(){} }`. Which branch is live is environment-dependent, and the EARLIER branch is often the one that runs (the first `function_exists` guard defines it; the second is skipped). The extractor keyed on `qualified_name` with a plain keep-last store, so the later (dead) branch silently overwrote the earlier (live) one — a false negative for a SAST tool, confirmed against the real `php` interpreter. New `_store_function` keeps BOTH via a deterministic `#L` suffix (the earlier-in-source unit keeps the clean id), mirroring the Python extractor's existing `_store_function`. Collision-only: a unique name keeps its byte-identical `path:name` id. Both qualified-name store sites (function :558, closure :600) route through it; the per-file `__module__` singleton is unaffected. Tests: tests/parsers/php/test_php_conditional_collision.py (if/else both kept, double function_exists guard both kept, unique-name id unchanged): $ pytest tests/parsers/php/test_php_conditional_collision.py -q 3 passed $ pytest tests/parsers/php/ -q 36 passed Co-Authored-By: Claude Opus 4.8 --- .../parsers/php/function_extractor.py | 30 ++++++++- .../php/test_php_conditional_collision.py | 66 +++++++++++++++++++ 2 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 libs/openant-core/tests/parsers/php/test_php_conditional_collision.py diff --git a/libs/openant-core/parsers/php/function_extractor.py b/libs/openant-core/parsers/php/function_extractor.py index 582a689..4e68b9f 100644 --- a/libs/openant-core/parsers/php/function_extractor.py +++ b/libs/openant-core/parsers/php/function_extractor.py @@ -482,7 +482,7 @@ def _process_function_node(self, node, source: bytes, relative_path: str, 'unit_type': unit_type, } - self.functions[func_id] = func_data + self._store_function(func_id, func_data) self.stats['total_functions'] += 1 if class_name: @@ -524,7 +524,7 @@ def _process_closure_node(self, node, source: bytes, relative_path: str, func_id = f"{relative_path}:{qualified_name}" - self.functions[func_id] = { + self._store_function(func_id, { 'name': name, 'qualified_name': qualified_name, 'file_path': relative_path, @@ -536,11 +536,35 @@ def _process_closure_node(self, node, source: bytes, relative_path: str, 'parameters': parameters, 'is_static': False, 'unit_type': 'closure', - } + }) self.stats['total_functions'] += 1 self.stats['standalone_functions'] += 1 self.stats['by_type']['closure'] = self.stats['by_type'].get('closure', 0) + 1 + def _store_function(self, func_id: str, func_data: dict) -> str: + """Insert a function unit, keeping BOTH on a same-(file,name) collision. + + PHP forbids plain redefinition (fatal), so legal duplicates arise only + from mutually-exclusive conditional branches — an `if/else` or a + defensive double `if(!function_exists('x')){...}` — where which branch + is live is environment-dependent and the EARLIER branch may be the one + that runs. Keying solely on `qualified_name` let the later branch + overwrite the earlier (a silent false negative). Disambiguate + DETERMINISTICALLY by source line (`#L`); the earlier-in-source + unit keeps the clean id. Mirrors the Python extractor's `_store_function`. + """ + if func_id not in self.functions: + self.functions[func_id] = func_data + return func_id + line = func_data.get('start_line', 0) + unique_id = f"{func_id}#L{line}" + n = 2 + while unique_id in self.functions: + unique_id = f"{func_id}#L{line}.{n}" + n += 1 + self.functions[unique_id] = func_data + return unique_id + def _extract_module_level_unit(self, tree, source: bytes, relative_path: str) -> None: """Synthesise a `module_level` unit for top-level procedural statements. diff --git a/libs/openant-core/tests/parsers/php/test_php_conditional_collision.py b/libs/openant-core/tests/parsers/php/test_php_conditional_collision.py new file mode 100644 index 0000000..ed698d9 --- /dev/null +++ b/libs/openant-core/tests/parsers/php/test_php_conditional_collision.py @@ -0,0 +1,66 @@ +"""Bug 1 (PHP): same-name functions defined in mutually-exclusive conditional +branches must NOT silently collapse to the last-in-source one. + +Verified harmful (independent + judge, real `php` interpreter): for + if (...) { function k(){ A } } else { function k(){ B } } +and the defensive double `function_exists` guard, the live definition can be the +EARLIER branch, but the extractor kept only the later (else) one — a silent +false negative for a SAST tool. Both branches are env-dependently reachable, so +the fix keeps BOTH (the `#L` disambiguation the Python extractor already +uses), not prefer-first/larger. Collision-only: a unique name keeps its clean id. +""" +import sys +import tempfile +from pathlib import Path + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +from parsers.php.function_extractor import FunctionExtractor + + +def _extract(php_source: str, filename: str = "cond.php") -> dict: + repo = tempfile.mkdtemp() + Path(repo, filename).write_text(php_source) + return FunctionExtractor(repo).extract_all([filename]) + + +def _named(out: dict, name: str): + return [fid for fid, d in out["functions"].items() if d.get("name") == name] + + +def test_conditional_ifelse_keeps_both_branches(): + src = ( + " Date: Sun, 14 Jun 2026 10:36:14 +0300 Subject: [PATCH 2/2] fix(parsers/ruby): keep both same-name defs from conditional branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A Ruby `def` executes when reached, so same-name defs in mutually-exclusive conditional branches (`if COND then def k;A end else def k;B end end`) are both runtime-reachable depending on the condition, and the EARLIER (if) branch is often the live one. The extractor's plain keep-last store let the later (else) branch silently overwrite the earlier — a false negative for a SAST tool, confirmed against the real `ruby` interpreter. New `_store_function` keeps BOTH via a deterministic `#L` suffix (the earlier-in-source unit keeps the clean id), mirroring the Python/PHP extractors. Unconditional method reopening is last-wins at runtime; keeping both there is the same benign tradeoff Python already accepts (Stage 2 attacker-simulation filters dead overrides). Collision-only: a unique name keeps its byte-identical `path:name` id. Both qualified-name store sites (:549 method, :613 function) route through it. Tests: tests/parsers/ruby/test_ruby_conditional_collision.py (if/else both kept, unique-name id unchanged): $ pytest tests/parsers/ruby/test_ruby_conditional_collision.py -q 2 passed $ pytest tests/parsers/ruby/ -q 47 passed Co-Authored-By: Claude Opus 4.8 --- .../parsers/ruby/function_extractor.py | 33 +++++++++++-- .../ruby/test_ruby_conditional_collision.py | 49 +++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 libs/openant-core/tests/parsers/ruby/test_ruby_conditional_collision.py diff --git a/libs/openant-core/parsers/ruby/function_extractor.py b/libs/openant-core/parsers/ruby/function_extractor.py index 4ee6131..6225795 100644 --- a/libs/openant-core/parsers/ruby/function_extractor.py +++ b/libs/openant-core/parsers/ruby/function_extractor.py @@ -512,7 +512,7 @@ def _emit_synthetic_method(self, node, source: bytes, relative_path: str, qualified_name = name func_id = f"{relative_path}:{qualified_name}" - self.functions[func_id] = { + self._store_function(func_id, { 'name': name, 'qualified_name': qualified_name, 'file_path': relative_path, @@ -525,7 +525,7 @@ def _emit_synthetic_method(self, node, source: bytes, relative_path: str, 'is_singleton': False, 'visibility': visibility, 'unit_type': unit_type, - } + }) self.stats['total_functions'] += 1 if class_name: self.stats['total_methods'] += 1 @@ -533,6 +533,33 @@ def _emit_synthetic_method(self, node, source: bytes, relative_path: str, self.stats['standalone_functions'] += 1 self.stats['by_type'][unit_type] = self.stats['by_type'].get(unit_type, 0) + 1 + + def _store_function(self, func_id: str, func_data: dict) -> str: + """Insert a function unit, keeping BOTH on a same-(file,name) collision. + + Ruby `def` executes when reached, so same-name defs in mutually-exclusive + conditional branches (`if/else`) are both runtime-reachable depending on + the condition, and the EARLIER branch may be the live one. A plain + keep-last store let the later (possibly dead) branch overwrite the + earlier (possibly live) one — a silent false negative, confirmed against + the real `ruby` interpreter. Keep BOTH via a deterministic `#L` + suffix (earlier-in-source keeps the clean id), mirroring the Python + extractor's `_store_function`. Unconditional reopening is last-wins at + runtime; keeping both there is the same benign tradeoff Python accepts. + Collision-only: a unique name keeps its byte-identical `path:name` id. + """ + if func_id not in self.functions: + self.functions[func_id] = func_data + return func_id + line = func_data.get('start_line', 0) + unique_id = f"{func_id}#L{line}" + n = 2 + while unique_id in self.functions: + unique_id = f"{func_id}#L{line}.{n}" + n += 1 + self.functions[unique_id] = func_data + return unique_id + def _process_method_node(self, node, source: bytes, relative_path: str, class_name: Optional[str], module_name: Optional[str], is_singleton: bool, visibility: str = 'public') -> None: @@ -575,7 +602,7 @@ def _process_method_node(self, node, source: bytes, relative_path: str, 'unit_type': unit_type, } - self.functions[func_id] = func_data + self._store_function(func_id, func_data) self.stats['total_functions'] += 1 if class_name: diff --git a/libs/openant-core/tests/parsers/ruby/test_ruby_conditional_collision.py b/libs/openant-core/tests/parsers/ruby/test_ruby_conditional_collision.py new file mode 100644 index 0000000..bc67928 --- /dev/null +++ b/libs/openant-core/tests/parsers/ruby/test_ruby_conditional_collision.py @@ -0,0 +1,49 @@ +"""Bug 1 (Ruby): same-name defs in mutually-exclusive conditional branches must +NOT collapse to the last-in-source one. + +Verified harmful (independent + judge, real `ruby` interpreter): for + if COND then def k; A end else def k; B end end +only the taken branch's `def` executes — and it may be the EARLIER (if) branch, +but the extractor kept only the later (else) one. Both branches are +env-dependently reachable, so the fix keeps BOTH via the `#L` +disambiguation the Python extractor already uses. Collision-only: a unique name +keeps its clean id. (Unconditional reopening stays correct — last-wins — and is +simply also kept-both, the same tradeoff Python accepts.) +""" +import sys +from pathlib import Path + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +from parsers.ruby.function_extractor import FunctionExtractor + + +def _extract(tmp_path: Path, filename: str, source: str) -> dict: + (tmp_path / filename).write_text(source) + return FunctionExtractor(str(tmp_path)).extract_all([filename]) + + +def _named(out: dict, name: str): + return [fid for fid, d in out["functions"].items() if d.get("name") == name] + + +def test_conditional_ifelse_keeps_both_branches(tmp_path): + src = ( + "if ENV['X']\n" + " def k; if_branch; end\n" + "else\n" + " def k; else_branch; end\n" + "end\n" + ) + out = _extract(tmp_path, "cond.rb", src) + ks = _named(out, "k") + assert len(ks) == 2, f"both conditional branches of k must be kept; got {ks}" + bodies = " ".join(out["functions"][f]["code"] for f in ks) + assert "if_branch" in bodies and "else_branch" in bodies, f"a branch was dropped: {bodies}" + + +def test_unique_name_id_unchanged(tmp_path): + out = _extract(tmp_path, "u.rb", "def solo; 1; end\n") + ids = _named(out, "solo") + assert ids == ["u.rb:solo"], f"unique-name id must stay byte-identical; got {ids}"