From 6cfba555db5fa046812f887942bd120a93e33231 Mon Sep 17 00:00:00 2001 From: Gadi Evron Date: Thu, 28 May 2026 12:58:44 -0700 Subject: [PATCH] fix(js-parser): treat a zero-JS-file repo as an empty result, not a crash A directory with zero JS-family source files made the JavaScript parser abort the entire scan instead of returning a clean empty result. repository_scanner.js succeeds and emits an empty file list (exit 0), but run_typescript_analyzer hit `if not files: return False`, so run_full_pipeline early-returned without results['success'], main() did sys.exit(1), and _parse_javascript raised RuntimeError on the non-zero exit (uncaught at the scan layer => whole scan aborts). The Python, Ruby and Zig parsers all handle an empty repo gracefully; the JS guard was the lone outlier promoting valid empty input to a fatal exit. Mirror the zig graceful-empty pattern entirely within parsers/javascript/test_pipeline.py: on zero files, write valid empty analyzer_output.json / dataset.json / call_graph.json, record synthetic successful stages, set self.empty_repo, and return True; run_full_pipeline then skips the unit generator and reports success. _parse_javascript reads units_count=0 and returns a valid empty ParseResult. parser_adapter.py and zig/test_pipeline.py are correct as-is and left untouched. Adds regression test tests/parsers/javascript/test_empty_repo.py (RED before, GREEN after). Full openant-core suite: 219 passed, 22 skipped. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parsers/javascript/test_pipeline.py | 81 +++++++++++++- .../parsers/javascript/test_empty_repo.py | 102 ++++++++++++++++++ 2 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 libs/openant-core/tests/parsers/javascript/test_empty_repo.py diff --git a/libs/openant-core/parsers/javascript/test_pipeline.py b/libs/openant-core/parsers/javascript/test_pipeline.py index 2eee6bd8..5d0b730a 100644 --- a/libs/openant-core/parsers/javascript/test_pipeline.py +++ b/libs/openant-core/parsers/javascript/test_pipeline.py @@ -126,6 +126,11 @@ def __init__( self.analyzer_output_file = None self.dataset_file = None + # Set when the repository contains zero JS-family source files. An empty + # repo is a valid 0-unit result (mirroring the Python/Ruby/Zig parsers), + # not a failure, so the downstream unit-generator stage is skipped. + self.empty_repo = False + # Reachability data (populated if processing_level >= REACHABLE) self.entry_points: Set[str] = set() self.reachable_units: Set[str] = set() @@ -278,8 +283,18 @@ def run_typescript_analyzer(self, files: list = None) -> bool: files = [f['path'] for f in scan_data.get('files', [])] if not files: - print("No files to analyze") - return False + # A repository with zero JS-family source files is a valid 0-unit + # result, not a failure. The Python, Ruby and Zig parsers all handle + # an empty repo gracefully; previously the JS pipeline returned False + # here, which made run_full_pipeline early-return without + # results['success'], so main() did sys.exit(1) and the adapter's + # _parse_javascript raised RuntimeError, aborting the entire scan. + # Mirror the zig graceful-empty pattern: write empty analyzer and + # dataset outputs and report success so the adapter reads + # units_count=0. + print("No files to analyze; treating as empty repository (0 units)") + self._write_empty_outputs() + return True # Write file list to a temporary file to avoid command-line length limits file_list_path = os.path.join(self.output_dir, 'file_list.txt') @@ -326,6 +341,56 @@ def run_typescript_analyzer(self, files: list = None) -> bool: return result.get('success', False) + def _write_empty_outputs(self) -> None: + """Write valid empty analyzer + dataset outputs for a zero-file repo. + + Mirrors the zig parser's graceful-empty pattern so a repository with no + JS-family source files yields a valid 0-unit result instead of a fatal + non-zero exit. Records synthetic successful stage entries and sets + ``self.empty_repo`` so run_full_pipeline skips the unit-generator stage. + """ + self.empty_repo = True + + analyzer_output = { + "repository": self.repo_path, + "functions": {}, + "callGraph": {}, + } + write_json(self.analyzer_output_file, analyzer_output) + + self.dataset_file = os.path.join(self.output_dir, 'dataset.json') + empty_dataset = { + "name": self.dataset_name or os.path.basename(self.repo_path), + "repository": self.repo_path, + "units": [], + "statistics": {"totalUnits": 0, "byType": {}}, + "metadata": {"generator": "test_pipeline.py", "empty_repository": True}, + } + write_json(self.dataset_file, empty_dataset) + + call_graph_file = os.path.join(self.output_dir, 'call_graph.json') + write_json(call_graph_file, {"functions": {}, "call_graph": {}, "reverse_call_graph": {}}) + + empty_stage = { + 'success': True, + 'elapsed_seconds': 0.0, + 'output_file': self.analyzer_output_file, + 'summary': {'total_functions': 0, 'by_unit_type': {}, 'call_graph_entries': 0}, + } + self.results['stages']['typescript_analyzer'] = empty_stage + self.results['stages']['unit_generator'] = { + 'success': True, + 'elapsed_seconds': 0.0, + 'output_file': self.dataset_file, + 'summary': { + 'total_units': 0, + 'by_type': {}, + 'units_with_dependencies': 0, + 'call_graph_edges': 0, + 'avg_out_degree': 0, + }, + } + def run_stage_with_stdout_capture(self, name: str, command: list, output_file: str) -> dict: """Run a stage that outputs JSON to stdout, capturing to a file.""" print(f"=" * 60) @@ -1085,6 +1150,18 @@ def run_full_pipeline(self): print("Pipeline stopped: TypeScript analyzer failed") return self.results + # Empty repository (zero JS-family files): the analyzer already wrote + # valid empty outputs and recorded synthetic successful stages. There is + # nothing for the unit generator or optional downstream stages to do, so + # report success directly instead of crashing. + if self.empty_repo: + self.results['success'] = True + print("=" * 60) + print("PIPELINE SUMMARY") + print("=" * 60) + print(f"{SYM_OK} Empty repository: 0 units (graceful)") + return self.results + # Stage 3: Unit Generator if not self.run_unit_generator(): print("Pipeline stopped: Unit generator failed") diff --git a/libs/openant-core/tests/parsers/javascript/test_empty_repo.py b/libs/openant-core/tests/parsers/javascript/test_empty_repo.py new file mode 100644 index 00000000..c24d3052 --- /dev/null +++ b/libs/openant-core/tests/parsers/javascript/test_empty_repo.py @@ -0,0 +1,102 @@ +"""Regression test for graceful handling of a zero-JS-file repository. + +A directory with zero JS-family source files must NOT be treated as a parser +failure. repository_scanner.js exits 0 with an empty file list; the JS pipeline +then hit ``if not files: return False`` in ``run_typescript_analyzer``, which +made ``run_full_pipeline`` early-return without ``results['success']``, so +``main`` did ``sys.exit(1)``. ``_parse_javascript`` then raised +``RuntimeError`` on the non-zero exit, aborting the whole scan instead of +yielding a 0-unit result. + +The Python, Ruby and Zig parsers all treat an empty repo gracefully (zig writes +an empty dataset and returns 0). This test pins the JS parser to the same +contract: zero files -> success + empty analyzer/dataset output, so the adapter +returns a valid empty ParseResult. + +Loaded via importlib under a UNIQUE module name to avoid colliding with the +many other modules named ``test_pipeline`` across the parser packages. +""" + +import importlib.util +import os + +import pytest + +from utilities.file_io import write_json + +_CORE_ROOT = os.path.abspath( + os.path.join(os.path.dirname(__file__), "..", "..", "..") +) +_JS_PIPELINE = os.path.join( + _CORE_ROOT, "parsers", "javascript", "test_pipeline.py" +) + + +def _load_js_pipeline(): + spec = importlib.util.spec_from_file_location( + "isolated_js_pipeline_under_test", _JS_PIPELINE + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +@pytest.fixture +def js_pipeline(): + return _load_js_pipeline() + + +def _make_pipeline(js_pipeline, tmp_path, repo): + repo.mkdir(parents=True, exist_ok=True) + out = tmp_path / "out" + out.mkdir(parents=True, exist_ok=True) + pipeline = js_pipeline.PipelineTest( + repo_path=str(repo), + output_dir=str(out), + processing_level=js_pipeline.ProcessingLevel.ALL, + skip_tests=True, + ) + return pipeline, out + + +def _write_empty_scan(pipeline, out): + """Simulate repository_scanner.js succeeding with zero JS files.""" + scan_path = os.path.join(str(out), "scan_results.json") + write_json( + scan_path, + {"files": [], "statistics": {"totalFiles": 0, "byExtension": {}}}, + ) + pipeline.scan_results_file = scan_path + + +def test_zero_js_files_analyzer_succeeds_gracefully(js_pipeline, tmp_path): + """run_typescript_analyzer must NOT report failure on an empty repo.""" + pipeline, out = _make_pipeline(js_pipeline, tmp_path, tmp_path / "empty_repo") + _write_empty_scan(pipeline, out) + + ok = pipeline.run_typescript_analyzer() + + assert ok is True, ( + "zero JS files must be a graceful empty result, not a stage failure" + ) + + +def test_zero_js_files_writes_empty_outputs(js_pipeline, tmp_path): + """An empty repo must leave a valid empty analyzer + dataset on disk so the + adapter reads units_count=0 instead of crashing.""" + pipeline, out = _make_pipeline(js_pipeline, tmp_path, tmp_path / "empty_repo") + _write_empty_scan(pipeline, out) + + assert pipeline.run_typescript_analyzer() is True + + analyzer_path = os.path.join(str(out), "analyzer_output.json") + dataset_path = os.path.join(str(out), "dataset.json") + assert os.path.exists(analyzer_path), "empty repo must still write analyzer_output.json" + assert os.path.exists(dataset_path), "empty repo must still write dataset.json" + + from utilities.file_io import read_json + + analyzer = read_json(analyzer_path) + dataset = read_json(dataset_path) + assert analyzer.get("functions", {}) == {} + assert dataset.get("units", []) == []