From 6cfba555db5fa046812f887942bd120a93e33231 Mon Sep 17 00:00:00 2001
From: Gadi Evron <gadievron@users.noreply.github.com>
Date: Thu, 28 May 2026 12:58:44 -0700
Subject: [PATCH] fix(js-parser): treat a zero-JS-file repo as an empty result,
 not a crash

A directory with zero JS-family source files made the JavaScript parser
abort the entire scan instead of returning a clean empty result.

repository_scanner.js succeeds and emits an empty file list (exit 0), but
run_typescript_analyzer hit `if not files: return False`, so
run_full_pipeline early-returned without results['success'], main() did
sys.exit(1), and _parse_javascript raised RuntimeError on the non-zero
exit (uncaught at the scan layer => whole scan aborts). The Python, Ruby
and Zig parsers all handle an empty repo gracefully; the JS guard was the
lone outlier promoting valid empty input to a fatal exit.

Mirror the zig graceful-empty pattern entirely within
parsers/javascript/test_pipeline.py: on zero files, write valid empty
analyzer_output.json / dataset.json / call_graph.json, record synthetic
successful stages, set self.empty_repo, and return True; run_full_pipeline
then skips the unit generator and reports success. _parse_javascript reads
units_count=0 and returns a valid empty ParseResult. parser_adapter.py and
zig/test_pipeline.py are correct as-is and left untouched.

Adds regression test tests/parsers/javascript/test_empty_repo.py
(RED before, GREEN after). Full openant-core suite: 219 passed, 22 skipped.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../parsers/javascript/test_pipeline.py       |  81 +++++++++++++-
 .../parsers/javascript/test_empty_repo.py     | 102 ++++++++++++++++++
 2 files changed, 181 insertions(+), 2 deletions(-)
 create mode 100644 libs/openant-core/tests/parsers/javascript/test_empty_repo.py

diff --git a/libs/openant-core/parsers/javascript/test_pipeline.py b/libs/openant-core/parsers/javascript/test_pipeline.py
index 2eee6bd8..5d0b730a 100644
--- a/libs/openant-core/parsers/javascript/test_pipeline.py
+++ b/libs/openant-core/parsers/javascript/test_pipeline.py
@@ -126,6 +126,11 @@ def __init__(
         self.analyzer_output_file = None
         self.dataset_file = None
 
+        # Set when the repository contains zero JS-family source files. An empty
+        # repo is a valid 0-unit result (mirroring the Python/Ruby/Zig parsers),
+        # not a failure, so the downstream unit-generator stage is skipped.
+        self.empty_repo = False
+
         # Reachability data (populated if processing_level >= REACHABLE)
         self.entry_points: Set[str] = set()
         self.reachable_units: Set[str] = set()
@@ -278,8 +283,18 @@ def run_typescript_analyzer(self, files: list = None) -> bool:
             files = [f['path'] for f in scan_data.get('files', [])]
 
         if not files:
-            print("No files to analyze")
-            return False
+            # A repository with zero JS-family source files is a valid 0-unit
+            # result, not a failure. The Python, Ruby and Zig parsers all handle
+            # an empty repo gracefully; previously the JS pipeline returned False
+            # here, which made run_full_pipeline early-return without
+            # results['success'], so main() did sys.exit(1) and the adapter's
+            # _parse_javascript raised RuntimeError, aborting the entire scan.
+            # Mirror the zig graceful-empty pattern: write empty analyzer and
+            # dataset outputs and report success so the adapter reads
+            # units_count=0.
+            print("No files to analyze; treating as empty repository (0 units)")
+            self._write_empty_outputs()
+            return True
 
         # Write file list to a temporary file to avoid command-line length limits
         file_list_path = os.path.join(self.output_dir, 'file_list.txt')
@@ -326,6 +341,56 @@ def run_typescript_analyzer(self, files: list = None) -> bool:
 
         return result.get('success', False)
 
+    def _write_empty_outputs(self) -> None:
+        """Write valid empty analyzer + dataset outputs for a zero-file repo.
+
+        Mirrors the zig parser's graceful-empty pattern so a repository with no
+        JS-family source files yields a valid 0-unit result instead of a fatal
+        non-zero exit. Records synthetic successful stage entries and sets
+        ``self.empty_repo`` so run_full_pipeline skips the unit-generator stage.
+        """
+        self.empty_repo = True
+
+        analyzer_output = {
+            "repository": self.repo_path,
+            "functions": {},
+            "callGraph": {},
+        }
+        write_json(self.analyzer_output_file, analyzer_output)
+
+        self.dataset_file = os.path.join(self.output_dir, 'dataset.json')
+        empty_dataset = {
+            "name": self.dataset_name or os.path.basename(self.repo_path),
+            "repository": self.repo_path,
+            "units": [],
+            "statistics": {"totalUnits": 0, "byType": {}},
+            "metadata": {"generator": "test_pipeline.py", "empty_repository": True},
+        }
+        write_json(self.dataset_file, empty_dataset)
+
+        call_graph_file = os.path.join(self.output_dir, 'call_graph.json')
+        write_json(call_graph_file, {"functions": {}, "call_graph": {}, "reverse_call_graph": {}})
+
+        empty_stage = {
+            'success': True,
+            'elapsed_seconds': 0.0,
+            'output_file': self.analyzer_output_file,
+            'summary': {'total_functions': 0, 'by_unit_type': {}, 'call_graph_entries': 0},
+        }
+        self.results['stages']['typescript_analyzer'] = empty_stage
+        self.results['stages']['unit_generator'] = {
+            'success': True,
+            'elapsed_seconds': 0.0,
+            'output_file': self.dataset_file,
+            'summary': {
+                'total_units': 0,
+                'by_type': {},
+                'units_with_dependencies': 0,
+                'call_graph_edges': 0,
+                'avg_out_degree': 0,
+            },
+        }
+
     def run_stage_with_stdout_capture(self, name: str, command: list, output_file: str) -> dict:
         """Run a stage that outputs JSON to stdout, capturing to a file."""
         print(f"=" * 60)
@@ -1085,6 +1150,18 @@ def run_full_pipeline(self):
             print("Pipeline stopped: TypeScript analyzer failed")
             return self.results
 
+        # Empty repository (zero JS-family files): the analyzer already wrote
+        # valid empty outputs and recorded synthetic successful stages. There is
+        # nothing for the unit generator or optional downstream stages to do, so
+        # report success directly instead of crashing.
+        if self.empty_repo:
+            self.results['success'] = True
+            print("=" * 60)
+            print("PIPELINE SUMMARY")
+            print("=" * 60)
+            print(f"{SYM_OK} Empty repository: 0 units (graceful)")
+            return self.results
+
         # Stage 3: Unit Generator
         if not self.run_unit_generator():
             print("Pipeline stopped: Unit generator failed")
diff --git a/libs/openant-core/tests/parsers/javascript/test_empty_repo.py b/libs/openant-core/tests/parsers/javascript/test_empty_repo.py
new file mode 100644
index 00000000..c24d3052
--- /dev/null
+++ b/libs/openant-core/tests/parsers/javascript/test_empty_repo.py
@@ -0,0 +1,102 @@
+"""Regression test for graceful handling of a zero-JS-file repository.
+
+A directory with zero JS-family source files must NOT be treated as a parser
+failure. repository_scanner.js exits 0 with an empty file list; the JS pipeline
+then hit ``if not files: return False`` in ``run_typescript_analyzer``, which
+made ``run_full_pipeline`` early-return without ``results['success']``, so
+``main`` did ``sys.exit(1)``. ``_parse_javascript`` then raised
+``RuntimeError`` on the non-zero exit, aborting the whole scan instead of
+yielding a 0-unit result.
+
+The Python, Ruby and Zig parsers all treat an empty repo gracefully (zig writes
+an empty dataset and returns 0). This test pins the JS parser to the same
+contract: zero files -> success + empty analyzer/dataset output, so the adapter
+returns a valid empty ParseResult.
+
+Loaded via importlib under a UNIQUE module name to avoid colliding with the
+many other modules named ``test_pipeline`` across the parser packages.
+"""
+
+import importlib.util
+import os
+
+import pytest
+
+from utilities.file_io import write_json
+
+_CORE_ROOT = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "..", "..", "..")
+)
+_JS_PIPELINE = os.path.join(
+    _CORE_ROOT, "parsers", "javascript", "test_pipeline.py"
+)
+
+
+def _load_js_pipeline():
+    spec = importlib.util.spec_from_file_location(
+        "isolated_js_pipeline_under_test", _JS_PIPELINE
+    )
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+@pytest.fixture
+def js_pipeline():
+    return _load_js_pipeline()
+
+
+def _make_pipeline(js_pipeline, tmp_path, repo):
+    repo.mkdir(parents=True, exist_ok=True)
+    out = tmp_path / "out"
+    out.mkdir(parents=True, exist_ok=True)
+    pipeline = js_pipeline.PipelineTest(
+        repo_path=str(repo),
+        output_dir=str(out),
+        processing_level=js_pipeline.ProcessingLevel.ALL,
+        skip_tests=True,
+    )
+    return pipeline, out
+
+
+def _write_empty_scan(pipeline, out):
+    """Simulate repository_scanner.js succeeding with zero JS files."""
+    scan_path = os.path.join(str(out), "scan_results.json")
+    write_json(
+        scan_path,
+        {"files": [], "statistics": {"totalFiles": 0, "byExtension": {}}},
+    )
+    pipeline.scan_results_file = scan_path
+
+
+def test_zero_js_files_analyzer_succeeds_gracefully(js_pipeline, tmp_path):
+    """run_typescript_analyzer must NOT report failure on an empty repo."""
+    pipeline, out = _make_pipeline(js_pipeline, tmp_path, tmp_path / "empty_repo")
+    _write_empty_scan(pipeline, out)
+
+    ok = pipeline.run_typescript_analyzer()
+
+    assert ok is True, (
+        "zero JS files must be a graceful empty result, not a stage failure"
+    )
+
+
+def test_zero_js_files_writes_empty_outputs(js_pipeline, tmp_path):
+    """An empty repo must leave a valid empty analyzer + dataset on disk so the
+    adapter reads units_count=0 instead of crashing."""
+    pipeline, out = _make_pipeline(js_pipeline, tmp_path, tmp_path / "empty_repo")
+    _write_empty_scan(pipeline, out)
+
+    assert pipeline.run_typescript_analyzer() is True
+
+    analyzer_path = os.path.join(str(out), "analyzer_output.json")
+    dataset_path = os.path.join(str(out), "dataset.json")
+    assert os.path.exists(analyzer_path), "empty repo must still write analyzer_output.json"
+    assert os.path.exists(dataset_path), "empty repo must still write dataset.json"
+
+    from utilities.file_io import read_json
+
+    analyzer = read_json(analyzer_path)
+    dataset = read_json(dataset_path)
+    assert analyzer.get("functions", {}) == {}
+    assert dataset.get("units", []) == []