Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 79 additions & 2 deletions libs/openant-core/parsers/javascript/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,11 @@ def __init__(
self.analyzer_output_file = None
self.dataset_file = None

# Set when the repository contains zero JS-family source files. An empty
# repo is a valid 0-unit result (mirroring the Python/Ruby/Zig parsers),
# not a failure, so the downstream unit-generator stage is skipped.
self.empty_repo = False

# Reachability data (populated if processing_level >= REACHABLE)
self.entry_points: Set[str] = set()
self.reachable_units: Set[str] = set()
Expand Down Expand Up @@ -278,8 +283,18 @@ def run_typescript_analyzer(self, files: list = None) -> bool:
files = [f['path'] for f in scan_data.get('files', [])]

if not files:
print("No files to analyze")
return False
# A repository with zero JS-family source files is a valid 0-unit
# result, not a failure. The Python, Ruby and Zig parsers all handle
# an empty repo gracefully; previously the JS pipeline returned False
# here, which made run_full_pipeline early-return without
# results['success'], so main() did sys.exit(1) and the adapter's
# _parse_javascript raised RuntimeError, aborting the entire scan.
# Mirror the zig graceful-empty pattern: write empty analyzer and
# dataset outputs and report success so the adapter reads
# units_count=0.
print("No files to analyze; treating as empty repository (0 units)")
self._write_empty_outputs()
return True

# Write file list to a temporary file to avoid command-line length limits
file_list_path = os.path.join(self.output_dir, 'file_list.txt')
Expand Down Expand Up @@ -326,6 +341,56 @@ def run_typescript_analyzer(self, files: list = None) -> bool:

return result.get('success', False)

def _write_empty_outputs(self) -> None:
"""Write valid empty analyzer + dataset outputs for a zero-file repo.

Mirrors the zig parser's graceful-empty pattern so a repository with no
JS-family source files yields a valid 0-unit result instead of a fatal
non-zero exit. Records synthetic successful stage entries and sets
``self.empty_repo`` so run_full_pipeline skips the unit-generator stage.
"""
self.empty_repo = True

analyzer_output = {
"repository": self.repo_path,
"functions": {},
"callGraph": {},
}
write_json(self.analyzer_output_file, analyzer_output)

self.dataset_file = os.path.join(self.output_dir, 'dataset.json')
empty_dataset = {
"name": self.dataset_name or os.path.basename(self.repo_path),
"repository": self.repo_path,
"units": [],
"statistics": {"totalUnits": 0, "byType": {}},
"metadata": {"generator": "test_pipeline.py", "empty_repository": True},
}
write_json(self.dataset_file, empty_dataset)

call_graph_file = os.path.join(self.output_dir, 'call_graph.json')
write_json(call_graph_file, {"functions": {}, "call_graph": {}, "reverse_call_graph": {}})

empty_stage = {
'success': True,
'elapsed_seconds': 0.0,
'output_file': self.analyzer_output_file,
'summary': {'total_functions': 0, 'by_unit_type': {}, 'call_graph_entries': 0},
}
self.results['stages']['typescript_analyzer'] = empty_stage
self.results['stages']['unit_generator'] = {
'success': True,
'elapsed_seconds': 0.0,
'output_file': self.dataset_file,
'summary': {
'total_units': 0,
'by_type': {},
'units_with_dependencies': 0,
'call_graph_edges': 0,
'avg_out_degree': 0,
},
}

def run_stage_with_stdout_capture(self, name: str, command: list, output_file: str) -> dict:
"""Run a stage that outputs JSON to stdout, capturing to a file."""
print(f"=" * 60)
Expand Down Expand Up @@ -1085,6 +1150,18 @@ def run_full_pipeline(self):
print("Pipeline stopped: TypeScript analyzer failed")
return self.results

# Empty repository (zero JS-family files): the analyzer already wrote
# valid empty outputs and recorded synthetic successful stages. There is
# nothing for the unit generator or optional downstream stages to do, so
# report success directly instead of crashing.
if self.empty_repo:
self.results['success'] = True
print("=" * 60)
print("PIPELINE SUMMARY")
print("=" * 60)
print(f"{SYM_OK} Empty repository: 0 units (graceful)")
return self.results

# Stage 3: Unit Generator
if not self.run_unit_generator():
print("Pipeline stopped: Unit generator failed")
Expand Down
102 changes: 102 additions & 0 deletions libs/openant-core/tests/parsers/javascript/test_empty_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Regression test for graceful handling of a zero-JS-file repository.

A directory with zero JS-family source files must NOT be treated as a parser
failure. repository_scanner.js exits 0 with an empty file list; the JS pipeline
then hit ``if not files: return False`` in ``run_typescript_analyzer``, which
made ``run_full_pipeline`` early-return without ``results['success']``, so
``main`` did ``sys.exit(1)``. ``_parse_javascript`` then raised
``RuntimeError`` on the non-zero exit, aborting the whole scan instead of
yielding a 0-unit result.

The Python, Ruby and Zig parsers all treat an empty repo gracefully (zig writes
an empty dataset and returns 0). This test pins the JS parser to the same
contract: zero files -> success + empty analyzer/dataset output, so the adapter
returns a valid empty ParseResult.

Loaded via importlib under a UNIQUE module name to avoid colliding with the
many other modules named ``test_pipeline`` across the parser packages.
"""

import importlib.util
import os

import pytest

from utilities.file_io import write_json

_CORE_ROOT = os.path.abspath(
os.path.join(os.path.dirname(__file__), "..", "..", "..")
)
_JS_PIPELINE = os.path.join(
_CORE_ROOT, "parsers", "javascript", "test_pipeline.py"
)


def _load_js_pipeline():
spec = importlib.util.spec_from_file_location(
"isolated_js_pipeline_under_test", _JS_PIPELINE
)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module


@pytest.fixture
def js_pipeline():
return _load_js_pipeline()


def _make_pipeline(js_pipeline, tmp_path, repo):
repo.mkdir(parents=True, exist_ok=True)
out = tmp_path / "out"
out.mkdir(parents=True, exist_ok=True)
pipeline = js_pipeline.PipelineTest(
repo_path=str(repo),
output_dir=str(out),
processing_level=js_pipeline.ProcessingLevel.ALL,
skip_tests=True,
)
return pipeline, out


def _write_empty_scan(pipeline, out):
"""Simulate repository_scanner.js succeeding with zero JS files."""
scan_path = os.path.join(str(out), "scan_results.json")
write_json(
scan_path,
{"files": [], "statistics": {"totalFiles": 0, "byExtension": {}}},
)
pipeline.scan_results_file = scan_path


def test_zero_js_files_analyzer_succeeds_gracefully(js_pipeline, tmp_path):
"""run_typescript_analyzer must NOT report failure on an empty repo."""
pipeline, out = _make_pipeline(js_pipeline, tmp_path, tmp_path / "empty_repo")
_write_empty_scan(pipeline, out)

ok = pipeline.run_typescript_analyzer()

assert ok is True, (
"zero JS files must be a graceful empty result, not a stage failure"
)


def test_zero_js_files_writes_empty_outputs(js_pipeline, tmp_path):
"""An empty repo must leave a valid empty analyzer + dataset on disk so the
adapter reads units_count=0 instead of crashing."""
pipeline, out = _make_pipeline(js_pipeline, tmp_path, tmp_path / "empty_repo")
_write_empty_scan(pipeline, out)

assert pipeline.run_typescript_analyzer() is True

analyzer_path = os.path.join(str(out), "analyzer_output.json")
dataset_path = os.path.join(str(out), "dataset.json")
assert os.path.exists(analyzer_path), "empty repo must still write analyzer_output.json"
assert os.path.exists(dataset_path), "empty repo must still write dataset.json"

from utilities.file_io import read_json

analyzer = read_json(analyzer_path)
dataset = read_json(dataset_path)
assert analyzer.get("functions", {}) == {}
assert dataset.get("units", []) == []
Loading