diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py index debbf52fb..61fa78500 100644 --- a/.github/scripts/diff_results.py +++ b/.github/scripts/diff_results.py @@ -12,37 +12,138 @@ import csv import sys -from itertools import zip_longest +from collections import Counter +from tabulate import tabulate -def load(path: str) -> list[tuple]: + +def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]: + """Return (header, [(1-based line number, row tuple)]), preserving original order.""" with open(path, newline="") as f: reader = csv.DictReader(f) - header = reader.fieldnames or [] - rows = [tuple(row[col] for col in header) for row in reader] - return sorted(rows) + header = list(reader.fieldnames or []) + rows = [ + (i, tuple(row[col] for col in header)) + for i, row in enumerate(reader, start=1) + ] + return header, rows + + +def _filter_unmatched( + rows: list[tuple[int, tuple]], unmatched: Counter +) -> list[tuple[int, tuple]]: + """Return rows (in original order) that belong to the unmatched set.""" + remaining_counts = Counter(unmatched) + result = [] + for lineno, row in rows: + if remaining_counts[row] > 0: + result.append((lineno, row)) + remaining_counts[row] -= 1 + return result + + +def _similarity(a: tuple, b: tuple) -> float: + """Fraction of fields that match between two rows (0.0 – 1.0).""" + if not a and not b: + return 1.0 + n = max(len(a), len(b)) + matches = sum(x == y for x, y in zip(a, b)) + return matches / n + + +def _pair_closest( + exp_rows: list[tuple[int, tuple]], + act_rows: list[tuple[int, tuple]], +) -> tuple[ + list[tuple[tuple[int, tuple], tuple[int, tuple]]], # paired (exp, act) + list[tuple[int, tuple]], # unpaired expected + list[tuple[int, tuple]], # unpaired actual +]: + """Greedily pair rows from the smaller side with the closest match on the larger side.""" + exp_is_smaller = len(exp_rows) <= len(act_rows) + smaller = exp_rows if exp_is_smaller else act_rows + remaining_larger = list(act_rows if exp_is_smaller else exp_rows) + + pairs: list[tuple[tuple[int, tuple], tuple[int, tuple]]] = [] + unpaired_smaller: list[tuple[int, tuple]] = [] + + for item in smaller: + if not remaining_larger: + unpaired_smaller.append(item) + continue + best_idx = max( + range(len(remaining_larger)), + key=lambda i: _similarity(item[1], remaining_larger[i][1]), + ) + matched = remaining_larger.pop(best_idx) + pairs.append((item, matched) if exp_is_smaller else (matched, item)) + + unpaired_exp = unpaired_smaller if exp_is_smaller else remaining_larger + unpaired_act = remaining_larger if exp_is_smaller else unpaired_smaller + return pairs, unpaired_exp, unpaired_act def diff(expected_path: str, actual_path: str) -> list[str]: - diffs = [] - c_rows = load(expected_path) - g_rows = load(actual_path) + exp_header, exp_rows = load(expected_path) + _, act_rows = load(actual_path) + + exp_content = [row for _, row in exp_rows] + act_content = [row for _, row in act_rows] - if len(c_rows) != len(g_rows): + exp_counter = Counter(exp_content) + act_counter = Counter(act_content) + matched = exp_counter & act_counter + + unmatched_exp = exp_counter - matched + unmatched_act = act_counter - matched + + if not unmatched_exp and not unmatched_act: + # Uncomment this if we care about row order changes + # if exp_content != act_content: + # return [ + # "Row order changed: rows are identical but appear in a different order" + # ] + return [] + + remaining_exp = _filter_unmatched(exp_rows, unmatched_exp) + remaining_act = _filter_unmatched(act_rows, unmatched_act) + + diffs = [] + if len(exp_content) != len(act_content): diffs.append( - f"Row count changed: {len(c_rows)} expected -> {len(g_rows)} actual" + f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual\n" ) - _ABSENT = object() - for i, (c_row, g_row) in enumerate( - zip_longest(c_rows, g_rows, fillvalue=_ABSENT), start=1 - ): - if c_row is _ABSENT: - diffs.append(f" Row {i}: present in actual only -> {g_row}") - elif g_row is _ABSENT: - diffs.append(f" Row {i}: present in expected only -> {c_row}") - elif c_row != g_row: - diffs.append(f" Row {i}: expected={c_row} -> actual={g_row}") + pairs, unpaired_exp, unpaired_act = _pair_closest(remaining_exp, remaining_act) + + table_headers = ["Exp/Act", "Result Row"] + exp_header + + records = [] + for (exp_lineno, exp_row), (act_lineno, act_row) in pairs: + exp_record = {"Exp/Act": "Expected", "Result Row": str(exp_lineno)} + act_record = {"Exp/Act": "Actual", "Result Row": str(act_lineno)} + for col, ev, av in zip(exp_header, exp_row, act_row): + exp_record[col] = f"**{ev}**" if ev != av and ev != "" else ev + act_record[col] = f"**{av}**" if ev != av and av != "" else av + records.append(exp_record) + records.append(act_record) + + for lineno, row in unpaired_exp: + record = {"Exp/Act": "Expected only", "Result Row": str(lineno)} + record.update(zip(exp_header, row)) + records.append(record) + + for lineno, row in unpaired_act: + record = {"Exp/Act": "Actual only", "Result Row": str(lineno)} + record.update(zip(exp_header, row)) + records.append(record) + + table = tabulate( + [[r.get(h, "") for h in table_headers] for r in records], + headers=table_headers, + tablefmt="github", + ) + diffs.append(table) return diffs @@ -64,7 +165,7 @@ def main(): sys.exit(2) if diffs: - print(f"DIFF_FOUND for {case_label}:") + print(f"DIFF_FOUND for {case_label}:\n") for line in diffs: print(line) sys.exit(1) diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh index 74df066be..091fa2d32 100644 --- a/.github/scripts/run_validation.sh +++ b/.github/scripts/run_validation.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # run_validation.sh — iterates all positive/ and negative/ test cases for a rule, -# runs the CORE engine against each, converts JSON output to results.csv, +# runs the CORE engine against each, prints output to results.csv, # diffs against any expected results.csv, and writes two outputs: # - $REPO_ROOT/validation_report.md (detailed markdown, legacy/fallback) # - $REPO_ROOT/case_results.jsonl (one JSON line per test case for the summary table) @@ -136,8 +136,8 @@ for TEST_TYPE in positive negative; do # Back up expected results.csv before the engine run (only if it exists) EXPECTED_RESULTS="" if [ "$MISSING_BASELINE" = false ]; then - cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/results.expected.csv" - EXPECTED_RESULTS="$RESULTS_DIR/results.expected.csv" + cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/expected.csv" + EXPECTED_RESULTS="$RESULTS_DIR/expected.csv" fi ENGINE_ARGS=( @@ -175,7 +175,7 @@ for TEST_TYPE in positive negative; do emit_result "false" "" "" "false" "" "$ENGINE_LOG" FAILED_CASES=$((FAILED_CASES + 1)) OVERALL_SUCCESS=false - [ "$MISSING_BASELINE" = false ] && mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv" + [ "$MISSING_BASELINE" = false ] && cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv" continue fi @@ -205,6 +205,9 @@ for TEST_TYPE in positive negative; do "$EXPECTED_RESULTS" "$ACTUAL_CSV" "$CASE_LABEL" \ > "$DIFF_LOG" 2>&1 || DIFF_EXIT=$? + # Preserve actual output before restoring the baseline + cp "$ACTUAL_CSV" "$RESULTS_DIR/actual.csv" + if [ $DIFF_EXIT -eq 0 ]; then echo " PASSED — actual results match expected baseline" { @@ -231,7 +234,7 @@ for TEST_TYPE in positive negative; do OVERALL_SUCCESS=false fi - mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv" + cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv" done < <(find "$TYPE_DIR" -mindepth 1 -maxdepth 1 -type d -print0 | sort -z) done diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 000000000..f8767b581 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,37 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "diff_results: compare two CSVs", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/.github/scripts/diff_results.py", + "args": [ + "${input:expectedCsv}", + "${input:actualCsv}", + "${input:caseLabel}" + ], + "console": "integratedTerminal" + } + ], + "inputs": [ + { + "id": "expectedCsv", + "type": "promptString", + "description": "Path to expected results CSV", + "default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/expected.csv" + }, + { + "id": "actualCsv", + "type": "promptString", + "description": "Path to actual results CSV", + "default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/actual.csv" + }, + { + "id": "caseLabel", + "type": "promptString", + "description": "Case label (e.g. negative/01)", + "default": "negative/01" + } + ] +}