Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 122 additions & 21 deletions .github/scripts/diff_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,37 +12,138 @@

import csv
import sys
from itertools import zip_longest
from collections import Counter

from tabulate import tabulate

def load(path: str) -> list[tuple]:

def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]:
"""Return (header, [(1-based line number, row tuple)]), preserving original order."""
with open(path, newline="") as f:
reader = csv.DictReader(f)
header = reader.fieldnames or []
rows = [tuple(row[col] for col in header) for row in reader]
return sorted(rows)
header = list(reader.fieldnames or [])
rows = [
(i, tuple(row[col] for col in header))
for i, row in enumerate(reader, start=1)
]
return header, rows


def _filter_unmatched(
rows: list[tuple[int, tuple]], unmatched: Counter
) -> list[tuple[int, tuple]]:
"""Return rows (in original order) that belong to the unmatched set."""
remaining_counts = Counter(unmatched)
result = []
for lineno, row in rows:
if remaining_counts[row] > 0:
result.append((lineno, row))
remaining_counts[row] -= 1
return result


def _similarity(a: tuple, b: tuple) -> float:
"""Fraction of fields that match between two rows (0.0 – 1.0)."""
if not a and not b:
return 1.0
n = max(len(a), len(b))
matches = sum(x == y for x, y in zip(a, b))
return matches / n


def _pair_closest(
exp_rows: list[tuple[int, tuple]],
act_rows: list[tuple[int, tuple]],
) -> tuple[
list[tuple[tuple[int, tuple], tuple[int, tuple]]], # paired (exp, act)
list[tuple[int, tuple]], # unpaired expected
list[tuple[int, tuple]], # unpaired actual
]:
"""Greedily pair rows from the smaller side with the closest match on the larger side."""
exp_is_smaller = len(exp_rows) <= len(act_rows)
smaller = exp_rows if exp_is_smaller else act_rows
remaining_larger = list(act_rows if exp_is_smaller else exp_rows)

pairs: list[tuple[tuple[int, tuple], tuple[int, tuple]]] = []
unpaired_smaller: list[tuple[int, tuple]] = []

for item in smaller:
if not remaining_larger:
unpaired_smaller.append(item)
continue
best_idx = max(
range(len(remaining_larger)),
key=lambda i: _similarity(item[1], remaining_larger[i][1]),
)
matched = remaining_larger.pop(best_idx)
pairs.append((item, matched) if exp_is_smaller else (matched, item))

unpaired_exp = unpaired_smaller if exp_is_smaller else remaining_larger
unpaired_act = remaining_larger if exp_is_smaller else unpaired_smaller
return pairs, unpaired_exp, unpaired_act


def diff(expected_path: str, actual_path: str) -> list[str]:
diffs = []
c_rows = load(expected_path)
g_rows = load(actual_path)
exp_header, exp_rows = load(expected_path)
_, act_rows = load(actual_path)

exp_content = [row for _, row in exp_rows]
act_content = [row for _, row in act_rows]

if len(c_rows) != len(g_rows):
exp_counter = Counter(exp_content)
act_counter = Counter(act_content)
matched = exp_counter & act_counter

unmatched_exp = exp_counter - matched
unmatched_act = act_counter - matched

if not unmatched_exp and not unmatched_act:
# Uncomment this if we care about row order changes
# if exp_content != act_content:
# return [
# "Row order changed: rows are identical but appear in a different order"
# ]
return []

remaining_exp = _filter_unmatched(exp_rows, unmatched_exp)
remaining_act = _filter_unmatched(act_rows, unmatched_act)

diffs = []
if len(exp_content) != len(act_content):
diffs.append(
f"Row count changed: {len(c_rows)} expected -> {len(g_rows)} actual"
f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual\n"
)

_ABSENT = object()
for i, (c_row, g_row) in enumerate(
zip_longest(c_rows, g_rows, fillvalue=_ABSENT), start=1
):
if c_row is _ABSENT:
diffs.append(f" Row {i}: present in actual only -> {g_row}")
elif g_row is _ABSENT:
diffs.append(f" Row {i}: present in expected only -> {c_row}")
elif c_row != g_row:
diffs.append(f" Row {i}: expected={c_row} -> actual={g_row}")
pairs, unpaired_exp, unpaired_act = _pair_closest(remaining_exp, remaining_act)

table_headers = ["Exp/Act", "Result Row"] + exp_header

records = []
for (exp_lineno, exp_row), (act_lineno, act_row) in pairs:
exp_record = {"Exp/Act": "Expected", "Result Row": str(exp_lineno)}
act_record = {"Exp/Act": "Actual", "Result Row": str(act_lineno)}
for col, ev, av in zip(exp_header, exp_row, act_row):
exp_record[col] = f"**{ev}**" if ev != av and ev != "" else ev
act_record[col] = f"**{av}**" if ev != av and av != "" else av
records.append(exp_record)
records.append(act_record)

for lineno, row in unpaired_exp:
record = {"Exp/Act": "Expected only", "Result Row": str(lineno)}
record.update(zip(exp_header, row))
records.append(record)

for lineno, row in unpaired_act:
record = {"Exp/Act": "Actual only", "Result Row": str(lineno)}
record.update(zip(exp_header, row))
records.append(record)

table = tabulate(
[[r.get(h, "") for h in table_headers] for r in records],
headers=table_headers,
tablefmt="github",
)
diffs.append(table)

return diffs

Expand All @@ -64,7 +165,7 @@ def main():
sys.exit(2)

if diffs:
print(f"DIFF_FOUND for {case_label}:")
print(f"DIFF_FOUND for {case_label}:\n")
for line in diffs:
print(line)
sys.exit(1)
Expand Down
13 changes: 8 additions & 5 deletions .github/scripts/run_validation.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env bash
# run_validation.sh — iterates all positive/ and negative/ test cases for a rule,
# runs the CORE engine against each, converts JSON output to results.csv,
# runs the CORE engine against each, prints output to results.csv,
# diffs against any expected results.csv, and writes two outputs:
# - $REPO_ROOT/validation_report.md (detailed markdown, legacy/fallback)
# - $REPO_ROOT/case_results.jsonl (one JSON line per test case for the summary table)
Expand Down Expand Up @@ -136,8 +136,8 @@ for TEST_TYPE in positive negative; do
# Back up expected results.csv before the engine run (only if it exists)
EXPECTED_RESULTS=""
if [ "$MISSING_BASELINE" = false ]; then
cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/results.expected.csv"
EXPECTED_RESULTS="$RESULTS_DIR/results.expected.csv"
cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/expected.csv"
EXPECTED_RESULTS="$RESULTS_DIR/expected.csv"
fi

ENGINE_ARGS=(
Expand Down Expand Up @@ -175,7 +175,7 @@ for TEST_TYPE in positive negative; do
emit_result "false" "" "" "false" "" "$ENGINE_LOG"
FAILED_CASES=$((FAILED_CASES + 1))
OVERALL_SUCCESS=false
[ "$MISSING_BASELINE" = false ] && mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
[ "$MISSING_BASELINE" = false ] && cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
continue
fi

Expand Down Expand Up @@ -205,6 +205,9 @@ for TEST_TYPE in positive negative; do
"$EXPECTED_RESULTS" "$ACTUAL_CSV" "$CASE_LABEL" \
> "$DIFF_LOG" 2>&1 || DIFF_EXIT=$?

# Preserve actual output before restoring the baseline
cp "$ACTUAL_CSV" "$RESULTS_DIR/actual.csv"

if [ $DIFF_EXIT -eq 0 ]; then
echo " PASSED — actual results match expected baseline"
{
Expand All @@ -231,7 +234,7 @@ for TEST_TYPE in positive negative; do
OVERALL_SUCCESS=false
fi

mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"

done < <(find "$TYPE_DIR" -mindepth 1 -maxdepth 1 -type d -print0 | sort -z)
done
Expand Down
37 changes: 37 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "diff_results: compare two CSVs",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/.github/scripts/diff_results.py",
"args": [
"${input:expectedCsv}",
"${input:actualCsv}",
"${input:caseLabel}"
],
"console": "integratedTerminal"
}
],
"inputs": [
{
"id": "expectedCsv",
"type": "promptString",
"description": "Path to expected results CSV",
"default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/expected.csv"
},
{
"id": "actualCsv",
"type": "promptString",
"description": "Path to actual results CSV",
"default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/actual.csv"
},
{
"id": "caseLabel",
"type": "promptString",
"description": "Case label (e.g. negative/01)",
"default": "negative/01"
}
]
}