diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index debbf52fb..61fa78500 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -12,37 +12,138 @@
 
 import csv
 import sys
-from itertools import zip_longest
+from collections import Counter
 
+from tabulate import tabulate
 
-def load(path: str) -> list[tuple]:
+
+def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]:
+    """Return (header, [(1-based line number, row tuple)]), preserving original order."""
     with open(path, newline="") as f:
         reader = csv.DictReader(f)
-        header = reader.fieldnames or []
-        rows = [tuple(row[col] for col in header) for row in reader]
-    return sorted(rows)
+        header = list(reader.fieldnames or [])
+        rows = [
+            (i, tuple(row[col] for col in header))
+            for i, row in enumerate(reader, start=1)
+        ]
+    return header, rows
+
+
+def _filter_unmatched(
+    rows: list[tuple[int, tuple]], unmatched: Counter
+) -> list[tuple[int, tuple]]:
+    """Return rows (in original order) that belong to the unmatched set."""
+    remaining_counts = Counter(unmatched)
+    result = []
+    for lineno, row in rows:
+        if remaining_counts[row] > 0:
+            result.append((lineno, row))
+            remaining_counts[row] -= 1
+    return result
+
+
+def _similarity(a: tuple, b: tuple) -> float:
+    """Fraction of fields that match between two rows (0.0 – 1.0)."""
+    if not a and not b:
+        return 1.0
+    n = max(len(a), len(b))
+    matches = sum(x == y for x, y in zip(a, b))
+    return matches / n
+
+
+def _pair_closest(
+    exp_rows: list[tuple[int, tuple]],
+    act_rows: list[tuple[int, tuple]],
+) -> tuple[
+    list[tuple[tuple[int, tuple], tuple[int, tuple]]],  # paired (exp, act)
+    list[tuple[int, tuple]],  # unpaired expected
+    list[tuple[int, tuple]],  # unpaired actual
+]:
+    """Greedily pair rows from the smaller side with the closest match on the larger side."""
+    exp_is_smaller = len(exp_rows) <= len(act_rows)
+    smaller = exp_rows if exp_is_smaller else act_rows
+    remaining_larger = list(act_rows if exp_is_smaller else exp_rows)
+
+    pairs: list[tuple[tuple[int, tuple], tuple[int, tuple]]] = []
+    unpaired_smaller: list[tuple[int, tuple]] = []
+
+    for item in smaller:
+        if not remaining_larger:
+            unpaired_smaller.append(item)
+            continue
+        best_idx = max(
+            range(len(remaining_larger)),
+            key=lambda i: _similarity(item[1], remaining_larger[i][1]),
+        )
+        matched = remaining_larger.pop(best_idx)
+        pairs.append((item, matched) if exp_is_smaller else (matched, item))
+
+    unpaired_exp = unpaired_smaller if exp_is_smaller else remaining_larger
+    unpaired_act = remaining_larger if exp_is_smaller else unpaired_smaller
+    return pairs, unpaired_exp, unpaired_act
 
 
 def diff(expected_path: str, actual_path: str) -> list[str]:
-    diffs = []
-    c_rows = load(expected_path)
-    g_rows = load(actual_path)
+    exp_header, exp_rows = load(expected_path)
+    _, act_rows = load(actual_path)
+
+    exp_content = [row for _, row in exp_rows]
+    act_content = [row for _, row in act_rows]
 
-    if len(c_rows) != len(g_rows):
+    exp_counter = Counter(exp_content)
+    act_counter = Counter(act_content)
+    matched = exp_counter & act_counter
+
+    unmatched_exp = exp_counter - matched
+    unmatched_act = act_counter - matched
+
+    if not unmatched_exp and not unmatched_act:
+        # Uncomment this if we care about row order changes
+        # if exp_content != act_content:
+        #     return [
+        #         "Row order changed: rows are identical but appear in a different order"
+        #     ]
+        return []
+
+    remaining_exp = _filter_unmatched(exp_rows, unmatched_exp)
+    remaining_act = _filter_unmatched(act_rows, unmatched_act)
+
+    diffs = []
+    if len(exp_content) != len(act_content):
         diffs.append(
-            f"Row count changed: {len(c_rows)} expected -> {len(g_rows)} actual"
+            f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual\n"
         )
 
-    _ABSENT = object()
-    for i, (c_row, g_row) in enumerate(
-        zip_longest(c_rows, g_rows, fillvalue=_ABSENT), start=1
-    ):
-        if c_row is _ABSENT:
-            diffs.append(f"  Row {i}: present in actual only -> {g_row}")
-        elif g_row is _ABSENT:
-            diffs.append(f"  Row {i}: present in expected only -> {c_row}")
-        elif c_row != g_row:
-            diffs.append(f"  Row {i}: expected={c_row} -> actual={g_row}")
+    pairs, unpaired_exp, unpaired_act = _pair_closest(remaining_exp, remaining_act)
+
+    table_headers = ["Exp/Act", "Result Row"] + exp_header
+
+    records = []
+    for (exp_lineno, exp_row), (act_lineno, act_row) in pairs:
+        exp_record = {"Exp/Act": "Expected", "Result Row": str(exp_lineno)}
+        act_record = {"Exp/Act": "Actual", "Result Row": str(act_lineno)}
+        for col, ev, av in zip(exp_header, exp_row, act_row):
+            exp_record[col] = f"**{ev}**" if ev != av and ev != "" else ev
+            act_record[col] = f"**{av}**" if ev != av and av != "" else av
+        records.append(exp_record)
+        records.append(act_record)
+
+    for lineno, row in unpaired_exp:
+        record = {"Exp/Act": "Expected only", "Result Row": str(lineno)}
+        record.update(zip(exp_header, row))
+        records.append(record)
+
+    for lineno, row in unpaired_act:
+        record = {"Exp/Act": "Actual only", "Result Row": str(lineno)}
+        record.update(zip(exp_header, row))
+        records.append(record)
+
+    table = tabulate(
+        [[r.get(h, "") for h in table_headers] for r in records],
+        headers=table_headers,
+        tablefmt="github",
+    )
+    diffs.append(table)
 
     return diffs
 
@@ -64,7 +165,7 @@ def main():
         sys.exit(2)
 
     if diffs:
-        print(f"DIFF_FOUND for {case_label}:")
+        print(f"DIFF_FOUND for {case_label}:\n")
         for line in diffs:
             print(line)
         sys.exit(1)
diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh
index 74df066be..091fa2d32 100644
--- a/.github/scripts/run_validation.sh
+++ b/.github/scripts/run_validation.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # run_validation.sh — iterates all positive/ and negative/ test cases for a rule,
-# runs the CORE engine against each, converts JSON output to results.csv,
+# runs the CORE engine against each, prints output to results.csv,
 # diffs against any expected results.csv, and writes two outputs:
 #   - $REPO_ROOT/validation_report.md  (detailed markdown, legacy/fallback)
 #   - $REPO_ROOT/case_results.jsonl    (one JSON line per test case for the summary table)
@@ -136,8 +136,8 @@ for TEST_TYPE in positive negative; do
     # Back up expected results.csv before the engine run (only if it exists)
     EXPECTED_RESULTS=""
     if [ "$MISSING_BASELINE" = false ]; then
-      cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/results.expected.csv"
-      EXPECTED_RESULTS="$RESULTS_DIR/results.expected.csv"
+      cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/expected.csv"
+      EXPECTED_RESULTS="$RESULTS_DIR/expected.csv"
     fi
 
     ENGINE_ARGS=(
@@ -175,7 +175,7 @@ for TEST_TYPE in positive negative; do
       emit_result "false" "" "" "false" "" "$ENGINE_LOG"
       FAILED_CASES=$((FAILED_CASES + 1))
       OVERALL_SUCCESS=false
-      [ "$MISSING_BASELINE" = false ] && mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
+      [ "$MISSING_BASELINE" = false ] && cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
       continue
     fi
 
@@ -205,6 +205,9 @@ for TEST_TYPE in positive negative; do
       "$EXPECTED_RESULTS" "$ACTUAL_CSV" "$CASE_LABEL" \
       > "$DIFF_LOG" 2>&1 || DIFF_EXIT=$?
 
+    # Preserve actual output before restoring the baseline
+    cp "$ACTUAL_CSV" "$RESULTS_DIR/actual.csv"
+
     if [ $DIFF_EXIT -eq 0 ]; then
       echo "  PASSED — actual results match expected baseline"
       {
@@ -231,7 +234,7 @@ for TEST_TYPE in positive negative; do
       OVERALL_SUCCESS=false
     fi
 
-    mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
+    cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
 
   done < <(find "$TYPE_DIR" -mindepth 1 -maxdepth 1 -type d -print0 | sort -z)
 done
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 000000000..f8767b581
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,37 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "diff_results: compare two CSVs",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${workspaceFolder}/.github/scripts/diff_results.py",
+      "args": [
+        "${input:expectedCsv}",
+        "${input:actualCsv}",
+        "${input:caseLabel}"
+      ],
+      "console": "integratedTerminal"
+    }
+  ],
+  "inputs": [
+    {
+      "id": "expectedCsv",
+      "type": "promptString",
+      "description": "Path to expected results CSV",
+      "default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/expected.csv"
+    },
+    {
+      "id": "actualCsv",
+      "type": "promptString",
+      "description": "Path to actual results CSV",
+      "default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/actual.csv"
+    },
+    {
+      "id": "caseLabel",
+      "type": "promptString",
+      "description": "Case label (e.g. negative/01)",
+      "default": "negative/01"
+    }
+  ]
+}