From 247cfb2186c437e270efbd0dbaef488ddffdd49d Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 13:36:32 -0400
Subject: [PATCH 01/15] Updated detailed diff algo

---
 .github/scripts/diff_results.py | 72 ++++++++++++++++++++++++---------
 .vscode/launch.json             | 37 +++++++++++++++++
 2 files changed, 90 insertions(+), 19 deletions(-)
 create mode 100644 .vscode/launch.json

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index debbf52fb..ebb5c1717 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -12,37 +12,71 @@
 
 import csv
 import sys
-from itertools import zip_longest
+from collections import Counter, defaultdict, deque
 
 
-def load(path: str) -> list[tuple]:
+def load(path: str) -> list[tuple[int, tuple]]:
+    """Load CSV rows as (1-based line number, row tuple), preserving original order."""
     with open(path, newline="") as f:
         reader = csv.DictReader(f)
         header = reader.fieldnames or []
-        rows = [tuple(row[col] for col in header) for row in reader]
-    return sorted(rows)
+        return [
+            (i, tuple(row[col] for col in header))
+            for i, row in enumerate(reader, start=1)
+        ]
 
 
 def diff(expected_path: str, actual_path: str) -> list[str]:
-    diffs = []
-    c_rows = load(expected_path)
-    g_rows = load(actual_path)
+    exp_rows = load(expected_path)
+    act_rows = load(actual_path)
+
+    exp_content = [row for _, row in exp_rows]
+    act_content = [row for _, row in act_rows]
+
+    exp_counter = Counter(exp_content)
+    act_counter = Counter(act_content)
+    matched = exp_counter & act_counter  # rows present in both (min count)
+
+    unmatched_exp = exp_counter - matched  # rows only in expected
+    unmatched_act = act_counter - matched  # rows only in actual
+
+    if not unmatched_exp and not unmatched_act:
+        if exp_content != act_content:
+            return [
+                "Row order changed: rows are identical but appear in a different order"
+            ]
+        return []
 
-    if len(c_rows) != len(g_rows):
+    # Build per-content queues of line numbers; consume matched rows first
+    # so remaining entries represent the truly unmatched lines.
+    exp_linenos: dict[tuple, deque[int]] = defaultdict(deque)
+    for lineno, row in exp_rows:
+        exp_linenos[row].append(lineno)
+
+    act_linenos: dict[tuple, deque[int]] = defaultdict(deque)
+    for lineno, row in act_rows:
+        act_linenos[row].append(lineno)
+
+    for row, count in matched.items():
+        for _ in range(count):
+            exp_linenos[row].popleft()
+            act_linenos[row].popleft()
+
+    diffs = []
+    if len(exp_content) != len(act_content):
         diffs.append(
-            f"Row count changed: {len(c_rows)} expected -> {len(g_rows)} actual"
+            f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual"
         )
 
-    _ABSENT = object()
-    for i, (c_row, g_row) in enumerate(
-        zip_longest(c_rows, g_rows, fillvalue=_ABSENT), start=1
-    ):
-        if c_row is _ABSENT:
-            diffs.append(f"  Row {i}: present in actual only -> {g_row}")
-        elif g_row is _ABSENT:
-            diffs.append(f"  Row {i}: present in expected only -> {c_row}")
-        elif c_row != g_row:
-            diffs.append(f"  Row {i}: expected={c_row} -> actual={g_row}")
+    for row in sorted(unmatched_exp):
+        for _ in range(unmatched_exp[row]):
+            lineno = exp_linenos[row].popleft()
+            diffs.append(f"  Expected row {lineno} not found in actual: {row}")
+
+    for row in sorted(unmatched_act):
+        for _ in range(unmatched_act[row]):
+            lineno = act_linenos[row].popleft()
+            diffs.append(f"  Actual row {lineno} not found in expected: {row}")
 
     return diffs
 
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 000000000..f8767b581
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,37 @@
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "diff_results: compare two CSVs",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${workspaceFolder}/.github/scripts/diff_results.py",
+      "args": [
+        "${input:expectedCsv}",
+        "${input:actualCsv}",
+        "${input:caseLabel}"
+      ],
+      "console": "integratedTerminal"
+    }
+  ],
+  "inputs": [
+    {
+      "id": "expectedCsv",
+      "type": "promptString",
+      "description": "Path to expected results CSV",
+      "default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/expected.csv"
+    },
+    {
+      "id": "actualCsv",
+      "type": "promptString",
+      "description": "Path to actual results CSV",
+      "default": "${workspaceFolder}/Published/CORE-000001/negative/01/results/actual.csv"
+    },
+    {
+      "id": "caseLabel",
+      "type": "promptString",
+      "description": "Case label (e.g. negative/01)",
+      "default": "negative/01"
+    }
+  ]
+}

From 47c07a799083ea8481f6e3c3924e469946e60367 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 14:25:09 -0400
Subject: [PATCH 02/15] better listing of mismatches

---
 .github/scripts/diff_results.py | 54 ++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index ebb5c1717..3aba28c26 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -12,7 +12,7 @@
 
 import csv
 import sys
-from collections import Counter, defaultdict, deque
+from collections import Counter
 
 
 def load(path: str) -> list[tuple[int, tuple]]:
@@ -26,6 +26,19 @@ def load(path: str) -> list[tuple[int, tuple]]:
         ]
 
 
+def _filter_unmatched(
+    rows: list[tuple[int, tuple]], unmatched: Counter
+) -> list[tuple[int, tuple]]:
+    """Return rows (in original order) that belong to the unmatched set."""
+    remaining_counts = Counter(unmatched)
+    result = []
+    for lineno, row in rows:
+        if remaining_counts[row] > 0:
+            result.append((lineno, row))
+            remaining_counts[row] -= 1
+    return result
+
+
 def diff(expected_path: str, actual_path: str) -> list[str]:
     exp_rows = load(expected_path)
     act_rows = load(actual_path)
@@ -47,20 +60,9 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
             ]
         return []
 
-    # Build per-content queues of line numbers; consume matched rows first
-    # so remaining entries represent the truly unmatched lines.
-    exp_linenos: dict[tuple, deque[int]] = defaultdict(deque)
-    for lineno, row in exp_rows:
-        exp_linenos[row].append(lineno)
-
-    act_linenos: dict[tuple, deque[int]] = defaultdict(deque)
-    for lineno, row in act_rows:
-        act_linenos[row].append(lineno)
-
-    for row, count in matched.items():
-        for _ in range(count):
-            exp_linenos[row].popleft()
-            act_linenos[row].popleft()
+    # Rebuild remaining unmatched rows in their original file order.
+    remaining_exp = _filter_unmatched(exp_rows, unmatched_exp)
+    remaining_act = _filter_unmatched(act_rows, unmatched_act)
 
     diffs = []
     if len(exp_content) != len(act_content):
@@ -68,15 +70,19 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
             f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual"
         )
 
-    for row in sorted(unmatched_exp):
-        for _ in range(unmatched_exp[row]):
-            lineno = exp_linenos[row].popleft()
-            diffs.append(f"  Expected row {lineno} not found in actual: {row}")
-
-    for row in sorted(unmatched_act):
-        for _ in range(unmatched_act[row]):
-            lineno = act_linenos[row].popleft()
-            diffs.append(f"  Actual row {lineno} not found in expected: {row}")
+    # Merge both unmatched lists by post-filter index; exp before act when tied.
+    # Sort key: (post_filter_row, 0=Expected/1=Actual)
+    entries = [
+        (post_idx, 0, "Expected", src_lineno, row)
+        for post_idx, (src_lineno, row) in enumerate(remaining_exp, start=1)
+    ] + [
+        (post_idx, 1, "Actual", src_lineno, row)
+        for post_idx, (src_lineno, row) in enumerate(remaining_act, start=1)
+    ]
+    entries.sort(key=lambda e: (e[0], e[1]))
+
+    for _, _, label, src_lineno, row in entries:
+        diffs.append(f"  [{label:8}] Row {src_lineno}: {row}")
 
     return diffs
 

From 80313fd3aaaddd34931e688ee60b8e7d5d3cff77 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 16:38:39 -0400
Subject: [PATCH 03/15] better line matching

---
 .github/scripts/diff_results.py | 85 ++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 23 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 3aba28c26..53819900b 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -15,15 +15,16 @@
 from collections import Counter
 
 
-def load(path: str) -> list[tuple[int, tuple]]:
-    """Load CSV rows as (1-based line number, row tuple), preserving original order."""
+def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]:
+    """Return (header, [(1-based line number, row tuple)]), preserving original order."""
     with open(path, newline="") as f:
         reader = csv.DictReader(f)
-        header = reader.fieldnames or []
-        return [
+        header = list(reader.fieldnames or [])
+        rows = [
             (i, tuple(row[col] for col in header))
             for i, row in enumerate(reader, start=1)
         ]
+    return header, rows
 
 
 def _filter_unmatched(
@@ -39,19 +40,60 @@ def _filter_unmatched(
     return result
 
 
+def _similarity(a: tuple, b: tuple) -> float:
+    """Fraction of fields that match between two rows (0.0 – 1.0)."""
+    if not a and not b:
+        return 1.0
+    n = max(len(a), len(b))
+    matches = sum(x == y for x, y in zip(a, b))
+    return matches / n
+
+
+def _pair_closest(
+    exp_rows: list[tuple[int, tuple]],
+    act_rows: list[tuple[int, tuple]],
+) -> tuple[
+    list[tuple[tuple[int, tuple], tuple[int, tuple]]],  # paired (exp, act)
+    list[tuple[int, tuple]],  # unpaired expected
+    list[tuple[int, tuple]],  # unpaired actual
+]:
+    """Greedily pair rows from the smaller side with the closest match on the larger side."""
+    exp_is_smaller = len(exp_rows) <= len(act_rows)
+    smaller = exp_rows if exp_is_smaller else act_rows
+    remaining_larger = list(act_rows if exp_is_smaller else exp_rows)
+
+    pairs: list[tuple[tuple[int, tuple], tuple[int, tuple]]] = []
+    unpaired_smaller: list[tuple[int, tuple]] = []
+
+    for item in smaller:
+        if not remaining_larger:
+            unpaired_smaller.append(item)
+            continue
+        best_idx = max(
+            range(len(remaining_larger)),
+            key=lambda i: _similarity(item[1], remaining_larger[i][1]),
+        )
+        matched = remaining_larger.pop(best_idx)
+        pairs.append((item, matched) if exp_is_smaller else (matched, item))
+
+    unpaired_exp = unpaired_smaller if exp_is_smaller else remaining_larger
+    unpaired_act = remaining_larger if exp_is_smaller else unpaired_smaller
+    return pairs, unpaired_exp, unpaired_act
+
+
 def diff(expected_path: str, actual_path: str) -> list[str]:
-    exp_rows = load(expected_path)
-    act_rows = load(actual_path)
+    exp_header, exp_rows = load(expected_path)
+    _, act_rows = load(actual_path)
 
     exp_content = [row for _, row in exp_rows]
     act_content = [row for _, row in act_rows]
 
     exp_counter = Counter(exp_content)
     act_counter = Counter(act_content)
-    matched = exp_counter & act_counter  # rows present in both (min count)
+    matched = exp_counter & act_counter
 
-    unmatched_exp = exp_counter - matched  # rows only in expected
-    unmatched_act = act_counter - matched  # rows only in actual
+    unmatched_exp = exp_counter - matched
+    unmatched_act = act_counter - matched
 
     if not unmatched_exp and not unmatched_act:
         if exp_content != act_content:
@@ -60,7 +102,6 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
             ]
         return []
 
-    # Rebuild remaining unmatched rows in their original file order.
     remaining_exp = _filter_unmatched(exp_rows, unmatched_exp)
     remaining_act = _filter_unmatched(act_rows, unmatched_act)
 
@@ -70,19 +111,17 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
             f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual"
         )
 
-    # Merge both unmatched lists by post-filter index; exp before act when tied.
-    # Sort key: (post_filter_row, 0=Expected/1=Actual)
-    entries = [
-        (post_idx, 0, "Expected", src_lineno, row)
-        for post_idx, (src_lineno, row) in enumerate(remaining_exp, start=1)
-    ] + [
-        (post_idx, 1, "Actual", src_lineno, row)
-        for post_idx, (src_lineno, row) in enumerate(remaining_act, start=1)
-    ]
-    entries.sort(key=lambda e: (e[0], e[1]))
-
-    for _, _, label, src_lineno, row in entries:
-        diffs.append(f"  [{label:8}] Row {src_lineno}: {row}")
+    pairs, unpaired_exp, unpaired_act = _pair_closest(remaining_exp, remaining_act)
+
+    for (exp_lineno, exp_row), (act_lineno, act_row) in pairs:
+        diffs.append(f"  [Expected] Row {exp_lineno}: {exp_row}")
+        diffs.append(f"  [Actual  ] Row {act_lineno}: {act_row}")
+
+    for lineno, row in unpaired_exp:
+        diffs.append(f"  [Expected only] Row {lineno}: {row}")
+
+    for lineno, row in unpaired_act:
+        diffs.append(f"  [Actual only  ] Row {lineno}: {row}")
 
     return diffs
 

From cf4dfd051e99671f242d06d73dbc659d94318d70 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 16:48:35 -0400
Subject: [PATCH 04/15] markdownify

---
 .github/scripts/diff_results.py | 52 ++++++++++++++++++++++++++++++---
 1 file changed, 48 insertions(+), 4 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 53819900b..34ca35613 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -81,6 +81,36 @@ def _pair_closest(
     return pairs, unpaired_exp, unpaired_act
 
 
+def create_md_table(table_name, headers, records, property_getter=None):
+    """
+    Create a Markdown table with the given headers and records.
+
+    Args:
+        table_name: The title of the table
+        headers: List of column headers
+        records: List of records to include in the table
+        property_getter: Optional function to extract properties from records.
+                         If None, assumes records are dictionaries.
+    Returns:
+        String containing the formatted Markdown table
+    """
+    title = f"### {table_name}"
+    header = "| " + " | ".join(headers) + " |"
+    underline = "| " + " | ".join(["---" for _ in headers]) + " |"
+
+    if property_getter is None:
+
+        def property_getter(record, prop):
+            return str(record.get(prop, ""))
+
+    values = "\n".join(
+        "| " + " | ".join([property_getter(record, prop) for prop in headers]) + " |"
+        for record in records
+    )
+
+    return f"{title}\n\n{header}\n{underline}\n{values}"
+
+
 def diff(expected_path: str, actual_path: str) -> list[str]:
     exp_header, exp_rows = load(expected_path)
     _, act_rows = load(actual_path)
@@ -113,15 +143,29 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
 
     pairs, unpaired_exp, unpaired_act = _pair_closest(remaining_exp, remaining_act)
 
+    table_headers = ["Exp/Act", "Result Row"] + exp_header
+
+    records = []
     for (exp_lineno, exp_row), (act_lineno, act_row) in pairs:
-        diffs.append(f"  [Expected] Row {exp_lineno}: {exp_row}")
-        diffs.append(f"  [Actual  ] Row {act_lineno}: {act_row}")
+        exp_record = {"Exp/Act": "Expected", "Result Row": str(exp_lineno)}
+        act_record = {"Exp/Act": "Actual",   "Result Row": str(act_lineno)}
+        for col, ev, av in zip(exp_header, exp_row, act_row):
+            exp_record[col] = f"**{ev}**" if ev != av else ev
+            act_record[col] = f"**{av}**" if ev != av else av
+        records.append(exp_record)
+        records.append(act_record)
 
     for lineno, row in unpaired_exp:
-        diffs.append(f"  [Expected only] Row {lineno}: {row}")
+        record = {"Exp/Act": "Expected only", "Result Row": str(lineno)}
+        record.update(zip(exp_header, row))
+        records.append(record)
 
     for lineno, row in unpaired_act:
-        diffs.append(f"  [Actual only  ] Row {lineno}: {row}")
+        record = {"Exp/Act": "Actual only", "Result Row": str(lineno)}
+        record.update(zip(exp_header, row))
+        records.append(record)
+
+    diffs.append(create_md_table("Diff Results", table_headers, records))
 
     return diffs
 

From c20afe08acabde9ba47416d0cda84d48e0ecfd0a Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 17:16:33 -0400
Subject: [PATCH 05/15] format md table

---
 .github/scripts/diff_results.py | 42 ++++++++-------------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 34ca35613..c73a764b0 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -14,6 +14,8 @@
 import sys
 from collections import Counter
 
+from tabulate import tabulate
+
 
 def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]:
     """Return (header, [(1-based line number, row tuple)]), preserving original order."""
@@ -81,34 +83,10 @@ def _pair_closest(
     return pairs, unpaired_exp, unpaired_act
 
 
-def create_md_table(table_name, headers, records, property_getter=None):
-    """
-    Create a Markdown table with the given headers and records.
-
-    Args:
-        table_name: The title of the table
-        headers: List of column headers
-        records: List of records to include in the table
-        property_getter: Optional function to extract properties from records.
-                         If None, assumes records are dictionaries.
-    Returns:
-        String containing the formatted Markdown table
-    """
-    title = f"### {table_name}"
-    header = "| " + " | ".join(headers) + " |"
-    underline = "| " + " | ".join(["---" for _ in headers]) + " |"
-
-    if property_getter is None:
-
-        def property_getter(record, prop):
-            return str(record.get(prop, ""))
-
-    values = "\n".join(
-        "| " + " | ".join([property_getter(record, prop) for prop in headers]) + " |"
-        for record in records
-    )
-
-    return f"{title}\n\n{header}\n{underline}\n{values}"
+def _render_diff_table(table_name: str, headers: list[str], records: list[dict]) -> str:
+    rows = [[r.get(h, "") for h in headers] for r in records]
+    table = tabulate(rows, headers=headers, tablefmt="github")
+    return f"### {table_name}\n\n{table}"
 
 
 def diff(expected_path: str, actual_path: str) -> list[str]:
@@ -148,10 +126,10 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
     records = []
     for (exp_lineno, exp_row), (act_lineno, act_row) in pairs:
         exp_record = {"Exp/Act": "Expected", "Result Row": str(exp_lineno)}
-        act_record = {"Exp/Act": "Actual",   "Result Row": str(act_lineno)}
+        act_record = {"Exp/Act": "Actual", "Result Row": str(act_lineno)}
         for col, ev, av in zip(exp_header, exp_row, act_row):
-            exp_record[col] = f"**{ev}**" if ev != av else ev
-            act_record[col] = f"**{av}**" if ev != av else av
+            exp_record[col] = f"**{ev}**" if ev != av and ev != "" else ev
+            act_record[col] = f"**{av}**" if ev != av and av != "" else av
         records.append(exp_record)
         records.append(act_record)
 
@@ -165,7 +143,7 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
         record.update(zip(exp_header, row))
         records.append(record)
 
-    diffs.append(create_md_table("Diff Results", table_headers, records))
+    diffs.append(_render_diff_table("Diff Results", table_headers, records))
 
     return diffs
 

From 230e03268585a4c38afc1339b56542c1e8c1ec14 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 17:53:44 -0400
Subject: [PATCH 06/15] newline fixes

---
 .github/scripts/diff_results.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index c73a764b0..9526cbf94 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -83,12 +83,6 @@ def _pair_closest(
     return pairs, unpaired_exp, unpaired_act
 
 
-def _render_diff_table(table_name: str, headers: list[str], records: list[dict]) -> str:
-    rows = [[r.get(h, "") for h in headers] for r in records]
-    table = tabulate(rows, headers=headers, tablefmt="github")
-    return f"### {table_name}\n\n{table}"
-
-
 def diff(expected_path: str, actual_path: str) -> list[str]:
     exp_header, exp_rows = load(expected_path)
     _, act_rows = load(actual_path)
@@ -116,7 +110,7 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
     diffs = []
     if len(exp_content) != len(act_content):
         diffs.append(
-            f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual"
+            f"Row count changed: {len(exp_content)} expected -> {len(act_content)} actual\n"
         )
 
     pairs, unpaired_exp, unpaired_act = _pair_closest(remaining_exp, remaining_act)
@@ -143,7 +137,12 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
         record.update(zip(exp_header, row))
         records.append(record)
 
-    diffs.append(_render_diff_table("Diff Results", table_headers, records))
+    table = tabulate(
+        [[r.get(h, "") for h in table_headers] for r in records],
+        headers=table_headers,
+        tablefmt="github",
+    )
+    diffs.append(table)
 
     return diffs
 
@@ -165,7 +164,7 @@ def main():
         sys.exit(2)
 
     if diffs:
-        print(f"DIFF_FOUND for {case_label}:")
+        print(f"### DIFF_FOUND for {case_label}:\n")
         for line in diffs:
             print(line)
         sys.exit(1)

From 40bea299f17fc1624aae5b4afc1b9eeb2b25a5d2 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Thu, 18 Jun 2026 18:04:41 -0400
Subject: [PATCH 07/15] header might mess up summary

---
 .github/scripts/diff_results.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 9526cbf94..31aed64c2 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -164,7 +164,7 @@ def main():
         sys.exit(2)
 
     if diffs:
-        print(f"### DIFF_FOUND for {case_label}:\n")
+        print(f"DIFF_FOUND for {case_label}:\n")
         for line in diffs:
             print(line)
         sys.exit(1)

From 60fa202588abad4e3ac325022a9c95c3107db856 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 11:29:43 -0400
Subject: [PATCH 08/15] fix crlf comparison

---
 .github/scripts/diff_results.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 31aed64c2..3a970f472 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -19,11 +19,11 @@
 
 def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]:
     """Return (header, [(1-based line number, row tuple)]), preserving original order."""
-    with open(path, newline="") as f:
+    with open(path, newline="", encoding="utf-8") as f:
         reader = csv.DictReader(f)
         header = list(reader.fieldnames or [])
         rows = [
-            (i, tuple(row[col] for col in header))
+            (i, tuple(row[col].strip("\r") for col in header))
             for i, row in enumerate(reader, start=1)
         ]
     return header, rows

From b40e56018731e69f4ee133ed558cd31e13b4b9b2 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 11:54:38 -0400
Subject: [PATCH 09/15] issue unrelated to crlf. reverting

---
 .github/scripts/diff_results.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 3a970f472..31aed64c2 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -19,11 +19,11 @@
 
 def load(path: str) -> tuple[list[str], list[tuple[int, tuple]]]:
     """Return (header, [(1-based line number, row tuple)]), preserving original order."""
-    with open(path, newline="", encoding="utf-8") as f:
+    with open(path, newline="") as f:
         reader = csv.DictReader(f)
         header = list(reader.fieldnames or [])
         rows = [
-            (i, tuple(row[col].strip("\r") for col in header))
+            (i, tuple(row[col] for col in header))
             for i, row in enumerate(reader, start=1)
         ]
     return header, rows

From 1c1710ab6e3a37a78f4b2879654592f56c34f3ae Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 12:19:28 -0400
Subject: [PATCH 10/15] update wording

---
 .github/scripts/run_validation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh
index 74df066be..a9c85c616 100644
--- a/.github/scripts/run_validation.sh
+++ b/.github/scripts/run_validation.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 # run_validation.sh — iterates all positive/ and negative/ test cases for a rule,
-# runs the CORE engine against each, converts JSON output to results.csv,
+# runs the CORE engine against each, prints output to results.csv,
 # diffs against any expected results.csv, and writes two outputs:
 #   - $REPO_ROOT/validation_report.md  (detailed markdown, legacy/fallback)
 #   - $REPO_ROOT/case_results.jsonl    (one JSON line per test case for the summary table)

From 7e3395e0f8b5f3d751cea02f4a999eb861078374 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 12:42:12 -0400
Subject: [PATCH 11/15] preserve actual.csv

---
 .github/scripts/run_validation.sh | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh
index a9c85c616..23def8f2b 100644
--- a/.github/scripts/run_validation.sh
+++ b/.github/scripts/run_validation.sh
@@ -136,8 +136,8 @@ for TEST_TYPE in positive negative; do
     # Back up expected results.csv before the engine run (only if it exists)
     EXPECTED_RESULTS=""
     if [ "$MISSING_BASELINE" = false ]; then
-      cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/results.expected.csv"
-      EXPECTED_RESULTS="$RESULTS_DIR/results.expected.csv"
+      cp "$RESULTS_DIR/results.csv" "$RESULTS_DIR/expected.csv"
+      EXPECTED_RESULTS="$RESULTS_DIR/expected.csv"
     fi
 
     ENGINE_ARGS=(
@@ -205,6 +205,9 @@ for TEST_TYPE in positive negative; do
       "$EXPECTED_RESULTS" "$ACTUAL_CSV" "$CASE_LABEL" \
       > "$DIFF_LOG" 2>&1 || DIFF_EXIT=$?
 
+    # Preserve actual output before restoring the baseline
+    cp "$ACTUAL_CSV" "$RESULTS_DIR/actual.csv"
+
     if [ $DIFF_EXIT -eq 0 ]; then
       echo "  PASSED — actual results match expected baseline"
       {

From 6ccdf4b43a700ff2d826047fe6d614a7e0c7fbc5 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 13:03:38 -0400
Subject: [PATCH 12/15] ignore row order changes

---
 .github/scripts/diff_results.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index 31aed64c2..d984258df 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -97,12 +97,13 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
     unmatched_exp = exp_counter - matched
     unmatched_act = act_counter - matched
 
-    if not unmatched_exp and not unmatched_act:
-        if exp_content != act_content:
-            return [
-                "Row order changed: rows are identical but appear in a different order"
-            ]
-        return []
+    # Uncomment this if we care about row order changes
+    # if not unmatched_exp and not unmatched_act:
+    #     if exp_content != act_content:
+    #         return [
+    #             "Row order changed: rows are identical but appear in a different order"
+    #         ]
+    #     return []
 
     remaining_exp = _filter_unmatched(exp_rows, unmatched_exp)
     remaining_act = _filter_unmatched(act_rows, unmatched_act)

From 73b0e868b8030fdca8d385fef44e5b0f06a7b5f8 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 13:11:09 -0400
Subject: [PATCH 13/15] keep expected for saving

---
 .github/scripts/run_validation.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh
index 23def8f2b..04c08d2da 100644
--- a/.github/scripts/run_validation.sh
+++ b/.github/scripts/run_validation.sh
@@ -157,7 +157,7 @@ for TEST_TYPE in positive negative; do
     (cd "$ENGINE_DIR" && $PYTHON_CMD core.py validate "${ENGINE_ARGS[@]}") \
       2>&1 | tee "$ENGINE_LOG" || ENGINE_EXIT=${PIPESTATUS[0]}
 
-    ACTUAL_CSV="$RESULTS_DIR/results.csv"
+    ACTUAL_CSV="$RESULTS_DIR/actual.csv"
 
     if [ $ENGINE_EXIT -ne 0 ] || [ ! -f "$ACTUAL_CSV" ]; then
       echo "  ERROR: engine failed or produced no output (exit $ENGINE_EXIT)"
@@ -175,7 +175,7 @@ for TEST_TYPE in positive negative; do
       emit_result "false" "" "" "false" "" "$ENGINE_LOG"
       FAILED_CASES=$((FAILED_CASES + 1))
       OVERALL_SUCCESS=false
-      [ "$MISSING_BASELINE" = false ] && mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
+      [ "$MISSING_BASELINE" = false ] && cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
       continue
     fi
 
@@ -234,7 +234,7 @@ for TEST_TYPE in positive negative; do
       OVERALL_SUCCESS=false
     fi
 
-    mv "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
+    cp "$EXPECTED_RESULTS" "$RESULTS_DIR/results.csv"
 
   done < <(find "$TYPE_DIR" -mindepth 1 -maxdepth 1 -type d -print0 | sort -z)
 done

From 2a0e0d078fb6ee081ac11067dcd887cad8123133 Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 13:21:49 -0400
Subject: [PATCH 14/15] fix actual results starting location

---
 .github/scripts/run_validation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/scripts/run_validation.sh b/.github/scripts/run_validation.sh
index 04c08d2da..091fa2d32 100644
--- a/.github/scripts/run_validation.sh
+++ b/.github/scripts/run_validation.sh
@@ -157,7 +157,7 @@ for TEST_TYPE in positive negative; do
     (cd "$ENGINE_DIR" && $PYTHON_CMD core.py validate "${ENGINE_ARGS[@]}") \
       2>&1 | tee "$ENGINE_LOG" || ENGINE_EXIT=${PIPESTATUS[0]}
 
-    ACTUAL_CSV="$RESULTS_DIR/actual.csv"
+    ACTUAL_CSV="$RESULTS_DIR/results.csv"
 
     if [ $ENGINE_EXIT -ne 0 ] || [ ! -f "$ACTUAL_CSV" ]; then
       echo "  ERROR: engine failed or produced no output (exit $ENGINE_EXIT)"

From 3097582d8dc95ae45728677aa9265953447c7a1b Mon Sep 17 00:00:00 2001
From: Gerry Campion <gcampion@cdisc.org>
Date: Mon, 22 Jun 2026 14:04:32 -0400
Subject: [PATCH 15/15] commented too much

---
 .github/scripts/diff_results.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/scripts/diff_results.py b/.github/scripts/diff_results.py
index d984258df..61fa78500 100644
--- a/.github/scripts/diff_results.py
+++ b/.github/scripts/diff_results.py
@@ -97,13 +97,13 @@ def diff(expected_path: str, actual_path: str) -> list[str]:
     unmatched_exp = exp_counter - matched
     unmatched_act = act_counter - matched
 
-    # Uncomment this if we care about row order changes
-    # if not unmatched_exp and not unmatched_act:
-    #     if exp_content != act_content:
-    #         return [
-    #             "Row order changed: rows are identical but appear in a different order"
-    #         ]
-    #     return []
+    if not unmatched_exp and not unmatched_act:
+        # Uncomment this if we care about row order changes
+        # if exp_content != act_content:
+        #     return [
+        #         "Row order changed: rows are identical but appear in a different order"
+        #     ]
+        return []
 
     remaining_exp = _filter_unmatched(exp_rows, unmatched_exp)
     remaining_act = _filter_unmatched(act_rows, unmatched_act)