From 7d02e04c98c47feca6048aa66900c46d36e59ce5 Mon Sep 17 00:00:00 2001 From: mfwolffe Date: Thu, 30 Apr 2026 18:24:08 -0400 Subject: [PATCH] Normalize probe markers in replay snapshot instruction rows --- src/dlm/replay/store.py | 5 ++++- tests/unit/replay/test_store.py | 10 ++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/dlm/replay/store.py b/src/dlm/replay/store.py index 2e6539b7..8676c634 100644 --- a/src/dlm/replay/store.py +++ b/src/dlm/replay/store.py @@ -183,8 +183,11 @@ def _snapshot_to_rows(snap: SectionSnapshot) -> list[Row]: if snap.section_type == "instruction": from dlm.data.instruction_parser import parse_instruction_body + from dlm.data.sections_to_rows import _normalize_probe_markers - pairs = parse_instruction_body(snap.content, section_id=snap.section_id) + pairs = parse_instruction_body( + _normalize_probe_markers(snap.content), section_id=snap.section_id + ) return [ { "messages": [ diff --git a/tests/unit/replay/test_store.py b/tests/unit/replay/test_store.py index b6b46706..1335d36c 100644 --- a/tests/unit/replay/test_store.py +++ b/tests/unit/replay/test_store.py @@ -70,6 +70,16 @@ def test_instruction_expands_to_messages_rows(self, tmp_path: Path) -> None: assert all("messages" in r for r in rows) assert rows[0]["messages"][0]["content"] == "q1" + def test_instruction_probe_marker_normalized(self, tmp_path: Path) -> None: + """Replay snapshots with `### Q !probe` headers parse like plain Q/A.""" + s = _store(tmp_path) + body = "### Q !probe\nq1\n### A\na1" + s.append(_snap("a" * 16, "instruction", body, added=datetime(2026, 1, 1))) + rows = s.sample_rows(k=10, now=datetime(2026, 4, 1), rng=random.Random(0)) + assert len(rows) == 1 + assert rows[0]["messages"][0]["content"] == "q1" + assert rows[0]["messages"][1]["content"] == "a1" + def test_preference_expands_to_pref_rows(self, tmp_path: Path) -> None: s = _store(tmp_path) body = "### Prompt\np\n### Chosen\nc\n### Rejected\nr"