From ed8240e44c4e657f73649f30e76dedf0c9813153 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:29:53 -0400
Subject: [PATCH 01/46] Promote cli/commands.py to cli/commands/ package

---
 src/dlm/cli/{commands.py => commands/__init__.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/dlm/cli/{commands.py => commands/__init__.py} (100%)

diff --git a/src/dlm/cli/commands.py b/src/dlm/cli/commands/__init__.py
similarity index 100%
rename from src/dlm/cli/commands.py
rename to src/dlm/cli/commands/__init__.py

From 40eb9133a3640b5eeb94e17ccd727754449b87e2 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:32:10 -0400
Subject: [PATCH 02/46] Extract doctor_cmd into cli/commands/doctor.py

---
 src/dlm/cli/commands/__init__.py | 17 ++---------------
 src/dlm/cli/commands/doctor.py   | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 15 deletions(-)
 create mode 100644 src/dlm/cli/commands/doctor.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 6bac022..fe051ae 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -17,6 +17,8 @@
 
 import typer
 
+from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
+
 if TYPE_CHECKING:
     from datetime import timedelta
 
@@ -2718,21 +2720,6 @@ def _parse_since_arg(since: str, console: object) -> timedelta:
     raise typer.Exit(code=2)
 
 
-def doctor_cmd(
-    json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable output.")] = False,
-) -> None:
-    """Inspect hardware and print the resolved training plan."""
-    import json
-
-    from dlm.hardware import doctor, render_text
-
-    result = doctor()
-    if json_out:
-        typer.echo(json.dumps(result.to_dict(), indent=2, default=str))
-    else:
-        typer.echo(render_text(result))
-
-
 def show_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm file to inspect.")],
     json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
diff --git a/src/dlm/cli/commands/doctor.py b/src/dlm/cli/commands/doctor.py
new file mode 100644
index 0000000..5caac88
--- /dev/null
+++ b/src/dlm/cli/commands/doctor.py
@@ -0,0 +1,22 @@
+"""`dlm doctor` — inspect hardware and print the resolved training plan."""
+
+from __future__ import annotations
+
+from typing import Annotated
+
+import typer
+
+
+def doctor_cmd(
+    json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable output.")] = False,
+) -> None:
+    """Inspect hardware and print the resolved training plan."""
+    import json
+
+    from dlm.hardware import doctor, render_text
+
+    result = doctor()
+    if json_out:
+        typer.echo(json.dumps(result.to_dict(), indent=2, default=str))
+    else:
+        typer.echo(render_text(result))

From a7181e830d6ef9e3f35b0cba8db05511e488ef07 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:33:15 -0400
Subject: [PATCH 03/46] Extract migrate_cmd into cli/commands/migrate.py

---
 src/dlm/cli/commands/__init__.py | 40 +--------------------------
 src/dlm/cli/commands/migrate.py  | 47 ++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 39 deletions(-)
 create mode 100644 src/dlm/cli/commands/migrate.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index fe051ae..557a85d 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -18,6 +18,7 @@
 import typer
 
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
+from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 
 if TYPE_CHECKING:
     from datetime import timedelta
@@ -3209,45 +3210,6 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-def migrate_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to migrate.")],
-    dry_run: Annotated[bool, typer.Option("--dry-run")] = False,
-    no_backup: Annotated[bool, typer.Option("--no-backup")] = False,
-) -> None:
-    """Migrate a .dlm frontmatter to the current schema version."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.migrate import migrate_file
-
-    console = Console(stderr=True)
-
-    try:
-        result = migrate_file(path, dry_run=dry_run, no_backup=no_backup)
-    except DlmParseError as exc:
-        console.print(f"[red]migrate:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if not result.applied:
-        console.print(
-            f"[green]migrate:[/green] {path} already at v{result.target_version} "
-            "(no migrations needed)."
-        )
-        return
-
-    applied_str = " → ".join(f"v{v}" for v in (*result.applied, result.target_version))
-    if dry_run:
-        console.print(
-            f"[yellow]dry-run:[/yellow] {path} would migrate {applied_str} "
-            "(re-run without --dry-run to apply)."
-        )
-        return
-
-    if result.backup_path is not None:
-        console.print(f"[dim]backup:[/dim]  {result.backup_path}")
-    console.print(f"[green]migrated:[/green] {path} {applied_str}")
-
-
 def templates_list_cmd(
     json_out: Annotated[
         bool,
diff --git a/src/dlm/cli/commands/migrate.py b/src/dlm/cli/commands/migrate.py
new file mode 100644
index 0000000..bc1f035
--- /dev/null
+++ b/src/dlm/cli/commands/migrate.py
@@ -0,0 +1,47 @@
+"""`dlm migrate` — migrate a .dlm frontmatter to the current schema version."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def migrate_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to migrate.")],
+    dry_run: Annotated[bool, typer.Option("--dry-run")] = False,
+    no_backup: Annotated[bool, typer.Option("--no-backup")] = False,
+) -> None:
+    """Migrate a .dlm frontmatter to the current schema version."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.migrate import migrate_file
+
+    console = Console(stderr=True)
+
+    try:
+        result = migrate_file(path, dry_run=dry_run, no_backup=no_backup)
+    except DlmParseError as exc:
+        console.print(f"[red]migrate:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if not result.applied:
+        console.print(
+            f"[green]migrate:[/green] {path} already at v{result.target_version} "
+            "(no migrations needed)."
+        )
+        return
+
+    applied_str = " → ".join(f"v{v}" for v in (*result.applied, result.target_version))
+    if dry_run:
+        console.print(
+            f"[yellow]dry-run:[/yellow] {path} would migrate {applied_str} "
+            "(re-run without --dry-run to apply)."
+        )
+        return
+
+    if result.backup_path is not None:
+        console.print(f"[dim]backup:[/dim]  {result.backup_path}")
+    console.print(f"[green]migrated:[/green] {path} {applied_str}")

From 1d3b779d7d9470d73b93517fe44bed919820fdd8 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:34:02 -0400
Subject: [PATCH 04/46] Extract pack_cmd into cli/commands/pack.py

---
 src/dlm/cli/commands/__init__.py | 65 +---------------------------
 src/dlm/cli/commands/pack.py     | 72 ++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 64 deletions(-)
 create mode 100644 src/dlm/cli/commands/pack.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 557a85d..c2399c9 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -19,6 +19,7 @@
 
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
+from dlm.cli.commands.pack import pack_cmd as pack_cmd
 
 if TYPE_CHECKING:
     from datetime import timedelta
@@ -2192,70 +2193,6 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         console.print(f"smoke:   {result.smoke_output_first_line}")
 
 
-def pack_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to pack.")],
-    out: Annotated[
-        Path | None,
-        typer.Option("--out", help="Output .dlm.pack path."),
-    ] = None,
-    include_exports: Annotated[
-        bool,
-        typer.Option("--include-exports", help="Bundle all GGUF exports into the pack."),
-    ] = False,
-    include_base: Annotated[
-        bool,
-        typer.Option(
-            "--include-base",
-            help="Bundle the base model snapshot (license rules still apply).",
-        ),
-    ] = False,
-    include_logs: Annotated[
-        bool,
-        typer.Option("--include-logs", help="Bundle per-run JSONL logs."),
-    ] = False,
-    licensee: Annotated[
-        str | None,
-        typer.Option(
-            "--i-am-the-licensee",
-            help="URL acknowledging separate acceptance of a non-redistributable base (required for --include-base on gated models).",
-        ),
-    ] = None,
-) -> None:
-    """Produce a portable .dlm.pack bundle."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.pack.errors import BaseLicenseRefusedError, PackError
-    from dlm.pack.packer import pack
-
-    console = Console(stderr=True)
-
-    try:
-        result = pack(
-            path,
-            out=out,
-            include_exports=include_exports,
-            include_base=include_base,
-            include_logs=include_logs,
-            licensee_acceptance_url=licensee,
-        )
-    except BaseLicenseRefusedError as exc:
-        console.print(f"[red]pack:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except PackError as exc:
-        console.print(f"[red]pack:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except DlmParseError as exc:
-        console.print(f"[red]parse:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    size_mb = result.bytes_written / (1024 * 1024)
-    console.print(
-        f"[green]packed:[/green] {result.path} "
-        f"({size_mb:.2f} MB, content_type={result.content_type})"
-    )
-
-
 def unpack_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm.pack to install.")],
     force: Annotated[
diff --git a/src/dlm/cli/commands/pack.py b/src/dlm/cli/commands/pack.py
new file mode 100644
index 0000000..78f033b
--- /dev/null
+++ b/src/dlm/cli/commands/pack.py
@@ -0,0 +1,72 @@
+"""`dlm pack` — produce a portable .dlm.pack bundle."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def pack_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to pack.")],
+    out: Annotated[
+        Path | None,
+        typer.Option("--out", help="Output .dlm.pack path."),
+    ] = None,
+    include_exports: Annotated[
+        bool,
+        typer.Option("--include-exports", help="Bundle all GGUF exports into the pack."),
+    ] = False,
+    include_base: Annotated[
+        bool,
+        typer.Option(
+            "--include-base",
+            help="Bundle the base model snapshot (license rules still apply).",
+        ),
+    ] = False,
+    include_logs: Annotated[
+        bool,
+        typer.Option("--include-logs", help="Bundle per-run JSONL logs."),
+    ] = False,
+    licensee: Annotated[
+        str | None,
+        typer.Option(
+            "--i-am-the-licensee",
+            help="URL acknowledging separate acceptance of a non-redistributable base (required for --include-base on gated models).",
+        ),
+    ] = None,
+) -> None:
+    """Produce a portable .dlm.pack bundle."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.pack.errors import BaseLicenseRefusedError, PackError
+    from dlm.pack.packer import pack
+
+    console = Console(stderr=True)
+
+    try:
+        result = pack(
+            path,
+            out=out,
+            include_exports=include_exports,
+            include_base=include_base,
+            include_logs=include_logs,
+            licensee_acceptance_url=licensee,
+        )
+    except BaseLicenseRefusedError as exc:
+        console.print(f"[red]pack:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except PackError as exc:
+        console.print(f"[red]pack:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except DlmParseError as exc:
+        console.print(f"[red]parse:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    size_mb = result.bytes_written / (1024 * 1024)
+    console.print(
+        f"[green]packed:[/green] {result.path} "
+        f"({size_mb:.2f} MB, content_type={result.content_type})"
+    )

From 7b71500f566ae254e90736942fed4c6c3920e90c Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:34:36 -0400
Subject: [PATCH 05/46] Extract unpack_cmd into cli/commands/unpack.py

---
 src/dlm/cli/commands/__init__.py | 48 +---------------------------
 src/dlm/cli/commands/unpack.py   | 55 ++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 47 deletions(-)
 create mode 100644 src/dlm/cli/commands/unpack.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index c2399c9..e1ca038 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -20,6 +20,7 @@
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
+from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 
 if TYPE_CHECKING:
     from datetime import timedelta
@@ -2193,53 +2194,6 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         console.print(f"smoke:   {result.smoke_output_first_line}")
 
 
-def unpack_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm.pack to install.")],
-    force: Annotated[
-        bool,
-        typer.Option("--force", help="Overwrite an existing store with the same dlm_id."),
-    ] = False,
-    out: Annotated[
-        Path | None,
-        typer.Option(
-            "--out", help="Directory to place the restored .dlm (default: alongside the pack)."
-        ),
-    ] = None,
-) -> None:
-    """Install a .dlm.pack into the local store."""
-    from rich.console import Console
-
-    from dlm.pack.errors import (
-        PackFormatVersionError,
-        PackIntegrityError,
-        PackLayoutError,
-    )
-    from dlm.pack.unpacker import unpack
-
-    console = Console(stderr=True)
-
-    try:
-        result = unpack(path, force=force, out_dir=out)
-    except PackFormatVersionError as exc:
-        console.print(f"[red]unpack:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except PackIntegrityError as exc:
-        console.print(f"[red]unpack:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except PackLayoutError as exc:
-        console.print(f"[red]unpack:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    console.print(f"[green]unpacked:[/green] {result.dlm_path}")
-    console.print(f"  store:  {result.store_path}")
-    console.print(f"  dlm_id: {result.dlm_id}")
-    if result.applied_migrations:
-        steps = " → ".join(
-            f"v{v}" for v in (*result.applied_migrations, result.header.pack_format_version + 1)
-        )
-        console.print(f"  migrated: {steps}")
-
-
 def verify_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm.pack to verify.")],
     trust_on_first_use: Annotated[
diff --git a/src/dlm/cli/commands/unpack.py b/src/dlm/cli/commands/unpack.py
new file mode 100644
index 0000000..df71939
--- /dev/null
+++ b/src/dlm/cli/commands/unpack.py
@@ -0,0 +1,55 @@
+"""`dlm unpack` — install a .dlm.pack into the local store."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def unpack_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm.pack to install.")],
+    force: Annotated[
+        bool,
+        typer.Option("--force", help="Overwrite an existing store with the same dlm_id."),
+    ] = False,
+    out: Annotated[
+        Path | None,
+        typer.Option(
+            "--out", help="Directory to place the restored .dlm (default: alongside the pack)."
+        ),
+    ] = None,
+) -> None:
+    """Install a .dlm.pack into the local store."""
+    from rich.console import Console
+
+    from dlm.pack.errors import (
+        PackFormatVersionError,
+        PackIntegrityError,
+        PackLayoutError,
+    )
+    from dlm.pack.unpacker import unpack
+
+    console = Console(stderr=True)
+
+    try:
+        result = unpack(path, force=force, out_dir=out)
+    except PackFormatVersionError as exc:
+        console.print(f"[red]unpack:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except PackIntegrityError as exc:
+        console.print(f"[red]unpack:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except PackLayoutError as exc:
+        console.print(f"[red]unpack:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    console.print(f"[green]unpacked:[/green] {result.dlm_path}")
+    console.print(f"  store:  {result.store_path}")
+    console.print(f"  dlm_id: {result.dlm_id}")
+    if result.applied_migrations:
+        steps = " → ".join(
+            f"v{v}" for v in (*result.applied_migrations, result.header.pack_format_version + 1)
+        )
+        console.print(f"  migrated: {steps}")

From 3a21000f76ce07a96bb5ed36effa9bd9833d998a Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:35:23 -0400
Subject: [PATCH 06/46] Extract verify_cmd into cli/commands/verify.py

---
 src/dlm/cli/commands/__init__.py | 106 +----------------------------
 src/dlm/cli/commands/verify.py   | 113 +++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+), 105 deletions(-)
 create mode 100644 src/dlm/cli/commands/verify.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index e1ca038..a1e6aa0 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -21,6 +21,7 @@
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
+from dlm.cli.commands.verify import verify_cmd as verify_cmd
 
 if TYPE_CHECKING:
     from datetime import timedelta
@@ -2194,111 +2195,6 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         console.print(f"smoke:   {result.smoke_output_first_line}")
 
 
-def verify_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm.pack to verify.")],
-    trust_on_first_use: Annotated[
-        bool,
-        typer.Option(
-            "--trust-on-first-use",
-            help=(
-                "Record the signer's public key under ~/.dlm/trusted-keys/ "
-                "on first verify. Without this flag an unknown signer is "
-                "rejected with exit code 2."
-            ),
-        ),
-    ] = False,
-    trusted_keys_dir: Annotated[
-        Path | None,
-        typer.Option(
-            "--trusted-keys-dir",
-            help="Override ~/.dlm/trusted-keys/ (useful for scripted verify).",
-            hidden=True,
-        ),
-    ] = None,
-) -> None:
-    """Verify a .dlm.pack's provenance chain.
-
-    Exit codes: 0 verified, 1 broken chain (or missing provenance),
-    2 untrusted signer, 3 signature rejected.
-    """
-    from rich.console import Console
-
-    from dlm.pack.errors import PackLayoutError
-    from dlm.pack.layout import PROVENANCE_FILENAME
-    from dlm.pack.unpacker import read_pack_member_bytes
-    from dlm.share.errors import ShareError
-    from dlm.share.provenance import (
-        ProvenanceChainBroken,
-        ProvenanceSchemaError,
-        UnknownSignerError,
-        load_provenance_json,
-        verify_provenance,
-    )
-
-    console = Console(stderr=True)
-    keys_dir = trusted_keys_dir or (Path.home() / ".dlm" / "trusted-keys")
-
-    try:
-        payload = read_pack_member_bytes(path, PROVENANCE_FILENAME)
-    except PackLayoutError as exc:
-        console.print(f"[red]verify:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except OSError as exc:
-        console.print(f"[red]verify:[/red] cannot read {path}: {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if payload is None:
-        console.print(f"[red]verify:[/red] {path} is unsigned — no {PROVENANCE_FILENAME} inside.")
-        raise typer.Exit(code=1)
-
-    # Write the in-pack JSON to a temp file so `load_provenance_json`
-    # can use its normal filesystem path. Keeps the parser single-
-    # sourced and the error messages consistent with the filesystem
-    # call-site.
-    import tempfile
-
-    with tempfile.NamedTemporaryFile("wb", suffix=".json", delete=False) as fh:
-        fh.write(payload)
-        tmp_path = Path(fh.name)
-    try:
-        provenance = load_provenance_json(tmp_path)
-    except ProvenanceSchemaError as exc:
-        console.print(f"[red]verify:[/red] malformed provenance.json: {exc}")
-        raise typer.Exit(code=1) from exc
-    finally:
-        tmp_path.unlink(missing_ok=True)
-
-    try:
-        result = verify_provenance(
-            provenance,
-            trusted_keys_dir=keys_dir,
-            tofu=trust_on_first_use,
-        )
-    except UnknownSignerError as exc:
-        console.print(f"[red]verify:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    except ProvenanceChainBroken as exc:
-        console.print(f"[red]verify:[/red] chain broken: {exc}")
-        raise typer.Exit(code=1) from exc
-    except ShareError as exc:
-        console.print(f"[red]verify:[/red] signature rejected: {exc}")
-        raise typer.Exit(code=3) from exc
-
-    out = Console()
-    out.print(f"[green]verified:[/green] {path.name}")
-    out.print(f"  signer:          {result.signer_fingerprint}")
-    out.print(f"  trusted-key:     {result.trusted_key_path}")
-    out.print(f"  adapter_sha256:  {provenance.adapter_sha256[:12]}...")
-    out.print(f"  base_revision:   {provenance.base_revision}")
-    out.print(f"  corpus_root:     {provenance.corpus_root_sha256[:12]}...")
-    out.print(f"  signed_at:       {provenance.signed_at}")
-    if result.tofu_recorded:
-        out.print(
-            f"[yellow]note:[/yellow] recorded new trust entry "
-            f"at {result.trusted_key_path}; subsequent verifies use strict mode."
-        )
-
-
 def repl_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm file to start a REPL against.")],
     adapter: Annotated[
diff --git a/src/dlm/cli/commands/verify.py b/src/dlm/cli/commands/verify.py
new file mode 100644
index 0000000..67074aa
--- /dev/null
+++ b/src/dlm/cli/commands/verify.py
@@ -0,0 +1,113 @@
+"""`dlm verify` — verify a .dlm.pack's provenance chain."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def verify_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm.pack to verify.")],
+    trust_on_first_use: Annotated[
+        bool,
+        typer.Option(
+            "--trust-on-first-use",
+            help=(
+                "Record the signer's public key under ~/.dlm/trusted-keys/ "
+                "on first verify. Without this flag an unknown signer is "
+                "rejected with exit code 2."
+            ),
+        ),
+    ] = False,
+    trusted_keys_dir: Annotated[
+        Path | None,
+        typer.Option(
+            "--trusted-keys-dir",
+            help="Override ~/.dlm/trusted-keys/ (useful for scripted verify).",
+            hidden=True,
+        ),
+    ] = None,
+) -> None:
+    """Verify a .dlm.pack's provenance chain.
+
+    Exit codes: 0 verified, 1 broken chain (or missing provenance),
+    2 untrusted signer, 3 signature rejected.
+    """
+    from rich.console import Console
+
+    from dlm.pack.errors import PackLayoutError
+    from dlm.pack.layout import PROVENANCE_FILENAME
+    from dlm.pack.unpacker import read_pack_member_bytes
+    from dlm.share.errors import ShareError
+    from dlm.share.provenance import (
+        ProvenanceChainBroken,
+        ProvenanceSchemaError,
+        UnknownSignerError,
+        load_provenance_json,
+        verify_provenance,
+    )
+
+    console = Console(stderr=True)
+    keys_dir = trusted_keys_dir or (Path.home() / ".dlm" / "trusted-keys")
+
+    try:
+        payload = read_pack_member_bytes(path, PROVENANCE_FILENAME)
+    except PackLayoutError as exc:
+        console.print(f"[red]verify:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except OSError as exc:
+        console.print(f"[red]verify:[/red] cannot read {path}: {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if payload is None:
+        console.print(f"[red]verify:[/red] {path} is unsigned — no {PROVENANCE_FILENAME} inside.")
+        raise typer.Exit(code=1)
+
+    # Write the in-pack JSON to a temp file so `load_provenance_json`
+    # can use its normal filesystem path. Keeps the parser single-
+    # sourced and the error messages consistent with the filesystem
+    # call-site.
+    import tempfile
+
+    with tempfile.NamedTemporaryFile("wb", suffix=".json", delete=False) as fh:
+        fh.write(payload)
+        tmp_path = Path(fh.name)
+    try:
+        provenance = load_provenance_json(tmp_path)
+    except ProvenanceSchemaError as exc:
+        console.print(f"[red]verify:[/red] malformed provenance.json: {exc}")
+        raise typer.Exit(code=1) from exc
+    finally:
+        tmp_path.unlink(missing_ok=True)
+
+    try:
+        result = verify_provenance(
+            provenance,
+            trusted_keys_dir=keys_dir,
+            tofu=trust_on_first_use,
+        )
+    except UnknownSignerError as exc:
+        console.print(f"[red]verify:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    except ProvenanceChainBroken as exc:
+        console.print(f"[red]verify:[/red] chain broken: {exc}")
+        raise typer.Exit(code=1) from exc
+    except ShareError as exc:
+        console.print(f"[red]verify:[/red] signature rejected: {exc}")
+        raise typer.Exit(code=3) from exc
+
+    out = Console()
+    out.print(f"[green]verified:[/green] {path.name}")
+    out.print(f"  signer:          {result.signer_fingerprint}")
+    out.print(f"  trusted-key:     {result.trusted_key_path}")
+    out.print(f"  adapter_sha256:  {provenance.adapter_sha256[:12]}...")
+    out.print(f"  base_revision:   {provenance.base_revision}")
+    out.print(f"  corpus_root:     {provenance.corpus_root_sha256[:12]}...")
+    out.print(f"  signed_at:       {provenance.signed_at}")
+    if result.tofu_recorded:
+        out.print(
+            f"[yellow]note:[/yellow] recorded new trust entry "
+            f"at {result.trusted_key_path}; subsequent verifies use strict mode."
+        )

From c88e02bf2eea63cee5a27618eaa675e027cae49d Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:41:21 -0400
Subject: [PATCH 07/46] Extract templates_list_cmd into
 cli/commands/templates.py

---
 src/dlm/cli/commands/__init__.py  | 82 +---------------------------
 src/dlm/cli/commands/templates.py | 88 +++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 81 deletions(-)
 create mode 100644 src/dlm/cli/commands/templates.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index a1e6aa0..f7bef33 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -21,6 +21,7 @@
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
+from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
 
 if TYPE_CHECKING:
@@ -2997,87 +2998,6 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-def templates_list_cmd(
-    json_out: Annotated[
-        bool,
-        typer.Option("--json", help="Emit a JSON array of template metadata."),
-    ] = False,
-    refresh: Annotated[
-        bool,
-        typer.Option(
-            "--refresh",
-            help=(
-                "Refresh from the upstream template gallery. Currently a no-op — "
-                "upstream repo + signing key are deferred."
-            ),
-        ),
-    ] = False,
-    accept_unsigned: Annotated[
-        bool,
-        typer.Option(
-            "--accept-unsigned",
-            help=(
-                "Bypass signed-tag verification on --refresh. Reserved; takes effect "
-                "once the upstream gallery signs its releases."
-            ),
-        ),
-    ] = False,
-) -> None:
-    """List the bundled (and, one day, remote) template gallery."""
-
-    import json as _json
-
-    from rich.console import Console
-
-    from dlm.templates import list_bundled
-
-    console_out = Console()
-    console_err = Console(stderr=True)
-
-    if refresh:
-        from dlm.templates.fetcher import RemoteFetchUnavailable, cache_dir, fetch_all
-
-        try:
-            fetch_all(cache_dir(), remote="")
-        except RemoteFetchUnavailable as exc:
-            console_err.print(
-                f"[yellow]templates:[/yellow] {exc} Falling back to the bundled gallery."
-            )
-        # --accept-unsigned is reserved for when the live fetcher lands;
-        # touching it here silences ARG001 without ceremony.
-        _ = accept_unsigned
-
-    templates = list_bundled()
-
-    if json_out:
-        payload = [
-            {
-                "name": t.name,
-                "title": t.meta.title,
-                "domain_tags": list(t.meta.domain_tags),
-                "recommended_base": t.meta.recommended_base,
-                "expected_steps": t.meta.expected_steps,
-                "expected_duration": dict(t.meta.expected_duration),
-                "summary": t.meta.summary,
-                "sample_prompts": list(t.meta.sample_prompts),
-            }
-            for t in templates
-        ]
-        console_out.print_json(_json.dumps(payload))
-        return
-
-    if not templates:
-        console_err.print("[yellow]templates:[/yellow] no bundled templates found.")
-        raise typer.Exit(code=1)
-
-    name_width = max(len(t.name) for t in templates)
-    for t in templates:
-        console_out.print(
-            f"[bold]{t.name:<{name_width}}[/bold]  {t.meta.title}  "
-            f"[dim]({t.meta.recommended_base})[/dim]"
-        )
-
-
 def push_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm or .dlm.pack to push.")],
     to: Annotated[
diff --git a/src/dlm/cli/commands/templates.py b/src/dlm/cli/commands/templates.py
new file mode 100644
index 0000000..b57b2ae
--- /dev/null
+++ b/src/dlm/cli/commands/templates.py
@@ -0,0 +1,88 @@
+"""`dlm templates list` — list the bundled template gallery."""
+
+from __future__ import annotations
+
+from typing import Annotated
+
+import typer
+
+
+def templates_list_cmd(
+    json_out: Annotated[
+        bool,
+        typer.Option("--json", help="Emit a JSON array of template metadata."),
+    ] = False,
+    refresh: Annotated[
+        bool,
+        typer.Option(
+            "--refresh",
+            help=(
+                "Refresh from the upstream template gallery. Currently a no-op — "
+                "upstream repo + signing key are deferred."
+            ),
+        ),
+    ] = False,
+    accept_unsigned: Annotated[
+        bool,
+        typer.Option(
+            "--accept-unsigned",
+            help=(
+                "Bypass signed-tag verification on --refresh. Reserved; takes effect "
+                "once the upstream gallery signs its releases."
+            ),
+        ),
+    ] = False,
+) -> None:
+    """List the bundled (and, one day, remote) template gallery."""
+
+    import json as _json
+
+    from rich.console import Console
+
+    from dlm.templates import list_bundled
+
+    console_out = Console()
+    console_err = Console(stderr=True)
+
+    if refresh:
+        from dlm.templates.fetcher import RemoteFetchUnavailable, cache_dir, fetch_all
+
+        try:
+            fetch_all(cache_dir(), remote="")
+        except RemoteFetchUnavailable as exc:
+            console_err.print(
+                f"[yellow]templates:[/yellow] {exc} Falling back to the bundled gallery."
+            )
+        # --accept-unsigned is reserved for when the live fetcher lands;
+        # touching it here silences ARG001 without ceremony.
+        _ = accept_unsigned
+
+    templates = list_bundled()
+
+    if json_out:
+        payload = [
+            {
+                "name": t.name,
+                "title": t.meta.title,
+                "domain_tags": list(t.meta.domain_tags),
+                "recommended_base": t.meta.recommended_base,
+                "expected_steps": t.meta.expected_steps,
+                "expected_duration": dict(t.meta.expected_duration),
+                "summary": t.meta.summary,
+                "sample_prompts": list(t.meta.sample_prompts),
+            }
+            for t in templates
+        ]
+        console_out.print_json(_json.dumps(payload))
+        return
+
+    if not templates:
+        console_err.print("[yellow]templates:[/yellow] no bundled templates found.")
+        raise typer.Exit(code=1)
+
+    name_width = max(len(t.name) for t in templates)
+    for t in templates:
+        console_out.print(
+            f"[bold]{t.name:<{name_width}}[/bold]  {t.meta.title}  "
+            f"[dim]({t.meta.recommended_base})[/dim]"
+        )

From c2ca4733afbd01f98fe3e7ba1d6992541422f038 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:41:36 -0400
Subject: [PATCH 08/46] Sort cli/commands/__init__.py re-export imports

---
 src/dlm/cli/commands/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index f7bef33..78969f0 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -20,8 +20,8 @@
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
-from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
+from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
 
 if TYPE_CHECKING:

From 9d4f38595b9f9bee7b1fbf75b826d0e4dbdf78f6 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:43:24 -0400
Subject: [PATCH 09/46] Move _previously_accepted + _human_size to
 cli/commands/_shared.py

---
 src/dlm/cli/commands/__init__.py | 34 ++---------------------
 src/dlm/cli/commands/_shared.py  | 47 ++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 32 deletions(-)
 create mode 100644 src/dlm/cli/commands/_shared.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 78969f0..d1c5635 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -17,6 +17,8 @@
 
 import typer
 
+from dlm.cli.commands._shared import _human_size as _human_size
+from dlm.cli.commands._shared import _previously_accepted as _previously_accepted
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
@@ -283,30 +285,6 @@ def init_cmd(
         console.print(f"[green]init:[/green] wrote {path}")
 
 
-def _previously_accepted(store_manifest_path: Path) -> bool:
-    """Return True iff the store manifest already holds a LicenseAcceptance.
-
-    `dlm prompt` and `dlm export` operate on an already-trained
-    adapter; the gated-base license was accepted during training and
-    persisted into `manifest.license_acceptance`. Replaying that
-    acceptance here is correct; silently hardcoding
-    `accept_license=True` is not — it would let a never-accepted
-    gated base slip through.
-    """
-    if not store_manifest_path.exists():
-        return False
-    from dlm.store.errors import ManifestCorruptError
-    from dlm.store.manifest import load_manifest
-
-    try:
-        manifest = load_manifest(store_manifest_path)
-    except (ManifestCorruptError, OSError):
-        # Narrow from bare `Exception` so programmer bugs propagate
-        # instead of being silently treated as "no acceptance."
-        return False
-    return manifest.license_acceptance is not None
-
-
 def _prompt_accept_license(console: object, base: str, license_url: str | None) -> bool:
     """Interactive y/N prompt for gated base-model license acceptance.
 
@@ -2694,14 +2672,6 @@ def _render_inspection_text(console: object, path: Path, inspection: object) ->
         console.print("  [yellow]orphaned:[/yellow]     source .dlm is missing or mismatched")
 
 
-def _human_size(n: int) -> str:
-    for unit in ("B", "KB", "MB", "GB", "TB"):
-        if n < 1024:
-            return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
-        n //= 1024
-    return f"{n} PB"
-
-
 def _summarize_training_sources(parsed: object, base_path: Path) -> list[dict[str, object]] | None:
     """Best-effort resolution of `training.sources` for `dlm show`.
 
diff --git a/src/dlm/cli/commands/_shared.py b/src/dlm/cli/commands/_shared.py
new file mode 100644
index 0000000..acdf907
--- /dev/null
+++ b/src/dlm/cli/commands/_shared.py
@@ -0,0 +1,47 @@
+"""Helpers shared across more than one cli/commands submodule.
+
+Anything used by exactly one command lives in that command's own
+submodule. This file is the explicit destination for true cross-command
+helpers — keeping it small means the package's public surface (the
+`__init__.py` re-exports) stays focused on commands themselves.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+def _previously_accepted(store_manifest_path: Path) -> bool:
+    """Return True iff the store manifest already holds a LicenseAcceptance.
+
+    `dlm prompt`, `dlm export`, and `dlm repl` operate on an
+    already-trained adapter; the gated-base license was accepted during
+    training and persisted into `manifest.license_acceptance`. Replaying
+    that acceptance here is correct; silently hardcoding
+    `accept_license=True` is not — it would let a never-accepted gated
+    base slip through.
+    """
+    if not store_manifest_path.exists():
+        return False
+    from dlm.store.errors import ManifestCorruptError
+    from dlm.store.manifest import load_manifest
+
+    try:
+        manifest = load_manifest(store_manifest_path)
+    except (ManifestCorruptError, OSError):
+        # Narrow from bare `Exception` so programmer bugs propagate
+        # instead of being silently treated as "no acceptance."
+        return False
+    return manifest.license_acceptance is not None
+
+
+def _human_size(n: int) -> str:
+    """Render a byte count as a 1-decimal human string (B / KB / MB / …)."""
+    for unit in ("B", "KB", "MB", "GB", "TB"):
+        if n < 1024:
+            return f"{n:.1f} {unit}" if unit != "B" else f"{n} B"
+        n //= 1024
+    return f"{n} PB"

From 7a4ff3de28d8e951eff21bd37c29201f4e8db867 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:44:22 -0400
Subject: [PATCH 10/46] Extract repl_cmd into cli/commands/repl.py

---
 src/dlm/cli/commands/__init__.py | 101 +---------------------------
 src/dlm/cli/commands/repl.py     | 110 +++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+), 100 deletions(-)
 create mode 100644 src/dlm/cli/commands/repl.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index d1c5635..b08a66c 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -22,6 +22,7 @@
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
+from dlm.cli.commands.repl import repl_cmd as repl_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
@@ -2174,106 +2175,6 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         console.print(f"smoke:   {result.smoke_output_first_line}")
 
 
-def repl_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to start a REPL against.")],
-    adapter: Annotated[
-        str | None,
-        typer.Option(
-            "--adapter",
-            help=(
-                "Named adapter to load. Required on multi-adapter "
-                "documents; rejected on single-adapter documents."
-            ),
-        ),
-    ] = None,
-    backend: Annotated[
-        str,
-        typer.Option(
-            "--backend",
-            help="Inference backend: `auto`, `pytorch`, or `mlx`.",
-        ),
-    ] = "auto",
-) -> None:
-    """Interactive REPL against the trained adapter."""
-    from rich.console import Console
-
-    from dlm.base_models import GatedModelError
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.doc.parser import parse_file
-    from dlm.hardware import doctor
-    from dlm.inference import AdapterNotFoundError
-    from dlm.inference.backends import (
-        UnsupportedBackendError,
-        build_backend,
-        select_backend,
-    )
-    from dlm.repl.session import ReplSession
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    if backend not in ("auto", "pytorch", "mlx"):
-        console.print(
-            f"[red]repl:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
-        )
-        raise typer.Exit(code=2)
-
-    parsed = parse_file(path)
-    declared = parsed.frontmatter.training.adapters
-    if adapter is not None:
-        if declared is None:
-            console.print(
-                "[red]repl:[/red] --adapter is only valid on multi-adapter "
-                "documents (this doc does not declare `training.adapters`)."
-            )
-            raise typer.Exit(code=2)
-        if adapter not in declared:
-            console.print(
-                f"[red]repl:[/red] --adapter {adapter!r} is not declared "
-                f"(declared: {sorted(declared)!r})."
-            )
-            raise typer.Exit(code=2)
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    already_accepted = _previously_accepted(store.manifest)
-    try:
-        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
-    except GatedModelError as exc:
-        console.print(
-            f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
-            "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
-        )
-        raise typer.Exit(code=1) from exc
-    caps = doctor().capabilities
-
-    try:
-        backend_name = select_backend(backend, caps)  # type: ignore[arg-type]
-    except UnsupportedBackendError as exc:
-        console.print(f"[red]repl:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    backend_obj = build_backend(backend_name, caps)
-
-    try:
-        backend_obj.load(spec, store, adapter_name=adapter)
-    except AdapterNotFoundError as exc:
-        console.print(f"[red]repl:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    tokenizer = getattr(backend_obj, "_loaded", None)
-    tokenizer = tokenizer.tokenizer if tokenizer is not None else None
-
-    session = ReplSession(
-        backend=backend_obj,
-        tokenizer=tokenizer,
-        active_adapter=adapter,
-        declared_adapters=tuple(sorted(declared)) if declared else (),
-    )
-
-    from dlm.repl.app import run_repl
-
-    raise typer.Exit(code=run_repl(session, console=console))
-
-
 def metrics_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm file whose store we query.")],
     json_out: Annotated[bool, typer.Option("--json", help="Emit JSON.")] = False,
diff --git a/src/dlm/cli/commands/repl.py b/src/dlm/cli/commands/repl.py
new file mode 100644
index 0000000..07fe157
--- /dev/null
+++ b/src/dlm/cli/commands/repl.py
@@ -0,0 +1,110 @@
+"""`dlm repl` — interactive REPL against the trained adapter."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+from dlm.cli.commands._shared import _previously_accepted
+
+
+def repl_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to start a REPL against.")],
+    adapter: Annotated[
+        str | None,
+        typer.Option(
+            "--adapter",
+            help=(
+                "Named adapter to load. Required on multi-adapter "
+                "documents; rejected on single-adapter documents."
+            ),
+        ),
+    ] = None,
+    backend: Annotated[
+        str,
+        typer.Option(
+            "--backend",
+            help="Inference backend: `auto`, `pytorch`, or `mlx`.",
+        ),
+    ] = "auto",
+) -> None:
+    """Interactive REPL against the trained adapter."""
+    from rich.console import Console
+
+    from dlm.base_models import GatedModelError
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.doc.parser import parse_file
+    from dlm.hardware import doctor
+    from dlm.inference import AdapterNotFoundError
+    from dlm.inference.backends import (
+        UnsupportedBackendError,
+        build_backend,
+        select_backend,
+    )
+    from dlm.repl.session import ReplSession
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    if backend not in ("auto", "pytorch", "mlx"):
+        console.print(
+            f"[red]repl:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
+        )
+        raise typer.Exit(code=2)
+
+    parsed = parse_file(path)
+    declared = parsed.frontmatter.training.adapters
+    if adapter is not None:
+        if declared is None:
+            console.print(
+                "[red]repl:[/red] --adapter is only valid on multi-adapter "
+                "documents (this doc does not declare `training.adapters`)."
+            )
+            raise typer.Exit(code=2)
+        if adapter not in declared:
+            console.print(
+                f"[red]repl:[/red] --adapter {adapter!r} is not declared "
+                f"(declared: {sorted(declared)!r})."
+            )
+            raise typer.Exit(code=2)
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    already_accepted = _previously_accepted(store.manifest)
+    try:
+        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
+    except GatedModelError as exc:
+        console.print(
+            f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
+            "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
+        )
+        raise typer.Exit(code=1) from exc
+    caps = doctor().capabilities
+
+    try:
+        backend_name = select_backend(backend, caps)  # type: ignore[arg-type]
+    except UnsupportedBackendError as exc:
+        console.print(f"[red]repl:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    backend_obj = build_backend(backend_name, caps)
+
+    try:
+        backend_obj.load(spec, store, adapter_name=adapter)
+    except AdapterNotFoundError as exc:
+        console.print(f"[red]repl:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    tokenizer = getattr(backend_obj, "_loaded", None)
+    tokenizer = tokenizer.tokenizer if tokenizer is not None else None
+
+    session = ReplSession(
+        backend=backend_obj,
+        tokenizer=tokenizer,
+        active_adapter=adapter,
+        declared_adapters=tuple(sorted(declared)) if declared else (),
+    )
+
+    from dlm.repl.app import run_repl
+
+    raise typer.Exit(code=run_repl(session, console=console))

From 3b251e9bdb1e46502918ab875cfea714a9dd74eb Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:45:06 -0400
Subject: [PATCH 11/46] Extract push_cmd into cli/commands/push.py

---
 src/dlm/cli/commands/__init__.py | 61 +---------------------------
 src/dlm/cli/commands/push.py     | 68 ++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 60 deletions(-)
 create mode 100644 src/dlm/cli/commands/push.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index b08a66c..63af34f 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -22,6 +22,7 @@
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
+from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
@@ -2869,66 +2870,6 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-def push_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm or .dlm.pack to push.")],
-    to: Annotated[
-        str,
-        typer.Option(
-            "--to",
-            help=(
-                "Destination. `hf:<org>/<repo>` for HuggingFace Hub, "
-                "`https://...` for a generic HTTPS endpoint, or a local path."
-            ),
-        ),
-    ],
-    sign: Annotated[
-        bool,
-        typer.Option("--sign", help="Sign the pack with minisign before upload."),
-    ] = False,
-    include_exports: Annotated[bool, typer.Option("--include-exports")] = False,
-    include_base: Annotated[bool, typer.Option("--include-base")] = False,
-    include_logs: Annotated[bool, typer.Option("--include-logs")] = False,
-    licensee: Annotated[
-        str | None,
-        typer.Option(
-            "--i-am-the-licensee",
-            help="URL ack for --include-base on non-redistributable bases.",
-        ),
-    ] = None,
-) -> None:
-    """Upload a .dlm or .dlm.pack to an HF repo, URL endpoint, or local path."""
-    from rich.console import Console
-
-    from dlm.share import ShareError, push
-    from dlm.share.signing import MinisignNotAvailableError
-
-    console = Console(stderr=True)
-
-    try:
-        result = push(
-            path,
-            to,
-            sign=sign,
-            include_exports=include_exports,
-            include_base=include_base,
-            include_logs=include_logs,
-            licensee_acceptance_url=licensee,
-        )
-    except MinisignNotAvailableError as exc:
-        console.print(f"[red]push:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except ShareError as exc:
-        console.print(f"[red]push:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    size_mb = result.bytes_sent / (1024 * 1024)
-    console.print(f"[green]pushed:[/green] {result.destination} ({size_mb:.2f} MB)")
-    if result.sink_kind.value == "hf":
-        console.print(f"[dim]install:[/dim] dlm pull {result.destination}")
-    if result.detail:
-        console.print(f"[dim]{result.detail}[/dim]")
-
-
 def pull_cmd(
     source: Annotated[
         str,
diff --git a/src/dlm/cli/commands/push.py b/src/dlm/cli/commands/push.py
new file mode 100644
index 0000000..0f2e6ac
--- /dev/null
+++ b/src/dlm/cli/commands/push.py
@@ -0,0 +1,68 @@
+"""`dlm push` — upload a .dlm or .dlm.pack to an HF repo, URL, or local path."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def push_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm or .dlm.pack to push.")],
+    to: Annotated[
+        str,
+        typer.Option(
+            "--to",
+            help=(
+                "Destination. `hf:<org>/<repo>` for HuggingFace Hub, "
+                "`https://...` for a generic HTTPS endpoint, or a local path."
+            ),
+        ),
+    ],
+    sign: Annotated[
+        bool,
+        typer.Option("--sign", help="Sign the pack with minisign before upload."),
+    ] = False,
+    include_exports: Annotated[bool, typer.Option("--include-exports")] = False,
+    include_base: Annotated[bool, typer.Option("--include-base")] = False,
+    include_logs: Annotated[bool, typer.Option("--include-logs")] = False,
+    licensee: Annotated[
+        str | None,
+        typer.Option(
+            "--i-am-the-licensee",
+            help="URL ack for --include-base on non-redistributable bases.",
+        ),
+    ] = None,
+) -> None:
+    """Upload a .dlm or .dlm.pack to an HF repo, URL endpoint, or local path."""
+    from rich.console import Console
+
+    from dlm.share import ShareError, push
+    from dlm.share.signing import MinisignNotAvailableError
+
+    console = Console(stderr=True)
+
+    try:
+        result = push(
+            path,
+            to,
+            sign=sign,
+            include_exports=include_exports,
+            include_base=include_base,
+            include_logs=include_logs,
+            licensee_acceptance_url=licensee,
+        )
+    except MinisignNotAvailableError as exc:
+        console.print(f"[red]push:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except ShareError as exc:
+        console.print(f"[red]push:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    size_mb = result.bytes_sent / (1024 * 1024)
+    console.print(f"[green]pushed:[/green] {result.destination} ({size_mb:.2f} MB)")
+    if result.sink_kind.value == "hf":
+        console.print(f"[dim]install:[/dim] dlm pull {result.destination}")
+    if result.detail:
+        console.print(f"[dim]{result.detail}[/dim]")

From cf75e7629f79193fbfd5ecddb58816bc360bfb38 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:45:47 -0400
Subject: [PATCH 12/46] Extract pull_cmd into cli/commands/pull.py

---
 src/dlm/cli/commands/__init__.py | 56 +---------------------------
 src/dlm/cli/commands/pull.py     | 63 ++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+), 55 deletions(-)
 create mode 100644 src/dlm/cli/commands/pull.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 63af34f..39464e4 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -22,6 +22,7 @@
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
+from dlm.cli.commands.pull import pull_cmd as pull_cmd
 from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
@@ -2870,61 +2871,6 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-def pull_cmd(
-    source: Annotated[
-        str,
-        typer.Argument(
-            help=(
-                "Source: `hf:<org>/<repo>`, `https://...`, "
-                "`peer://host:port/<id>?token=...`, or a local path."
-            )
-        ),
-    ],
-    out: Annotated[
-        Path | None,
-        typer.Option("--out", help="Directory for the restored .dlm (default: CWD)."),
-    ] = None,
-    force: Annotated[
-        bool,
-        typer.Option("--force", help="Overwrite an existing store with the same dlm_id."),
-    ] = False,
-) -> None:
-    """Download + verify + unpack a .dlm.pack from a remote source."""
-    from rich.console import Console
-
-    from dlm.pack.errors import PackError
-    from dlm.share import ShareError, pull
-    from dlm.share.signing import VerifyStatus
-
-    console = Console(stderr=True)
-
-    try:
-        result = pull(source, out_dir=out, force=force)
-    except ShareError as exc:
-        console.print(f"[red]pull:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except PackError as exc:
-        console.print(f"[red]pull:[/red] pack integrity: {exc}")
-        raise typer.Exit(code=1) from exc
-
-    size_mb = result.bytes_received / (1024 * 1024)
-    console.print(f"[green]pulled:[/green] {result.source} → {result.dlm_path} ({size_mb:.2f} MB)")
-
-    status = result.verification.status
-    if status == VerifyStatus.VERIFIED:
-        console.print(
-            f"[green]verified:[/green] signature matches "
-            f"[bold]{result.verification.key_path}[/bold]"
-        )
-    elif status == VerifyStatus.UNVERIFIED:
-        console.print(
-            f"[yellow]unverified:[/yellow] signature present but "
-            f"not matched ({result.verification.detail}); sha256 still validated"
-        )
-    else:
-        console.print("[dim]unsigned[/dim] (sha256 integrity still validated)")
-
-
 def serve_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm file to serve.")],
     port: Annotated[int, typer.Option("--port")] = 7337,
diff --git a/src/dlm/cli/commands/pull.py b/src/dlm/cli/commands/pull.py
new file mode 100644
index 0000000..62ccee8
--- /dev/null
+++ b/src/dlm/cli/commands/pull.py
@@ -0,0 +1,63 @@
+"""`dlm pull` — download + verify + unpack a .dlm.pack from a remote source."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def pull_cmd(
+    source: Annotated[
+        str,
+        typer.Argument(
+            help=(
+                "Source: `hf:<org>/<repo>`, `https://...`, "
+                "`peer://host:port/<id>?token=...`, or a local path."
+            )
+        ),
+    ],
+    out: Annotated[
+        Path | None,
+        typer.Option("--out", help="Directory for the restored .dlm (default: CWD)."),
+    ] = None,
+    force: Annotated[
+        bool,
+        typer.Option("--force", help="Overwrite an existing store with the same dlm_id."),
+    ] = False,
+) -> None:
+    """Download + verify + unpack a .dlm.pack from a remote source."""
+    from rich.console import Console
+
+    from dlm.pack.errors import PackError
+    from dlm.share import ShareError, pull
+    from dlm.share.signing import VerifyStatus
+
+    console = Console(stderr=True)
+
+    try:
+        result = pull(source, out_dir=out, force=force)
+    except ShareError as exc:
+        console.print(f"[red]pull:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except PackError as exc:
+        console.print(f"[red]pull:[/red] pack integrity: {exc}")
+        raise typer.Exit(code=1) from exc
+
+    size_mb = result.bytes_received / (1024 * 1024)
+    console.print(f"[green]pulled:[/green] {result.source} → {result.dlm_path} ({size_mb:.2f} MB)")
+
+    status = result.verification.status
+    if status == VerifyStatus.VERIFIED:
+        console.print(
+            f"[green]verified:[/green] signature matches "
+            f"[bold]{result.verification.key_path}[/bold]"
+        )
+    elif status == VerifyStatus.UNVERIFIED:
+        console.print(
+            f"[yellow]unverified:[/yellow] signature present but "
+            f"not matched ({result.verification.detail}); sha256 still validated"
+        )
+    else:
+        console.print("[dim]unsigned[/dim] (sha256 integrity still validated)")

From 95ad10e61ba87c2c485ed27546d9592eb0dae5c2 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 18:46:35 -0400
Subject: [PATCH 13/46] Extract serve_cmd into cli/commands/serve.py

---
 src/dlm/cli/commands/__init__.py | 87 +----------------------------
 src/dlm/cli/commands/serve.py    | 94 ++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 86 deletions(-)
 create mode 100644 src/dlm/cli/commands/serve.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 39464e4..4016ae7 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -25,6 +25,7 @@
 from dlm.cli.commands.pull import pull_cmd as pull_cmd
 from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
+from dlm.cli.commands.serve import serve_cmd as serve_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
@@ -2871,92 +2872,6 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-def serve_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to serve.")],
-    port: Annotated[int, typer.Option("--port")] = 7337,
-    public: Annotated[
-        bool,
-        typer.Option(
-            "--public",
-            help="Bind 0.0.0.0 (requires --i-know-this-is-public); otherwise 127.0.0.1.",
-        ),
-    ] = False,
-    i_know_public: Annotated[
-        bool,
-        typer.Option(
-            "--i-know-this-is-public",
-            help="Confirm binding 0.0.0.0 is safe on this network.",
-        ),
-    ] = False,
-    max_concurrency: Annotated[
-        int,
-        typer.Option("--max-concurrency", help="Max concurrent connections per token."),
-    ] = 4,
-    rate_limit: Annotated[
-        int,
-        typer.Option("--rate-limit", help="Max requests per minute per token."),
-    ] = 30,
-    token_ttl_minutes: Annotated[
-        int, typer.Option("--token-ttl-minutes", help="Token lifetime in minutes.")
-    ] = 15,
-) -> None:
-    """Serve a .dlm's pack over LAN for peers to pull."""
-    from rich.console import Console
-
-    from dlm.doc.parser import parse_file
-    from dlm.pack.packer import pack as pack_fn
-    from dlm.share import ServeOptions, serve
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    parsed = parse_file(path)
-    dlm_id = parsed.frontmatter.dlm_id
-
-    # pack() calls load_manifest(), which crashes with an unhelpful
-    # "store manifest corrupt" error on a .dlm that's never been
-    # trained. Surface the true cause instead.
-    store = for_dlm(dlm_id)
-    if not store.manifest.exists():
-        console.print(
-            f"[red]serve:[/red] no training state for {dlm_id} — run [bold]dlm train[/bold] first."
-        )
-        raise typer.Exit(code=1)
-
-    # Pack into a temp file that lives as long as the server does.
-    import tempfile
-
-    tmp_dir = Path(tempfile.mkdtemp(prefix="dlm-serve-"))
-    tmp_pack = tmp_dir / f"{path.stem}.dlm.pack"
-    pack_fn(path, out=tmp_pack)
-    console.print(f"[dim]packed:[/dim] {tmp_pack} ({tmp_pack.stat().st_size} bytes)")
-
-    opts = ServeOptions(
-        port=port,
-        public=public,
-        i_know_this_is_public=i_know_public,
-        max_concurrency=max_concurrency,
-        rate_limit_per_min=rate_limit,
-        token_ttl_seconds=token_ttl_minutes * 60,
-    )
-    handle = serve(dlm_id, tmp_pack, opts)
-
-    console.print(
-        f"[green]serving:[/green] {path.name} (dlm_id {dlm_id}) on "
-        f"[bold]http://{handle.bind_host}:{handle.port}/{dlm_id}[/bold]"
-    )
-    console.print(f"[bold]peer URL:[/bold] {handle.peer_url}")
-    console.print(f"[dim]token valid for {token_ttl_minutes} min. Ctrl-C to stop.[/dim]")
-
-    try:
-        handle.wait_shutdown()
-    finally:
-        import shutil
-
-        shutil.rmtree(tmp_dir, ignore_errors=True)
-    console.print("[dim]stopped.[/dim]")
-
-
 # ---- Cache Commands --------------------------------------------------
 
 
diff --git a/src/dlm/cli/commands/serve.py b/src/dlm/cli/commands/serve.py
new file mode 100644
index 0000000..7ffb60e
--- /dev/null
+++ b/src/dlm/cli/commands/serve.py
@@ -0,0 +1,94 @@
+"""`dlm serve` — serve a .dlm's pack over LAN for peers to pull."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def serve_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to serve.")],
+    port: Annotated[int, typer.Option("--port")] = 7337,
+    public: Annotated[
+        bool,
+        typer.Option(
+            "--public",
+            help="Bind 0.0.0.0 (requires --i-know-this-is-public); otherwise 127.0.0.1.",
+        ),
+    ] = False,
+    i_know_public: Annotated[
+        bool,
+        typer.Option(
+            "--i-know-this-is-public",
+            help="Confirm binding 0.0.0.0 is safe on this network.",
+        ),
+    ] = False,
+    max_concurrency: Annotated[
+        int,
+        typer.Option("--max-concurrency", help="Max concurrent connections per token."),
+    ] = 4,
+    rate_limit: Annotated[
+        int,
+        typer.Option("--rate-limit", help="Max requests per minute per token."),
+    ] = 30,
+    token_ttl_minutes: Annotated[
+        int, typer.Option("--token-ttl-minutes", help="Token lifetime in minutes.")
+    ] = 15,
+) -> None:
+    """Serve a .dlm's pack over LAN for peers to pull."""
+    from rich.console import Console
+
+    from dlm.doc.parser import parse_file
+    from dlm.pack.packer import pack as pack_fn
+    from dlm.share import ServeOptions, serve
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    parsed = parse_file(path)
+    dlm_id = parsed.frontmatter.dlm_id
+
+    # pack() calls load_manifest(), which crashes with an unhelpful
+    # "store manifest corrupt" error on a .dlm that's never been
+    # trained. Surface the true cause instead.
+    store = for_dlm(dlm_id)
+    if not store.manifest.exists():
+        console.print(
+            f"[red]serve:[/red] no training state for {dlm_id} — run [bold]dlm train[/bold] first."
+        )
+        raise typer.Exit(code=1)
+
+    # Pack into a temp file that lives as long as the server does.
+    import tempfile
+
+    tmp_dir = Path(tempfile.mkdtemp(prefix="dlm-serve-"))
+    tmp_pack = tmp_dir / f"{path.stem}.dlm.pack"
+    pack_fn(path, out=tmp_pack)
+    console.print(f"[dim]packed:[/dim] {tmp_pack} ({tmp_pack.stat().st_size} bytes)")
+
+    opts = ServeOptions(
+        port=port,
+        public=public,
+        i_know_this_is_public=i_know_public,
+        max_concurrency=max_concurrency,
+        rate_limit_per_min=rate_limit,
+        token_ttl_seconds=token_ttl_minutes * 60,
+    )
+    handle = serve(dlm_id, tmp_pack, opts)
+
+    console.print(
+        f"[green]serving:[/green] {path.name} (dlm_id {dlm_id}) on "
+        f"[bold]http://{handle.bind_host}:{handle.port}/{dlm_id}[/bold]"
+    )
+    console.print(f"[bold]peer URL:[/bold] {handle.peer_url}")
+    console.print(f"[dim]token valid for {token_ttl_minutes} min. Ctrl-C to stop.[/dim]")
+
+    try:
+        handle.wait_shutdown()
+    finally:
+        import shutil
+
+        shutil.rmtree(tmp_dir, ignore_errors=True)
+    console.print("[dim]stopped.[/dim]")

From fd7b5e6e569a469cab3f2128a6f7c742b1186cb4 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:00:38 -0400
Subject: [PATCH 14/46] Extract cache_show/prune/clear into
 cli/commands/cache.py

---
 src/dlm/cli/commands/__init__.py | 174 +-----------------------------
 src/dlm/cli/commands/cache.py    | 178 +++++++++++++++++++++++++++++++
 2 files changed, 181 insertions(+), 171 deletions(-)
 create mode 100644 src/dlm/cli/commands/cache.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 4016ae7..0a6a4ce 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -19,6 +19,9 @@
 
 from dlm.cli.commands._shared import _human_size as _human_size
 from dlm.cli.commands._shared import _previously_accepted as _previously_accepted
+from dlm.cli.commands.cache import cache_clear_cmd as cache_clear_cmd
+from dlm.cli.commands.cache import cache_prune_cmd as cache_prune_cmd
+from dlm.cli.commands.cache import cache_show_cmd as cache_show_cmd
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
@@ -2872,177 +2875,6 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-# ---- Cache Commands --------------------------------------------------
-
-
-def cache_show_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to inspect the cache for.")],
-    json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
-) -> None:
-    """Show tokenized-section cache size, entry count, last-run hit rate."""
-    import json as _json
-    import sys as _sys
-
-    from rich.console import Console
-
-    from dlm.directives.cache import TokenizedCache
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.metrics import queries as _queries
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]cache:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    cache = TokenizedCache.open(store.tokenized_cache_dir)
-    last = _queries.latest_tokenization(store.root)
-
-    payload: dict[str, object] = {
-        "dlm_id": parsed.frontmatter.dlm_id,
-        "cache_path": str(store.tokenized_cache_dir),
-        "entry_count": cache.entry_count,
-        "bytes": cache.total_bytes,
-        "last_run_hit_rate": last.hit_rate if last else None,
-        "last_run_id": last.run_id if last else None,
-    }
-    if json_out:
-        _sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
-        return
-
-    out_console.print(f"[bold]Cache for {parsed.frontmatter.dlm_id}[/bold]")
-    out_console.print(f"  path:              {store.tokenized_cache_dir}")
-    out_console.print(f"  entries:           {cache.entry_count}")
-    out_console.print(f"  size:              {_human_size(cache.total_bytes)}")
-    if last is not None:
-        out_console.print(
-            f"  last-run hit rate: {last.hit_rate:.1%} "
-            f"({last.cache_hits}/{last.cache_hits + last.cache_misses})"
-        )
-    else:
-        out_console.print("  last-run hit rate: [dim]no tokenization runs yet[/dim]")
-
-
-def cache_prune_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to prune the cache for.")],
-    older_than: Annotated[
-        str | None,
-        typer.Option(
-            "--older-than",
-            help=(
-                "Drop entries not accessed in this duration. "
-                "Format: `30d`, `12h`, `45m`. When omitted, defaults to "
-                "the document's `training.cache.prune_older_than_days` "
-                "(90d pre-v9 docs inherit)."
-            ),
-        ),
-    ] = None,
-) -> None:
-    """Remove tokenized-cache entries not accessed within a cutoff."""
-    from rich.console import Console
-
-    from dlm.directives.cache import TokenizedCache
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    # Parse the doc first — we need it either way (for dlm_id) AND
-    # for the frontmatter default when --older-than is absent.
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]cache:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if older_than is not None:
-        seconds = _parse_duration(older_than)
-        if seconds is None:
-            console.print(
-                f"[red]cache:[/red] invalid --older-than {older_than!r} "
-                "(expected e.g. 30d, 12h, 45m)"
-            )
-            raise typer.Exit(code=2)
-        cutoff_label = older_than
-    else:
-        # Fall back to the frontmatter's per-doc default. Pre-v9 docs
-        # get the CacheConfig default of 90 days via the Pydantic
-        # factory on parse.
-        days = parsed.frontmatter.training.cache.prune_older_than_days
-        seconds = float(days) * 86400.0
-        cutoff_label = f"{days}d"
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    cache = TokenizedCache.open(store.tokenized_cache_dir)
-    removed = cache.prune(older_than_seconds=seconds)
-    cache.save_manifest()
-    console.print(f"[green]cache:[/green] pruned {removed} entr(y/ies) older than {cutoff_label}")
-
-
-def cache_clear_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to wipe the cache for.")],
-    force: Annotated[
-        bool,
-        typer.Option("--force", help="Skip the confirmation prompt."),
-    ] = False,
-) -> None:
-    """Wipe every entry in the tokenized-section cache for this store."""
-    from rich.console import Console
-
-    from dlm.directives.cache import TokenizedCache
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]cache:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    cache = TokenizedCache.open(store.tokenized_cache_dir)
-
-    if not force and cache.entry_count > 0:
-        confirmed = typer.confirm(
-            f"wipe {cache.entry_count} entries ({_human_size(cache.total_bytes)})?"
-        )
-        if not confirmed:
-            console.print("[yellow]cache:[/yellow] clear cancelled")
-            raise typer.Exit(code=0)
-
-    removed = cache.clear()
-    cache.save_manifest()
-    console.print(f"[green]cache:[/green] cleared {removed} entr(y/ies)")
-
-
-def _parse_duration(spec: str) -> float | None:
-    """Parse a duration like `30d`, `12h`, `45m` → seconds. None on
-    malformed input."""
-    if not spec or not spec[:-1].isdigit():
-        return None
-    n = int(spec[:-1])
-    unit = spec[-1].lower()
-    if unit == "s":
-        return float(n)
-    if unit == "m":
-        return float(n) * 60
-    if unit == "h":
-        return float(n) * 3600
-    if unit == "d":
-        return float(n) * 86400
-    return None
-
-
 # --- preference -----------------------------------------------------------
 
 
diff --git a/src/dlm/cli/commands/cache.py b/src/dlm/cli/commands/cache.py
new file mode 100644
index 0000000..7dc0296
--- /dev/null
+++ b/src/dlm/cli/commands/cache.py
@@ -0,0 +1,178 @@
+"""`dlm cache` — show / prune / clear the tokenized-section cache."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+from dlm.cli.commands._shared import _human_size
+
+
+def cache_show_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to inspect the cache for.")],
+    json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
+) -> None:
+    """Show tokenized-section cache size, entry count, last-run hit rate."""
+    import json as _json
+    import sys as _sys
+
+    from rich.console import Console
+
+    from dlm.directives.cache import TokenizedCache
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.metrics import queries as _queries
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]cache:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    cache = TokenizedCache.open(store.tokenized_cache_dir)
+    last = _queries.latest_tokenization(store.root)
+
+    payload: dict[str, object] = {
+        "dlm_id": parsed.frontmatter.dlm_id,
+        "cache_path": str(store.tokenized_cache_dir),
+        "entry_count": cache.entry_count,
+        "bytes": cache.total_bytes,
+        "last_run_hit_rate": last.hit_rate if last else None,
+        "last_run_id": last.run_id if last else None,
+    }
+    if json_out:
+        _sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
+        return
+
+    out_console.print(f"[bold]Cache for {parsed.frontmatter.dlm_id}[/bold]")
+    out_console.print(f"  path:              {store.tokenized_cache_dir}")
+    out_console.print(f"  entries:           {cache.entry_count}")
+    out_console.print(f"  size:              {_human_size(cache.total_bytes)}")
+    if last is not None:
+        out_console.print(
+            f"  last-run hit rate: {last.hit_rate:.1%} "
+            f"({last.cache_hits}/{last.cache_hits + last.cache_misses})"
+        )
+    else:
+        out_console.print("  last-run hit rate: [dim]no tokenization runs yet[/dim]")
+
+
+def cache_prune_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to prune the cache for.")],
+    older_than: Annotated[
+        str | None,
+        typer.Option(
+            "--older-than",
+            help=(
+                "Drop entries not accessed in this duration. "
+                "Format: `30d`, `12h`, `45m`. When omitted, defaults to "
+                "the document's `training.cache.prune_older_than_days` "
+                "(90d pre-v9 docs inherit)."
+            ),
+        ),
+    ] = None,
+) -> None:
+    """Remove tokenized-cache entries not accessed within a cutoff."""
+    from rich.console import Console
+
+    from dlm.directives.cache import TokenizedCache
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    # Parse the doc first — we need it either way (for dlm_id) AND
+    # for the frontmatter default when --older-than is absent.
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]cache:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if older_than is not None:
+        seconds = _parse_duration(older_than)
+        if seconds is None:
+            console.print(
+                f"[red]cache:[/red] invalid --older-than {older_than!r} "
+                "(expected e.g. 30d, 12h, 45m)"
+            )
+            raise typer.Exit(code=2)
+        cutoff_label = older_than
+    else:
+        # Fall back to the frontmatter's per-doc default. Pre-v9 docs
+        # get the CacheConfig default of 90 days via the Pydantic
+        # factory on parse.
+        days = parsed.frontmatter.training.cache.prune_older_than_days
+        seconds = float(days) * 86400.0
+        cutoff_label = f"{days}d"
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    cache = TokenizedCache.open(store.tokenized_cache_dir)
+    removed = cache.prune(older_than_seconds=seconds)
+    cache.save_manifest()
+    console.print(f"[green]cache:[/green] pruned {removed} entr(y/ies) older than {cutoff_label}")
+
+
+def cache_clear_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to wipe the cache for.")],
+    force: Annotated[
+        bool,
+        typer.Option("--force", help="Skip the confirmation prompt."),
+    ] = False,
+) -> None:
+    """Wipe every entry in the tokenized-section cache for this store."""
+    from rich.console import Console
+
+    from dlm.directives.cache import TokenizedCache
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]cache:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    cache = TokenizedCache.open(store.tokenized_cache_dir)
+
+    if not force and cache.entry_count > 0:
+        confirmed = typer.confirm(
+            f"wipe {cache.entry_count} entries ({_human_size(cache.total_bytes)})?"
+        )
+        if not confirmed:
+            console.print("[yellow]cache:[/yellow] clear cancelled")
+            raise typer.Exit(code=0)
+
+    removed = cache.clear()
+    cache.save_manifest()
+    console.print(f"[green]cache:[/green] cleared {removed} entr(y/ies)")
+
+
+def _parse_duration(spec: str) -> float | None:
+    """Parse a duration like `30d`, `12h`, `45m` → seconds. None on
+    malformed input."""
+    if not spec or not spec[:-1].isdigit():
+        return None
+    n = int(spec[:-1])
+    unit = spec[-1].lower()
+    if unit == "s":
+        return float(n)
+    if unit == "m":
+        return float(n) * 60
+    if unit == "h":
+        return float(n) * 3600
+    if unit == "d":
+        return float(n) * 86400
+    return None

From 340df56fd3a145bd35e7b9cd5f6f1064cfb8fed2 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:01:07 -0400
Subject: [PATCH 15/46] Re-export _parse_duration for test compatibility

---
 src/dlm/cli/commands/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 0a6a4ce..0466b64 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -19,6 +19,7 @@
 
 from dlm.cli.commands._shared import _human_size as _human_size
 from dlm.cli.commands._shared import _previously_accepted as _previously_accepted
+from dlm.cli.commands.cache import _parse_duration as _parse_duration
 from dlm.cli.commands.cache import cache_clear_cmd as cache_clear_cmd
 from dlm.cli.commands.cache import cache_prune_cmd as cache_prune_cmd
 from dlm.cli.commands.cache import cache_show_cmd as cache_show_cmd

From 7598f931eb1b5081842a4fcdba0f577f08fdb024 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:02:45 -0400
Subject: [PATCH 16/46] Extract harvest_cmd into cli/commands/harvest.py

---
 src/dlm/cli/commands/__init__.py | 140 +----------------------------
 src/dlm/cli/commands/harvest.py  | 149 +++++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+), 139 deletions(-)
 create mode 100644 src/dlm/cli/commands/harvest.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 0466b64..5ed2edf 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -24,6 +24,7 @@
 from dlm.cli.commands.cache import cache_prune_cmd as cache_prune_cmd
 from dlm.cli.commands.cache import cache_show_cmd as cache_show_cmd
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
+from dlm.cli.commands.harvest import harvest_cmd as harvest_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
 from dlm.cli.commands.pull import pull_cmd as pull_cmd
@@ -3698,142 +3699,3 @@ def _synth_prompt_summary(content: str, *, section_id: str) -> str:
 # --- harvest --------------------------------------------------------------
 
 
-def harvest_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to harvest into.")],
-    sway_json: Annotated[
-        Path | None,
-        typer.Option(
-            "--sway-json",
-            help="Path to a sway JSON report. Required unless --revert is set.",
-        ),
-    ] = None,
-    apply: Annotated[
-        bool,
-        typer.Option(
-            "--apply",
-            help="Write harvested sections to the .dlm. Default is dry-run (review only).",
-        ),
-    ] = False,
-    tag: Annotated[
-        str,
-        typer.Option(
-            "--tag",
-            help="Prefix for the synthesized section's harvest_source metadata.",
-        ),
-    ] = "auto-harvest",
-    min_confidence: Annotated[
-        float,
-        typer.Option(
-            "--min-confidence",
-            help="Drop candidates whose sway evidence.confidence is below this.",
-            min=0.0,
-            max=1.0,
-        ),
-    ] = 0.0,
-    strict: Annotated[
-        bool,
-        typer.Option(
-            "--strict/--lax",
-            help=(
-                "Strict (default): refuse if any failing probe lacks a "
-                "reference. Lax: log a warning and skip those probes."
-            ),
-        ),
-    ] = True,
-    revert: Annotated[
-        bool,
-        typer.Option(
-            "--revert",
-            help=(
-                "Strip every auto-harvested section from the document. "
-                "Mutually exclusive with --sway-json / --apply."
-            ),
-        ),
-    ] = False,
-) -> None:
-    """Adversarial replay: harvest failing sway probes back into the .dlm.
-
-    Default mode is `--dry-run`-style preview; pass `--apply` to write.
-    """
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.harvest import (
-        HarvestError,
-        MalformedSwayReportError,
-        NoReferenceError,
-        apply_plan,
-        build_plan,
-        read_sway_report,
-        render_plan,
-        revert_all_auto_harvests,
-    )
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    if revert and (sway_json is not None or apply):
-        console.print(
-            "[red]harvest:[/red] --revert is mutually exclusive with --sway-json / --apply"
-        )
-        raise typer.Exit(code=1)
-    if not revert and sway_json is None:
-        console.print(
-            "[red]harvest:[/red] --sway-json is required (or pass --revert "
-            "to strip auto-harvested sections)"
-        )
-        raise typer.Exit(code=1)
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]harvest:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if revert:
-        summary = revert_all_auto_harvests(parsed, target=path)
-        out_console.print(
-            f"[green]harvest:[/green] stripped {len(summary.added_section_ids)} "
-            f"auto-harvested section(s) from {path} (all harvest runs, not just last)"
-        )
-        return
-
-    assert sway_json is not None  # narrowed by the check above
-    try:
-        candidates = read_sway_report(
-            sway_json,
-            strict=strict,
-            min_confidence=min_confidence,
-        )
-    except MalformedSwayReportError as exc:
-        console.print(f"[red]harvest:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except NoReferenceError as exc:
-        console.print(f"[red]harvest:[/red] {exc}")
-        console.print("  Pass [bold]--lax[/bold] to skip probes without references instead.")
-        raise typer.Exit(code=1) from exc
-    except HarvestError as exc:
-        console.print(f"[red]harvest:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    plan = build_plan(parsed, candidates, tag=tag)
-    out_console.print(render_plan(plan))
-
-    if not plan.additions:
-        out_console.print(
-            "\n[yellow]no candidates to harvest[/yellow] — either the sway "
-            "report had no failing probes with references, or all matched "
-            "sections already exist in the document."
-        )
-        raise typer.Exit(code=2)
-
-    if not apply:
-        out_console.print("\n[dim]dry-run — re-run with [bold]--apply[/bold] to write.[/dim]")
-        return
-
-    summary = apply_plan(parsed, plan, target=path)
-    out_console.print(
-        f"\n[green]harvest:[/green] wrote {summary.added} section(s) to {path} "
-        f"({summary.skipped} skipped)"
-    )
diff --git a/src/dlm/cli/commands/harvest.py b/src/dlm/cli/commands/harvest.py
new file mode 100644
index 0000000..d284423
--- /dev/null
+++ b/src/dlm/cli/commands/harvest.py
@@ -0,0 +1,149 @@
+"""`dlm harvest` — adversarial replay: harvest failing sway probes into a .dlm."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def harvest_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to harvest into.")],
+    sway_json: Annotated[
+        Path | None,
+        typer.Option(
+            "--sway-json",
+            help="Path to a sway JSON report. Required unless --revert is set.",
+        ),
+    ] = None,
+    apply: Annotated[
+        bool,
+        typer.Option(
+            "--apply",
+            help="Write harvested sections to the .dlm. Default is dry-run (review only).",
+        ),
+    ] = False,
+    tag: Annotated[
+        str,
+        typer.Option(
+            "--tag",
+            help="Prefix for the synthesized section's harvest_source metadata.",
+        ),
+    ] = "auto-harvest",
+    min_confidence: Annotated[
+        float,
+        typer.Option(
+            "--min-confidence",
+            help="Drop candidates whose sway evidence.confidence is below this.",
+            min=0.0,
+            max=1.0,
+        ),
+    ] = 0.0,
+    strict: Annotated[
+        bool,
+        typer.Option(
+            "--strict/--lax",
+            help=(
+                "Strict (default): refuse if any failing probe lacks a "
+                "reference. Lax: log a warning and skip those probes."
+            ),
+        ),
+    ] = True,
+    revert: Annotated[
+        bool,
+        typer.Option(
+            "--revert",
+            help=(
+                "Strip every auto-harvested section from the document. "
+                "Mutually exclusive with --sway-json / --apply."
+            ),
+        ),
+    ] = False,
+) -> None:
+    """Adversarial replay: harvest failing sway probes back into the .dlm.
+
+    Default mode is `--dry-run`-style preview; pass `--apply` to write.
+    """
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.harvest import (
+        HarvestError,
+        MalformedSwayReportError,
+        NoReferenceError,
+        apply_plan,
+        build_plan,
+        read_sway_report,
+        render_plan,
+        revert_all_auto_harvests,
+    )
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    if revert and (sway_json is not None or apply):
+        console.print(
+            "[red]harvest:[/red] --revert is mutually exclusive with --sway-json / --apply"
+        )
+        raise typer.Exit(code=1)
+    if not revert and sway_json is None:
+        console.print(
+            "[red]harvest:[/red] --sway-json is required (or pass --revert "
+            "to strip auto-harvested sections)"
+        )
+        raise typer.Exit(code=1)
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]harvest:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if revert:
+        summary = revert_all_auto_harvests(parsed, target=path)
+        out_console.print(
+            f"[green]harvest:[/green] stripped {len(summary.added_section_ids)} "
+            f"auto-harvested section(s) from {path} (all harvest runs, not just last)"
+        )
+        return
+
+    assert sway_json is not None  # narrowed by the check above
+    try:
+        candidates = read_sway_report(
+            sway_json,
+            strict=strict,
+            min_confidence=min_confidence,
+        )
+    except MalformedSwayReportError as exc:
+        console.print(f"[red]harvest:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except NoReferenceError as exc:
+        console.print(f"[red]harvest:[/red] {exc}")
+        console.print("  Pass [bold]--lax[/bold] to skip probes without references instead.")
+        raise typer.Exit(code=1) from exc
+    except HarvestError as exc:
+        console.print(f"[red]harvest:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    plan = build_plan(parsed, candidates, tag=tag)
+    out_console.print(render_plan(plan))
+
+    if not plan.additions:
+        out_console.print(
+            "\n[yellow]no candidates to harvest[/yellow] — either the sway "
+            "report had no failing probes with references, or all matched "
+            "sections already exist in the document."
+        )
+        raise typer.Exit(code=2)
+
+    if not apply:
+        out_console.print("\n[dim]dry-run — re-run with [bold]--apply[/bold] to write.[/dim]")
+        return
+
+    summary = apply_plan(parsed, plan, target=path)
+    out_console.print(
+        f"\n[green]harvest:[/green] wrote {summary.added} section(s) to {path} "
+        f"({summary.skipped} skipped)"
+    )

From 125f56eb17e0ee15ef7644d6b76793b3c1209007 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:05:15 -0400
Subject: [PATCH 17/46] Extract synth_instructions/revert/list into
 cli/commands/synth.py

---
 src/dlm/cli/commands/__init__.py | 379 +------------------------------
 src/dlm/cli/commands/synth.py    | 377 ++++++++++++++++++++++++++++++
 2 files changed, 381 insertions(+), 375 deletions(-)
 create mode 100644 src/dlm/cli/commands/synth.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 5ed2edf..0b2d9d3 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -13,7 +13,7 @@
 import os
 from collections.abc import Sequence
 from pathlib import Path
-from typing import TYPE_CHECKING, Annotated, Any, Literal, cast
+from typing import TYPE_CHECKING, Annotated, Any, Literal
 
 import typer
 
@@ -31,6 +31,9 @@
 from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
 from dlm.cli.commands.serve import serve_cmd as serve_cmd
+from dlm.cli.commands.synth import synth_instructions_cmd as synth_instructions_cmd
+from dlm.cli.commands.synth import synth_list_cmd as synth_list_cmd
+from dlm.cli.commands.synth import synth_revert_cmd as synth_revert_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
@@ -3325,377 +3328,3 @@ def _preference_prompt_summary(content: str, *, section_id: str) -> str:
     return prompt or "<blank>"
 
 
-# --- synth -----------------------------------------------------------------
-
-
-def synth_instructions_cmd(
-    path: Annotated[
-        Path, typer.Argument(help=".dlm file to synthesize instruction sections from.")
-    ],
-    teacher: Annotated[
-        str,
-        typer.Option(
-            "--teacher",
-            help=(
-                "Teacher selector: self, hf:<model>, openai:<model>, "
-                "anthropic:<model>, or vllm-server:<url>."
-            ),
-        ),
-    ] = "self",
-    per_section: Annotated[
-        int,
-        typer.Option(
-            "--per-section",
-            help="Instruction pairs to generate per prose section.",
-            min=1,
-        ),
-    ] = 3,
-    strategy: Annotated[
-        str,
-        typer.Option(
-            "--strategy",
-            help="Synthesis strategy: extraction, expansion, or both.",
-        ),
-    ] = "extraction",
-    filter_kind: Annotated[
-        str,
-        typer.Option(
-            "--filter",
-            help="Filter pipeline: sway, none, or dedup-only.",
-        ),
-    ] = "sway",
-    threshold: Annotated[
-        float | None,
-        typer.Option(
-            "--threshold",
-            help="Optional minimum sway-judge margin when --filter=sway.",
-            min=0.0,
-        ),
-    ] = None,
-    max_pairs: Annotated[
-        int | None,
-        typer.Option(
-            "--max-pairs",
-            help="Maximum accepted synth pairs to keep from this run.",
-            min=1,
-        ),
-    ] = None,
-    max_new_tokens: Annotated[
-        int,
-        typer.Option(
-            "--max-new-tokens",
-            help="Maximum new tokens the teacher may emit per prompt.",
-            min=1,
-        ),
-    ] = 512,
-    temp: Annotated[
-        float,
-        typer.Option("--temp", help="Teacher sampling temperature.", min=0.0),
-    ] = 0.0,
-    top_p: Annotated[
-        float | None,
-        typer.Option(
-            "--top-p",
-            help="Optional top-p cutoff for teacher sampling.",
-            min=0.0,
-            max=1.0,
-        ),
-    ] = None,
-    seed: Annotated[
-        int | None,
-        typer.Option("--seed", help="Optional teacher sampling seed."),
-    ] = None,
-    apply: Annotated[
-        bool,
-        typer.Option(
-            "--apply",
-            help="Write accepted auto-synth sections directly to the .dlm.",
-        ),
-    ] = False,
-    dry_run: Annotated[
-        bool,
-        typer.Option(
-            "--dry-run",
-            help="Preview the synth plan without staging or writing anything.",
-        ),
-    ] = False,
-) -> None:
-    """Generate, stage, or apply auto-synth instruction sections."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.preference import JudgeUnavailableError, build_judge
-    from dlm.store.paths import for_dlm
-    from dlm.synth import (
-        InvalidTeacherSpecError,
-        TeacherInvocationError,
-        TeacherUnavailableError,
-        build_synth_plan,
-        build_teacher,
-        clear_pending_plan,
-        filter_synth_plan,
-        render_filter_report,
-        render_synth_plan,
-        save_pending_plan,
-    )
-    from dlm.synth import (
-        apply_plan as apply_synth_plan,
-    )
-    from dlm.synth import (
-        build_apply_plan as build_synth_apply_plan,
-    )
-    from dlm.synth import (
-        render_apply_plan as render_synth_apply_plan,
-    )
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    if strategy not in ("extraction", "expansion", "both"):
-        console.print(
-            "[red]synth:[/red] --strategy must be one of extraction|expansion|both "
-            f"(got {strategy!r})."
-        )
-        raise typer.Exit(code=2)
-    if filter_kind not in ("sway", "none", "dedup-only"):
-        console.print(
-            f"[red]synth:[/red] --filter must be one of sway|none|dedup-only (got {filter_kind!r})."
-        )
-        raise typer.Exit(code=2)
-    if apply and dry_run:
-        console.print("[red]synth:[/red] --apply and --dry-run are mutually exclusive.")
-        raise typer.Exit(code=2)
-    if threshold is not None and filter_kind != "sway":
-        console.print("[red]synth:[/red] --threshold is only valid when --filter is `sway`.")
-        raise typer.Exit(code=2)
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-
-    try:
-        strategy_value = cast(Literal["extraction", "expansion", "both"], strategy)
-        teacher_obj = build_teacher(teacher, dlm_path=path)
-        plan = build_synth_plan(
-            parsed,
-            teacher_obj,
-            per_section=per_section,
-            strategy=strategy_value,
-            max_pairs=max_pairs,
-            max_new_tokens=max_new_tokens,
-            temperature=temp,
-            top_p=top_p,
-            seed=seed,
-        )
-    except InvalidTeacherSpecError as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    except TeacherUnavailableError as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except TeacherInvocationError as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except ValueError as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-
-    judge_obj = None
-    if filter_kind == "sway":
-        try:
-            judge_obj = build_judge("sway", dlm_path=path)
-        except JudgeUnavailableError as exc:
-            console.print(f"[red]synth:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-
-    try:
-        filter_value = cast(Literal["sway", "none", "dedup-only"], filter_kind)
-        filtered = filter_synth_plan(
-            plan,
-            filter_kind=filter_value,
-            judge=judge_obj,
-            threshold=threshold,
-        )
-    except ValueError as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-
-    out_console.print(render_synth_plan(plan))
-    out_console.print("")
-    out_console.print(render_filter_report(filtered))
-
-    if not filtered.additions:
-        if not dry_run:
-            clear_pending_plan(store)
-        out_console.print(
-            "\n[yellow]no synth additions accepted[/yellow] — either generation "
-            "yielded no valid pairs, dedup removed them, or the filter rejected them."
-        )
-        raise typer.Exit(code=2)
-
-    sections = [addition.addition.section for addition in filtered.additions]
-
-    if apply:
-        apply_plan = build_synth_apply_plan(parsed, sections)
-        out_console.print("")
-        out_console.print(render_synth_apply_plan(apply_plan))
-        summary = apply_synth_plan(parsed, apply_plan, target=path)
-        clear_pending_plan(store)
-        out_console.print(
-            f"\n[green]synth:[/green] wrote {summary.added} section(s) to {path} "
-            f"({summary.skipped} skipped)"
-        )
-        return
-
-    if dry_run:
-        out_console.print("\n[green]synth:[/green] dry-run only — nothing staged.")
-        return
-
-    pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
-    out_console.print(
-        f"\n[green]synth:[/green] staged {len(pending.sections)} auto-synth instruction "
-        f"section(s). Run [bold]dlm synth list {path}[/bold] to inspect them."
-    )
-
-
-def synth_revert_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-synth instructions from.")],
-) -> None:
-    """Remove every `auto_synth: true` instruction section from the `.dlm`."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.synth import revert_all_auto_synth
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    summary = revert_all_auto_synth(parsed, target=path)
-    out_console.print(
-        f"[green]synth:[/green] stripped {len(summary.added_section_ids)} "
-        f"auto-synth instruction section(s) from {path}"
-    )
-
-
-def synth_list_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file whose auto-synth instructions we list.")],
-) -> None:
-    """List applied + staged auto-synth instruction sections."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.doc.sections import SectionType
-    from dlm.store.paths import for_dlm
-    from dlm.synth import PendingSynthPlanError, load_pending_plan
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    try:
-        pending = load_pending_plan(store)
-    except PendingSynthPlanError as exc:
-        console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    applied = [
-        section
-        for section in parsed.sections
-        if section.type is SectionType.INSTRUCTION and section.auto_synth
-    ]
-
-    out_console.print(f"[bold]{path}[/bold]")
-    out_console.print(f"  applied auto-synth: {len(applied)}")
-    out_console.print(f"  staged pending:     {len(pending.sections) if pending else 0}")
-
-    if not applied and pending is None:
-        out_console.print("  [dim]no auto-synth instruction sections yet[/dim]")
-        return
-
-    if applied:
-        _render_synth_listing(out_console, "Applied", applied)
-    if pending is not None:
-        _render_synth_listing(out_console, "Pending", pending.sections)
-
-
-def _render_synth_listing(
-    out_console: object,
-    heading: str,
-    sections: Sequence[object],
-) -> None:
-    from collections import Counter
-
-    from rich.console import Console
-
-    from dlm.doc.sections import Section
-
-    assert isinstance(out_console, Console)
-    typed_sections = [section for section in sections if isinstance(section, Section)]
-
-    out_console.print(f"\n[bold]{heading}[/bold]")
-
-    teacher_counts = Counter(section.synth_teacher or "unknown" for section in typed_sections)
-    strategy_counts = Counter(section.synth_strategy or "unknown" for section in typed_sections)
-    source_counts = Counter(section.source_section_id or "unknown" for section in typed_sections)
-
-    out_console.print("  by teacher:")
-    for teacher_name in sorted(teacher_counts):
-        out_console.print(f"    - {teacher_name}: {teacher_counts[teacher_name]}")
-
-    out_console.print("  by strategy:")
-    for strategy_name in sorted(strategy_counts):
-        out_console.print(f"    - {strategy_name}: {strategy_counts[strategy_name]}")
-
-    out_console.print("  by source section:")
-    for source_id in sorted(source_counts):
-        out_console.print(f"    - {source_id}: {source_counts[source_id]}")
-
-    out_console.print("  sections:")
-    for section in typed_sections:
-        prompt = _synth_prompt_summary(section.content, section_id=section.section_id)
-        out_console.print(
-            "    - "
-            f"{section.section_id}  teacher={section.synth_teacher or 'unknown'}  "
-            f"strategy={section.synth_strategy or 'unknown'}  "
-            f"source={section.source_section_id or 'unknown'}  "
-            f"prompt={prompt}"
-        )
-
-
-def _synth_prompt_summary(content: str, *, section_id: str) -> str:
-    """Best-effort prompt summary for `synth list`."""
-    from dlm.data.errors import InstructionParseError
-    from dlm.data.instruction_parser import parse_instruction_body
-
-    try:
-        pairs = parse_instruction_body(content, section_id=section_id)
-    except InstructionParseError:
-        return "<unparseable>"
-    if not pairs:
-        return "<empty>"
-    prompt = pairs[0].question.splitlines()[0].strip()
-    return prompt or "<blank>"
-
-
-# --- harvest --------------------------------------------------------------
-
-
diff --git a/src/dlm/cli/commands/synth.py b/src/dlm/cli/commands/synth.py
new file mode 100644
index 0000000..a00deee
--- /dev/null
+++ b/src/dlm/cli/commands/synth.py
@@ -0,0 +1,377 @@
+"""`dlm synth` — generate / list / revert auto-synth instruction sections."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Annotated, Literal, cast
+
+import typer
+
+
+def synth_instructions_cmd(
+    path: Annotated[
+        Path, typer.Argument(help=".dlm file to synthesize instruction sections from.")
+    ],
+    teacher: Annotated[
+        str,
+        typer.Option(
+            "--teacher",
+            help=(
+                "Teacher selector: self, hf:<model>, openai:<model>, "
+                "anthropic:<model>, or vllm-server:<url>."
+            ),
+        ),
+    ] = "self",
+    per_section: Annotated[
+        int,
+        typer.Option(
+            "--per-section",
+            help="Instruction pairs to generate per prose section.",
+            min=1,
+        ),
+    ] = 3,
+    strategy: Annotated[
+        str,
+        typer.Option(
+            "--strategy",
+            help="Synthesis strategy: extraction, expansion, or both.",
+        ),
+    ] = "extraction",
+    filter_kind: Annotated[
+        str,
+        typer.Option(
+            "--filter",
+            help="Filter pipeline: sway, none, or dedup-only.",
+        ),
+    ] = "sway",
+    threshold: Annotated[
+        float | None,
+        typer.Option(
+            "--threshold",
+            help="Optional minimum sway-judge margin when --filter=sway.",
+            min=0.0,
+        ),
+    ] = None,
+    max_pairs: Annotated[
+        int | None,
+        typer.Option(
+            "--max-pairs",
+            help="Maximum accepted synth pairs to keep from this run.",
+            min=1,
+        ),
+    ] = None,
+    max_new_tokens: Annotated[
+        int,
+        typer.Option(
+            "--max-new-tokens",
+            help="Maximum new tokens the teacher may emit per prompt.",
+            min=1,
+        ),
+    ] = 512,
+    temp: Annotated[
+        float,
+        typer.Option("--temp", help="Teacher sampling temperature.", min=0.0),
+    ] = 0.0,
+    top_p: Annotated[
+        float | None,
+        typer.Option(
+            "--top-p",
+            help="Optional top-p cutoff for teacher sampling.",
+            min=0.0,
+            max=1.0,
+        ),
+    ] = None,
+    seed: Annotated[
+        int | None,
+        typer.Option("--seed", help="Optional teacher sampling seed."),
+    ] = None,
+    apply: Annotated[
+        bool,
+        typer.Option(
+            "--apply",
+            help="Write accepted auto-synth sections directly to the .dlm.",
+        ),
+    ] = False,
+    dry_run: Annotated[
+        bool,
+        typer.Option(
+            "--dry-run",
+            help="Preview the synth plan without staging or writing anything.",
+        ),
+    ] = False,
+) -> None:
+    """Generate, stage, or apply auto-synth instruction sections."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.preference import JudgeUnavailableError, build_judge
+    from dlm.store.paths import for_dlm
+    from dlm.synth import (
+        InvalidTeacherSpecError,
+        TeacherInvocationError,
+        TeacherUnavailableError,
+        build_synth_plan,
+        build_teacher,
+        clear_pending_plan,
+        filter_synth_plan,
+        render_filter_report,
+        render_synth_plan,
+        save_pending_plan,
+    )
+    from dlm.synth import (
+        apply_plan as apply_synth_plan,
+    )
+    from dlm.synth import (
+        build_apply_plan as build_synth_apply_plan,
+    )
+    from dlm.synth import (
+        render_apply_plan as render_synth_apply_plan,
+    )
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    if strategy not in ("extraction", "expansion", "both"):
+        console.print(
+            "[red]synth:[/red] --strategy must be one of extraction|expansion|both "
+            f"(got {strategy!r})."
+        )
+        raise typer.Exit(code=2)
+    if filter_kind not in ("sway", "none", "dedup-only"):
+        console.print(
+            f"[red]synth:[/red] --filter must be one of sway|none|dedup-only (got {filter_kind!r})."
+        )
+        raise typer.Exit(code=2)
+    if apply and dry_run:
+        console.print("[red]synth:[/red] --apply and --dry-run are mutually exclusive.")
+        raise typer.Exit(code=2)
+    if threshold is not None and filter_kind != "sway":
+        console.print("[red]synth:[/red] --threshold is only valid when --filter is `sway`.")
+        raise typer.Exit(code=2)
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+
+    try:
+        strategy_value = cast(Literal["extraction", "expansion", "both"], strategy)
+        teacher_obj = build_teacher(teacher, dlm_path=path)
+        plan = build_synth_plan(
+            parsed,
+            teacher_obj,
+            per_section=per_section,
+            strategy=strategy_value,
+            max_pairs=max_pairs,
+            max_new_tokens=max_new_tokens,
+            temperature=temp,
+            top_p=top_p,
+            seed=seed,
+        )
+    except InvalidTeacherSpecError as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    except TeacherUnavailableError as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except TeacherInvocationError as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except ValueError as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+
+    judge_obj = None
+    if filter_kind == "sway":
+        try:
+            judge_obj = build_judge("sway", dlm_path=path)
+        except JudgeUnavailableError as exc:
+            console.print(f"[red]synth:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+
+    try:
+        filter_value = cast(Literal["sway", "none", "dedup-only"], filter_kind)
+        filtered = filter_synth_plan(
+            plan,
+            filter_kind=filter_value,
+            judge=judge_obj,
+            threshold=threshold,
+        )
+    except ValueError as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+
+    out_console.print(render_synth_plan(plan))
+    out_console.print("")
+    out_console.print(render_filter_report(filtered))
+
+    if not filtered.additions:
+        if not dry_run:
+            clear_pending_plan(store)
+        out_console.print(
+            "\n[yellow]no synth additions accepted[/yellow] — either generation "
+            "yielded no valid pairs, dedup removed them, or the filter rejected them."
+        )
+        raise typer.Exit(code=2)
+
+    sections = [addition.addition.section for addition in filtered.additions]
+
+    if apply:
+        apply_plan = build_synth_apply_plan(parsed, sections)
+        out_console.print("")
+        out_console.print(render_synth_apply_plan(apply_plan))
+        summary = apply_synth_plan(parsed, apply_plan, target=path)
+        clear_pending_plan(store)
+        out_console.print(
+            f"\n[green]synth:[/green] wrote {summary.added} section(s) to {path} "
+            f"({summary.skipped} skipped)"
+        )
+        return
+
+    if dry_run:
+        out_console.print("\n[green]synth:[/green] dry-run only — nothing staged.")
+        return
+
+    pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
+    out_console.print(
+        f"\n[green]synth:[/green] staged {len(pending.sections)} auto-synth instruction "
+        f"section(s). Run [bold]dlm synth list {path}[/bold] to inspect them."
+    )
+
+
+def synth_revert_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-synth instructions from.")],
+) -> None:
+    """Remove every `auto_synth: true` instruction section from the `.dlm`."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.synth import revert_all_auto_synth
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    summary = revert_all_auto_synth(parsed, target=path)
+    out_console.print(
+        f"[green]synth:[/green] stripped {len(summary.added_section_ids)} "
+        f"auto-synth instruction section(s) from {path}"
+    )
+
+
+def synth_list_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file whose auto-synth instructions we list.")],
+) -> None:
+    """List applied + staged auto-synth instruction sections."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.doc.sections import SectionType
+    from dlm.store.paths import for_dlm
+    from dlm.synth import PendingSynthPlanError, load_pending_plan
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    try:
+        pending = load_pending_plan(store)
+    except PendingSynthPlanError as exc:
+        console.print(f"[red]synth:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    applied = [
+        section
+        for section in parsed.sections
+        if section.type is SectionType.INSTRUCTION and section.auto_synth
+    ]
+
+    out_console.print(f"[bold]{path}[/bold]")
+    out_console.print(f"  applied auto-synth: {len(applied)}")
+    out_console.print(f"  staged pending:     {len(pending.sections) if pending else 0}")
+
+    if not applied and pending is None:
+        out_console.print("  [dim]no auto-synth instruction sections yet[/dim]")
+        return
+
+    if applied:
+        _render_synth_listing(out_console, "Applied", applied)
+    if pending is not None:
+        _render_synth_listing(out_console, "Pending", pending.sections)
+
+
+def _render_synth_listing(
+    out_console: object,
+    heading: str,
+    sections: Sequence[object],
+) -> None:
+    from collections import Counter
+
+    from rich.console import Console
+
+    from dlm.doc.sections import Section
+
+    assert isinstance(out_console, Console)
+    typed_sections = [section for section in sections if isinstance(section, Section)]
+
+    out_console.print(f"\n[bold]{heading}[/bold]")
+
+    teacher_counts = Counter(section.synth_teacher or "unknown" for section in typed_sections)
+    strategy_counts = Counter(section.synth_strategy or "unknown" for section in typed_sections)
+    source_counts = Counter(section.source_section_id or "unknown" for section in typed_sections)
+
+    out_console.print("  by teacher:")
+    for teacher_name in sorted(teacher_counts):
+        out_console.print(f"    - {teacher_name}: {teacher_counts[teacher_name]}")
+
+    out_console.print("  by strategy:")
+    for strategy_name in sorted(strategy_counts):
+        out_console.print(f"    - {strategy_name}: {strategy_counts[strategy_name]}")
+
+    out_console.print("  by source section:")
+    for source_id in sorted(source_counts):
+        out_console.print(f"    - {source_id}: {source_counts[source_id]}")
+
+    out_console.print("  sections:")
+    for section in typed_sections:
+        prompt = _synth_prompt_summary(section.content, section_id=section.section_id)
+        out_console.print(
+            "    - "
+            f"{section.section_id}  teacher={section.synth_teacher or 'unknown'}  "
+            f"strategy={section.synth_strategy or 'unknown'}  "
+            f"source={section.source_section_id or 'unknown'}  "
+            f"prompt={prompt}"
+        )
+
+
+def _synth_prompt_summary(content: str, *, section_id: str) -> str:
+    """Best-effort prompt summary for `synth list`."""
+    from dlm.data.errors import InstructionParseError
+    from dlm.data.instruction_parser import parse_instruction_body
+
+    try:
+        pairs = parse_instruction_body(content, section_id=section_id)
+    except InstructionParseError:
+        return "<unparseable>"
+    if not pairs:
+        return "<empty>"
+    prompt = pairs[0].question.splitlines()[0].strip()
+    return prompt or "<blank>"

From 29a8b6e236dcc3612c15162f0dfccd2a1c58ab1e Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:07:02 -0400
Subject: [PATCH 18/46] Extract preference_mine/apply/revert/list into
 cli/commands/preference.py

---
 src/dlm/cli/commands/__init__.py   | 451 +---------------------------
 src/dlm/cli/commands/preference.py | 455 +++++++++++++++++++++++++++++
 2 files changed, 459 insertions(+), 447 deletions(-)
 create mode 100644 src/dlm/cli/commands/preference.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 0b2d9d3..b918b7a 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -27,6 +27,10 @@
 from dlm.cli.commands.harvest import harvest_cmd as harvest_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
+from dlm.cli.commands.preference import preference_apply_cmd as preference_apply_cmd
+from dlm.cli.commands.preference import preference_list_cmd as preference_list_cmd
+from dlm.cli.commands.preference import preference_mine_cmd as preference_mine_cmd
+from dlm.cli.commands.preference import preference_revert_cmd as preference_revert_cmd
 from dlm.cli.commands.pull import pull_cmd as pull_cmd
 from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
@@ -2880,451 +2884,4 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
 
 
-# --- preference -----------------------------------------------------------
-
-
-def preference_mine_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to mine preferences from.")],
-    samples: Annotated[
-        int,
-        typer.Option("--samples", help="Candidate responses to sample per prompt.", min=2),
-    ] = 4,
-    judge: Annotated[
-        str,
-        typer.Option(
-            "--judge",
-            help="Judge selector: sway, hf:<model>, or cli:<cmd>.",
-        ),
-    ] = "sway",
-    threshold: Annotated[
-        float | None,
-        typer.Option(
-            "--threshold",
-            help="Minimum chosen-vs-rejected score margin. Defaults to the judge's native threshold.",
-            min=0.0,
-        ),
-    ] = None,
-    max_pairs: Annotated[
-        int | None,
-        typer.Option(
-            "--max-pairs",
-            help="Maximum mined preference pairs to keep from this run.",
-            min=1,
-        ),
-    ] = None,
-    temp: Annotated[
-        float,
-        typer.Option("--temp", help="Sampling temperature for candidate generation.", min=0.0),
-    ] = 0.7,
-    top_p: Annotated[
-        float | None,
-        typer.Option(
-            "--top-p",
-            help="Optional nucleus-sampling cutoff for candidate generation.",
-            min=0.0,
-            max=1.0,
-        ),
-    ] = None,
-    backend: Annotated[
-        str,
-        typer.Option(
-            "--backend",
-            help="Generation backend: auto, pytorch, or mlx.",
-        ),
-    ] = "auto",
-    adapter: Annotated[
-        str | None,
-        typer.Option(
-            "--adapter",
-            help=(
-                "Named adapter to mine from on multi-adapter documents. "
-                "Required there; invalid on single-adapter documents."
-            ),
-        ),
-    ] = None,
-    apply: Annotated[
-        bool,
-        typer.Option(
-            "--apply",
-            help=(
-                "Write mined preference sections directly to the .dlm. "
-                "Default stages them for `dlm preference apply`."
-            ),
-        ),
-    ] = False,
-) -> None:
-    """Sample + stage auto-mined preference sections from the current adapter."""
-    from rich.console import Console
-
-    from dlm.base_models import GatedModelError
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.hardware import doctor
-    from dlm.inference import AdapterNotFoundError
-    from dlm.inference.backends import (
-        UnsupportedBackendError,
-        build_backend,
-        select_backend,
-    )
-    from dlm.metrics import MetricsRecorder, PreferenceMineEvent
-    from dlm.metrics.events import PreferenceMineWriteMode
-    from dlm.modality import modality_for
-    from dlm.preference import (
-        InvalidJudgeSpecError,
-        JudgeUnavailableError,
-        build_apply_plan,
-        build_judge,
-        build_mine_plan,
-        render_apply_plan,
-        render_mine_plan,
-    )
-    from dlm.preference.apply import apply_plan as apply_preference_plan
-    from dlm.preference.pending import clear_pending_plan, save_pending_plan
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    if backend not in ("auto", "pytorch", "mlx"):
-        console.print(
-            f"[red]preference:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
-        )
-        raise typer.Exit(code=2)
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    adapters_declared = parsed.frontmatter.training.adapters
-    if adapter is not None:
-        if adapters_declared is None:
-            console.print(
-                "[red]preference:[/red] --adapter is only valid on multi-adapter "
-                "documents (this doc does not declare `training.adapters`)."
-            )
-            raise typer.Exit(code=2)
-        if adapter not in adapters_declared:
-            declared = sorted(adapters_declared)
-            console.print(
-                f"[red]preference:[/red] --adapter {adapter!r} is not declared "
-                f"(declared: {declared})."
-            )
-            raise typer.Exit(code=2)
-    elif adapters_declared is not None:
-        console.print(
-            "[red]preference:[/red] multi-adapter documents require --adapter "
-            "so mining knows which adapter to sample."
-        )
-        raise typer.Exit(code=2)
-
-    judge_kind = judge.split(":", 1)[0].strip()
-    if adapter is not None and judge_kind == "sway":
-        console.print(
-            "[red]preference:[/red] --judge sway is not yet wired for named adapters; "
-            "use `hf:<model>` or `cli:<cmd>` for multi-adapter mining."
-        )
-        raise typer.Exit(code=2)
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    run_id = _latest_training_run_id(store)
-    if run_id is None:
-        console.print(
-            "[red]preference:[/red] mining requires a prior training run (run `dlm train` first)."
-        )
-        raise typer.Exit(code=1)
-
-    already_accepted = _previously_accepted(store.manifest)
-    try:
-        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
-    except GatedModelError as exc:
-        console.print(
-            f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
-            "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
-        )
-        raise typer.Exit(code=1) from exc
-
-    dispatch = modality_for(spec)
-    if dispatch.accepts_images or dispatch.accepts_audio:
-        console.print(
-            f"[red]preference:[/red] preference mining currently supports text bases only; "
-            f"base {spec.key!r} is modality='{spec.modality}'."
-        )
-        raise typer.Exit(code=2)
-
-    caps = doctor().capabilities
-    try:
-        backend_name = select_backend(backend, caps)  # type: ignore[arg-type]
-    except UnsupportedBackendError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    backend_obj = build_backend(backend_name, caps)
-
-    try:
-        backend_obj.load(spec, store, adapter_name=adapter)
-    except AdapterNotFoundError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    try:
-        judge_obj = build_judge(judge, dlm_path=path)
-        plan = build_mine_plan(
-            parsed,
-            backend_obj,
-            judge_obj,
-            mined_run_id=run_id,
-            samples=samples,
-            max_pairs=max_pairs,
-            threshold=threshold,
-            temperature=temp,
-            top_p=top_p,
-        )
-    except InvalidJudgeSpecError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    except JudgeUnavailableError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except ValueError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    finally:
-        backend_obj.unload()
-
-    recorder = MetricsRecorder(store.root)
-
-    def _record_preference_mine(write_mode: PreferenceMineWriteMode) -> None:
-        recorder.record_preference_mine(
-            PreferenceMineEvent(
-                run_id=run_id,
-                judge_name=judge_obj.name,
-                sample_count=samples,
-                mined_pairs=len(plan.additions),
-                skipped_prompts=len(plan.skipped),
-                write_mode=write_mode,
-            )
-        )
-
-    out_console.print(render_mine_plan(plan))
-
-    if not plan.additions:
-        clear_pending_plan(store)
-        _record_preference_mine("empty")
-        out_console.print(
-            "\n[yellow]no candidates to mine[/yellow] — either instruction prompts "
-            "did not yield a confident pair, or the matching preference sections "
-            "already exist in the document."
-        )
-        raise typer.Exit(code=2)
-
-    sections = [addition.section for addition in plan.additions]
-
-    if apply:
-        apply_plan = build_apply_plan(parsed, sections)
-        out_console.print("")
-        out_console.print(render_apply_plan(apply_plan))
-        summary = apply_preference_plan(parsed, apply_plan, target=path)
-        clear_pending_plan(store)
-        _record_preference_mine("applied")
-        out_console.print(
-            f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
-            f"({summary.skipped} skipped)"
-        )
-        return
-
-    pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
-    _record_preference_mine("staged")
-    out_console.print(
-        f"\n[green]preference:[/green] staged {len(pending.sections)} mined preference "
-        f"section(s). Run [bold]dlm preference apply {path}[/bold] to write them."
-    )
-
-
-def preference_apply_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to apply staged preferences into.")],
-) -> None:
-    """Write the staged preference-mine plan into the `.dlm`."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.preference import build_apply_plan, render_apply_plan
-    from dlm.preference.apply import apply_plan as apply_preference_plan
-    from dlm.preference.pending import (
-        PendingPreferencePlanError,
-        clear_pending_plan,
-        load_pending_plan,
-    )
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    try:
-        pending = load_pending_plan(store)
-    except PendingPreferencePlanError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if pending is None:
-        console.print(
-            "[red]preference:[/red] no staged mined preferences found; "
-            "run `dlm preference mine` first."
-        )
-        raise typer.Exit(code=1)
-
-    plan = build_apply_plan(parsed, list(pending.sections))
-    out_console.print(render_apply_plan(plan))
-
-    if not plan.additions:
-        clear_pending_plan(store)
-        out_console.print(
-            "\n[yellow]no staged preferences to write[/yellow] — the pending plan was "
-            "already present in the document."
-        )
-        raise typer.Exit(code=2)
-
-    summary = apply_preference_plan(parsed, plan, target=path)
-    clear_pending_plan(store)
-    out_console.print(
-        f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
-        f"({summary.skipped} skipped)"
-    )
-
-
-def preference_revert_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-mined preferences from.")],
-) -> None:
-    """Remove every `auto_mined: true` preference section from the `.dlm`."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.preference import revert_all_auto_mined
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    summary = revert_all_auto_mined(parsed, target=path)
-    out_console.print(
-        f"[green]preference:[/green] stripped {len(summary.added_section_ids)} "
-        f"auto-mined preference section(s) from {path}"
-    )
-
-
-def preference_list_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file whose auto-mined preferences we list.")],
-) -> None:
-    """List applied + staged auto-mined preference sections."""
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.doc.sections import SectionType
-    from dlm.preference.pending import PendingPreferencePlanError, load_pending_plan
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    try:
-        pending = load_pending_plan(store)
-    except PendingPreferencePlanError as exc:
-        console.print(f"[red]preference:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    applied = [
-        section
-        for section in parsed.sections
-        if section.type is SectionType.PREFERENCE and section.auto_mined
-    ]
-
-    out_console.print(f"[bold]{path}[/bold]")
-    out_console.print(f"  applied auto-mined: {len(applied)}")
-    out_console.print(f"  staged pending:     {len(pending.sections) if pending else 0}")
-
-    if not applied and pending is None:
-        out_console.print("  [dim]no auto-mined preference sections yet[/dim]")
-        return
-
-    if applied:
-        out_console.print("\n[bold]Applied[/bold]")
-        for section in applied:
-            prompt = _preference_prompt_summary(section.content, section_id=section.section_id)
-            judge_name = section.judge_name or "unknown"
-            run_id = section.mined_run_id if section.mined_run_id is not None else "?"
-            out_console.print(
-                f"  - {section.section_id}  judge={judge_name}  run={run_id}  prompt={prompt}"
-            )
-
-    if pending is not None:
-        out_console.print("\n[bold]Pending[/bold]")
-        for section in pending.sections:
-            prompt = _preference_prompt_summary(section.content, section_id=section.section_id)
-            judge_name = section.judge_name or "unknown"
-            run_id = section.mined_run_id if section.mined_run_id is not None else "?"
-            out_console.print(
-                f"  - {section.section_id}  judge={judge_name}  run={run_id}  prompt={prompt}"
-            )
-
-
-def _latest_training_run_id(store: object) -> int | None:
-    """Most recent run id from metrics DB or manifest."""
-    from dlm.metrics.queries import latest_run_id
-    from dlm.store.errors import ManifestCorruptError
-    from dlm.store.manifest import load_manifest
-    from dlm.store.paths import StorePath
-
-    assert isinstance(store, StorePath)
-
-    metrics_run_id = latest_run_id(store.root)
-    if metrics_run_id is not None:
-        return metrics_run_id
-    if not store.manifest.exists():
-        return None
-    try:
-        manifest = load_manifest(store.manifest)
-    except (ManifestCorruptError, OSError):
-        return None
-    if not manifest.training_runs:
-        return None
-    return max(run.run_id for run in manifest.training_runs)
-
-
-def _preference_prompt_summary(content: str, *, section_id: str) -> str:
-    """Best-effort prompt summary for `preference list`."""
-    from dlm.data.errors import PreferenceParseError
-    from dlm.data.preference_parser import parse_preference_body
-
-    try:
-        triples = parse_preference_body(content, section_id=section_id)
-    except PreferenceParseError:
-        return "<unparseable>"
-    if not triples:
-        return "<empty>"
-    prompt = triples[0].prompt.splitlines()[0].strip()
-    return prompt or "<blank>"
-
 
diff --git a/src/dlm/cli/commands/preference.py b/src/dlm/cli/commands/preference.py
new file mode 100644
index 0000000..21f3fce
--- /dev/null
+++ b/src/dlm/cli/commands/preference.py
@@ -0,0 +1,455 @@
+"""`dlm preference` — mine / apply / revert / list preference sections."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+from dlm.cli.commands._shared import _previously_accepted
+
+
+def preference_mine_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to mine preferences from.")],
+    samples: Annotated[
+        int,
+        typer.Option("--samples", help="Candidate responses to sample per prompt.", min=2),
+    ] = 4,
+    judge: Annotated[
+        str,
+        typer.Option(
+            "--judge",
+            help="Judge selector: sway, hf:<model>, or cli:<cmd>.",
+        ),
+    ] = "sway",
+    threshold: Annotated[
+        float | None,
+        typer.Option(
+            "--threshold",
+            help="Minimum chosen-vs-rejected score margin. Defaults to the judge's native threshold.",
+            min=0.0,
+        ),
+    ] = None,
+    max_pairs: Annotated[
+        int | None,
+        typer.Option(
+            "--max-pairs",
+            help="Maximum mined preference pairs to keep from this run.",
+            min=1,
+        ),
+    ] = None,
+    temp: Annotated[
+        float,
+        typer.Option("--temp", help="Sampling temperature for candidate generation.", min=0.0),
+    ] = 0.7,
+    top_p: Annotated[
+        float | None,
+        typer.Option(
+            "--top-p",
+            help="Optional nucleus-sampling cutoff for candidate generation.",
+            min=0.0,
+            max=1.0,
+        ),
+    ] = None,
+    backend: Annotated[
+        str,
+        typer.Option(
+            "--backend",
+            help="Generation backend: auto, pytorch, or mlx.",
+        ),
+    ] = "auto",
+    adapter: Annotated[
+        str | None,
+        typer.Option(
+            "--adapter",
+            help=(
+                "Named adapter to mine from on multi-adapter documents. "
+                "Required there; invalid on single-adapter documents."
+            ),
+        ),
+    ] = None,
+    apply: Annotated[
+        bool,
+        typer.Option(
+            "--apply",
+            help=(
+                "Write mined preference sections directly to the .dlm. "
+                "Default stages them for `dlm preference apply`."
+            ),
+        ),
+    ] = False,
+) -> None:
+    """Sample + stage auto-mined preference sections from the current adapter."""
+    from rich.console import Console
+
+    from dlm.base_models import GatedModelError
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.hardware import doctor
+    from dlm.inference import AdapterNotFoundError
+    from dlm.inference.backends import (
+        UnsupportedBackendError,
+        build_backend,
+        select_backend,
+    )
+    from dlm.metrics import MetricsRecorder, PreferenceMineEvent
+    from dlm.metrics.events import PreferenceMineWriteMode
+    from dlm.modality import modality_for
+    from dlm.preference import (
+        InvalidJudgeSpecError,
+        JudgeUnavailableError,
+        build_apply_plan,
+        build_judge,
+        build_mine_plan,
+        render_apply_plan,
+        render_mine_plan,
+    )
+    from dlm.preference.apply import apply_plan as apply_preference_plan
+    from dlm.preference.pending import clear_pending_plan, save_pending_plan
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    if backend not in ("auto", "pytorch", "mlx"):
+        console.print(
+            f"[red]preference:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
+        )
+        raise typer.Exit(code=2)
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    adapters_declared = parsed.frontmatter.training.adapters
+    if adapter is not None:
+        if adapters_declared is None:
+            console.print(
+                "[red]preference:[/red] --adapter is only valid on multi-adapter "
+                "documents (this doc does not declare `training.adapters`)."
+            )
+            raise typer.Exit(code=2)
+        if adapter not in adapters_declared:
+            declared = sorted(adapters_declared)
+            console.print(
+                f"[red]preference:[/red] --adapter {adapter!r} is not declared "
+                f"(declared: {declared})."
+            )
+            raise typer.Exit(code=2)
+    elif adapters_declared is not None:
+        console.print(
+            "[red]preference:[/red] multi-adapter documents require --adapter "
+            "so mining knows which adapter to sample."
+        )
+        raise typer.Exit(code=2)
+
+    judge_kind = judge.split(":", 1)[0].strip()
+    if adapter is not None and judge_kind == "sway":
+        console.print(
+            "[red]preference:[/red] --judge sway is not yet wired for named adapters; "
+            "use `hf:<model>` or `cli:<cmd>` for multi-adapter mining."
+        )
+        raise typer.Exit(code=2)
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    run_id = _latest_training_run_id(store)
+    if run_id is None:
+        console.print(
+            "[red]preference:[/red] mining requires a prior training run (run `dlm train` first)."
+        )
+        raise typer.Exit(code=1)
+
+    already_accepted = _previously_accepted(store.manifest)
+    try:
+        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
+    except GatedModelError as exc:
+        console.print(
+            f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
+            "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
+        )
+        raise typer.Exit(code=1) from exc
+
+    dispatch = modality_for(spec)
+    if dispatch.accepts_images or dispatch.accepts_audio:
+        console.print(
+            f"[red]preference:[/red] preference mining currently supports text bases only; "
+            f"base {spec.key!r} is modality='{spec.modality}'."
+        )
+        raise typer.Exit(code=2)
+
+    caps = doctor().capabilities
+    try:
+        backend_name = select_backend(backend, caps)  # type: ignore[arg-type]
+    except UnsupportedBackendError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    backend_obj = build_backend(backend_name, caps)
+
+    try:
+        backend_obj.load(spec, store, adapter_name=adapter)
+    except AdapterNotFoundError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    try:
+        judge_obj = build_judge(judge, dlm_path=path)
+        plan = build_mine_plan(
+            parsed,
+            backend_obj,
+            judge_obj,
+            mined_run_id=run_id,
+            samples=samples,
+            max_pairs=max_pairs,
+            threshold=threshold,
+            temperature=temp,
+            top_p=top_p,
+        )
+    except InvalidJudgeSpecError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    except JudgeUnavailableError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except ValueError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    finally:
+        backend_obj.unload()
+
+    recorder = MetricsRecorder(store.root)
+
+    def _record_preference_mine(write_mode: PreferenceMineWriteMode) -> None:
+        recorder.record_preference_mine(
+            PreferenceMineEvent(
+                run_id=run_id,
+                judge_name=judge_obj.name,
+                sample_count=samples,
+                mined_pairs=len(plan.additions),
+                skipped_prompts=len(plan.skipped),
+                write_mode=write_mode,
+            )
+        )
+
+    out_console.print(render_mine_plan(plan))
+
+    if not plan.additions:
+        clear_pending_plan(store)
+        _record_preference_mine("empty")
+        out_console.print(
+            "\n[yellow]no candidates to mine[/yellow] — either instruction prompts "
+            "did not yield a confident pair, or the matching preference sections "
+            "already exist in the document."
+        )
+        raise typer.Exit(code=2)
+
+    sections = [addition.section for addition in plan.additions]
+
+    if apply:
+        apply_plan = build_apply_plan(parsed, sections)
+        out_console.print("")
+        out_console.print(render_apply_plan(apply_plan))
+        summary = apply_preference_plan(parsed, apply_plan, target=path)
+        clear_pending_plan(store)
+        _record_preference_mine("applied")
+        out_console.print(
+            f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
+            f"({summary.skipped} skipped)"
+        )
+        return
+
+    pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
+    _record_preference_mine("staged")
+    out_console.print(
+        f"\n[green]preference:[/green] staged {len(pending.sections)} mined preference "
+        f"section(s). Run [bold]dlm preference apply {path}[/bold] to write them."
+    )
+
+
+def preference_apply_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to apply staged preferences into.")],
+) -> None:
+    """Write the staged preference-mine plan into the `.dlm`."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.preference import build_apply_plan, render_apply_plan
+    from dlm.preference.apply import apply_plan as apply_preference_plan
+    from dlm.preference.pending import (
+        PendingPreferencePlanError,
+        clear_pending_plan,
+        load_pending_plan,
+    )
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    try:
+        pending = load_pending_plan(store)
+    except PendingPreferencePlanError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if pending is None:
+        console.print(
+            "[red]preference:[/red] no staged mined preferences found; "
+            "run `dlm preference mine` first."
+        )
+        raise typer.Exit(code=1)
+
+    plan = build_apply_plan(parsed, list(pending.sections))
+    out_console.print(render_apply_plan(plan))
+
+    if not plan.additions:
+        clear_pending_plan(store)
+        out_console.print(
+            "\n[yellow]no staged preferences to write[/yellow] — the pending plan was "
+            "already present in the document."
+        )
+        raise typer.Exit(code=2)
+
+    summary = apply_preference_plan(parsed, plan, target=path)
+    clear_pending_plan(store)
+    out_console.print(
+        f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
+        f"({summary.skipped} skipped)"
+    )
+
+
+def preference_revert_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to strip auto-mined preferences from.")],
+) -> None:
+    """Remove every `auto_mined: true` preference section from the `.dlm`."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.preference import revert_all_auto_mined
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    summary = revert_all_auto_mined(parsed, target=path)
+    out_console.print(
+        f"[green]preference:[/green] stripped {len(summary.added_section_ids)} "
+        f"auto-mined preference section(s) from {path}"
+    )
+
+
+def preference_list_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file whose auto-mined preferences we list.")],
+) -> None:
+    """List applied + staged auto-mined preference sections."""
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.doc.sections import SectionType
+    from dlm.preference.pending import PendingPreferencePlanError, load_pending_plan
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    try:
+        pending = load_pending_plan(store)
+    except PendingPreferencePlanError as exc:
+        console.print(f"[red]preference:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    applied = [
+        section
+        for section in parsed.sections
+        if section.type is SectionType.PREFERENCE and section.auto_mined
+    ]
+
+    out_console.print(f"[bold]{path}[/bold]")
+    out_console.print(f"  applied auto-mined: {len(applied)}")
+    out_console.print(f"  staged pending:     {len(pending.sections) if pending else 0}")
+
+    if not applied and pending is None:
+        out_console.print("  [dim]no auto-mined preference sections yet[/dim]")
+        return
+
+    if applied:
+        out_console.print("\n[bold]Applied[/bold]")
+        for section in applied:
+            prompt = _preference_prompt_summary(section.content, section_id=section.section_id)
+            judge_name = section.judge_name or "unknown"
+            run_id = section.mined_run_id if section.mined_run_id is not None else "?"
+            out_console.print(
+                f"  - {section.section_id}  judge={judge_name}  run={run_id}  prompt={prompt}"
+            )
+
+    if pending is not None:
+        out_console.print("\n[bold]Pending[/bold]")
+        for section in pending.sections:
+            prompt = _preference_prompt_summary(section.content, section_id=section.section_id)
+            judge_name = section.judge_name or "unknown"
+            run_id = section.mined_run_id if section.mined_run_id is not None else "?"
+            out_console.print(
+                f"  - {section.section_id}  judge={judge_name}  run={run_id}  prompt={prompt}"
+            )
+
+
+def _latest_training_run_id(store: object) -> int | None:
+    """Most recent run id from metrics DB or manifest."""
+    from dlm.metrics.queries import latest_run_id
+    from dlm.store.errors import ManifestCorruptError
+    from dlm.store.manifest import load_manifest
+    from dlm.store.paths import StorePath
+
+    assert isinstance(store, StorePath)
+
+    metrics_run_id = latest_run_id(store.root)
+    if metrics_run_id is not None:
+        return metrics_run_id
+    if not store.manifest.exists():
+        return None
+    try:
+        manifest = load_manifest(store.manifest)
+    except (ManifestCorruptError, OSError):
+        return None
+    if not manifest.training_runs:
+        return None
+    return max(run.run_id for run in manifest.training_runs)
+
+
+def _preference_prompt_summary(content: str, *, section_id: str) -> str:
+    """Best-effort prompt summary for `preference list`."""
+    from dlm.data.errors import PreferenceParseError
+    from dlm.data.preference_parser import parse_preference_body
+
+    try:
+        triples = parse_preference_body(content, section_id=section_id)
+    except PreferenceParseError:
+        return "<unparseable>"
+    if not triples:
+        return "<empty>"
+    prompt = triples[0].prompt.splitlines()[0].strip()
+    return prompt or "<blank>"

From c4cc7afd995d47bc8298e1082b0c89677c7b51b8 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:08:40 -0400
Subject: [PATCH 19/46] Extract metrics_cmd + metrics_watch_cmd into
 cli/commands/metrics.py

---
 src/dlm/cli/commands/__init__.py | 223 +------------------------------
 src/dlm/cli/commands/metrics.py  | 220 ++++++++++++++++++++++++++++++
 2 files changed, 223 insertions(+), 220 deletions(-)
 create mode 100644 src/dlm/cli/commands/metrics.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index b918b7a..1c1af6b 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -13,7 +13,7 @@
 import os
 from collections.abc import Sequence
 from pathlib import Path
-from typing import TYPE_CHECKING, Annotated, Any, Literal
+from typing import Annotated, Any, Literal
 
 import typer
 
@@ -25,6 +25,8 @@
 from dlm.cli.commands.cache import cache_show_cmd as cache_show_cmd
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.harvest import harvest_cmd as harvest_cmd
+from dlm.cli.commands.metrics import metrics_cmd as metrics_cmd
+from dlm.cli.commands.metrics import metrics_watch_cmd as metrics_watch_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
 from dlm.cli.commands.pack import pack_cmd as pack_cmd
 from dlm.cli.commands.preference import preference_apply_cmd as preference_apply_cmd
@@ -42,9 +44,6 @@
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
 
-if TYPE_CHECKING:
-    from datetime import timedelta
-
 
 def _stub(sprint: str, subject: str) -> None:
     """Raise a clear unimplemented error pointing to the owning sprint."""
@@ -2190,219 +2189,6 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         console.print(f"smoke:   {result.smoke_output_first_line}")
 
 
-def metrics_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file whose store we query.")],
-    json_out: Annotated[bool, typer.Option("--json", help="Emit JSON.")] = False,
-    csv_out: Annotated[bool, typer.Option("--csv", help="Emit CSV.")] = False,
-    run_id: Annotated[
-        int | None,
-        typer.Option("--run-id", help="Only show this run (drill-down)."),
-    ] = None,
-    phase: Annotated[
-        str | None,
-        typer.Option("--phase", help="Filter by phase: sft|dpo|orpo|cpt."),
-    ] = None,
-    since: Annotated[
-        str | None,
-        typer.Option(
-            "--since",
-            help="Time window (e.g. `24h`, `7d`, `30m`). Filters `started_at`.",
-        ),
-    ] = None,
-    limit: Annotated[int, typer.Option("--limit")] = 20,
-) -> None:
-    """Query the per-store metrics database."""
-    import csv
-    import json
-    import sys
-
-    from rich.console import Console
-
-    from dlm.doc.parser import parse_file
-    from dlm.metrics.queries import (
-        evals_for_run,
-        evals_to_dict,
-        preference_mining_for_run,
-        preference_mining_to_dict,
-        recent_runs,
-        runs_to_dict,
-        steps_for_run,
-        steps_to_dict,
-    )
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    if json_out and csv_out:
-        console.print("[red]metrics:[/red] --json and --csv are mutually exclusive")
-        raise typer.Exit(code=2)
-
-    since_delta = _parse_since_arg(since, console) if since else None
-
-    parsed = parse_file(path)
-    store = for_dlm(parsed.frontmatter.dlm_id)
-
-    runs = recent_runs(store.root, limit=limit, phase=phase, since=since_delta, run_id=run_id)
-
-    if run_id is not None:
-        # Drill-down: show this run's steps + evals.
-        if not runs:
-            console.print(f"[red]metrics:[/red] no run with run_id={run_id}")
-            raise typer.Exit(code=1)
-        run = runs[0]
-        steps = steps_for_run(store.root, run_id)
-        evals = evals_for_run(store.root, run_id)
-        preference_rows = preference_mining_for_run(store.root, run_id)
-
-        if json_out:
-            payload = {
-                "run": runs_to_dict([run])[0],
-                "steps": steps_to_dict(steps),
-                "evals": evals_to_dict(evals),
-                "preference_mining": preference_mining_to_dict(preference_rows),
-            }
-            sys.stdout.write(json.dumps(payload, indent=2) + "\n")
-            return
-        if csv_out:
-            writer = csv.writer(sys.stdout)
-            writer.writerow(["step", "loss", "lr", "grad_norm", "val_loss"])
-            eval_by_step = {e.step: e.val_loss for e in evals}
-            for s in steps:
-                writer.writerow([s.step, s.loss, s.lr, s.grad_norm, eval_by_step.get(s.step)])
-            return
-        console.print(
-            f"[green]run_id={run.run_id}[/green]  phase={run.phase}  "
-            f"seed={run.seed}  status={run.status}  steps={len(steps)}  "
-            f"evals={len(evals)}"
-        )
-        if evals:
-            last = evals[-1]
-            console.print(
-                f"  last eval: step={last.step}  val_loss={last.val_loss}  "
-                f"perplexity={last.perplexity}"
-            )
-        if preference_rows:
-            last_pref = preference_rows[-1]
-            console.print(
-                "  preference mining: "
-                f"events={len(preference_rows)}  "
-                f"mined_pairs={sum(row.mined_pairs for row in preference_rows)}  "
-                f"skipped_prompts={sum(row.skipped_prompts for row in preference_rows)}  "
-                f"last_mode={last_pref.write_mode}  "
-                f"judge={last_pref.judge_name}"
-            )
-        return
-
-    # Top-level: list runs.
-    if json_out:
-        sys.stdout.write(json.dumps({"runs": runs_to_dict(runs)}, indent=2) + "\n")
-        return
-    if csv_out:
-        writer = csv.writer(sys.stdout)
-        writer.writerow(["run_id", "phase", "seed", "status", "started_at", "ended_at"])
-        for r in runs:
-            writer.writerow([r.run_id, r.phase, r.seed, r.status, r.started_at, r.ended_at])
-        return
-
-    if not runs:
-        console.print("[dim]metrics:[/dim] no runs found (hint: train first, or adjust filters)")
-        return
-    console.print(f"[bold]Runs: {len(runs)}[/bold]")
-    for r in runs:
-        console.print(
-            f"  run_id={r.run_id}  phase={r.phase}  seed={r.seed}  "
-            f"status={r.status}  started={r.started_at}"
-        )
-
-
-def metrics_watch_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file whose store we tail.")],
-    poll_seconds: Annotated[
-        float,
-        typer.Option("--poll-seconds", help="How often to re-read the metrics DB."),
-    ] = 1.0,
-) -> None:
-    """Tail the metrics DB: print new steps/evals as they land."""
-    import time
-
-    from rich.console import Console
-
-    from dlm.doc.parser import parse_file
-    from dlm.metrics.queries import evals_for_run, latest_run_id, steps_for_run
-    from dlm.store.paths import for_dlm
-
-    console = Console()
-
-    parsed = parse_file(path)
-    store = for_dlm(parsed.frontmatter.dlm_id)
-
-    console.print(
-        f"[dim]metrics watch:[/dim] polling {store.root} every {poll_seconds}s (Ctrl-C to exit)"
-    )
-
-    current_run: int | None = None
-    last_step_seen = 0
-    last_eval_step_seen = 0
-    try:
-        while True:
-            run_id = latest_run_id(store.root)
-            if run_id is None:
-                time.sleep(poll_seconds)
-                continue
-            if run_id != current_run:
-                current_run = run_id
-                last_step_seen = 0
-                last_eval_step_seen = 0
-                console.print(f"[green]→ following run_id={run_id}[/green]")
-
-            new_steps = steps_for_run(store.root, run_id, since_step=last_step_seen)
-            for s in new_steps:
-                console.print(
-                    f"  step {s.step:>5}  loss={s.loss}  lr={s.lr}  grad_norm={s.grad_norm}"
-                )
-                last_step_seen = s.step
-
-            new_evals = evals_for_run(store.root, run_id, since_step=last_eval_step_seen)
-            for e in new_evals:
-                console.print(
-                    f"  [yellow]eval @ step {e.step}[/yellow]  "
-                    f"val_loss={e.val_loss}  perplexity={e.perplexity}"
-                )
-                last_eval_step_seen = e.step
-
-            time.sleep(poll_seconds)
-    except KeyboardInterrupt:
-        console.print("[dim]metrics watch:[/dim] bye")
-
-
-def _parse_since_arg(since: str, console: object) -> timedelta:
-    """Parse `24h` / `7d` / `30m` / `10s` into a timedelta."""
-    from datetime import timedelta
-
-    from rich.console import Console
-
-    assert isinstance(console, Console)
-
-    if not since:
-        raise typer.Exit(code=2)
-    unit = since[-1].lower()
-    try:
-        value = int(since[:-1])
-    except ValueError:
-        console.print(f"[red]metrics:[/red] --since {since!r} not an integer+unit")
-        raise typer.Exit(code=2) from None
-    if unit == "s":
-        return timedelta(seconds=value)
-    if unit == "m":
-        return timedelta(minutes=value)
-    if unit == "h":
-        return timedelta(hours=value)
-    if unit == "d":
-        return timedelta(days=value)
-    console.print(f"[red]metrics:[/red] --since {since!r} unit must be s/m/h/d")
-    raise typer.Exit(code=2)
-
-
 def show_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm file to inspect.")],
     json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
@@ -2882,6 +2668,3 @@ def _render_training_sources_text(console: object, records: list[dict[str, objec
         else:
             size = int(tb) if isinstance(tb, int) else 0
             console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
-
-
-
diff --git a/src/dlm/cli/commands/metrics.py b/src/dlm/cli/commands/metrics.py
new file mode 100644
index 0000000..ebbbd73
--- /dev/null
+++ b/src/dlm/cli/commands/metrics.py
@@ -0,0 +1,220 @@
+"""`dlm metrics` — query / watch the per-store metrics database."""
+
+from __future__ import annotations
+
+from datetime import timedelta
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+
+def metrics_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file whose store we query.")],
+    json_out: Annotated[bool, typer.Option("--json", help="Emit JSON.")] = False,
+    csv_out: Annotated[bool, typer.Option("--csv", help="Emit CSV.")] = False,
+    run_id: Annotated[
+        int | None,
+        typer.Option("--run-id", help="Only show this run (drill-down)."),
+    ] = None,
+    phase: Annotated[
+        str | None,
+        typer.Option("--phase", help="Filter by phase: sft|dpo|orpo|cpt."),
+    ] = None,
+    since: Annotated[
+        str | None,
+        typer.Option(
+            "--since",
+            help="Time window (e.g. `24h`, `7d`, `30m`). Filters `started_at`.",
+        ),
+    ] = None,
+    limit: Annotated[int, typer.Option("--limit")] = 20,
+) -> None:
+    """Query the per-store metrics database."""
+    import csv
+    import json
+    import sys
+
+    from rich.console import Console
+
+    from dlm.doc.parser import parse_file
+    from dlm.metrics.queries import (
+        evals_for_run,
+        evals_to_dict,
+        preference_mining_for_run,
+        preference_mining_to_dict,
+        recent_runs,
+        runs_to_dict,
+        steps_for_run,
+        steps_to_dict,
+    )
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    if json_out and csv_out:
+        console.print("[red]metrics:[/red] --json and --csv are mutually exclusive")
+        raise typer.Exit(code=2)
+
+    since_delta = _parse_since_arg(since, console) if since else None
+
+    parsed = parse_file(path)
+    store = for_dlm(parsed.frontmatter.dlm_id)
+
+    runs = recent_runs(store.root, limit=limit, phase=phase, since=since_delta, run_id=run_id)
+
+    if run_id is not None:
+        # Drill-down: show this run's steps + evals.
+        if not runs:
+            console.print(f"[red]metrics:[/red] no run with run_id={run_id}")
+            raise typer.Exit(code=1)
+        run = runs[0]
+        steps = steps_for_run(store.root, run_id)
+        evals = evals_for_run(store.root, run_id)
+        preference_rows = preference_mining_for_run(store.root, run_id)
+
+        if json_out:
+            payload = {
+                "run": runs_to_dict([run])[0],
+                "steps": steps_to_dict(steps),
+                "evals": evals_to_dict(evals),
+                "preference_mining": preference_mining_to_dict(preference_rows),
+            }
+            sys.stdout.write(json.dumps(payload, indent=2) + "\n")
+            return
+        if csv_out:
+            writer = csv.writer(sys.stdout)
+            writer.writerow(["step", "loss", "lr", "grad_norm", "val_loss"])
+            eval_by_step = {e.step: e.val_loss for e in evals}
+            for s in steps:
+                writer.writerow([s.step, s.loss, s.lr, s.grad_norm, eval_by_step.get(s.step)])
+            return
+        console.print(
+            f"[green]run_id={run.run_id}[/green]  phase={run.phase}  "
+            f"seed={run.seed}  status={run.status}  steps={len(steps)}  "
+            f"evals={len(evals)}"
+        )
+        if evals:
+            last = evals[-1]
+            console.print(
+                f"  last eval: step={last.step}  val_loss={last.val_loss}  "
+                f"perplexity={last.perplexity}"
+            )
+        if preference_rows:
+            last_pref = preference_rows[-1]
+            console.print(
+                "  preference mining: "
+                f"events={len(preference_rows)}  "
+                f"mined_pairs={sum(row.mined_pairs for row in preference_rows)}  "
+                f"skipped_prompts={sum(row.skipped_prompts for row in preference_rows)}  "
+                f"last_mode={last_pref.write_mode}  "
+                f"judge={last_pref.judge_name}"
+            )
+        return
+
+    # Top-level: list runs.
+    if json_out:
+        sys.stdout.write(json.dumps({"runs": runs_to_dict(runs)}, indent=2) + "\n")
+        return
+    if csv_out:
+        writer = csv.writer(sys.stdout)
+        writer.writerow(["run_id", "phase", "seed", "status", "started_at", "ended_at"])
+        for r in runs:
+            writer.writerow([r.run_id, r.phase, r.seed, r.status, r.started_at, r.ended_at])
+        return
+
+    if not runs:
+        console.print("[dim]metrics:[/dim] no runs found (hint: train first, or adjust filters)")
+        return
+    console.print(f"[bold]Runs: {len(runs)}[/bold]")
+    for r in runs:
+        console.print(
+            f"  run_id={r.run_id}  phase={r.phase}  seed={r.seed}  "
+            f"status={r.status}  started={r.started_at}"
+        )
+
+
+def metrics_watch_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file whose store we tail.")],
+    poll_seconds: Annotated[
+        float,
+        typer.Option("--poll-seconds", help="How often to re-read the metrics DB."),
+    ] = 1.0,
+) -> None:
+    """Tail the metrics DB: print new steps/evals as they land."""
+    import time
+
+    from rich.console import Console
+
+    from dlm.doc.parser import parse_file
+    from dlm.metrics.queries import evals_for_run, latest_run_id, steps_for_run
+    from dlm.store.paths import for_dlm
+
+    console = Console()
+
+    parsed = parse_file(path)
+    store = for_dlm(parsed.frontmatter.dlm_id)
+
+    console.print(
+        f"[dim]metrics watch:[/dim] polling {store.root} every {poll_seconds}s (Ctrl-C to exit)"
+    )
+
+    current_run: int | None = None
+    last_step_seen = 0
+    last_eval_step_seen = 0
+    try:
+        while True:
+            run_id = latest_run_id(store.root)
+            if run_id is None:
+                time.sleep(poll_seconds)
+                continue
+            if run_id != current_run:
+                current_run = run_id
+                last_step_seen = 0
+                last_eval_step_seen = 0
+                console.print(f"[green]→ following run_id={run_id}[/green]")
+
+            new_steps = steps_for_run(store.root, run_id, since_step=last_step_seen)
+            for s in new_steps:
+                console.print(
+                    f"  step {s.step:>5}  loss={s.loss}  lr={s.lr}  grad_norm={s.grad_norm}"
+                )
+                last_step_seen = s.step
+
+            new_evals = evals_for_run(store.root, run_id, since_step=last_eval_step_seen)
+            for e in new_evals:
+                console.print(
+                    f"  [yellow]eval @ step {e.step}[/yellow]  "
+                    f"val_loss={e.val_loss}  perplexity={e.perplexity}"
+                )
+                last_eval_step_seen = e.step
+
+            time.sleep(poll_seconds)
+    except KeyboardInterrupt:
+        console.print("[dim]metrics watch:[/dim] bye")
+
+
+def _parse_since_arg(since: str, console: object) -> timedelta:
+    """Parse `24h` / `7d` / `30m` / `10s` into a timedelta."""
+    from rich.console import Console
+
+    assert isinstance(console, Console)
+
+    if not since:
+        raise typer.Exit(code=2)
+    unit = since[-1].lower()
+    try:
+        value = int(since[:-1])
+    except ValueError:
+        console.print(f"[red]metrics:[/red] --since {since!r} not an integer+unit")
+        raise typer.Exit(code=2) from None
+    if unit == "s":
+        return timedelta(seconds=value)
+    if unit == "m":
+        return timedelta(minutes=value)
+    if unit == "h":
+        return timedelta(hours=value)
+    if unit == "d":
+        return timedelta(days=value)
+    console.print(f"[red]metrics:[/red] --since {since!r} unit must be s/m/h/d")
+    raise typer.Exit(code=2)

From a84adeae392130dcd545042541847a956af84852 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:11:16 -0400
Subject: [PATCH 20/46] Extract show_cmd + 9 show helpers into
 cli/commands/show.py

---
 src/dlm/cli/commands/__init__.py | 480 +-----------------------------
 src/dlm/cli/commands/show.py     | 491 +++++++++++++++++++++++++++++++
 2 files changed, 492 insertions(+), 479 deletions(-)
 create mode 100644 src/dlm/cli/commands/show.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 1c1af6b..8348f5e 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -37,6 +37,7 @@
 from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
 from dlm.cli.commands.serve import serve_cmd as serve_cmd
+from dlm.cli.commands.show import show_cmd as show_cmd
 from dlm.cli.commands.synth import synth_instructions_cmd as synth_instructions_cmd
 from dlm.cli.commands.synth import synth_list_cmd as synth_list_cmd
 from dlm.cli.commands.synth import synth_revert_cmd as synth_revert_cmd
@@ -2189,482 +2190,3 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         console.print(f"smoke:   {result.smoke_output_first_line}")
 
 
-def show_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to inspect.")],
-    json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
-) -> None:
-    """Show training history, exports, and adapter state."""
-    import json as _json
-    import sys
-
-    from rich.console import Console
-
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.store.errors import ManifestCorruptError
-    from dlm.store.inspect import inspect_store
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-    out_console = Console()
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]show:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    training_sources, discovered_configs = _summarize_training_sources_and_discovered(
-        parsed, path.resolve().parent
-    )
-    # The per-document cache config comes from frontmatter, not on-disk
-    # state — report it on both the pre-train and initialized-store paths
-    # so authors can sanity-check the knobs before `dlm train` runs.
-    cache_cfg = parsed.frontmatter.training.cache
-    training_cache_config: dict[str, object] = {
-        "enabled": cache_cfg.enabled,
-        "max_bytes": cache_cfg.max_bytes,
-        "prune_older_than_days": cache_cfg.prune_older_than_days,
-    }
-
-    # Store may not exist yet (no `dlm train` run). Treat that as an
-    # informational state rather than an error — useful after `dlm init`.
-    if not store.manifest.exists():
-        if json_out:
-            payload: dict[str, object] = {
-                "dlm_id": parsed.frontmatter.dlm_id,
-                "base_model": parsed.frontmatter.base_model,
-                "store_initialized": False,
-                "source_path": str(path.resolve()),
-                "training_cache_config": training_cache_config,
-            }
-            if training_sources is not None:
-                payload["training_sources"] = training_sources
-            if discovered_configs:
-                payload["discovered_training_configs"] = discovered_configs
-            sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
-        else:
-            out_console.print(f"[bold]{path}[/bold]")
-            out_console.print(f"  dlm_id:       {parsed.frontmatter.dlm_id}")
-            out_console.print(f"  base_model:   {parsed.frontmatter.base_model}")
-            out_console.print("  store:        [dim]not yet initialized (run `dlm train`)[/dim]")
-            if training_sources:
-                _render_training_sources_text(out_console, training_sources)
-        return
-
-    try:
-        inspection = inspect_store(store, source_path=path.resolve())
-    except ManifestCorruptError as exc:
-        console.print(f"[red]show:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    training_cache = _summarize_training_cache(store.tokenized_cache_dir, store.root)
-    gate = _summarize_gate(store)
-    preference_mining = _summarize_preference_mining(store.root)
-    base_security = _summarize_base_security(parsed.frontmatter.base_model)
-
-    if json_out:
-        payload_full = _inspection_to_dict(inspection)
-        if training_sources is not None:
-            payload_full["training_sources"] = training_sources
-        if discovered_configs:
-            payload_full["discovered_training_configs"] = discovered_configs
-        if training_cache is not None:
-            payload_full["training_cache"] = training_cache
-        payload_full["training_cache_config"] = training_cache_config
-        if gate is not None:
-            payload_full["gate"] = gate
-        if preference_mining is not None:
-            payload_full["preference_mining"] = preference_mining
-            payload_full["preference_mining_runs"] = preference_mining["run_count"]
-            payload_full["total_auto_mined_pairs"] = preference_mining["total_mined_pairs"]
-        if base_security is not None:
-            payload_full["base_security"] = base_security
-        # Write JSON to raw stdout — Rich's Console wraps lines at the
-        # terminal width and would corrupt the JSON.
-        sys.stdout.write(_json.dumps(payload_full, indent=2, default=str) + "\n")
-        return
-
-    _render_inspection_text(out_console, path, inspection)
-    if training_sources:
-        _render_training_sources_text(out_console, training_sources)
-    if training_cache is not None and training_cache.get("entry_count", 0):
-        _render_training_cache_text(out_console, training_cache)
-    if gate is not None:
-        _render_gate_text(out_console, gate)
-    if base_security is not None and base_security.get("trust_remote_code"):
-        _render_base_security_text(out_console, base_security)
-
-
-def _inspection_to_dict(inspection: object) -> dict[str, object]:
-    """Flatten a StoreInspection into a JSON-safe dict.
-
-    Schema is the v1 contract for `dlm show --json`; any reshape is a
-    version bump (recorded in tests/golden/cli-json/).
-    """
-    from dlm.store.inspect import StoreInspection
-
-    assert isinstance(inspection, StoreInspection)
-    return {
-        "dlm_id": inspection.dlm_id,
-        "path": str(inspection.path),
-        "base_model": inspection.base_model,
-        "base_model_revision": inspection.base_model_revision,
-        "adapter_version": inspection.adapter_version,
-        "training_runs": inspection.training_runs,
-        "last_trained_at": inspection.last_trained_at,
-        "has_adapter_current": inspection.has_adapter_current,
-        "replay_size_bytes": inspection.replay_size_bytes,
-        "total_size_bytes": inspection.total_size_bytes,
-        "source_path": str(inspection.source_path) if inspection.source_path else None,
-        "orphaned": inspection.orphaned,
-        "exports": [e.model_dump(mode="json") for e in inspection.exports],
-        "content_hashes": dict(inspection.content_hashes),
-        "pinned_versions": dict(inspection.pinned_versions),
-        "named_adapters": [
-            {
-                "name": a.name,
-                "has_current": a.has_current,
-                "latest_version": a.latest_version,
-            }
-            for a in inspection.named_adapters
-        ],
-    }
-
-
-def _render_inspection_text(console: object, path: Path, inspection: object) -> None:
-    """Human-readable `dlm show` output."""
-    from rich.console import Console
-
-    from dlm.store.inspect import StoreInspection
-
-    assert isinstance(console, Console)
-    assert isinstance(inspection, StoreInspection)
-
-    console.print(f"[bold]{path}[/bold]")
-    console.print(f"  dlm_id:         {inspection.dlm_id}")
-    rev = inspection.base_model_revision
-    rev_str = f" (revision {rev[:7]})" if rev else ""
-    console.print(f"  base_model:     {inspection.base_model}{rev_str}")
-    console.print(
-        f"  store:          {inspection.path}  ({_human_size(inspection.total_size_bytes)})"
-    )
-    if inspection.named_adapters:
-        # Multi-adapter store: render the per-adapter pointers rather
-        # than the flat field (which stays 0 on multi-adapter docs).
-        console.print("  adapters:")
-        for adapter in inspection.named_adapters:
-            if adapter.has_current:
-                console.print(f"    {adapter.name:16}v{adapter.latest_version:04d}")
-            else:
-                console.print(f"    {adapter.name:16}[dim]no current pointer[/dim]")
-    elif inspection.has_adapter_current:
-        console.print(f"  adapter:        v{inspection.adapter_version:04d}")
-    else:
-        console.print("  adapter:        [dim]none (no `dlm train` yet)[/dim]")
-    last = inspection.last_trained_at
-    last_str = f" — last {last.isoformat(timespec='seconds')}" if last else ""
-    console.print(f"  training runs:  {inspection.training_runs}{last_str}")
-    console.print(f"  exports:        {len(inspection.exports)}")
-    for exp in inspection.exports:
-        tag = f" — {exp.ollama_name}" if exp.ollama_name else ""
-        console.print(f"                  {exp.quant}{tag}")
-    if inspection.orphaned:
-        console.print("  [yellow]orphaned:[/yellow]     source .dlm is missing or mismatched")
-
-
-def _summarize_training_sources(parsed: object, base_path: Path) -> list[dict[str, object]] | None:
-    """Best-effort resolution of `training.sources` for `dlm show`.
-
-    Returns None when the frontmatter declares no directives; returns
-    a list of per-source dicts otherwise. Failures to expand (missing
-    paths, policy escapes) fall back to declared-only records so the
-    show output stays useful for debugging a misconfigured directive.
-    """
-    records, _ = _summarize_training_sources_and_discovered(parsed, base_path)
-    return records
-
-
-def _summarize_training_sources_and_discovered(
-    parsed: object, base_path: Path
-) -> tuple[list[dict[str, object]] | None, list[dict[str, object]]]:
-    """Like `_summarize_training_sources` but also returns the per-anchor
-    `.dlm/training.yaml` + `.dlm/ignore` discovery records.
-
-    Returns `(training_sources, discovered_configs)`. `discovered_configs`
-    is always a list (empty when nothing was found or the expansion
-    failed); `training_sources` matches the single-value helper's
-    contract.
-    """
-    from dlm.directives import DirectiveError, expand_sources
-    from dlm.doc.parser import ParsedDlm
-
-    assert isinstance(parsed, ParsedDlm)
-    directives = parsed.frontmatter.training.sources
-    if not directives:
-        return None, []
-
-    declared: list[dict[str, object]] = [
-        {
-            "path": d.path,
-            "include": list(d.include),
-            "exclude": list(d.exclude),
-            "max_files": d.max_files,
-            "max_bytes_per_file": d.max_bytes_per_file,
-        }
-        for d in directives
-    ]
-
-    try:
-        result = expand_sources(parsed, base_path=base_path)
-    except (DirectiveError, OSError):
-        return declared, []
-
-    records: list[dict[str, object]] = []
-    for decl, prov in zip(declared, result.provenance, strict=False):
-        records.append(
-            {
-                **decl,
-                "file_count": prov.file_count,
-                "total_bytes": prov.total_bytes,
-                "skipped_binary": prov.skipped_binary,
-                "skipped_encoding": prov.skipped_encoding,
-                "skipped_over_size": prov.skipped_over_size,
-            }
-        )
-    # If the expander returned fewer entries than declared (shouldn't
-    # happen on success but defensive), pad with declared-only.
-    if len(records) < len(declared):
-        records.extend(declared[len(records) :])
-
-    discovered_records: list[dict[str, object]] = []
-    for dc in result.discovered:
-        discovered_records.append(
-            {
-                "anchor": str(dc.anchor),
-                "has_training_yaml": dc.config is not None,
-                "has_ignore": bool(dc.ignore_rules),
-                "include": list(dc.config.include) if dc.config else [],
-                "exclude": list(dc.config.exclude) if dc.config else [],
-                "exclude_defaults": (dc.config.exclude_defaults if dc.config else True),
-                "metadata": dict(dc.config.metadata) if dc.config else {},
-                "ignore_rules": len(dc.ignore_rules),
-            }
-        )
-    return records, discovered_records
-
-
-def _summarize_training_cache(cache_dir: Path, store_root: Path) -> dict[str, object] | None:
-    """Return a JSON-friendly snapshot of the tokenized-section cache.
-
-    None when the cache dir doesn't exist (store never trained with
-    the cache, or pre-Sprint-31 layout). Cheap — reads the manifest
-    only, not the entry files.
-    """
-    if not cache_dir.is_dir():
-        return None
-    from dlm.directives.cache import TokenizedCache
-    from dlm.metrics import queries as _queries
-
-    cache = TokenizedCache.open(cache_dir)
-    last = _queries.latest_tokenization(store_root)
-    return {
-        "path": str(cache_dir),
-        "entry_count": cache.entry_count,
-        "bytes": cache.total_bytes,
-        "last_run_hit_rate": last.hit_rate if last else None,
-        "last_run_id": last.run_id if last else None,
-    }
-
-
-def _summarize_gate(store: object) -> dict[str, object] | None:
-    """Return a JSON-friendly snapshot of the learned adapter gate.
-
-    None when the store has no gate config (pre-Sprint-34 runs, or
-    `training.gate.enabled` was false). Reads two sources: the
-    on-disk `gate_config.json` for mode + adapter order, and the
-    metrics `gate_events` table for per-adapter mean weight from the
-    most recent run that recorded a gate.
-    """
-    import json as _json
-
-    from dlm.store.paths import StorePath
-    from dlm.train.gate.paths import gate_config_path
-
-    assert isinstance(store, StorePath)
-    cfg_path = gate_config_path(store)
-
-    from dlm.metrics import queries as _queries
-    from dlm.train.gate.module import GateMetadata
-
-    events = _queries.latest_gate_events(store.root)
-    # Divergence path: training raised before writing a config, but we
-    # still emit one GateEvent per adapter with mode="diverged" so
-    # operators can see the failure. Surface it even when the config
-    # file is absent.
-    if not cfg_path.exists():
-        if events and events[0].mode == "diverged":
-            return {
-                "mode": "diverged",
-                "adapter_names": [e.adapter_name for e in events],
-                "input_dim": None,
-                "hidden_proj_dim": None,
-                "last_run_id": events[0].run_id,
-                "per_adapter": [
-                    {
-                        "adapter_name": e.adapter_name,
-                        "mean_weight": e.mean_weight,
-                        "sample_count": e.sample_count,
-                        "mode": e.mode,
-                    }
-                    for e in events
-                ],
-            }
-        return None
-
-    raw = _json.loads(cfg_path.read_text(encoding="utf-8"))
-    meta = GateMetadata.from_json(raw)
-    per_adapter: list[dict[str, object]] = []
-    run_id: int | None = None
-    if events:
-        run_id = events[0].run_id
-        per_adapter = [
-            {
-                "adapter_name": e.adapter_name,
-                "mean_weight": e.mean_weight,
-                "sample_count": e.sample_count,
-                "mode": e.mode,
-            }
-            for e in events
-        ]
-    else:
-        # No recorded events yet; fall back to the config so `dlm show`
-        # still reports that a gate exists and in which mode.
-        per_adapter = [{"adapter_name": name} for name in meta.adapter_names]
-    return {
-        "mode": meta.mode,
-        "adapter_names": list(meta.adapter_names),
-        "input_dim": meta.input_dim,
-        "hidden_proj_dim": meta.hidden_proj_dim,
-        "last_run_id": run_id,
-        "per_adapter": per_adapter,
-    }
-
-
-def _summarize_preference_mining(store_root: Path) -> dict[str, object] | None:
-    """Return the latest preference-mine summary for `dlm show --json`."""
-    from dlm.metrics import queries as _queries
-
-    totals = _queries.preference_mining_totals(store_root)
-    if totals is None:
-        return None
-    last = _queries.latest_preference_mining(store_root)
-    assert last is not None
-    rows = _queries.preference_mining_for_run(store_root, last.run_id)
-    return {
-        "run_count": totals.run_count,
-        "event_count": totals.event_count,
-        "total_mined_pairs": totals.total_mined_pairs,
-        "total_skipped_prompts": totals.total_skipped_prompts,
-        "last_run_id": last.run_id,
-        "last_run_event_count": len(rows),
-        "last_event": _queries.preference_mining_to_dict([last])[0],
-    }
-
-
-def _summarize_base_security(base_model_key: str) -> dict[str, object] | None:
-    """Surface security-sensitive base-model flags for `dlm show`.
-
-    Today that's just `trust_remote_code` — a flag that causes the HF
-    loader to execute Python from the model repo. We resolve the spec
-    out of the in-process registry (no network: the resolver reads a
-    frozen Python dict) so users can see which bases opt in without
-    grepping source. Returns None when the key doesn't resolve (an
-    `hf:...` escape hatch that isn't in the registry); the caller
-    silently skips in that case.
-    """
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.base_models.errors import BaseModelError
-
-    try:
-        spec = resolve_base_model(base_model_key, accept_license=True)
-    except BaseModelError:
-        return None
-    return {
-        "base_model": spec.key,
-        "architecture": spec.architecture,
-        "trust_remote_code": bool(spec.trust_remote_code),
-    }
-
-
-def _render_base_security_text(console: object, snap: dict[str, object]) -> None:
-    from rich.console import Console
-
-    assert isinstance(console, Console)
-    arch = snap.get("architecture", "?")
-    console.print(
-        f"  [yellow]security:[/yellow] base uses [red]trust_remote_code=True[/red] "
-        f"(arch={arch}) — HF loader will execute Python from the model repo"
-    )
-
-
-def _render_gate_text(console: object, snap: dict[str, object]) -> None:
-    from rich.console import Console
-
-    assert isinstance(console, Console)
-    mode = snap.get("mode", "?")
-    if mode == "diverged":
-        console.print("  adapter gate ([red]diverged[/red]):")
-        console.print(
-            "    [yellow]gate training produced a non-finite loss; "
-            "store fell back to gate-less routing[/yellow]"
-        )
-    else:
-        console.print(f"  adapter gate ({mode}):")
-    per_adapter = snap.get("per_adapter", [])
-    if isinstance(per_adapter, list):
-        for entry in per_adapter:
-            if not isinstance(entry, dict):
-                continue
-            name = entry.get("adapter_name", "?")
-            weight = entry.get("mean_weight")
-            count = entry.get("sample_count")
-            if weight is None:
-                console.print(f"    {name}  [dim](no recorded events)[/dim]")
-            else:
-                w = float(weight) if isinstance(weight, (int, float)) else 0.0
-                c = count if isinstance(count, int) else 0
-                console.print(f"    {name:<16}  weight={w:.3f}  samples={c}")
-
-
-def _render_training_cache_text(console: object, snap: dict[str, object]) -> None:
-    from rich.console import Console
-
-    assert isinstance(console, Console)
-    ec_raw = snap.get("entry_count", 0)
-    by_raw = snap.get("bytes", 0)
-    entry_count = ec_raw if isinstance(ec_raw, int) else 0
-    byte_count = by_raw if isinstance(by_raw, int) else 0
-    console.print("  tokenized cache:")
-    console.print(f"    entries:        {entry_count}")
-    console.print(f"    size:           {_human_size(byte_count)}")
-    rate = snap.get("last_run_hit_rate")
-    if isinstance(rate, (int, float)):
-        console.print(f"    last hit rate:  {float(rate):.1%}")
-
-
-def _render_training_sources_text(console: object, records: list[dict[str, object]]) -> None:
-    from rich.console import Console
-
-    assert isinstance(console, Console)
-    console.print("  training sources:")
-    for rec in records:
-        path = rec["path"]
-        fc = rec.get("file_count")
-        tb = rec.get("total_bytes")
-        if fc is None:
-            console.print(f"    {path}  [dim](not expanded)[/dim]")
-        else:
-            size = int(tb) if isinstance(tb, int) else 0
-            console.print(f"    {path}  {fc} file(s), {_human_size(size)}")
diff --git a/src/dlm/cli/commands/show.py b/src/dlm/cli/commands/show.py
new file mode 100644
index 0000000..639c9ce
--- /dev/null
+++ b/src/dlm/cli/commands/show.py
@@ -0,0 +1,491 @@
+"""`dlm show` — show training history, exports, and adapter state."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+from dlm.cli.commands._shared import _human_size
+
+
+def show_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to inspect.")],
+    json_out: Annotated[bool, typer.Option("--json", help="Emit machine-readable JSON.")] = False,
+) -> None:
+    """Show training history, exports, and adapter state."""
+    import json as _json
+    import sys
+
+    from rich.console import Console
+
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.store.errors import ManifestCorruptError
+    from dlm.store.inspect import inspect_store
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+    out_console = Console()
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]show:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    training_sources, discovered_configs = _summarize_training_sources_and_discovered(
+        parsed, path.resolve().parent
+    )
+    # The per-document cache config comes from frontmatter, not on-disk
+    # state — report it on both the pre-train and initialized-store paths
+    # so authors can sanity-check the knobs before `dlm train` runs.
+    cache_cfg = parsed.frontmatter.training.cache
+    training_cache_config: dict[str, object] = {
+        "enabled": cache_cfg.enabled,
+        "max_bytes": cache_cfg.max_bytes,
+        "prune_older_than_days": cache_cfg.prune_older_than_days,
+    }
+
+    # Store may not exist yet (no `dlm train` run). Treat that as an
+    # informational state rather than an error — useful after `dlm init`.
+    if not store.manifest.exists():
+        if json_out:
+            payload: dict[str, object] = {
+                "dlm_id": parsed.frontmatter.dlm_id,
+                "base_model": parsed.frontmatter.base_model,
+                "store_initialized": False,
+                "source_path": str(path.resolve()),
+                "training_cache_config": training_cache_config,
+            }
+            if training_sources is not None:
+                payload["training_sources"] = training_sources
+            if discovered_configs:
+                payload["discovered_training_configs"] = discovered_configs
+            sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
+        else:
+            out_console.print(f"[bold]{path}[/bold]")
+            out_console.print(f"  dlm_id:       {parsed.frontmatter.dlm_id}")
+            out_console.print(f"  base_model:   {parsed.frontmatter.base_model}")
+            out_console.print("  store:        [dim]not yet initialized (run `dlm train`)[/dim]")
+            if training_sources:
+                _render_training_sources_text(out_console, training_sources)
+        return
+
+    try:
+        inspection = inspect_store(store, source_path=path.resolve())
+    except ManifestCorruptError as exc:
+        console.print(f"[red]show:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    training_cache = _summarize_training_cache(store.tokenized_cache_dir, store.root)
+    gate = _summarize_gate(store)
+    preference_mining = _summarize_preference_mining(store.root)
+    base_security = _summarize_base_security(parsed.frontmatter.base_model)
+
+    if json_out:
+        payload_full = _inspection_to_dict(inspection)
+        if training_sources is not None:
+            payload_full["training_sources"] = training_sources
+        if discovered_configs:
+            payload_full["discovered_training_configs"] = discovered_configs
+        if training_cache is not None:
+            payload_full["training_cache"] = training_cache
+        payload_full["training_cache_config"] = training_cache_config
+        if gate is not None:
+            payload_full["gate"] = gate
+        if preference_mining is not None:
+            payload_full["preference_mining"] = preference_mining
+            payload_full["preference_mining_runs"] = preference_mining["run_count"]
+            payload_full["total_auto_mined_pairs"] = preference_mining["total_mined_pairs"]
+        if base_security is not None:
+            payload_full["base_security"] = base_security
+        # Write JSON to raw stdout — Rich's Console wraps lines at the
+        # terminal width and would corrupt the JSON.
+        sys.stdout.write(_json.dumps(payload_full, indent=2, default=str) + "\n")
+        return
+
+    _render_inspection_text(out_console, path, inspection)
+    if training_sources:
+        _render_training_sources_text(out_console, training_sources)
+    if training_cache is not None and training_cache.get("entry_count", 0):
+        _render_training_cache_text(out_console, training_cache)
+    if gate is not None:
+        _render_gate_text(out_console, gate)
+    if base_security is not None and base_security.get("trust_remote_code"):
+        _render_base_security_text(out_console, base_security)
+
+
+def _inspection_to_dict(inspection: object) -> dict[str, object]:
+    """Flatten a StoreInspection into a JSON-safe dict.
+
+    Schema is the v1 contract for `dlm show --json`; any reshape is a
+    version bump (recorded in tests/golden/cli-json/).
+    """
+    from dlm.store.inspect import StoreInspection
+
+    assert isinstance(inspection, StoreInspection)
+    return {
+        "dlm_id": inspection.dlm_id,
+        "path": str(inspection.path),
+        "base_model": inspection.base_model,
+        "base_model_revision": inspection.base_model_revision,
+        "adapter_version": inspection.adapter_version,
+        "training_runs": inspection.training_runs,
+        "last_trained_at": inspection.last_trained_at,
+        "has_adapter_current": inspection.has_adapter_current,
+        "replay_size_bytes": inspection.replay_size_bytes,
+        "total_size_bytes": inspection.total_size_bytes,
+        "source_path": str(inspection.source_path) if inspection.source_path else None,
+        "orphaned": inspection.orphaned,
+        "exports": [e.model_dump(mode="json") for e in inspection.exports],
+        "content_hashes": dict(inspection.content_hashes),
+        "pinned_versions": dict(inspection.pinned_versions),
+        "named_adapters": [
+            {
+                "name": a.name,
+                "has_current": a.has_current,
+                "latest_version": a.latest_version,
+            }
+            for a in inspection.named_adapters
+        ],
+    }
+
+
+def _render_inspection_text(console: object, path: Path, inspection: object) -> None:
+    """Human-readable `dlm show` output."""
+    from rich.console import Console
+
+    from dlm.store.inspect import StoreInspection
+
+    assert isinstance(console, Console)
+    assert isinstance(inspection, StoreInspection)
+
+    console.print(f"[bold]{path}[/bold]")
+    console.print(f"  dlm_id:         {inspection.dlm_id}")
+    rev = inspection.base_model_revision
+    rev_str = f" (revision {rev[:7]})" if rev else ""
+    console.print(f"  base_model:     {inspection.base_model}{rev_str}")
+    console.print(
+        f"  store:          {inspection.path}  ({_human_size(inspection.total_size_bytes)})"
+    )
+    if inspection.named_adapters:
+        # Multi-adapter store: render the per-adapter pointers rather
+        # than the flat field (which stays 0 on multi-adapter docs).
+        console.print("  adapters:")
+        for adapter in inspection.named_adapters:
+            if adapter.has_current:
+                console.print(f"    {adapter.name:16}v{adapter.latest_version:04d}")
+            else:
+                console.print(f"    {adapter.name:16}[dim]no current pointer[/dim]")
+    elif inspection.has_adapter_current:
+        console.print(f"  adapter:        v{inspection.adapter_version:04d}")
+    else:
+        console.print("  adapter:        [dim]none (no `dlm train` yet)[/dim]")
+    last = inspection.last_trained_at
+    last_str = f" — last {last.isoformat(timespec='seconds')}" if last else ""
+    console.print(f"  training runs:  {inspection.training_runs}{last_str}")
+    console.print(f"  exports:        {len(inspection.exports)}")
+    for exp in inspection.exports:
+        tag = f" — {exp.ollama_name}" if exp.ollama_name else ""
+        console.print(f"                  {exp.quant}{tag}")
+    if inspection.orphaned:
+        console.print("  [yellow]orphaned:[/yellow]     source .dlm is missing or mismatched")
+
+
+def _summarize_training_sources(parsed: object, base_path: Path) -> list[dict[str, object]] | None:
+    """Best-effort resolution of `training.sources` for `dlm show`.
+
+    Returns None when the frontmatter declares no directives; returns
+    a list of per-source dicts otherwise. Failures to expand (missing
+    paths, policy escapes) fall back to declared-only records so the
+    show output stays useful for debugging a misconfigured directive.
+    """
+    records, _ = _summarize_training_sources_and_discovered(parsed, base_path)
+    return records
+
+
+def _summarize_training_sources_and_discovered(
+    parsed: object, base_path: Path
+) -> tuple[list[dict[str, object]] | None, list[dict[str, object]]]:
+    """Like `_summarize_training_sources` but also returns the per-anchor
+    `.dlm/training.yaml` + `.dlm/ignore` discovery records.
+
+    Returns `(training_sources, discovered_configs)`. `discovered_configs`
+    is always a list (empty when nothing was found or the expansion
+    failed); `training_sources` matches the single-value helper's
+    contract.
+    """
+    from dlm.directives import DirectiveError, expand_sources
+    from dlm.doc.parser import ParsedDlm
+
+    assert isinstance(parsed, ParsedDlm)
+    directives = parsed.frontmatter.training.sources
+    if not directives:
+        return None, []
+
+    declared: list[dict[str, object]] = [
+        {
+            "path": d.path,
+            "include": list(d.include),
+            "exclude": list(d.exclude),
+            "max_files": d.max_files,
+            "max_bytes_per_file": d.max_bytes_per_file,
+        }
+        for d in directives
+    ]
+
+    try:
+        result = expand_sources(parsed, base_path=base_path)
+    except (DirectiveError, OSError):
+        return declared, []
+
+    records: list[dict[str, object]] = []
+    for decl, prov in zip(declared, result.provenance, strict=False):
+        records.append(
+            {
+                **decl,
+                "file_count": prov.file_count,
+                "total_bytes": prov.total_bytes,
+                "skipped_binary": prov.skipped_binary,
+                "skipped_encoding": prov.skipped_encoding,
+                "skipped_over_size": prov.skipped_over_size,
+            }
+        )
+    # If the expander returned fewer entries than declared (shouldn't
+    # happen on success but defensive), pad with declared-only.
+    if len(records) < len(declared):
+        records.extend(declared[len(records) :])
+
+    discovered_records: list[dict[str, object]] = []
+    for dc in result.discovered:
+        discovered_records.append(
+            {
+                "anchor": str(dc.anchor),
+                "has_training_yaml": dc.config is not None,
+                "has_ignore": bool(dc.ignore_rules),
+                "include": list(dc.config.include) if dc.config else [],
+                "exclude": list(dc.config.exclude) if dc.config else [],
+                "exclude_defaults": (dc.config.exclude_defaults if dc.config else True),
+                "metadata": dict(dc.config.metadata) if dc.config else {},
+                "ignore_rules": len(dc.ignore_rules),
+            }
+        )
+    return records, discovered_records
+
+
+def _summarize_training_cache(cache_dir: Path, store_root: Path) -> dict[str, object] | None:
+    """Return a JSON-friendly snapshot of the tokenized-section cache.
+
+    None when the cache dir doesn't exist (store never trained with
+    the cache, or pre-Sprint-31 layout). Cheap — reads the manifest
+    only, not the entry files.
+    """
+    if not cache_dir.is_dir():
+        return None
+    from dlm.directives.cache import TokenizedCache
+    from dlm.metrics import queries as _queries
+
+    cache = TokenizedCache.open(cache_dir)
+    last = _queries.latest_tokenization(store_root)
+    return {
+        "path": str(cache_dir),
+        "entry_count": cache.entry_count,
+        "bytes": cache.total_bytes,
+        "last_run_hit_rate": last.hit_rate if last else None,
+        "last_run_id": last.run_id if last else None,
+    }
+
+
+def _summarize_gate(store: object) -> dict[str, object] | None:
+    """Return a JSON-friendly snapshot of the learned adapter gate.
+
+    None when the store has no gate config (pre-Sprint-34 runs, or
+    `training.gate.enabled` was false). Reads two sources: the
+    on-disk `gate_config.json` for mode + adapter order, and the
+    metrics `gate_events` table for per-adapter mean weight from the
+    most recent run that recorded a gate.
+    """
+    import json as _json
+
+    from dlm.store.paths import StorePath
+    from dlm.train.gate.paths import gate_config_path
+
+    assert isinstance(store, StorePath)
+    cfg_path = gate_config_path(store)
+
+    from dlm.metrics import queries as _queries
+    from dlm.train.gate.module import GateMetadata
+
+    events = _queries.latest_gate_events(store.root)
+    # Divergence path: training raised before writing a config, but we
+    # still emit one GateEvent per adapter with mode="diverged" so
+    # operators can see the failure. Surface it even when the config
+    # file is absent.
+    if not cfg_path.exists():
+        if events and events[0].mode == "diverged":
+            return {
+                "mode": "diverged",
+                "adapter_names": [e.adapter_name for e in events],
+                "input_dim": None,
+                "hidden_proj_dim": None,
+                "last_run_id": events[0].run_id,
+                "per_adapter": [
+                    {
+                        "adapter_name": e.adapter_name,
+                        "mean_weight": e.mean_weight,
+                        "sample_count": e.sample_count,
+                        "mode": e.mode,
+                    }
+                    for e in events
+                ],
+            }
+        return None
+
+    raw = _json.loads(cfg_path.read_text(encoding="utf-8"))
+    meta = GateMetadata.from_json(raw)
+    per_adapter: list[dict[str, object]] = []
+    run_id: int | None = None
+    if events:
+        run_id = events[0].run_id
+        per_adapter = [
+            {
+                "adapter_name": e.adapter_name,
+                "mean_weight": e.mean_weight,
+                "sample_count": e.sample_count,
+                "mode": e.mode,
+            }
+            for e in events
+        ]
+    else:
+        # No recorded events yet; fall back to the config so `dlm show`
+        # still reports that a gate exists and in which mode.
+        per_adapter = [{"adapter_name": name} for name in meta.adapter_names]
+    return {
+        "mode": meta.mode,
+        "adapter_names": list(meta.adapter_names),
+        "input_dim": meta.input_dim,
+        "hidden_proj_dim": meta.hidden_proj_dim,
+        "last_run_id": run_id,
+        "per_adapter": per_adapter,
+    }
+
+
+def _summarize_preference_mining(store_root: Path) -> dict[str, object] | None:
+    """Return the latest preference-mine summary for `dlm show --json`."""
+    from dlm.metrics import queries as _queries
+
+    totals = _queries.preference_mining_totals(store_root)
+    if totals is None:
+        return None
+    last = _queries.latest_preference_mining(store_root)
+    assert last is not None
+    rows = _queries.preference_mining_for_run(store_root, last.run_id)
+    return {
+        "run_count": totals.run_count,
+        "event_count": totals.event_count,
+        "total_mined_pairs": totals.total_mined_pairs,
+        "total_skipped_prompts": totals.total_skipped_prompts,
+        "last_run_id": last.run_id,
+        "last_run_event_count": len(rows),
+        "last_event": _queries.preference_mining_to_dict([last])[0],
+    }
+
+
+def _summarize_base_security(base_model_key: str) -> dict[str, object] | None:
+    """Surface security-sensitive base-model flags for `dlm show`.
+
+    Today that's just `trust_remote_code` — a flag that causes the HF
+    loader to execute Python from the model repo. We resolve the spec
+    out of the in-process registry (no network: the resolver reads a
+    frozen Python dict) so users can see which bases opt in without
+    grepping source. Returns None when the key doesn't resolve (an
+    `hf:...` escape hatch that isn't in the registry); the caller
+    silently skips in that case.
+    """
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.base_models.errors import BaseModelError
+
+    try:
+        spec = resolve_base_model(base_model_key, accept_license=True)
+    except BaseModelError:
+        return None
+    return {
+        "base_model": spec.key,
+        "architecture": spec.architecture,
+        "trust_remote_code": bool(spec.trust_remote_code),
+    }
+
+
+def _render_base_security_text(console: object, snap: dict[str, object]) -> None:
+    from rich.console import Console
+
+    assert isinstance(console, Console)
+    arch = snap.get("architecture", "?")
+    console.print(
+        f"  [yellow]security:[/yellow] base uses [red]trust_remote_code=True[/red] "
+        f"(arch={arch}) — HF loader will execute Python from the model repo"
+    )
+
+
+def _render_gate_text(console: object, snap: dict[str, object]) -> None:
+    from rich.console import Console
+
+    assert isinstance(console, Console)
+    mode = snap.get("mode", "?")
+    if mode == "diverged":
+        console.print("  adapter gate ([red]diverged[/red]):")
+        console.print(
+            "    [yellow]gate training produced a non-finite loss; "
+            "store fell back to gate-less routing[/yellow]"
+        )
+    else:
+        console.print(f"  adapter gate ({mode}):")
+    per_adapter = snap.get("per_adapter", [])
+    if isinstance(per_adapter, list):
+        for entry in per_adapter:
+            if not isinstance(entry, dict):
+                continue
+            name = entry.get("adapter_name", "?")
+            weight = entry.get("mean_weight")
+            count = entry.get("sample_count")
+            if weight is None:
+                console.print(f"    {name}  [dim](no recorded events)[/dim]")
+            else:
+                w = float(weight) if isinstance(weight, (int, float)) else 0.0
+                c = count if isinstance(count, int) else 0
+                console.print(f"    {name:<16}  weight={w:.3f}  samples={c}")
+
+
+def _render_training_cache_text(console: object, snap: dict[str, object]) -> None:
+    from rich.console import Console
+
+    assert isinstance(console, Console)
+    ec_raw = snap.get("entry_count", 0)
+    by_raw = snap.get("bytes", 0)
+    entry_count = ec_raw if isinstance(ec_raw, int) else 0
+    byte_count = by_raw if isinstance(by_raw, int) else 0
+    console.print("  tokenized cache:")
+    console.print(f"    entries:        {entry_count}")
+    console.print(f"    size:           {_human_size(byte_count)}")
+    rate = snap.get("last_run_hit_rate")
+    if isinstance(rate, (int, float)):
+        console.print(f"    last hit rate:  {float(rate):.1%}")
+
+
+def _render_training_sources_text(console: object, records: list[dict[str, object]]) -> None:
+    from rich.console import Console
+
+    assert isinstance(console, Console)
+    console.print("  training sources:")
+    for rec in records:
+        path = rec["path"]
+        fc = rec.get("file_count")
+        tb = rec.get("total_bytes")
+        if fc is None:
+            console.print(f"    {path}  [dim](not expanded)[/dim]")
+        else:
+            size = int(tb) if isinstance(tb, int) else 0
+            console.print(f"    {path}  {fc} file(s), {_human_size(size)}")

From dc211cfc644d52018f0ca2830bc7d96b8013a0a4 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:13:46 -0400
Subject: [PATCH 21/46] Extract init_cmd + 4 init helpers into
 cli/commands/init.py

---
 src/dlm/cli/commands/__init__.py  | 376 +----------------------------
 src/dlm/cli/commands/init.py      | 383 ++++++++++++++++++++++++++++++
 tests/unit/cli/test_init_edges.py |   5 +-
 3 files changed, 389 insertions(+), 375 deletions(-)
 create mode 100644 src/dlm/cli/commands/init.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 8348f5e..616786a 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -25,6 +25,8 @@
 from dlm.cli.commands.cache import cache_show_cmd as cache_show_cmd
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
 from dlm.cli.commands.harvest import harvest_cmd as harvest_cmd
+from dlm.cli.commands.init import _prompt_accept_license as _prompt_accept_license
+from dlm.cli.commands.init import init_cmd as init_cmd
 from dlm.cli.commands.metrics import metrics_cmd as metrics_cmd
 from dlm.cli.commands.metrics import metrics_watch_cmd as metrics_watch_cmd
 from dlm.cli.commands.migrate import migrate_cmd as migrate_cmd
@@ -53,380 +55,6 @@ def _stub(sprint: str, subject: str) -> None:
     )
 
 
-def init_cmd(
-    path: Annotated[Path, typer.Argument(help="Target .dlm path to create.")],
-    base: Annotated[
-        str, typer.Option("--base", help="Base model key or hf:org/name.")
-    ] = "qwen2.5-1.5b",
-    template: Annotated[
-        str | None,
-        typer.Option(
-            "--template",
-            help="Start from a named gallery template (see `dlm templates list`).",
-        ),
-    ] = None,
-    i_accept_license: Annotated[
-        bool,
-        typer.Option("--i-accept-license", help="Accept gated base-model license."),
-    ] = False,
-    force: Annotated[
-        bool,
-        typer.Option("--force", help="Overwrite an existing .dlm at path."),
-    ] = False,
-    skip_export_probes: Annotated[
-        bool,
-        typer.Option(
-            "--skip-export-probes",
-            help=(
-                "Skip the llama.cpp / GGUF-conversion probes so brand-new "
-                "architectures (not yet in our vendored llama.cpp) can still "
-                "be used for training + HF inference. Forfeits `dlm export` "
-                "to Ollama until the vendored copy catches up."
-            ),
-        ),
-    ] = False,
-    multimodal: Annotated[
-        bool,
-        typer.Option(
-            "--multimodal",
-            help=(
-                "Scaffold a vision-language .dlm with an `::image::` section. "
-                "Defaults --base to paligemma-3b-mix-224 and skips GGUF "
-                "export probes because current GGUF export does not "
-                "support vision-language bases."
-            ),
-        ),
-    ] = False,
-    audio: Annotated[
-        bool,
-        typer.Option(
-            "--audio",
-            help=(
-                "Scaffold an audio-language .dlm with an `::audio::` section. "
-                "Defaults --base to qwen2-audio-7b-instruct and skips GGUF "
-                "export probes (audio archs are not on llama.cpp's roadmap)."
-            ),
-        ),
-    ] = False,
-) -> None:
-    """Bootstrap a new .dlm file with sensible defaults."""
-
-    from rich.console import Console
-
-    from dlm.base_models import (
-        GatedModelError,
-        UnknownBaseModelError,
-        is_gated,
-        require_acceptance,
-    )
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.io.ulid import mint_ulid
-
-    console = Console(stderr=True)
-
-    if path.exists() and not force:
-        console.print(
-            f"[red]init:[/red] {path} already exists. "
-            "Re-run with [bold]--force[/bold] to overwrite."
-        )
-        raise typer.Exit(code=1)
-
-    # --multimodal / --audio are mutually exclusive with each other and
-    # with --template (templates pin their own base + body shape; v1
-    # doesn't ship media templates yet).
-    if multimodal and audio:
-        console.print(
-            "[red]init:[/red] --multimodal and --audio are mutually exclusive "
-            "(each targets a different modality)."
-        )
-        raise typer.Exit(code=2)
-    if multimodal and template is not None:
-        console.print(
-            "[red]init:[/red] --multimodal and --template are mutually exclusive; "
-            "v1 doesn't ship a VL template (see `dlm templates list`)."
-        )
-        raise typer.Exit(code=2)
-    if audio and template is not None:
-        console.print(
-            "[red]init:[/red] --audio and --template are mutually exclusive; "
-            "v1 doesn't ship an audio template (see `dlm templates list`)."
-        )
-        raise typer.Exit(code=2)
-
-    # --multimodal / --audio override the text-first --base default. A
-    # user who wants a different media base passes --base explicitly;
-    # we verify the pick is the right modality below.
-    if multimodal and base == "qwen2.5-1.5b":
-        base = "paligemma-3b-mix-224"
-    if audio and base == "qwen2.5-1.5b":
-        base = "qwen2-audio-7b-instruct"
-
-    # --template resolves the base from the template's meta.yaml; the
-    # --base default is kept for the no-template path only. Users who
-    # pass both a template and an explicit --base get a warning but the
-    # template still wins (the template body was authored against its
-    # recommended base).
-    if template is not None:
-        from dlm.templates import load_template
-
-        # Peek at the template's recommended base WITHOUT writing
-        # anything yet, so we can handle the license prompt against the
-        # right base (the template's, not `--base`) before committing.
-        try:
-            resolved_base = load_template(template).meta.recommended_base
-        except Exception as exc:
-            console.print(f"[red]init:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-        if base != "qwen2.5-1.5b" and base != resolved_base:
-            console.print(
-                f"[yellow]init:[/yellow] --base {base} ignored; template "
-                f"{template!r} uses {resolved_base}."
-            )
-    else:
-        resolved_base = base
-
-    # Media bases can't clear the GGUF-conversion probes. Force-skip
-    # them so the probe suite doesn't false-fail the init.
-    if multimodal or audio:
-        skip_export_probes = True
-
-    try:
-        spec = resolve_base_model(
-            resolved_base,
-            accept_license=i_accept_license,
-            skip_export_probes=skip_export_probes,
-        )
-    except UnknownBaseModelError as exc:
-        console.print(f"[red]init:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except GatedModelError as exc:
-        # Gated + user didn't pass --i-accept-license up-front. Prompt
-        # interactively if we have a TTY; otherwise refuse with the flag
-        # pointer (audit F22 non-interactive path).
-        if not _prompt_accept_license(console, resolved_base, exc.license_url):
-            console.print(
-                "[red]license:[/red] refused. Re-run with "
-                "[bold]--i-accept-license[/bold] to accept non-interactively."
-            )
-            raise typer.Exit(code=1) from exc
-        spec = resolve_base_model(
-            resolved_base,
-            accept_license=True,
-            skip_export_probes=skip_export_probes,
-        )
-
-    # NOW apply the template — license has already been accepted
-    # (either by --i-accept-license or interactive prompt), so pass
-    # the acceptance through. apply_template enforces the license
-    # contract at its boundary.
-    applied_result = None
-    if template is not None:
-        from dlm.templates import TemplateError, apply_template
-
-        try:
-            applied_result = apply_template(template, path, force=force, accept_license=True)
-        except TemplateError as exc:
-            console.print(f"[red]init:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-
-    # Record the license acceptance (or None for non-gated specs). We
-    # know `resolve_base_model` already validated the flag/prompt chain
-    # — `accept_license=True` means either the user passed the flag or
-    # answered the interactive prompt. Either path is a real
-    # acceptance; persist the record now so subsequent `dlm train` /
-    # `dlm export` don't re-prompt.
-    acceptance_via: Literal["cli_flag", "interactive"] = (
-        "cli_flag" if i_accept_license else "interactive"
-    )
-    acceptance = (
-        require_acceptance(spec, accept_license=True, via=acceptance_via)
-        if is_gated(spec)
-        else None
-    )
-
-    # Media flags require a matching-modality base. Check after resolve
-    # so users pointing at an unknown or wrong-modality hf:org/name get
-    # a clear explanation rather than a schema error deep in parse time.
-    if multimodal and spec.modality != "vision-language":
-        console.print(
-            f"[red]init:[/red] --multimodal requires a vision-language base; "
-            f"{spec.key!r} is modality='{spec.modality}'. "
-            "Pick --base paligemma-3b-mix-224 or drop --multimodal."
-        )
-        raise typer.Exit(code=2)
-    if audio and spec.modality != "audio-language":
-        console.print(
-            f"[red]init:[/red] --audio requires an audio-language base; "
-            f"{spec.key!r} is modality='{spec.modality}'. "
-            "Pick --base qwen2-audio-7b-instruct or drop --audio."
-        )
-        raise typer.Exit(code=2)
-
-    if applied_result is not None:
-        dlm_id = applied_result.dlm_id
-    else:
-        dlm_id = mint_ulid()
-        if multimodal:
-            _write_init_scaffold_multimodal(path, spec.key, dlm_id)
-        elif audio:
-            _write_init_scaffold_audio(path, spec.key, dlm_id)
-        else:
-            _write_init_scaffold(path, spec.key, dlm_id)
-
-    # Create the store + write the initial manifest so `dlm show` sees
-    # the license record and `dlm train` has a prior manifest to diff
-    # against.
-    from dlm.store.manifest import Manifest, save_manifest
-    from dlm.store.paths import for_dlm
-
-    store = for_dlm(dlm_id)
-    store.ensure_layout()
-    save_manifest(
-        store.manifest,
-        Manifest(
-            dlm_id=dlm_id,
-            base_model=spec.key,
-            base_model_revision=spec.revision,
-            source_path=path.resolve(),
-            license_acceptance=acceptance,
-        ),
-    )
-    if applied_result is not None:
-        meta = applied_result.template.meta
-        console.print(
-            f"[green]init:[/green] wrote {path} from template "
-            f"[bold]{meta.name}[/bold] ({meta.title}) — base {spec.key}."
-        )
-    else:
-        console.print(f"[green]init:[/green] wrote {path}")
-
-
-def _prompt_accept_license(console: object, base: str, license_url: str | None) -> bool:
-    """Interactive y/N prompt for gated base-model license acceptance.
-
-    Non-interactive runs (no TTY) return False; the caller surfaces the
-    `--i-accept-license` flag pointer in that case.
-    """
-    import sys
-
-    from rich.console import Console
-
-    assert isinstance(console, Console)
-
-    if not sys.stdin.isatty():
-        return False
-
-    console.print(
-        f"[yellow]This base model ({base}) requires accepting the upstream license.[/yellow]"
-    )
-    if license_url:
-        console.print(f"  Review the license at: {license_url}")
-    console.print("Accept and continue? [y/N]: ", end="")
-    try:
-        answer = input().strip().lower()
-    except EOFError:
-        return False
-    return answer in ("y", "yes")
-
-
-def _write_init_scaffold(path: Path, base_model_key: str, dlm_id: str) -> None:
-    """Write a minimal-but-valid .dlm file at `path`.
-
-    Body has one PROSE paragraph + a commented instruction section so
-    users see both section shapes on first open.
-    """
-    scaffold = f"""---
-dlm_id: {dlm_id}
-dlm_version: 1
-base_model: {base_model_key}
----
-
-# Your document title
-
-Write prose here. It will train via continued pretraining (CPT) loss.
-
-::instruction::
-
-### Q
-Your example question.
-
-### A
-Your example answer.
-"""
-    path.write_text(scaffold, encoding="utf-8")
-
-
-def _write_init_scaffold_multimodal(path: Path, base_model_key: str, dlm_id: str) -> None:
-    """Write a VL-shaped .dlm file at `path`.
-
-    Body shows the `::image::` attribute fence + a caption so users
-    see the v10 grammar on first open. The placeholder path
-    `figures/your-image.png` is deliberately non-existent — first
-    `dlm train` will refuse with a clear file-missing error, prompting
-    the user to drop a real image in. This is friendlier than
-    committing an inert sample that users might not notice isn't theirs.
-
-    `dlm_version: 10` because IMAGE sections require schema v10.
-    """
-    scaffold = f"""---
-dlm_id: {dlm_id}
-dlm_version: 10
-base_model: {base_model_key}
----
-
-# Your document title
-
-Write prose here. It will train via continued pretraining (CPT) loss.
-
-::image path="figures/your-image.png" alt="short description"::
-Caption text describing the image. Training rows bundle the image
-with this caption as `<image>\\n<caption>`.
-
-::instruction::
-
-### Q
-What is in this image?
-
-### A
-Describe what the image shows.
-"""
-    path.write_text(scaffold, encoding="utf-8")
-
-
-def _write_init_scaffold_audio(path: Path, base_model_key: str, dlm_id: str) -> None:
-    """Write an audio-shaped .dlm file at `path`.
-
-    Body shows the `::audio::` attribute fence with the sibling-
-    transcript-friendly `transcript="..."` form so users see the v11
-    grammar on first open. The placeholder path `clips/your-clip.wav`
-    is deliberately non-existent — first `dlm train` refuses with a
-    clear file-missing error rather than silently training on an inert
-    sample.
-
-    `dlm_version: 11` because AUDIO sections require schema v11.
-    """
-    scaffold = f"""---
-dlm_id: {dlm_id}
-dlm_version: 11
-base_model: {base_model_key}
----
-
-# Your document title
-
-Write prose here. It will train via continued pretraining (CPT) loss.
-
-::audio path="clips/your-clip.wav" transcript="Transcript of the audio clip."::
-
-::instruction::
-
-### Q
-What was said in this recording?
-
-### A
-Describe what you hear in the audio.
-"""
-    path.write_text(scaffold, encoding="utf-8")
-
 
 def train_cmd(
     path: Annotated[
diff --git a/src/dlm/cli/commands/init.py b/src/dlm/cli/commands/init.py
new file mode 100644
index 0000000..27c3db9
--- /dev/null
+++ b/src/dlm/cli/commands/init.py
@@ -0,0 +1,383 @@
+"""`dlm init` — bootstrap a new .dlm file with sensible defaults."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated, Literal
+
+import typer
+
+
+def init_cmd(
+    path: Annotated[Path, typer.Argument(help="Target .dlm path to create.")],
+    base: Annotated[
+        str, typer.Option("--base", help="Base model key or hf:org/name.")
+    ] = "qwen2.5-1.5b",
+    template: Annotated[
+        str | None,
+        typer.Option(
+            "--template",
+            help="Start from a named gallery template (see `dlm templates list`).",
+        ),
+    ] = None,
+    i_accept_license: Annotated[
+        bool,
+        typer.Option("--i-accept-license", help="Accept gated base-model license."),
+    ] = False,
+    force: Annotated[
+        bool,
+        typer.Option("--force", help="Overwrite an existing .dlm at path."),
+    ] = False,
+    skip_export_probes: Annotated[
+        bool,
+        typer.Option(
+            "--skip-export-probes",
+            help=(
+                "Skip the llama.cpp / GGUF-conversion probes so brand-new "
+                "architectures (not yet in our vendored llama.cpp) can still "
+                "be used for training + HF inference. Forfeits `dlm export` "
+                "to Ollama until the vendored copy catches up."
+            ),
+        ),
+    ] = False,
+    multimodal: Annotated[
+        bool,
+        typer.Option(
+            "--multimodal",
+            help=(
+                "Scaffold a vision-language .dlm with an `::image::` section. "
+                "Defaults --base to paligemma-3b-mix-224 and skips GGUF "
+                "export probes because current GGUF export does not "
+                "support vision-language bases."
+            ),
+        ),
+    ] = False,
+    audio: Annotated[
+        bool,
+        typer.Option(
+            "--audio",
+            help=(
+                "Scaffold an audio-language .dlm with an `::audio::` section. "
+                "Defaults --base to qwen2-audio-7b-instruct and skips GGUF "
+                "export probes (audio archs are not on llama.cpp's roadmap)."
+            ),
+        ),
+    ] = False,
+) -> None:
+    """Bootstrap a new .dlm file with sensible defaults."""
+
+    from rich.console import Console
+
+    from dlm.base_models import (
+        GatedModelError,
+        UnknownBaseModelError,
+        is_gated,
+        require_acceptance,
+    )
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.io.ulid import mint_ulid
+
+    console = Console(stderr=True)
+
+    if path.exists() and not force:
+        console.print(
+            f"[red]init:[/red] {path} already exists. "
+            "Re-run with [bold]--force[/bold] to overwrite."
+        )
+        raise typer.Exit(code=1)
+
+    # --multimodal / --audio are mutually exclusive with each other and
+    # with --template (templates pin their own base + body shape; v1
+    # doesn't ship media templates yet).
+    if multimodal and audio:
+        console.print(
+            "[red]init:[/red] --multimodal and --audio are mutually exclusive "
+            "(each targets a different modality)."
+        )
+        raise typer.Exit(code=2)
+    if multimodal and template is not None:
+        console.print(
+            "[red]init:[/red] --multimodal and --template are mutually exclusive; "
+            "v1 doesn't ship a VL template (see `dlm templates list`)."
+        )
+        raise typer.Exit(code=2)
+    if audio and template is not None:
+        console.print(
+            "[red]init:[/red] --audio and --template are mutually exclusive; "
+            "v1 doesn't ship an audio template (see `dlm templates list`)."
+        )
+        raise typer.Exit(code=2)
+
+    # --multimodal / --audio override the text-first --base default. A
+    # user who wants a different media base passes --base explicitly;
+    # we verify the pick is the right modality below.
+    if multimodal and base == "qwen2.5-1.5b":
+        base = "paligemma-3b-mix-224"
+    if audio and base == "qwen2.5-1.5b":
+        base = "qwen2-audio-7b-instruct"
+
+    # --template resolves the base from the template's meta.yaml; the
+    # --base default is kept for the no-template path only. Users who
+    # pass both a template and an explicit --base get a warning but the
+    # template still wins (the template body was authored against its
+    # recommended base).
+    if template is not None:
+        from dlm.templates import load_template
+
+        # Peek at the template's recommended base WITHOUT writing
+        # anything yet, so we can handle the license prompt against the
+        # right base (the template's, not `--base`) before committing.
+        try:
+            resolved_base = load_template(template).meta.recommended_base
+        except Exception as exc:
+            console.print(f"[red]init:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+        if base != "qwen2.5-1.5b" and base != resolved_base:
+            console.print(
+                f"[yellow]init:[/yellow] --base {base} ignored; template "
+                f"{template!r} uses {resolved_base}."
+            )
+    else:
+        resolved_base = base
+
+    # Media bases can't clear the GGUF-conversion probes. Force-skip
+    # them so the probe suite doesn't false-fail the init.
+    if multimodal or audio:
+        skip_export_probes = True
+
+    try:
+        spec = resolve_base_model(
+            resolved_base,
+            accept_license=i_accept_license,
+            skip_export_probes=skip_export_probes,
+        )
+    except UnknownBaseModelError as exc:
+        console.print(f"[red]init:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except GatedModelError as exc:
+        # Gated + user didn't pass --i-accept-license up-front. Prompt
+        # interactively if we have a TTY; otherwise refuse with the flag
+        # pointer.
+        if not _prompt_accept_license(console, resolved_base, exc.license_url):
+            console.print(
+                "[red]license:[/red] refused. Re-run with "
+                "[bold]--i-accept-license[/bold] to accept non-interactively."
+            )
+            raise typer.Exit(code=1) from exc
+        spec = resolve_base_model(
+            resolved_base,
+            accept_license=True,
+            skip_export_probes=skip_export_probes,
+        )
+
+    # NOW apply the template — license has already been accepted
+    # (either by --i-accept-license or interactive prompt), so pass
+    # the acceptance through. apply_template enforces the license
+    # contract at its boundary.
+    applied_result = None
+    if template is not None:
+        from dlm.templates import TemplateError, apply_template
+
+        try:
+            applied_result = apply_template(template, path, force=force, accept_license=True)
+        except TemplateError as exc:
+            console.print(f"[red]init:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+
+    # Record the license acceptance (or None for non-gated specs). We
+    # know `resolve_base_model` already validated the flag/prompt chain
+    # — `accept_license=True` means either the user passed the flag or
+    # answered the interactive prompt. Either path is a real
+    # acceptance; persist the record now so subsequent `dlm train` /
+    # `dlm export` don't re-prompt.
+    acceptance_via: Literal["cli_flag", "interactive"] = (
+        "cli_flag" if i_accept_license else "interactive"
+    )
+    acceptance = (
+        require_acceptance(spec, accept_license=True, via=acceptance_via)
+        if is_gated(spec)
+        else None
+    )
+
+    # Media flags require a matching-modality base. Check after resolve
+    # so users pointing at an unknown or wrong-modality hf:org/name get
+    # a clear explanation rather than a schema error deep in parse time.
+    if multimodal and spec.modality != "vision-language":
+        console.print(
+            f"[red]init:[/red] --multimodal requires a vision-language base; "
+            f"{spec.key!r} is modality='{spec.modality}'. "
+            "Pick --base paligemma-3b-mix-224 or drop --multimodal."
+        )
+        raise typer.Exit(code=2)
+    if audio and spec.modality != "audio-language":
+        console.print(
+            f"[red]init:[/red] --audio requires an audio-language base; "
+            f"{spec.key!r} is modality='{spec.modality}'. "
+            "Pick --base qwen2-audio-7b-instruct or drop --audio."
+        )
+        raise typer.Exit(code=2)
+
+    if applied_result is not None:
+        dlm_id = applied_result.dlm_id
+    else:
+        dlm_id = mint_ulid()
+        if multimodal:
+            _write_init_scaffold_multimodal(path, spec.key, dlm_id)
+        elif audio:
+            _write_init_scaffold_audio(path, spec.key, dlm_id)
+        else:
+            _write_init_scaffold(path, spec.key, dlm_id)
+
+    # Create the store + write the initial manifest so `dlm show` sees
+    # the license record and `dlm train` has a prior manifest to diff
+    # against.
+    from dlm.store.manifest import Manifest, save_manifest
+    from dlm.store.paths import for_dlm
+
+    store = for_dlm(dlm_id)
+    store.ensure_layout()
+    save_manifest(
+        store.manifest,
+        Manifest(
+            dlm_id=dlm_id,
+            base_model=spec.key,
+            base_model_revision=spec.revision,
+            source_path=path.resolve(),
+            license_acceptance=acceptance,
+        ),
+    )
+    if applied_result is not None:
+        meta = applied_result.template.meta
+        console.print(
+            f"[green]init:[/green] wrote {path} from template "
+            f"[bold]{meta.name}[/bold] ({meta.title}) — base {spec.key}."
+        )
+    else:
+        console.print(f"[green]init:[/green] wrote {path}")
+
+
+def _prompt_accept_license(console: object, base: str, license_url: str | None) -> bool:
+    """Interactive y/N prompt for gated base-model license acceptance.
+
+    Non-interactive runs (no TTY) return False; the caller surfaces the
+    `--i-accept-license` flag pointer in that case.
+    """
+    import sys
+
+    from rich.console import Console
+
+    assert isinstance(console, Console)
+
+    if not sys.stdin.isatty():
+        return False
+
+    console.print(
+        f"[yellow]This base model ({base}) requires accepting the upstream license.[/yellow]"
+    )
+    if license_url:
+        console.print(f"  Review the license at: {license_url}")
+    console.print("Accept and continue? [y/N]: ", end="")
+    try:
+        answer = input().strip().lower()
+    except EOFError:
+        return False
+    return answer in ("y", "yes")
+
+
+def _write_init_scaffold(path: Path, base_model_key: str, dlm_id: str) -> None:
+    """Write a minimal-but-valid .dlm file at `path`.
+
+    Body has one PROSE paragraph + a commented instruction section so
+    users see both section shapes on first open.
+    """
+    scaffold = f"""---
+dlm_id: {dlm_id}
+dlm_version: 1
+base_model: {base_model_key}
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::instruction::
+
+### Q
+Your example question.
+
+### A
+Your example answer.
+"""
+    path.write_text(scaffold, encoding="utf-8")
+
+
+def _write_init_scaffold_multimodal(path: Path, base_model_key: str, dlm_id: str) -> None:
+    """Write a VL-shaped .dlm file at `path`.
+
+    Body shows the `::image::` attribute fence + a caption so users
+    see the v10 grammar on first open. The placeholder path
+    `figures/your-image.png` is deliberately non-existent — first
+    `dlm train` will refuse with a clear file-missing error, prompting
+    the user to drop a real image in. This is friendlier than
+    committing an inert sample that users might not notice isn't theirs.
+
+    `dlm_version: 10` because IMAGE sections require schema v10.
+    """
+    scaffold = f"""---
+dlm_id: {dlm_id}
+dlm_version: 10
+base_model: {base_model_key}
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::image path="figures/your-image.png" alt="short description"::
+Caption text describing the image. Training rows bundle the image
+with this caption as `<image>\\n<caption>`.
+
+::instruction::
+
+### Q
+What is in this image?
+
+### A
+Describe what the image shows.
+"""
+    path.write_text(scaffold, encoding="utf-8")
+
+
+def _write_init_scaffold_audio(path: Path, base_model_key: str, dlm_id: str) -> None:
+    """Write an audio-shaped .dlm file at `path`.
+
+    Body shows the `::audio::` attribute fence with the sibling-
+    transcript-friendly `transcript="..."` form so users see the v11
+    grammar on first open. The placeholder path `clips/your-clip.wav`
+    is deliberately non-existent — first `dlm train` refuses with a
+    clear file-missing error rather than silently training on an inert
+    sample.
+
+    `dlm_version: 11` because AUDIO sections require schema v11.
+    """
+    scaffold = f"""---
+dlm_id: {dlm_id}
+dlm_version: 11
+base_model: {base_model_key}
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::audio path="clips/your-clip.wav" transcript="Transcript of the audio clip."::
+
+::instruction::
+
+### Q
+What was said in this recording?
+
+### A
+Describe what you hear in the audio.
+"""
+    path.write_text(scaffold, encoding="utf-8")
diff --git a/tests/unit/cli/test_init_edges.py b/tests/unit/cli/test_init_edges.py
index d6e2e7c..c80a673 100644
--- a/tests/unit/cli/test_init_edges.py
+++ b/tests/unit/cli/test_init_edges.py
@@ -107,7 +107,10 @@ def _fake_resolve(
 
         monkeypatch.setattr(base_models, "resolve", _fake_resolve)
         monkeypatch.setattr(base_models, "is_gated", lambda spec: False)
-        monkeypatch.setattr(commands, "_prompt_accept_license", lambda console, base, url: True)
+        monkeypatch.setattr(
+            "dlm.cli.commands.init._prompt_accept_license",
+            lambda console, base, url: True,
+        )
 
         result = runner.invoke(
             app,

From 41cac0a1da1ca1a0869632871ebfe9033bafeb7e Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:17:44 -0400
Subject: [PATCH 22/46] Extract train_cmd + multi-GPU helpers into
 cli/commands/train.py

---
 src/dlm/cli/commands/__init__.py              | 727 +----------------
 src/dlm/cli/commands/train.py                 | 731 ++++++++++++++++++
 tests/unit/cli/test_train_validation_edges.py |   2 +-
 3 files changed, 736 insertions(+), 724 deletions(-)
 create mode 100644 src/dlm/cli/commands/train.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 616786a..4cf0620 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -10,10 +10,9 @@
 
 from __future__ import annotations
 
-import os
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Annotated, Any, Literal
+from typing import Annotated, Any
 
 import typer
 
@@ -44,6 +43,9 @@
 from dlm.cli.commands.synth import synth_list_cmd as synth_list_cmd
 from dlm.cli.commands.synth import synth_revert_cmd as synth_revert_cmd
 from dlm.cli.commands.templates import templates_list_cmd as templates_list_cmd
+from dlm.cli.commands.train import _maybe_dispatch_multi_gpu as _maybe_dispatch_multi_gpu
+from dlm.cli.commands.train import _strip_gpus_from_argv as _strip_gpus_from_argv
+from dlm.cli.commands.train import train_cmd as train_cmd
 from dlm.cli.commands.unpack import unpack_cmd as unpack_cmd
 from dlm.cli.commands.verify import verify_cmd as verify_cmd
 
@@ -56,727 +58,6 @@ def _stub(sprint: str, subject: str) -> None:
 
 
 
-def train_cmd(
-    path: Annotated[
-        Path,
-        typer.Argument(
-            help=(
-                ".dlm file to train. Or a directory — when passed a directory, "
-                "`dlm train` auto-scaffolds `<dir>/.dlm/corpus.dlm` on first run "
-                "(with --base) and reuses it on subsequent runs."
-            ),
-        ),
-    ],
-    resume: Annotated[bool, typer.Option("--resume", help="Resume from last checkpoint.")] = False,
-    fresh: Annotated[bool, typer.Option("--fresh", help="Discard prior adapter state.")] = False,
-    seed: Annotated[int | None, typer.Option("--seed", help="Override training seed.")] = None,
-    max_steps: Annotated[int | None, typer.Option("--max-steps", help="Cap step count.")] = None,
-    phase: Annotated[
-        str,
-        typer.Option(
-            "--phase",
-            help=(
-                "Which training phases to run: 'sft' (supervised only), "
-                "'preference' (DPO/ORPO only — requires a prior SFT "
-                "adapter), or 'all' (SFT then preference when enabled). "
-                "The preference method (dpo / orpo) comes from "
-                "training.preference.method in the frontmatter."
-            ),
-        ),
-    ] = "all",
-    i_accept_license: Annotated[
-        bool,
-        typer.Option(
-            "--i-accept-license",
-            help="Accept the base model's license (required for gated bases like llama-3.2).",
-        ),
-    ] = False,
-    strict_lock: Annotated[
-        bool,
-        typer.Option(
-            "--strict-lock",
-            help="Fail on any dlm.lock drift, including version warns.",
-        ),
-    ] = False,
-    update_lock: Annotated[
-        bool,
-        typer.Option(
-            "--update-lock",
-            help="Overwrite dlm.lock without validating prior entries.",
-        ),
-    ] = False,
-    ignore_lock: Annotated[
-        bool,
-        typer.Option(
-            "--ignore-lock",
-            help="Skip dlm.lock validation and don't write a new lock.",
-        ),
-    ] = False,
-    strict_metrics: Annotated[
-        bool,
-        typer.Option(
-            "--strict-metrics",
-            help="Promote metrics SQLite write failures to hard errors.",
-        ),
-    ] = False,
-    no_mined: Annotated[
-        bool,
-        typer.Option(
-            "--no-mined",
-            help=(
-                "Exclude auto-mined preference sections from the preference "
-                "phase, including replay-sampled mined pairs. Hand-authored "
-                "`::preference::` sections still train normally."
-            ),
-        ),
-    ] = False,
-    gpus: Annotated[
-        str | None,
-        typer.Option(
-            "--gpus",
-            help=(
-                "Multi-GPU training. `all` uses every visible CUDA device; "
-                "`N` uses the first N; `0,1` selects exact device ids. "
-                "Dispatches to `accelerate launch` when >1 device is "
-                "selected. Omit for single-process training."
-            ),
-        ),
-    ] = None,
-    watch: Annotated[
-        bool,
-        typer.Option(
-            "--watch",
-            help=(
-                "Save-to-train mode. After an initial train, block on "
-                "filesystem events and run incremental retrains "
-                "(mode=resume, step-capped) on each settled save. Ctrl-C "
-                "exits cleanly between cycles."
-            ),
-        ),
-    ] = False,
-    watch_max_steps: Annotated[
-        int,
-        typer.Option(
-            "--watch-max-steps",
-            help="Per-cycle step cap for --watch. Default 100 keeps cycles responsive.",
-        ),
-    ] = 100,
-    watch_debounce_ms: Annotated[
-        int,
-        typer.Option(
-            "--watch-debounce-ms",
-            help="Quiet interval (ms) before a burst of saves triggers a retrain.",
-        ),
-    ] = 400,
-    watch_repl: Annotated[
-        bool,
-        typer.Option(
-            "--repl",
-            help=(
-                "With --watch: also open the REPL so prompts reflect the "
-                "latest adapter. **Scaffolded** — threading integration "
-                "is untestable without a two-process harness; emit a "
-                "not-yet-implemented refusal and exit 2."
-            ),
-        ),
-    ] = False,
-    base: Annotated[
-        str | None,
-        typer.Option(
-            "--base",
-            help=(
-                "Base model key for auto-scaffold. Required on first run when "
-                "`path` is a directory without an existing .dlm/ config. "
-                "Accepts registry keys (smollm2-135m, qwen2.5-coder-1.5b, ...) "
-                "or `hf:<org>/<name>` for off-registry models."
-            ),
-        ),
-    ] = None,
-    include: Annotated[
-        list[str] | None,
-        typer.Option(
-            "--include",
-            help=(
-                "Glob pattern for files to train on (auto-scaffold only). "
-                "Repeatable. Default: '**/*' with --recursive, '*' without. "
-                "Examples: '**/*.py', '**/*.f90', '**/*.{md,rst}'."
-            ),
-        ),
-    ] = None,
-    exclude: Annotated[
-        list[str] | None,
-        typer.Option(
-            "--exclude",
-            help=(
-                "Glob pattern for files to skip (auto-scaffold only). "
-                "Repeatable. Defaults (secrets, VCS, lockfiles, binaries) "
-                "apply on top via the descent protocol."
-            ),
-        ),
-    ] = None,
-    recursive: Annotated[
-        bool,
-        typer.Option(
-            "--recursive/--no-recursive",
-            "-r/-R",
-            help=(
-                "Auto-scaffold include patterns descend into subdirectories. "
-                "Default True. --no-recursive limits the default include to "
-                "top-level files only."
-            ),
-        ),
-    ] = True,
-    name: Annotated[
-        str,
-        typer.Option(
-            "--name",
-            help=(
-                "Adapter name for auto-scaffold → `<dir>/.dlm/<name>.dlm`. "
-                "Default 'corpus'. Lets a single tree host multiple adapters."
-            ),
-        ),
-    ] = "corpus",
-    policy: Annotated[
-        str,
-        typer.Option(
-            "--policy",
-            help=(
-                "Auto-scaffold sources_policy: 'strict' (default; confines "
-                "training to the target directory) or 'permissive' (allows "
-                "absolute paths anywhere)."
-            ),
-        ),
-    ] = "strict",
-    rescaffold: Annotated[
-        bool,
-        typer.Option(
-            "--rescaffold",
-            help=(
-                "Rewrite an existing scaffolded .dlm in place with the new "
-                "--base/--include/--exclude/--policy flags. Keeps the same "
-                "dlm_id (store stays intact). Without it, re-running with "
-                "frontmatter-editing flags refuses to shadow-edit."
-            ),
-        ),
-    ] = False,
-    listen_rpc: Annotated[
-        str | None,
-        typer.Option(
-            "--listen-rpc",
-            help=(
-                "Open a JSON-RPC endpoint at <host:port> (e.g. `127.0.0.1:7429`) "
-                "that accepts `inject_probe` pushes from sway-style eval "
-                "harnesses. Probes enter the queue and drain at the next "
-                "training-cycle boundary. Requires --watch or --max-cycles. "
-                "Bearer token from DLM_PROBE_TOKEN."
-            ),
-        ),
-    ] = None,
-    max_cycles: Annotated[
-        int,
-        typer.Option(
-            "--max-cycles",
-            help=(
-                "Convergence stop for --listen-rpc without --watch: cap the "
-                "probe-driven retrain loop at N cycles. Ignored without "
-                "--listen-rpc."
-            ),
-        ),
-    ] = 0,
-    no_cache: Annotated[
-        bool,
-        typer.Option(
-            "--no-cache",
-            help=(
-                "Opt out of the tokenized-section cache for this run. By "
-                "default, `dlm train` pre-tokenizes directive-sourced rows "
-                "via ~/.dlm/store/<id>/tokenized-cache/ so subsequent runs "
-                "on the same corpus skip re-tokenization. Use this to "
-                "bypass the cache for debugging or to compare cached vs "
-                "uncached training determinism."
-            ),
-        ),
-    ] = False,
-    skip_export_probes: Annotated[
-        bool,
-        typer.Option(
-            "--skip-export-probes",
-            help=(
-                "Skip the llama.cpp / GGUF-conversion probes so brand-new "
-                "architectures (not yet in our vendored llama.cpp) can still "
-                "be used for training + HF inference. Forfeits `dlm export` "
-                "to Ollama until the vendored copy catches up. Mirrors the "
-                "flag of the same name on `dlm init`."
-            ),
-        ),
-    ] = False,
-) -> None:
-    """Train / retrain a .dlm against its base model."""
-    import sqlite3
-    import sys
-
-    from rich.console import Console
-
-    from dlm.base_models import GatedModelError
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.doc.errors import DlmParseError
-    from dlm.doc.parser import parse_file
-    from dlm.hardware import doctor
-    from dlm.lock import LockMode, LockValidationError
-    from dlm.store.paths import for_dlm
-    from dlm.train import (
-        DiskSpaceError,
-        OOMError,
-        ResumeIntegrityError,
-        TrainingError,
-    )
-    from dlm.train.preference import (
-        DpoPhaseError,
-        NoPreferenceContentError,
-        PriorAdapterRequiredError,
-    )
-    from dlm.train.preference.phase_orchestrator import Phase, run_phases
-
-    console = Console(stderr=True)
-
-    if phase not in ("sft", "preference", "all"):
-        console.print(f"[red]error:[/red] --phase must be one of sft|preference|all, got {phase!r}")
-        raise typer.Exit(code=2)
-    phase_literal: Phase = phase  # type: ignore[assignment]
-
-    if resume and fresh:
-        console.print("[red]error:[/red] --resume and --fresh are mutually exclusive")
-        raise typer.Exit(code=2)
-    mode: Literal["fresh", "resume"] = "resume" if resume else "fresh"
-
-    # --gpus dispatches to accelerate launch when >1 device is
-    # selected. The single-GPU path falls through to the existing
-    # in-process trainer; a bare `--gpus 1` is a no-op (users can use
-    # it to lock the visible device set via CUDA_VISIBLE_DEVICES
-    # without spawning a subprocess).
-    if gpus is not None:
-        # Resolve mixed_precision from capabilities so bf16-incapable
-        # CUDA GPUs (SM<8.0) don't trip the `accelerate launch`
-        # default. `probe()` is cheap and runs in the launcher-side
-        # process only; each rank re-probes via `doctor()` later.
-        from dlm.hardware.capabilities import probe as _probe_caps
-
-        _caps = _probe_caps()
-        _mp = "bf16" if _caps.supports_bf16 else "fp16"
-        exit_code = _maybe_dispatch_multi_gpu(gpus, sys.argv, console, mixed_precision=_mp)
-        if exit_code is not None:
-            raise typer.Exit(code=exit_code)
-
-    # Mutual-exclusion gate for the three lock flags. Exactly one (or
-    # zero) may be set — silently ignoring a conflicting pair would
-    # mask operator intent.
-    lock_flag_count = sum((strict_lock, update_lock, ignore_lock))
-    if lock_flag_count > 1:
-        console.print(
-            "[red]error:[/red] --strict-lock / --update-lock / --ignore-lock "
-            "are mutually exclusive",
-        )
-        raise typer.Exit(code=2)
-    lock_mode: LockMode = "default"
-    if strict_lock:
-        lock_mode = "strict"
-    elif update_lock:
-        lock_mode = "update"
-    elif ignore_lock:
-        lock_mode = "ignore"
-
-    # `--no-cache` bypasses the tokenized-section cache for this run.
-    # Plumbed as an env var because the trainer's pre-tokenize helper
-    # already reads one — the CLI flag is a discoverable surface over
-    # the same switch. Rolling the flag into `TrainingPlan` is a
-    # deferred refactor; the env var is sufficient for the user-facing
-    # contract and survives `accelerate launch` re-invocations.
-    if no_cache:
-        from dlm.train.cache import set_disable_flag
-
-        set_disable_flag("--no-cache")
-
-    if policy not in ("permissive", "strict"):
-        console.print(
-            f"[red]error:[/red] --policy must be 'permissive' or 'strict', got {policy!r}"
-        )
-        raise typer.Exit(code=2)
-    policy_literal: Literal["permissive", "strict"] = policy  # type: ignore[assignment]
-
-    # --listen-rpc requires a loop to drain the queue — either --watch
-    # (file-change cycles) or --max-cycles N (bounded retrain loop).
-    # Without one, the server would accept probes that never train. We
-    # also need the bearer token up front so the user sees the refusal
-    # before we spend time downloading weights.
-    rpc_config: tuple[str, int, str] | None = None
-    if listen_rpc is not None:
-        if not watch and max_cycles <= 0:
-            console.print(
-                "[red]error:[/red] --listen-rpc requires --watch or --max-cycles N "
-                "(the probe queue needs a drain cadence)"
-            )
-            raise typer.Exit(code=2)
-        token = os.environ.get("DLM_PROBE_TOKEN", "").strip()
-        if not token:
-            console.print(
-                "[red]error:[/red] --listen-rpc needs a bearer token; "
-                "export DLM_PROBE_TOKEN=<secret>"
-            )
-            raise typer.Exit(code=2)
-        host, _, port_s = listen_rpc.rpartition(":")
-        if not host or not port_s:
-            console.print(f"[red]error:[/red] --listen-rpc expects host:port, got {listen_rpc!r}")
-            raise typer.Exit(code=2)
-        try:
-            port = int(port_s)
-        except ValueError:
-            console.print(f"[red]error:[/red] --listen-rpc port must be an integer, got {port_s!r}")
-            raise typer.Exit(code=2) from None
-        rpc_config = (host, port, token)
-
-    # Directory targets auto-scaffold `<dir>/.dlm/corpus.dlm` (or
-    # reuse an existing one). After this block, `path` always points
-    # at an actual `.dlm` file that the rest of the flow can parse.
-    if path.is_dir():
-        from dlm.cli.scaffold import ScaffoldError, scaffold_train_target
-
-        try:
-            scaffold_result = scaffold_train_target(
-                path,
-                base=base,
-                include=tuple(include or ()),
-                exclude=tuple(exclude or ()),
-                recursive=recursive,
-                name=name,
-                policy=policy_literal,
-                rescaffold=rescaffold,
-            )
-        except ScaffoldError as exc:
-            console.print(f"[red]scaffold:[/red] {exc.message}")
-            raise typer.Exit(code=1) from exc
-
-        if scaffold_result.scaffolded:
-            console.print(
-                f"[cyan]scaffolded:[/cyan] {scaffold_result.dlm_path} "
-                f"(dlm_id={scaffold_result.dlm_id})"
-            )
-        path = scaffold_result.dlm_path
-
-    try:
-        parsed = parse_file(path)
-    except (DlmParseError, OSError) as exc:
-        console.print(f"[red]error:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    try:
-        spec = resolve_base_model(
-            parsed.frontmatter.base_model,
-            accept_license=i_accept_license,
-            skip_export_probes=skip_export_probes,
-        )
-    except GatedModelError as exc:
-        console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.")
-        if exc.license_url:
-            console.print(f"  review the license at: {exc.license_url}")
-        console.print(
-            "  re-run with [bold]--i-accept-license[/bold] once you have accepted. "
-            "Acceptance will be persisted in the store manifest."
-        )
-        raise typer.Exit(code=1) from exc
-    # Detect the DDP world_size set by `accelerate launch`
-    # (WORLD_SIZE env var) and thread it into the doctor so the plan's
-    # effective_batch_size reflects the rank count. Single-process
-    # runs read 1 and the plan math is unchanged.
-    from dlm.train.distributed import detect_world_size
-
-    ws = detect_world_size()
-    doctor_result = doctor(
-        training_config=parsed.frontmatter.training,
-        base_params=spec.params,
-        seq_len=min(parsed.frontmatter.training.sequence_len, spec.effective_context_length),
-        world_size=ws,
-    )
-    plan = doctor_result.plan
-    if plan is None:
-        console.print(
-            "[red]doctor:[/red] no viable training plan for this host. "
-            "Run `dlm doctor` for details."
-        )
-        raise typer.Exit(code=1)
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    store.ensure_layout()
-
-    # `dlm init` writes a manifest as part of store provisioning. Mirror
-    # that manifest write here when the store layout exists but has no
-    # manifest yet — covers two flows:
-    #   - auto-scaffold via `dlm train <dir>` on a fresh directory
-    #   - hand-authored .dlm with a fresh ULID that never went through
-    #     `dlm init` (e.g. authored via the LSP / VSCode extension)
-    # License acceptance has already been validated upstream by this
-    # point, so we just record it.
-    if not store.manifest.exists():
-        from dlm.base_models import is_gated
-        from dlm.base_models.license import require_acceptance
-        from dlm.store.manifest import Manifest, save_manifest
-
-        acceptance = (
-            require_acceptance(spec, accept_license=True, via="cli_flag")
-            if is_gated(spec)
-            else None
-        )
-        save_manifest(
-            store.manifest,
-            Manifest(
-                dlm_id=parsed.frontmatter.dlm_id,
-                base_model=spec.key,
-                base_model_revision=spec.revision,
-                source_path=path.resolve(),
-                license_acceptance=acceptance,
-            ),
-        )
-
-    from dlm.modality import ModalityError
-
-    try:
-        phase_results = run_phases(
-            store,
-            parsed,
-            spec,
-            plan,
-            phase=phase_literal,
-            mode=mode,
-            seed=seed,
-            max_steps=max_steps,
-            lock_mode=lock_mode,
-            capabilities=doctor_result.capabilities,
-            world_size=ws,
-            strict_metrics=strict_metrics,
-            include_auto_mined=not no_mined,
-        )
-    except sqlite3.Error as exc:
-        console.print(f"[red]metrics:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except LockValidationError as exc:
-        console.print(f"[red]lock:[/red] {exc}")
-        console.print(
-            "  Re-run with [bold]--update-lock[/bold] to accept the drift or "
-            "[bold]--ignore-lock[/bold] to continue without persisting a new lock."
-        )
-        raise typer.Exit(code=1) from exc
-    except DiskSpaceError as exc:
-        console.print(f"[red]disk:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except OOMError as exc:
-        from dlm.train import format_oom_message
-
-        console.print(
-            format_oom_message(
-                step=exc.step,
-                peak_bytes=exc.peak_bytes,
-                free_at_start_bytes=exc.free_at_start_bytes,
-                current_grad_accum=exc.current_grad_accum,
-                recommended_grad_accum=exc.recommended_grad_accum,
-            )
-        )
-        raise typer.Exit(code=1) from exc
-    except ResumeIntegrityError as exc:
-        console.print(f"[red]resume:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except (NoPreferenceContentError, PriorAdapterRequiredError) as exc:
-        console.print(f"[red]dpo:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except DpoPhaseError as exc:
-        console.print(f"[red]dpo:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except TrainingError as exc:
-        console.print(f"[red]training:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except ModalityError as exc:
-        console.print(f"[red]training:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if not phase_results:
-        console.print(
-            "[yellow]no-op:[/yellow] nothing to train for the requested phase. "
-            "Check that the document has the section types the phase consumes "
-            "(prose/instruction for SFT, preference for DPO)."
-        )
-        raise typer.Exit(code=0)
-
-    for pr in phase_results:
-        result = pr.result
-        console.print(
-            f"[green]{pr.phase}:[/green] v{result.adapter_version:04d} "
-            f"({result.steps} steps, seed={result.seed}, "
-            f"determinism={result.determinism.class_})"
-        )
-        console.print(f"adapter: {result.adapter_path}")
-        console.print(f"log:     {result.log_path}")
-    # Final-train-loss stdout line mirrors the last phase so existing
-    # downstream scripts keep working.
-    result = phase_results[-1].result
-    if result.final_train_loss is not None:
-        sys.stdout.write(f"{result.final_train_loss}\n")
-
-    # --watch keeps the training context alive and re-runs incremental
-    # cycles on file change. Entered AFTER the initial train so the
-    # loop resumes from a real committed adapter.
-    if watch:
-        if watch_repl:
-            console.print(
-                "[red]train:[/red] --watch --repl is scaffolded but not yet "
-                "implemented. The threaded REPL bridge needs a test "
-                "harness we don't have in CI today."
-            )
-            raise typer.Exit(code=2)
-
-        from dlm.watch.loop import run_watch
-        from dlm.watch.status import WatchStatus, render_status
-
-        status = WatchStatus(doc_path=str(path), sections=len(parsed.sections))
-
-        # Start the probe-RPC server if --listen-rpc was requested. The
-        # queue is exposed; end-to-end flow into `build_dataset` at the
-        # next cycle boundary is the follow-up consumer task — for now
-        # the server accepts and buffers probes so sway sinks can be
-        # wired + tested against a live endpoint.
-        rpc_server = None
-        probe_queue = None
-        if rpc_config is not None:
-            from dlm.train.inject import InjectedProbeQueue
-            from dlm.train.rpc import ProbeRpcServer
-
-            rpc_host, rpc_port, rpc_token = rpc_config
-            probe_queue = InjectedProbeQueue()
-            rpc_server = ProbeRpcServer(
-                host=rpc_host, port=rpc_port, token=rpc_token, queue=probe_queue
-            )
-            rpc_server.start()
-            bound_host, bound_port = rpc_server.address
-            console.print(
-                f"[dim]rpc:[/dim] listening on {bound_host}:{bound_port} "
-                f"(queue capacity {probe_queue.capacity})"
-            )
-
-        console.print(
-            f"[dim]watch:[/dim] {render_status(status)}; "
-            f"max_steps={watch_max_steps}, debounce_ms={watch_debounce_ms}"
-        )
-
-        def _log_cycle(result_: object) -> None:
-            from dlm.watch.loop import CycleResult
-
-            assert isinstance(result_, CycleResult)
-            if result_.ran and result_.run_result is not None:
-                status.mark_cycle_done(
-                    train_loss=result_.run_result.final_train_loss,
-                    val_loss=result_.run_result.final_val_loss,
-                    steps=result_.run_result.steps,
-                    coalesced=1,
-                )
-                console.print(f"[dim]watch:[/dim] {render_status(status)}")
-            else:
-                console.print("[dim]watch:[/dim] no new content, skipping retrain")
-
-        try:
-            exit_code = run_watch(
-                doc_path=path,
-                store=store,
-                spec=spec,
-                plan=plan,
-                max_steps=watch_max_steps,
-                debounce_ms=watch_debounce_ms,
-                on_cycle=_log_cycle,
-                drain_probes=probe_queue.drain if probe_queue is not None else None,
-            )
-        except KeyboardInterrupt:
-            if rpc_server is not None:
-                rpc_server.stop()
-            console.print("[dim]watch:[/dim] Ctrl-C received, exiting")
-            raise typer.Exit(code=0)  # noqa: B904
-        finally:
-            if rpc_server is not None:
-                rpc_server.stop()
-        raise typer.Exit(code=exit_code)
-
-    # --max-cycles without --watch: the bounded-loop cycle driver is
-    # the next consumer-side integration step. Accept the flags, refuse
-    # execution until the loop lands.
-    if rpc_config is not None and not watch:
-        console.print(
-            "[red]train:[/red] --listen-rpc --max-cycles (without --watch) is "
-            "scaffolded; the bounded cycle loop is the follow-up. Use "
-            "--watch for now."
-        )
-        raise typer.Exit(code=2)
-
-
-def _maybe_dispatch_multi_gpu(
-    gpus_flag: str,
-    argv: list[str],
-    console: object,
-    *,
-    mixed_precision: str = "bf16",
-) -> int | None:
-    """Resolve `--gpus`; if multi-GPU, spawn accelerate launch and return its exit code.
-
-    Returns None when the resolved world_size is 1 — caller falls
-    through to the in-process trainer. Returns an int exit code when
-    the launcher ran, so the caller can `raise typer.Exit(code=...)`.
-    """
-    from rich.console import Console
-
-    from dlm.train.distributed import UnsupportedGpuSpecError, launch_multi_gpu, parse_gpus
-
-    assert isinstance(console, Console)
-
-    try:
-        spec = parse_gpus(gpus_flag)
-    except UnsupportedGpuSpecError as exc:
-        console.print(f"[red]train:[/red] {exc}")
-        return 2
-
-    try:
-        import torch
-
-        device_count = int(torch.cuda.device_count())
-    except Exception:  # pragma: no cover - torch probing has many failure modes
-        device_count = 0
-
-    try:
-        device_ids = spec.resolve(device_count)
-    except UnsupportedGpuSpecError as exc:
-        console.print(f"[red]train:[/red] {exc}")
-        return 2
-
-    if len(device_ids) < 2:
-        # Single-GPU (or --gpus 1) — no subprocess needed. Caller
-        # continues with the in-process path.
-        return None
-
-    # Forward the original argv minus `--gpus` / `--gpus=...`; the
-    # worker entry strips it defensively too, but we drop it here so
-    # the launched accelerate cmd carries exactly the intended args.
-    cli_args = _strip_gpus_from_argv(argv)
-    console.print(
-        f"[dim]train:[/dim] dispatching to accelerate launch on devices {list(device_ids)} "
-        f"(mixed_precision={mixed_precision})"
-    )
-    return launch_multi_gpu(device_ids, cli_args, mixed_precision=mixed_precision)
-
-
-def _strip_gpus_from_argv(argv: list[str]) -> list[str]:
-    """Drop `--gpus <v>` / `--gpus=<v>` from raw sys.argv (launcher side).
-
-    Skips argv[0] (script path) — `accelerate launch -m <entry>`
-    provides the rank entrypoint separately, so the launcher forwards
-    argv[1:] minus the multi-GPU flag. Delegates to the shared
-    `strip_gpus_flag` helper.
-    """
-    from dlm.train.distributed.gpus import strip_gpus_flag
-
-    return strip_gpus_flag(argv, skip_argv0=True)
-
 
 def prompt_cmd(
     ctx: typer.Context,
diff --git a/src/dlm/cli/commands/train.py b/src/dlm/cli/commands/train.py
new file mode 100644
index 0000000..3d73676
--- /dev/null
+++ b/src/dlm/cli/commands/train.py
@@ -0,0 +1,731 @@
+"""`dlm train` — train / retrain a .dlm against its base model."""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Annotated, Literal
+
+import typer
+
+
+def train_cmd(
+    path: Annotated[
+        Path,
+        typer.Argument(
+            help=(
+                ".dlm file to train. Or a directory — when passed a directory, "
+                "`dlm train` auto-scaffolds `<dir>/.dlm/corpus.dlm` on first run "
+                "(with --base) and reuses it on subsequent runs."
+            ),
+        ),
+    ],
+    resume: Annotated[bool, typer.Option("--resume", help="Resume from last checkpoint.")] = False,
+    fresh: Annotated[bool, typer.Option("--fresh", help="Discard prior adapter state.")] = False,
+    seed: Annotated[int | None, typer.Option("--seed", help="Override training seed.")] = None,
+    max_steps: Annotated[int | None, typer.Option("--max-steps", help="Cap step count.")] = None,
+    phase: Annotated[
+        str,
+        typer.Option(
+            "--phase",
+            help=(
+                "Which training phases to run: 'sft' (supervised only), "
+                "'preference' (DPO/ORPO only — requires a prior SFT "
+                "adapter), or 'all' (SFT then preference when enabled). "
+                "The preference method (dpo / orpo) comes from "
+                "training.preference.method in the frontmatter."
+            ),
+        ),
+    ] = "all",
+    i_accept_license: Annotated[
+        bool,
+        typer.Option(
+            "--i-accept-license",
+            help="Accept the base model's license (required for gated bases like llama-3.2).",
+        ),
+    ] = False,
+    strict_lock: Annotated[
+        bool,
+        typer.Option(
+            "--strict-lock",
+            help="Fail on any dlm.lock drift, including version warns.",
+        ),
+    ] = False,
+    update_lock: Annotated[
+        bool,
+        typer.Option(
+            "--update-lock",
+            help="Overwrite dlm.lock without validating prior entries.",
+        ),
+    ] = False,
+    ignore_lock: Annotated[
+        bool,
+        typer.Option(
+            "--ignore-lock",
+            help="Skip dlm.lock validation and don't write a new lock.",
+        ),
+    ] = False,
+    strict_metrics: Annotated[
+        bool,
+        typer.Option(
+            "--strict-metrics",
+            help="Promote metrics SQLite write failures to hard errors.",
+        ),
+    ] = False,
+    no_mined: Annotated[
+        bool,
+        typer.Option(
+            "--no-mined",
+            help=(
+                "Exclude auto-mined preference sections from the preference "
+                "phase, including replay-sampled mined pairs. Hand-authored "
+                "`::preference::` sections still train normally."
+            ),
+        ),
+    ] = False,
+    gpus: Annotated[
+        str | None,
+        typer.Option(
+            "--gpus",
+            help=(
+                "Multi-GPU training. `all` uses every visible CUDA device; "
+                "`N` uses the first N; `0,1` selects exact device ids. "
+                "Dispatches to `accelerate launch` when >1 device is "
+                "selected. Omit for single-process training."
+            ),
+        ),
+    ] = None,
+    watch: Annotated[
+        bool,
+        typer.Option(
+            "--watch",
+            help=(
+                "Save-to-train mode. After an initial train, block on "
+                "filesystem events and run incremental retrains "
+                "(mode=resume, step-capped) on each settled save. Ctrl-C "
+                "exits cleanly between cycles."
+            ),
+        ),
+    ] = False,
+    watch_max_steps: Annotated[
+        int,
+        typer.Option(
+            "--watch-max-steps",
+            help="Per-cycle step cap for --watch. Default 100 keeps cycles responsive.",
+        ),
+    ] = 100,
+    watch_debounce_ms: Annotated[
+        int,
+        typer.Option(
+            "--watch-debounce-ms",
+            help="Quiet interval (ms) before a burst of saves triggers a retrain.",
+        ),
+    ] = 400,
+    watch_repl: Annotated[
+        bool,
+        typer.Option(
+            "--repl",
+            help=(
+                "With --watch: also open the REPL so prompts reflect the "
+                "latest adapter. **Scaffolded** — threading integration "
+                "is untestable without a two-process harness; emit a "
+                "not-yet-implemented refusal and exit 2."
+            ),
+        ),
+    ] = False,
+    base: Annotated[
+        str | None,
+        typer.Option(
+            "--base",
+            help=(
+                "Base model key for auto-scaffold. Required on first run when "
+                "`path` is a directory without an existing .dlm/ config. "
+                "Accepts registry keys (smollm2-135m, qwen2.5-coder-1.5b, ...) "
+                "or `hf:<org>/<name>` for off-registry models."
+            ),
+        ),
+    ] = None,
+    include: Annotated[
+        list[str] | None,
+        typer.Option(
+            "--include",
+            help=(
+                "Glob pattern for files to train on (auto-scaffold only). "
+                "Repeatable. Default: '**/*' with --recursive, '*' without. "
+                "Examples: '**/*.py', '**/*.f90', '**/*.{md,rst}'."
+            ),
+        ),
+    ] = None,
+    exclude: Annotated[
+        list[str] | None,
+        typer.Option(
+            "--exclude",
+            help=(
+                "Glob pattern for files to skip (auto-scaffold only). "
+                "Repeatable. Defaults (secrets, VCS, lockfiles, binaries) "
+                "apply on top via the descent protocol."
+            ),
+        ),
+    ] = None,
+    recursive: Annotated[
+        bool,
+        typer.Option(
+            "--recursive/--no-recursive",
+            "-r/-R",
+            help=(
+                "Auto-scaffold include patterns descend into subdirectories. "
+                "Default True. --no-recursive limits the default include to "
+                "top-level files only."
+            ),
+        ),
+    ] = True,
+    name: Annotated[
+        str,
+        typer.Option(
+            "--name",
+            help=(
+                "Adapter name for auto-scaffold → `<dir>/.dlm/<name>.dlm`. "
+                "Default 'corpus'. Lets a single tree host multiple adapters."
+            ),
+        ),
+    ] = "corpus",
+    policy: Annotated[
+        str,
+        typer.Option(
+            "--policy",
+            help=(
+                "Auto-scaffold sources_policy: 'strict' (default; confines "
+                "training to the target directory) or 'permissive' (allows "
+                "absolute paths anywhere)."
+            ),
+        ),
+    ] = "strict",
+    rescaffold: Annotated[
+        bool,
+        typer.Option(
+            "--rescaffold",
+            help=(
+                "Rewrite an existing scaffolded .dlm in place with the new "
+                "--base/--include/--exclude/--policy flags. Keeps the same "
+                "dlm_id (store stays intact). Without it, re-running with "
+                "frontmatter-editing flags refuses to shadow-edit."
+            ),
+        ),
+    ] = False,
+    listen_rpc: Annotated[
+        str | None,
+        typer.Option(
+            "--listen-rpc",
+            help=(
+                "Open a JSON-RPC endpoint at <host:port> (e.g. `127.0.0.1:7429`) "
+                "that accepts `inject_probe` pushes from sway-style eval "
+                "harnesses. Probes enter the queue and drain at the next "
+                "training-cycle boundary. Requires --watch or --max-cycles. "
+                "Bearer token from DLM_PROBE_TOKEN."
+            ),
+        ),
+    ] = None,
+    max_cycles: Annotated[
+        int,
+        typer.Option(
+            "--max-cycles",
+            help=(
+                "Convergence stop for --listen-rpc without --watch: cap the "
+                "probe-driven retrain loop at N cycles. Ignored without "
+                "--listen-rpc."
+            ),
+        ),
+    ] = 0,
+    no_cache: Annotated[
+        bool,
+        typer.Option(
+            "--no-cache",
+            help=(
+                "Opt out of the tokenized-section cache for this run. By "
+                "default, `dlm train` pre-tokenizes directive-sourced rows "
+                "via ~/.dlm/store/<id>/tokenized-cache/ so subsequent runs "
+                "on the same corpus skip re-tokenization. Use this to "
+                "bypass the cache for debugging or to compare cached vs "
+                "uncached training determinism."
+            ),
+        ),
+    ] = False,
+    skip_export_probes: Annotated[
+        bool,
+        typer.Option(
+            "--skip-export-probes",
+            help=(
+                "Skip the llama.cpp / GGUF-conversion probes so brand-new "
+                "architectures (not yet in our vendored llama.cpp) can still "
+                "be used for training + HF inference. Forfeits `dlm export` "
+                "to Ollama until the vendored copy catches up. Mirrors the "
+                "flag of the same name on `dlm init`."
+            ),
+        ),
+    ] = False,
+) -> None:
+    """Train / retrain a .dlm against its base model."""
+    import sqlite3
+    import sys
+
+    from rich.console import Console
+
+    from dlm.base_models import GatedModelError
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.doc.errors import DlmParseError
+    from dlm.doc.parser import parse_file
+    from dlm.hardware import doctor
+    from dlm.lock import LockMode, LockValidationError
+    from dlm.store.paths import for_dlm
+    from dlm.train import (
+        DiskSpaceError,
+        OOMError,
+        ResumeIntegrityError,
+        TrainingError,
+    )
+    from dlm.train.preference import (
+        DpoPhaseError,
+        NoPreferenceContentError,
+        PriorAdapterRequiredError,
+    )
+    from dlm.train.preference.phase_orchestrator import Phase, run_phases
+
+    console = Console(stderr=True)
+
+    if phase not in ("sft", "preference", "all"):
+        console.print(f"[red]error:[/red] --phase must be one of sft|preference|all, got {phase!r}")
+        raise typer.Exit(code=2)
+    phase_literal: Phase = phase  # type: ignore[assignment]
+
+    if resume and fresh:
+        console.print("[red]error:[/red] --resume and --fresh are mutually exclusive")
+        raise typer.Exit(code=2)
+    mode: Literal["fresh", "resume"] = "resume" if resume else "fresh"
+
+    # --gpus dispatches to accelerate launch when >1 device is
+    # selected. The single-GPU path falls through to the existing
+    # in-process trainer; a bare `--gpus 1` is a no-op (users can use
+    # it to lock the visible device set via CUDA_VISIBLE_DEVICES
+    # without spawning a subprocess).
+    if gpus is not None:
+        # Resolve mixed_precision from capabilities so bf16-incapable
+        # CUDA GPUs (SM<8.0) don't trip the `accelerate launch`
+        # default. `probe()` is cheap and runs in the launcher-side
+        # process only; each rank re-probes via `doctor()` later.
+        from dlm.hardware.capabilities import probe as _probe_caps
+
+        _caps = _probe_caps()
+        _mp = "bf16" if _caps.supports_bf16 else "fp16"
+        exit_code = _maybe_dispatch_multi_gpu(gpus, sys.argv, console, mixed_precision=_mp)
+        if exit_code is not None:
+            raise typer.Exit(code=exit_code)
+
+    # Mutual-exclusion gate for the three lock flags. Exactly one (or
+    # zero) may be set — silently ignoring a conflicting pair would
+    # mask operator intent.
+    lock_flag_count = sum((strict_lock, update_lock, ignore_lock))
+    if lock_flag_count > 1:
+        console.print(
+            "[red]error:[/red] --strict-lock / --update-lock / --ignore-lock "
+            "are mutually exclusive",
+        )
+        raise typer.Exit(code=2)
+    lock_mode: LockMode = "default"
+    if strict_lock:
+        lock_mode = "strict"
+    elif update_lock:
+        lock_mode = "update"
+    elif ignore_lock:
+        lock_mode = "ignore"
+
+    # `--no-cache` bypasses the tokenized-section cache for this run.
+    # Plumbed as an env var because the trainer's pre-tokenize helper
+    # already reads one — the CLI flag is a discoverable surface over
+    # the same switch. Rolling the flag into `TrainingPlan` is a
+    # deferred refactor; the env var is sufficient for the user-facing
+    # contract and survives `accelerate launch` re-invocations.
+    if no_cache:
+        from dlm.train.cache import set_disable_flag
+
+        set_disable_flag("--no-cache")
+
+    if policy not in ("permissive", "strict"):
+        console.print(
+            f"[red]error:[/red] --policy must be 'permissive' or 'strict', got {policy!r}"
+        )
+        raise typer.Exit(code=2)
+    policy_literal: Literal["permissive", "strict"] = policy  # type: ignore[assignment]
+
+    # --listen-rpc requires a loop to drain the queue — either --watch
+    # (file-change cycles) or --max-cycles N (bounded retrain loop).
+    # Without one, the server would accept probes that never train. We
+    # also need the bearer token up front so the user sees the refusal
+    # before we spend time downloading weights.
+    rpc_config: tuple[str, int, str] | None = None
+    if listen_rpc is not None:
+        if not watch and max_cycles <= 0:
+            console.print(
+                "[red]error:[/red] --listen-rpc requires --watch or --max-cycles N "
+                "(the probe queue needs a drain cadence)"
+            )
+            raise typer.Exit(code=2)
+        token = os.environ.get("DLM_PROBE_TOKEN", "").strip()
+        if not token:
+            console.print(
+                "[red]error:[/red] --listen-rpc needs a bearer token; "
+                "export DLM_PROBE_TOKEN=<secret>"
+            )
+            raise typer.Exit(code=2)
+        host, _, port_s = listen_rpc.rpartition(":")
+        if not host or not port_s:
+            console.print(f"[red]error:[/red] --listen-rpc expects host:port, got {listen_rpc!r}")
+            raise typer.Exit(code=2)
+        try:
+            port = int(port_s)
+        except ValueError:
+            console.print(f"[red]error:[/red] --listen-rpc port must be an integer, got {port_s!r}")
+            raise typer.Exit(code=2) from None
+        rpc_config = (host, port, token)
+
+    # Directory targets auto-scaffold `<dir>/.dlm/corpus.dlm` (or
+    # reuse an existing one). After this block, `path` always points
+    # at an actual `.dlm` file that the rest of the flow can parse.
+    if path.is_dir():
+        from dlm.cli.scaffold import ScaffoldError, scaffold_train_target
+
+        try:
+            scaffold_result = scaffold_train_target(
+                path,
+                base=base,
+                include=tuple(include or ()),
+                exclude=tuple(exclude or ()),
+                recursive=recursive,
+                name=name,
+                policy=policy_literal,
+                rescaffold=rescaffold,
+            )
+        except ScaffoldError as exc:
+            console.print(f"[red]scaffold:[/red] {exc.message}")
+            raise typer.Exit(code=1) from exc
+
+        if scaffold_result.scaffolded:
+            console.print(
+                f"[cyan]scaffolded:[/cyan] {scaffold_result.dlm_path} "
+                f"(dlm_id={scaffold_result.dlm_id})"
+            )
+        path = scaffold_result.dlm_path
+
+    try:
+        parsed = parse_file(path)
+    except (DlmParseError, OSError) as exc:
+        console.print(f"[red]error:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    try:
+        spec = resolve_base_model(
+            parsed.frontmatter.base_model,
+            accept_license=i_accept_license,
+            skip_export_probes=skip_export_probes,
+        )
+    except GatedModelError as exc:
+        console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.")
+        if exc.license_url:
+            console.print(f"  review the license at: {exc.license_url}")
+        console.print(
+            "  re-run with [bold]--i-accept-license[/bold] once you have accepted. "
+            "Acceptance will be persisted in the store manifest."
+        )
+        raise typer.Exit(code=1) from exc
+    # Detect the DDP world_size set by `accelerate launch`
+    # (WORLD_SIZE env var) and thread it into the doctor so the plan's
+    # effective_batch_size reflects the rank count. Single-process
+    # runs read 1 and the plan math is unchanged.
+    from dlm.train.distributed import detect_world_size
+
+    ws = detect_world_size()
+    doctor_result = doctor(
+        training_config=parsed.frontmatter.training,
+        base_params=spec.params,
+        seq_len=min(parsed.frontmatter.training.sequence_len, spec.effective_context_length),
+        world_size=ws,
+    )
+    plan = doctor_result.plan
+    if plan is None:
+        console.print(
+            "[red]doctor:[/red] no viable training plan for this host. "
+            "Run `dlm doctor` for details."
+        )
+        raise typer.Exit(code=1)
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    store.ensure_layout()
+
+    # `dlm init` writes a manifest as part of store provisioning. Mirror
+    # that manifest write here when the store layout exists but has no
+    # manifest yet — covers two flows:
+    #   - auto-scaffold via `dlm train <dir>` on a fresh directory
+    #   - hand-authored .dlm with a fresh ULID that never went through
+    #     `dlm init` (e.g. authored via the LSP / VSCode extension)
+    # License acceptance has already been validated upstream by this
+    # point, so we just record it.
+    if not store.manifest.exists():
+        from dlm.base_models import is_gated
+        from dlm.base_models.license import require_acceptance
+        from dlm.store.manifest import Manifest, save_manifest
+
+        acceptance = (
+            require_acceptance(spec, accept_license=True, via="cli_flag")
+            if is_gated(spec)
+            else None
+        )
+        save_manifest(
+            store.manifest,
+            Manifest(
+                dlm_id=parsed.frontmatter.dlm_id,
+                base_model=spec.key,
+                base_model_revision=spec.revision,
+                source_path=path.resolve(),
+                license_acceptance=acceptance,
+            ),
+        )
+
+    from dlm.modality import ModalityError
+
+    try:
+        phase_results = run_phases(
+            store,
+            parsed,
+            spec,
+            plan,
+            phase=phase_literal,
+            mode=mode,
+            seed=seed,
+            max_steps=max_steps,
+            lock_mode=lock_mode,
+            capabilities=doctor_result.capabilities,
+            world_size=ws,
+            strict_metrics=strict_metrics,
+            include_auto_mined=not no_mined,
+        )
+    except sqlite3.Error as exc:
+        console.print(f"[red]metrics:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except LockValidationError as exc:
+        console.print(f"[red]lock:[/red] {exc}")
+        console.print(
+            "  Re-run with [bold]--update-lock[/bold] to accept the drift or "
+            "[bold]--ignore-lock[/bold] to continue without persisting a new lock."
+        )
+        raise typer.Exit(code=1) from exc
+    except DiskSpaceError as exc:
+        console.print(f"[red]disk:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except OOMError as exc:
+        from dlm.train import format_oom_message
+
+        console.print(
+            format_oom_message(
+                step=exc.step,
+                peak_bytes=exc.peak_bytes,
+                free_at_start_bytes=exc.free_at_start_bytes,
+                current_grad_accum=exc.current_grad_accum,
+                recommended_grad_accum=exc.recommended_grad_accum,
+            )
+        )
+        raise typer.Exit(code=1) from exc
+    except ResumeIntegrityError as exc:
+        console.print(f"[red]resume:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except (NoPreferenceContentError, PriorAdapterRequiredError) as exc:
+        console.print(f"[red]dpo:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except DpoPhaseError as exc:
+        console.print(f"[red]dpo:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except TrainingError as exc:
+        console.print(f"[red]training:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except ModalityError as exc:
+        console.print(f"[red]training:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if not phase_results:
+        console.print(
+            "[yellow]no-op:[/yellow] nothing to train for the requested phase. "
+            "Check that the document has the section types the phase consumes "
+            "(prose/instruction for SFT, preference for DPO)."
+        )
+        raise typer.Exit(code=0)
+
+    for pr in phase_results:
+        result = pr.result
+        console.print(
+            f"[green]{pr.phase}:[/green] v{result.adapter_version:04d} "
+            f"({result.steps} steps, seed={result.seed}, "
+            f"determinism={result.determinism.class_})"
+        )
+        console.print(f"adapter: {result.adapter_path}")
+        console.print(f"log:     {result.log_path}")
+    # Final-train-loss stdout line mirrors the last phase so existing
+    # downstream scripts keep working.
+    result = phase_results[-1].result
+    if result.final_train_loss is not None:
+        sys.stdout.write(f"{result.final_train_loss}\n")
+
+    # --watch keeps the training context alive and re-runs incremental
+    # cycles on file change. Entered AFTER the initial train so the
+    # loop resumes from a real committed adapter.
+    if watch:
+        if watch_repl:
+            console.print(
+                "[red]train:[/red] --watch --repl is scaffolded but not yet "
+                "implemented. The threaded REPL bridge needs a test "
+                "harness we don't have in CI today."
+            )
+            raise typer.Exit(code=2)
+
+        from dlm.watch.loop import run_watch
+        from dlm.watch.status import WatchStatus, render_status
+
+        status = WatchStatus(doc_path=str(path), sections=len(parsed.sections))
+
+        # Start the probe-RPC server if --listen-rpc was requested. The
+        # queue is exposed; end-to-end flow into `build_dataset` at the
+        # next cycle boundary is the follow-up consumer task — for now
+        # the server accepts and buffers probes so sway sinks can be
+        # wired + tested against a live endpoint.
+        rpc_server = None
+        probe_queue = None
+        if rpc_config is not None:
+            from dlm.train.inject import InjectedProbeQueue
+            from dlm.train.rpc import ProbeRpcServer
+
+            rpc_host, rpc_port, rpc_token = rpc_config
+            probe_queue = InjectedProbeQueue()
+            rpc_server = ProbeRpcServer(
+                host=rpc_host, port=rpc_port, token=rpc_token, queue=probe_queue
+            )
+            rpc_server.start()
+            bound_host, bound_port = rpc_server.address
+            console.print(
+                f"[dim]rpc:[/dim] listening on {bound_host}:{bound_port} "
+                f"(queue capacity {probe_queue.capacity})"
+            )
+
+        console.print(
+            f"[dim]watch:[/dim] {render_status(status)}; "
+            f"max_steps={watch_max_steps}, debounce_ms={watch_debounce_ms}"
+        )
+
+        def _log_cycle(result_: object) -> None:
+            from dlm.watch.loop import CycleResult
+
+            assert isinstance(result_, CycleResult)
+            if result_.ran and result_.run_result is not None:
+                status.mark_cycle_done(
+                    train_loss=result_.run_result.final_train_loss,
+                    val_loss=result_.run_result.final_val_loss,
+                    steps=result_.run_result.steps,
+                    coalesced=1,
+                )
+                console.print(f"[dim]watch:[/dim] {render_status(status)}")
+            else:
+                console.print("[dim]watch:[/dim] no new content, skipping retrain")
+
+        try:
+            exit_code = run_watch(
+                doc_path=path,
+                store=store,
+                spec=spec,
+                plan=plan,
+                max_steps=watch_max_steps,
+                debounce_ms=watch_debounce_ms,
+                on_cycle=_log_cycle,
+                drain_probes=probe_queue.drain if probe_queue is not None else None,
+            )
+        except KeyboardInterrupt:
+            if rpc_server is not None:
+                rpc_server.stop()
+            console.print("[dim]watch:[/dim] Ctrl-C received, exiting")
+            raise typer.Exit(code=0)  # noqa: B904
+        finally:
+            if rpc_server is not None:
+                rpc_server.stop()
+        raise typer.Exit(code=exit_code)
+
+    # --max-cycles without --watch: the bounded-loop cycle driver is
+    # the next consumer-side integration step. Accept the flags, refuse
+    # execution until the loop lands.
+    if rpc_config is not None and not watch:
+        console.print(
+            "[red]train:[/red] --listen-rpc --max-cycles (without --watch) is "
+            "scaffolded; the bounded cycle loop is the follow-up. Use "
+            "--watch for now."
+        )
+        raise typer.Exit(code=2)
+
+
+def _maybe_dispatch_multi_gpu(
+    gpus_flag: str,
+    argv: list[str],
+    console: object,
+    *,
+    mixed_precision: str = "bf16",
+) -> int | None:
+    """Resolve `--gpus`; if multi-GPU, spawn accelerate launch and return its exit code.
+
+    Returns None when the resolved world_size is 1 — caller falls
+    through to the in-process trainer. Returns an int exit code when
+    the launcher ran, so the caller can `raise typer.Exit(code=...)`.
+    """
+    from rich.console import Console
+
+    from dlm.train.distributed import UnsupportedGpuSpecError, launch_multi_gpu, parse_gpus
+
+    assert isinstance(console, Console)
+
+    try:
+        spec = parse_gpus(gpus_flag)
+    except UnsupportedGpuSpecError as exc:
+        console.print(f"[red]train:[/red] {exc}")
+        return 2
+
+    try:
+        import torch
+
+        device_count = int(torch.cuda.device_count())
+    except Exception:  # pragma: no cover - torch probing has many failure modes
+        device_count = 0
+
+    try:
+        device_ids = spec.resolve(device_count)
+    except UnsupportedGpuSpecError as exc:
+        console.print(f"[red]train:[/red] {exc}")
+        return 2
+
+    if len(device_ids) < 2:
+        # Single-GPU (or --gpus 1) — no subprocess needed. Caller
+        # continues with the in-process path.
+        return None
+
+    # Forward the original argv minus `--gpus` / `--gpus=...`; the
+    # worker entry strips it defensively too, but we drop it here so
+    # the launched accelerate cmd carries exactly the intended args.
+    cli_args = _strip_gpus_from_argv(argv)
+    console.print(
+        f"[dim]train:[/dim] dispatching to accelerate launch on devices {list(device_ids)} "
+        f"(mixed_precision={mixed_precision})"
+    )
+    return launch_multi_gpu(device_ids, cli_args, mixed_precision=mixed_precision)
+
+
+def _strip_gpus_from_argv(argv: list[str]) -> list[str]:
+    """Drop `--gpus <v>` / `--gpus=<v>` from raw sys.argv (launcher side).
+
+    Skips argv[0] (script path) — `accelerate launch -m <entry>`
+    provides the rank entrypoint separately, so the launcher forwards
+    argv[1:] minus the multi-GPU flag. Delegates to the shared
+    `strip_gpus_flag` helper.
+    """
+    from dlm.train.distributed.gpus import strip_gpus_flag
+
+    return strip_gpus_flag(argv, skip_argv0=True)
diff --git a/tests/unit/cli/test_train_validation_edges.py b/tests/unit/cli/test_train_validation_edges.py
index e43e818..15cb0cb 100644
--- a/tests/unit/cli/test_train_validation_edges.py
+++ b/tests/unit/cli/test_train_validation_edges.py
@@ -115,7 +115,7 @@ def _fake_dispatch(
             seen["mixed_precision"] = mixed_precision
             return 17
 
-        monkeypatch.setattr("dlm.cli.commands._maybe_dispatch_multi_gpu", _fake_dispatch)
+        monkeypatch.setattr("dlm.cli.commands.train._maybe_dispatch_multi_gpu", _fake_dispatch)
 
         runner = CliRunner()
         result = runner.invoke(

From f6d4999f88427389332407af4bea79c11a8cae2d Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:20:11 -0400
Subject: [PATCH 23/46] Extract prompt_cmd + VL/audio dispatch helpers into
 cli/commands/prompt.py

---
 src/dlm/cli/commands/__init__.py    | 415 +--------------------------
 src/dlm/cli/commands/prompt.py      | 421 ++++++++++++++++++++++++++++
 tests/unit/cli/test_prompt_edges.py |   4 +-
 3 files changed, 427 insertions(+), 413 deletions(-)
 create mode 100644 src/dlm/cli/commands/prompt.py

diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 4cf0620..5ab699f 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -12,7 +12,7 @@
 
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Annotated, Any
+from typing import Annotated
 
 import typer
 
@@ -34,6 +34,9 @@
 from dlm.cli.commands.preference import preference_list_cmd as preference_list_cmd
 from dlm.cli.commands.preference import preference_mine_cmd as preference_mine_cmd
 from dlm.cli.commands.preference import preference_revert_cmd as preference_revert_cmd
+from dlm.cli.commands.prompt import _dispatch_audio_prompt as _dispatch_audio_prompt
+from dlm.cli.commands.prompt import _dispatch_vl_prompt as _dispatch_vl_prompt
+from dlm.cli.commands.prompt import prompt_cmd as prompt_cmd
 from dlm.cli.commands.pull import pull_cmd as pull_cmd
 from dlm.cli.commands.push import push_cmd as push_cmd
 from dlm.cli.commands.repl import repl_cmd as repl_cmd
@@ -59,416 +62,6 @@ def _stub(sprint: str, subject: str) -> None:
 
 
 
-def prompt_cmd(
-    ctx: typer.Context,
-    path: Annotated[Path, typer.Argument(help=".dlm file to query.")],
-    query: Annotated[str | None, typer.Argument(help="One-shot prompt (omit for stdin).")] = None,
-    max_tokens: Annotated[
-        int,
-        typer.Option("--max-tokens", help="Max new tokens to generate."),
-    ] = 256,
-    temp: Annotated[
-        float,
-        typer.Option("--temp", help="Sampling temperature. `0.0` = greedy decoding."),
-    ] = 0.7,
-    top_p: Annotated[
-        float | None,
-        typer.Option(
-            "--top-p",
-            help="Top-p sampling cutoff. Omit to disable nucleus sampling.",
-        ),
-    ] = None,
-    verbose: Annotated[bool, typer.Option("--verbose", help="Log resolved InferencePlan.")] = False,
-    adapter: Annotated[
-        str | None,
-        typer.Option(
-            "--adapter",
-            help=(
-                "Named adapter to prompt against. Required on multi-adapter "
-                "documents; rejected on single-adapter documents."
-            ),
-        ),
-    ] = None,
-    gate: Annotated[
-        str,
-        typer.Option(
-            "--gate",
-            help=(
-                "Learned adapter gate. `auto` (default) uses the "
-                "gate when one exists in the store; `off` forces uniform "
-                "weights across declared adapters. Ignored when --adapter "
-                "explicitly pins a single adapter."
-            ),
-        ),
-    ] = "auto",
-    backend: Annotated[
-        str,
-        typer.Option(
-            "--backend",
-            help=(
-                "Inference backend: `auto` (default) picks MLX on Apple "
-                "Silicon, else PyTorch. Force with `pytorch` or `mlx`. "
-                "MLX requires `uv sync --extra mlx` on darwin-arm64."
-            ),
-        ),
-    ] = "auto",
-    image: Annotated[
-        list[Path] | None,
-        typer.Option(
-            "--image",
-            help=(
-                "Attach an image file to the prompt. Repeat for multiple "
-                "images; each expands to the base's image-token placeholder. "
-                "Requires a vision-language base."
-            ),
-        ),
-    ] = None,
-    audio: Annotated[
-        list[Path] | None,
-        typer.Option(
-            "--audio",
-            help=(
-                "Attach an audio file (.wav/.flac/.ogg) to the prompt. "
-                "Repeat for multiple clips; each expands to the base's "
-                "audio-token placeholder. Requires an audio-language base "
-                "(for example Qwen2-Audio-7B-Instruct)."
-            ),
-        ),
-    ] = None,
-) -> None:
-    """Run inference against the trained adapter."""
-    import sys
-
-    from rich.console import Console
-
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.doc.parser import parse_file
-    from dlm.hardware import doctor
-    from dlm.inference import AdapterNotFoundError
-    from dlm.inference.backends import (
-        UnsupportedBackendError,
-        build_backend,
-        select_backend,
-    )
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    if backend not in ("auto", "pytorch", "mlx"):
-        console.print(
-            f"[red]prompt:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
-        )
-        raise typer.Exit(code=2)
-
-    # Typer passes None when the option was never given; normalize early so
-    # downstream branching can just check truthiness + len().
-    image_paths: list[Path] = list(image or [])
-    audio_paths: list[Path] = list(audio or [])
-    if image_paths and audio_paths:
-        console.print(
-            "[red]prompt:[/red] --image and --audio cannot be combined "
-            "(each targets a different modality)."
-        )
-        raise typer.Exit(code=2)
-
-    from dlm.base_models import GatedModelError
-
-    parsed = parse_file(path)
-    adapters_declared = parsed.frontmatter.training.adapters
-    if adapter is not None:
-        if adapters_declared is None:
-            console.print(
-                "[red]prompt:[/red] --adapter is only valid on multi-adapter "
-                "documents (this doc does not declare `training.adapters`)."
-            )
-            raise typer.Exit(code=2)
-        if adapter not in adapters_declared:
-            declared = sorted(adapters_declared)
-            console.print(
-                f"[red]prompt:[/red] --adapter {adapter!r} is not declared (declared: {declared})."
-            )
-            raise typer.Exit(code=2)
-
-    if gate not in ("auto", "off"):
-        console.print(f"[red]prompt:[/red] --gate must be `auto` or `off`, got {gate!r}.")
-        raise typer.Exit(code=2)
-    # --adapter explicitly pins a single adapter — gate routing is moot.
-    # We silently ignore --gate in that case (the flag has a non-default
-    # value only when the user cares, and pairing it with --adapter is
-    # not an error, just a no-op).
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-    already_accepted = _previously_accepted(store.manifest)
-    try:
-        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
-    except GatedModelError as exc:
-        console.print(
-            f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
-            "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
-        )
-        raise typer.Exit(code=1) from exc
-    caps = doctor().capabilities
-
-    # --- VL path -------------------------------------------------------
-    # The VL branch has its own model / processor / adapter loader and
-    # its own generate function. `--image` and vision-language bases
-    # must appear together; each alone is a usage error.
-    from dlm.modality import modality_for
-
-    dispatch = modality_for(spec)
-    from click.core import ParameterSource
-
-    if ctx.get_parameter_source("temp") == ParameterSource.DEFAULT:
-        temp = spec.suggested_prompt_temperature
-    if image_paths and not dispatch.accepts_images:
-        console.print(
-            f"[red]prompt:[/red] --image is only valid with vision-language bases; "
-            f"base {spec.key!r} is modality='{spec.modality}'."
-        )
-        raise typer.Exit(code=2)
-    if dispatch.accepts_images and not image_paths:
-        console.print(
-            f"[red]prompt:[/red] base {spec.key!r} is vision-language; "
-            "pass at least one --image PATH to prompt it."
-        )
-        raise typer.Exit(code=2)
-    if dispatch.accepts_images:
-        _dispatch_vl_prompt(
-            console=console,
-            spec=spec,
-            store=store,
-            caps=caps,
-            adapter_name=adapter,
-            image_paths=image_paths,
-            query=query,
-            max_tokens=max_tokens,
-            temp=temp,
-            top_p=top_p,
-            verbose=verbose,
-        )
-        return
-
-    # --- Audio path ----------------------------------------------------
-    if audio_paths and not dispatch.accepts_audio:
-        console.print(
-            f"[red]prompt:[/red] --audio is only valid with audio-language bases; "
-            f"base {spec.key!r} is modality='{spec.modality}'."
-        )
-        raise typer.Exit(code=2)
-    if dispatch.accepts_audio and not audio_paths:
-        console.print(
-            f"[red]prompt:[/red] base {spec.key!r} is audio-language; "
-            "pass at least one --audio PATH to prompt it."
-        )
-        raise typer.Exit(code=2)
-    if dispatch.accepts_audio:
-        _dispatch_audio_prompt(
-            console=console,
-            spec=spec,
-            store=store,
-            caps=caps,
-            adapter_name=adapter,
-            audio_paths=audio_paths,
-            query=query,
-            max_tokens=max_tokens,
-            temp=temp,
-            top_p=top_p,
-            verbose=verbose,
-            auto_resample=parsed.frontmatter.training.audio.auto_resample,
-        )
-        return
-
-    try:
-        backend_name = select_backend(backend, caps)  # type: ignore[arg-type]
-    except UnsupportedBackendError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    backend_obj = build_backend(backend_name, caps)
-
-    if verbose:
-        console.print(f"[dim]backend:[/dim] {backend_name}")
-
-    try:
-        backend_obj.load(spec, store, adapter_name=adapter)
-    except AdapterNotFoundError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if query is None:
-        query = sys.stdin.read().strip()
-    if not query:
-        console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
-        raise typer.Exit(code=2)
-
-    response = backend_obj.generate(
-        query,
-        max_new_tokens=max_tokens,
-        temperature=temp,
-        top_p=top_p,
-    )
-    sys.stdout.write(response + "\n")
-
-
-def _dispatch_vl_prompt(  # pragma: no cover
-    *,
-    console: Any,
-    spec: Any,
-    store: Any,
-    caps: Any,
-    adapter_name: str | None,
-    image_paths: list[Path],
-    query: str | None,
-    max_tokens: int,
-    temp: float,
-    top_p: float | None,
-    verbose: bool,
-) -> None:
-    """Run the VL generate path. Keeps `prompt_cmd` readable.
-
-    Pragma'd from unit coverage because it calls the VL HF stack.
-    Covered by the slow-marked vision-language integration test (T12).
-    """
-    import sys
-
-    import typer
-
-    from dlm.inference import (
-        AdapterNotFoundError,
-        generate_vl,
-        load_for_vl_inference,
-        load_images,
-    )
-    from dlm.modality import ProcessorContractError
-
-    if verbose:
-        console.print("[dim]vl-backend:[/dim] pytorch (AutoModelForImageTextToText)")
-
-    try:
-        loaded = load_for_vl_inference(store, spec, caps, adapter_name=adapter_name)
-    except AdapterNotFoundError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except ProcessorContractError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    try:
-        images = load_images(image_paths)
-    except FileNotFoundError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-
-    if query is None:
-        query = sys.stdin.read().strip()
-    if not query:
-        console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
-        raise typer.Exit(code=2)
-
-    # Every VL spec in the registry must declare a preprocessor plan
-    # (schema validator); the fallback is defensive for the hf: escape
-    # hatch, which could in principle skip one.
-    image_token = "<image>"
-    if spec.vl_preprocessor_plan is not None:
-        image_token = spec.vl_preprocessor_plan.image_token
-
-    response = generate_vl(
-        loaded.model,
-        loaded.processor,
-        query,
-        images,
-        image_token=image_token,
-        max_new_tokens=max_tokens,
-        temperature=temp,
-        top_p=top_p,
-    )
-    sys.stdout.write(response + "\n")
-
-
-def _dispatch_audio_prompt(  # pragma: no cover
-    *,
-    console: Any,
-    spec: Any,
-    store: Any,
-    caps: Any,
-    adapter_name: str | None,
-    audio_paths: list[Path],
-    query: str | None,
-    max_tokens: int,
-    temp: float,
-    top_p: float | None,
-    verbose: bool,
-    auto_resample: bool = False,
-) -> None:
-    """Run the audio-LM generate path. Keeps `prompt_cmd` readable.
-
-    Pragma'd from unit coverage because it calls the audio HF stack.
-    Covered by the slow-marked audio integration test (T12).
-    """
-    import sys
-
-    import typer
-
-    from dlm.inference import (
-        AdapterNotFoundError,
-        generate_audio,
-        load_audios,
-        load_for_audio_inference,
-    )
-
-    if verbose:
-        console.print(f"[dim]audio-backend:[/dim] pytorch ({spec.architecture})")
-
-    try:
-        loaded = load_for_audio_inference(store, spec, caps, adapter_name=adapter_name)
-    except AdapterNotFoundError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if spec.audio_preprocessor_plan is None:
-        # Defensive — every registry audio spec carries the plan, but
-        # the hf: escape hatch could skip it.
-        console.print(
-            f"[red]prompt:[/red] base {spec.key!r} is audio-language "
-            "but has no audio_preprocessor_plan; cannot resolve sample rate."
-        )
-        raise typer.Exit(code=2)
-
-    target_sr = spec.audio_preprocessor_plan.sample_rate
-    try:
-        waveforms = load_audios(
-            audio_paths,
-            target_sample_rate=target_sr,
-            auto_resample=auto_resample,
-        )
-    except FileNotFoundError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    except ValueError as exc:
-        # Sample-rate mismatch — surface the actionable ffmpeg hint.
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-
-    if query is None:
-        query = sys.stdin.read().strip()
-    if not query:
-        console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
-        raise typer.Exit(code=2)
-
-    audio_token = spec.audio_preprocessor_plan.audio_token
-
-    response = generate_audio(
-        loaded.model,
-        loaded.processor,
-        query,
-        waveforms,
-        audio_token=audio_token,
-        sample_rate=target_sr,
-        max_new_tokens=max_tokens,
-        temperature=temp,
-        top_p=top_p,
-    )
-    sys.stdout.write(response + "\n")
-
 
 def export_cmd(
     path: Annotated[Path, typer.Argument(help=".dlm file to export.")],
diff --git a/src/dlm/cli/commands/prompt.py b/src/dlm/cli/commands/prompt.py
new file mode 100644
index 0000000..1f7501c
--- /dev/null
+++ b/src/dlm/cli/commands/prompt.py
@@ -0,0 +1,421 @@
+"""`dlm prompt` — run inference against the trained adapter."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Annotated, Any
+
+import typer
+
+from dlm.cli.commands._shared import _previously_accepted
+
+
+def prompt_cmd(
+    ctx: typer.Context,
+    path: Annotated[Path, typer.Argument(help=".dlm file to query.")],
+    query: Annotated[str | None, typer.Argument(help="One-shot prompt (omit for stdin).")] = None,
+    max_tokens: Annotated[
+        int,
+        typer.Option("--max-tokens", help="Max new tokens to generate."),
+    ] = 256,
+    temp: Annotated[
+        float,
+        typer.Option("--temp", help="Sampling temperature. `0.0` = greedy decoding."),
+    ] = 0.7,
+    top_p: Annotated[
+        float | None,
+        typer.Option(
+            "--top-p",
+            help="Top-p sampling cutoff. Omit to disable nucleus sampling.",
+        ),
+    ] = None,
+    verbose: Annotated[bool, typer.Option("--verbose", help="Log resolved InferencePlan.")] = False,
+    adapter: Annotated[
+        str | None,
+        typer.Option(
+            "--adapter",
+            help=(
+                "Named adapter to prompt against. Required on multi-adapter "
+                "documents; rejected on single-adapter documents."
+            ),
+        ),
+    ] = None,
+    gate: Annotated[
+        str,
+        typer.Option(
+            "--gate",
+            help=(
+                "Learned adapter gate. `auto` (default) uses the "
+                "gate when one exists in the store; `off` forces uniform "
+                "weights across declared adapters. Ignored when --adapter "
+                "explicitly pins a single adapter."
+            ),
+        ),
+    ] = "auto",
+    backend: Annotated[
+        str,
+        typer.Option(
+            "--backend",
+            help=(
+                "Inference backend: `auto` (default) picks MLX on Apple "
+                "Silicon, else PyTorch. Force with `pytorch` or `mlx`. "
+                "MLX requires `uv sync --extra mlx` on darwin-arm64."
+            ),
+        ),
+    ] = "auto",
+    image: Annotated[
+        list[Path] | None,
+        typer.Option(
+            "--image",
+            help=(
+                "Attach an image file to the prompt. Repeat for multiple "
+                "images; each expands to the base's image-token placeholder. "
+                "Requires a vision-language base."
+            ),
+        ),
+    ] = None,
+    audio: Annotated[
+        list[Path] | None,
+        typer.Option(
+            "--audio",
+            help=(
+                "Attach an audio file (.wav/.flac/.ogg) to the prompt. "
+                "Repeat for multiple clips; each expands to the base's "
+                "audio-token placeholder. Requires an audio-language base "
+                "(for example Qwen2-Audio-7B-Instruct)."
+            ),
+        ),
+    ] = None,
+) -> None:
+    """Run inference against the trained adapter."""
+    import sys
+
+    from rich.console import Console
+
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.doc.parser import parse_file
+    from dlm.hardware import doctor
+    from dlm.inference import AdapterNotFoundError
+    from dlm.inference.backends import (
+        UnsupportedBackendError,
+        build_backend,
+        select_backend,
+    )
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    if backend not in ("auto", "pytorch", "mlx"):
+        console.print(
+            f"[red]prompt:[/red] --backend must be `auto`, `pytorch`, or `mlx` (got {backend!r})."
+        )
+        raise typer.Exit(code=2)
+
+    # Typer passes None when the option was never given; normalize early so
+    # downstream branching can just check truthiness + len().
+    image_paths: list[Path] = list(image or [])
+    audio_paths: list[Path] = list(audio or [])
+    if image_paths and audio_paths:
+        console.print(
+            "[red]prompt:[/red] --image and --audio cannot be combined "
+            "(each targets a different modality)."
+        )
+        raise typer.Exit(code=2)
+
+    from dlm.base_models import GatedModelError
+
+    parsed = parse_file(path)
+    adapters_declared = parsed.frontmatter.training.adapters
+    if adapter is not None:
+        if adapters_declared is None:
+            console.print(
+                "[red]prompt:[/red] --adapter is only valid on multi-adapter "
+                "documents (this doc does not declare `training.adapters`)."
+            )
+            raise typer.Exit(code=2)
+        if adapter not in adapters_declared:
+            declared = sorted(adapters_declared)
+            console.print(
+                f"[red]prompt:[/red] --adapter {adapter!r} is not declared (declared: {declared})."
+            )
+            raise typer.Exit(code=2)
+
+    if gate not in ("auto", "off"):
+        console.print(f"[red]prompt:[/red] --gate must be `auto` or `off`, got {gate!r}.")
+        raise typer.Exit(code=2)
+    # --adapter explicitly pins a single adapter — gate routing is moot.
+    # We silently ignore --gate in that case (the flag has a non-default
+    # value only when the user cares, and pairing it with --adapter is
+    # not an error, just a no-op).
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    already_accepted = _previously_accepted(store.manifest)
+    try:
+        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
+    except GatedModelError as exc:
+        console.print(
+            f"[red]license:[/red] base {parsed.frontmatter.base_model!r} is gated and has "
+            "no recorded acceptance in this store; run `dlm train --i-accept-license` first."
+        )
+        raise typer.Exit(code=1) from exc
+    caps = doctor().capabilities
+
+    # --- VL path -------------------------------------------------------
+    # The VL branch has its own model / processor / adapter loader and
+    # its own generate function. `--image` and vision-language bases
+    # must appear together; each alone is a usage error.
+    from dlm.modality import modality_for
+
+    dispatch = modality_for(spec)
+    from click.core import ParameterSource
+
+    if ctx.get_parameter_source("temp") == ParameterSource.DEFAULT:
+        temp = spec.suggested_prompt_temperature
+    if image_paths and not dispatch.accepts_images:
+        console.print(
+            f"[red]prompt:[/red] --image is only valid with vision-language bases; "
+            f"base {spec.key!r} is modality='{spec.modality}'."
+        )
+        raise typer.Exit(code=2)
+    if dispatch.accepts_images and not image_paths:
+        console.print(
+            f"[red]prompt:[/red] base {spec.key!r} is vision-language; "
+            "pass at least one --image PATH to prompt it."
+        )
+        raise typer.Exit(code=2)
+    if dispatch.accepts_images:
+        _dispatch_vl_prompt(
+            console=console,
+            spec=spec,
+            store=store,
+            caps=caps,
+            adapter_name=adapter,
+            image_paths=image_paths,
+            query=query,
+            max_tokens=max_tokens,
+            temp=temp,
+            top_p=top_p,
+            verbose=verbose,
+        )
+        return
+
+    # --- Audio path ----------------------------------------------------
+    if audio_paths and not dispatch.accepts_audio:
+        console.print(
+            f"[red]prompt:[/red] --audio is only valid with audio-language bases; "
+            f"base {spec.key!r} is modality='{spec.modality}'."
+        )
+        raise typer.Exit(code=2)
+    if dispatch.accepts_audio and not audio_paths:
+        console.print(
+            f"[red]prompt:[/red] base {spec.key!r} is audio-language; "
+            "pass at least one --audio PATH to prompt it."
+        )
+        raise typer.Exit(code=2)
+    if dispatch.accepts_audio:
+        _dispatch_audio_prompt(
+            console=console,
+            spec=spec,
+            store=store,
+            caps=caps,
+            adapter_name=adapter,
+            audio_paths=audio_paths,
+            query=query,
+            max_tokens=max_tokens,
+            temp=temp,
+            top_p=top_p,
+            verbose=verbose,
+            auto_resample=parsed.frontmatter.training.audio.auto_resample,
+        )
+        return
+
+    try:
+        backend_name = select_backend(backend, caps)  # type: ignore[arg-type]
+    except UnsupportedBackendError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    backend_obj = build_backend(backend_name, caps)
+
+    if verbose:
+        console.print(f"[dim]backend:[/dim] {backend_name}")
+
+    try:
+        backend_obj.load(spec, store, adapter_name=adapter)
+    except AdapterNotFoundError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if query is None:
+        query = sys.stdin.read().strip()
+    if not query:
+        console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
+        raise typer.Exit(code=2)
+
+    response = backend_obj.generate(
+        query,
+        max_new_tokens=max_tokens,
+        temperature=temp,
+        top_p=top_p,
+    )
+    sys.stdout.write(response + "\n")
+
+
+def _dispatch_vl_prompt(  # pragma: no cover
+    *,
+    console: Any,
+    spec: Any,
+    store: Any,
+    caps: Any,
+    adapter_name: str | None,
+    image_paths: list[Path],
+    query: str | None,
+    max_tokens: int,
+    temp: float,
+    top_p: float | None,
+    verbose: bool,
+) -> None:
+    """Run the VL generate path. Keeps `prompt_cmd` readable.
+
+    Pragma'd from unit coverage because it calls the VL HF stack.
+    Covered by the slow-marked vision-language integration test.
+    """
+    import sys
+
+    import typer
+
+    from dlm.inference import (
+        AdapterNotFoundError,
+        generate_vl,
+        load_for_vl_inference,
+        load_images,
+    )
+    from dlm.modality import ProcessorContractError
+
+    if verbose:
+        console.print("[dim]vl-backend:[/dim] pytorch (AutoModelForImageTextToText)")
+
+    try:
+        loaded = load_for_vl_inference(store, spec, caps, adapter_name=adapter_name)
+    except AdapterNotFoundError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except ProcessorContractError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    try:
+        images = load_images(image_paths)
+    except FileNotFoundError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+
+    if query is None:
+        query = sys.stdin.read().strip()
+    if not query:
+        console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
+        raise typer.Exit(code=2)
+
+    # Every VL spec in the registry must declare a preprocessor plan
+    # (schema validator); the fallback is defensive for the hf: escape
+    # hatch, which could in principle skip one.
+    image_token = "<image>"
+    if spec.vl_preprocessor_plan is not None:
+        image_token = spec.vl_preprocessor_plan.image_token
+
+    response = generate_vl(
+        loaded.model,
+        loaded.processor,
+        query,
+        images,
+        image_token=image_token,
+        max_new_tokens=max_tokens,
+        temperature=temp,
+        top_p=top_p,
+    )
+    sys.stdout.write(response + "\n")
+
+
+def _dispatch_audio_prompt(  # pragma: no cover
+    *,
+    console: Any,
+    spec: Any,
+    store: Any,
+    caps: Any,
+    adapter_name: str | None,
+    audio_paths: list[Path],
+    query: str | None,
+    max_tokens: int,
+    temp: float,
+    top_p: float | None,
+    verbose: bool,
+    auto_resample: bool = False,
+) -> None:
+    """Run the audio-LM generate path. Keeps `prompt_cmd` readable.
+
+    Pragma'd from unit coverage because it calls the audio HF stack.
+    Covered by the slow-marked audio integration test.
+    """
+    import sys
+
+    import typer
+
+    from dlm.inference import (
+        AdapterNotFoundError,
+        generate_audio,
+        load_audios,
+        load_for_audio_inference,
+    )
+
+    if verbose:
+        console.print(f"[dim]audio-backend:[/dim] pytorch ({spec.architecture})")
+
+    try:
+        loaded = load_for_audio_inference(store, spec, caps, adapter_name=adapter_name)
+    except AdapterNotFoundError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if spec.audio_preprocessor_plan is None:
+        # Defensive — every registry audio spec carries the plan, but
+        # the hf: escape hatch could skip it.
+        console.print(
+            f"[red]prompt:[/red] base {spec.key!r} is audio-language "
+            "but has no audio_preprocessor_plan; cannot resolve sample rate."
+        )
+        raise typer.Exit(code=2)
+
+    target_sr = spec.audio_preprocessor_plan.sample_rate
+    try:
+        waveforms = load_audios(
+            audio_paths,
+            target_sample_rate=target_sr,
+            auto_resample=auto_resample,
+        )
+    except FileNotFoundError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    except ValueError as exc:
+        # Sample-rate mismatch — surface the actionable ffmpeg hint.
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+
+    if query is None:
+        query = sys.stdin.read().strip()
+    if not query:
+        console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
+        raise typer.Exit(code=2)
+
+    audio_token = spec.audio_preprocessor_plan.audio_token
+
+    response = generate_audio(
+        loaded.model,
+        loaded.processor,
+        query,
+        waveforms,
+        audio_token=audio_token,
+        sample_rate=target_sr,
+        max_new_tokens=max_tokens,
+        temperature=temp,
+        top_p=top_p,
+    )
+    sys.stdout.write(response + "\n")
diff --git a/tests/unit/cli/test_prompt_edges.py b/tests/unit/cli/test_prompt_edges.py
index 295c0b3..f85678f 100644
--- a/tests/unit/cli/test_prompt_edges.py
+++ b/tests/unit/cli/test_prompt_edges.py
@@ -245,7 +245,7 @@ def test_vision_language_dispatch_branch_invokes_helper(
             spec=_spec(key="vl-demo", modality="vision-language"),
         )
         monkeypatch.setattr(
-            "dlm.cli.commands._dispatch_vl_prompt",
+            "dlm.cli.commands.prompt._dispatch_vl_prompt",
             lambda **kwargs: captured.update(kwargs),
         )
 
@@ -276,7 +276,7 @@ def test_audio_dispatch_branch_invokes_helper(
             spec=_spec(key="audio-demo", modality="audio-language"),
         )
         monkeypatch.setattr(
-            "dlm.cli.commands._dispatch_audio_prompt",
+            "dlm.cli.commands.prompt._dispatch_audio_prompt",
             lambda **kwargs: captured.update(kwargs),
         )
 

From 2b94a4315497c5272f1bcb95eab9bb86d30be4b7 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:24:05 -0400
Subject: [PATCH 24/46] Extract export_cmd into cli/commands/export.py + drop
 stale ARG001 ignore

---
 pyproject.toml                   |   4 -
 src/dlm/cli/commands/__init__.py | 668 +------------------------------
 src/dlm/cli/commands/export.py   | 640 +++++++++++++++++++++++++++++
 3 files changed, 659 insertions(+), 653 deletions(-)
 create mode 100644 src/dlm/cli/commands/export.py

diff --git a/pyproject.toml b/pyproject.toml
index 755e04c..15f3c9a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -163,10 +163,6 @@ ignore = [
 # into a sequence of readers; the nested form is clearer than the
 # comma-separated one.
 "tests/**/*.py" = ["ARG", "PT011", "SIM117"]
-# Typer stub subcommands accept every CLI arg the real implementation
-# will take so `--help` reflects the shipping surface — even though the
-# stub body discards them.
-"src/dlm/cli/commands.py" = ["ARG001"]
 # HuggingFace Trainer callbacks MUST accept `args`/`state`/`control`
 # positionally even when the implementation only reads some of them —
 # HF dispatches them by position. ARG002 for these wrappers is noise.
diff --git a/src/dlm/cli/commands/__init__.py b/src/dlm/cli/commands/__init__.py
index 5ab699f..a288711 100644
--- a/src/dlm/cli/commands/__init__.py
+++ b/src/dlm/cli/commands/__init__.py
@@ -1,21 +1,19 @@
-"""Subcommand stubs for the v1.0 CLI surface.
-
-Every stub raises `NotImplementedError` with the sprint number that will
-implement it. This makes `dlm --help` self-documenting about project
-progress. Arguments are accepted so `--help` renders the real eventual
-surface; they're unused until each subcommand's owning sprint lands,
-which is why `src/dlm/cli/commands.py` has a ruff per-file-ignore for
-`ARG001` in `pyproject.toml`.
+"""Re-exports for the `dlm.cli.commands` package.
+
+Each `*_cmd` function lives in its own submodule
+(`dlm.cli.commands.<name>`); this `__init__.py` re-exports them so
+`from dlm.cli.commands import <foo>_cmd` and `commands.<foo>_cmd`
+keep working from `dlm.cli.app` and any test fixtures that bind
+helpers by their pre-package name.
+
+Private helpers (`_*`) are also re-exported when test fixtures
+monkeypatch them through `dlm.cli.commands.<name>` — keeping the
+import path stable here lets callers stay agnostic about which
+submodule owns the helper.
 """
 
 from __future__ import annotations
 
-from collections.abc import Sequence
-from pathlib import Path
-from typing import Annotated
-
-import typer
-
 from dlm.cli.commands._shared import _human_size as _human_size
 from dlm.cli.commands._shared import _previously_accepted as _previously_accepted
 from dlm.cli.commands.cache import _parse_duration as _parse_duration
@@ -23,6 +21,7 @@
 from dlm.cli.commands.cache import cache_prune_cmd as cache_prune_cmd
 from dlm.cli.commands.cache import cache_show_cmd as cache_show_cmd
 from dlm.cli.commands.doctor import doctor_cmd as doctor_cmd
+from dlm.cli.commands.export import export_cmd as export_cmd
 from dlm.cli.commands.harvest import harvest_cmd as harvest_cmd
 from dlm.cli.commands.init import _prompt_accept_license as _prompt_accept_license
 from dlm.cli.commands.init import init_cmd as init_cmd
@@ -54,641 +53,12 @@
 
 
 def _stub(sprint: str, subject: str) -> None:
-    """Raise a clear unimplemented error pointing to the owning sprint."""
+    """Raise a clear unimplemented error for any v1 subcommand still pending.
+
+    Kept around because a unit test asserts the error message shape, and
+    because future v2/v3 subcommand wiring may want a stable stub
+    helper to register a not-yet-implemented surface in `--help`.
+    """
     raise NotImplementedError(
         f"`{subject}` is not implemented yet (owned by Sprint {sprint}).",
     )
-
-
-
-
-
-def export_cmd(
-    path: Annotated[Path, typer.Argument(help=".dlm file to export.")],
-    target: Annotated[
-        str,
-        typer.Option(
-            "--target",
-            help="Export destination. Currently supported: ollama, llama-server, vllm, mlx-serve.",
-        ),
-    ] = "ollama",
-    quant: Annotated[
-        str | None,
-        typer.Option("--quant", help="GGUF quant level (defaults to frontmatter)."),
-    ] = None,
-    merged: Annotated[
-        bool,
-        typer.Option("--merged", help="Merge the adapter into the base before export."),
-    ] = False,
-    dequantize: Annotated[
-        bool,
-        typer.Option(
-            "--dequantize",
-            help="Dequantize a QLoRA base to fp16 before merging.",
-        ),
-    ] = False,
-    name: Annotated[str | None, typer.Option("--name", help="Ollama model name.")] = None,
-    no_template: Annotated[
-        bool,
-        typer.Option("--no-template", help="Skip writing TEMPLATE into the Modelfile."),
-    ] = False,
-    no_smoke: Annotated[
-        bool,
-        typer.Option("--no-smoke", help="Register the export but skip the smoke prompt."),
-    ] = False,
-    no_imatrix: Annotated[
-        bool,
-        typer.Option(
-            "--no-imatrix",
-            help=(
-                "Skip importance-matrix calibration. Default uses the "
-                "replay corpus to calibrate k-quant quantization."
-            ),
-        ),
-    ] = False,
-    draft: Annotated[
-        str | None,
-        typer.Option(
-            "--draft",
-            help=(
-                "Speculative-decoding draft model Ollama tag "
-                "(e.g. qwen2.5:0.5b). Default uses the registered pair "
-                "for this base; override here to pick a custom draft."
-            ),
-        ),
-    ] = None,
-    no_draft: Annotated[
-        bool,
-        typer.Option(
-            "--no-draft",
-            help="Suppress PARAMETER draft_model emission even when a pair is registered.",
-        ),
-    ] = False,
-    skip_ollama: Annotated[
-        bool,
-        typer.Option(
-            "--skip-ollama",
-            help="Emit GGUFs + manifest only; do not touch the Ollama binary.",
-        ),
-    ] = False,
-    adapter: Annotated[
-        str | None,
-        typer.Option(
-            "--adapter",
-            help=(
-                "Named adapter to export. Required on multi-adapter "
-                "documents; rejected on single-adapter documents."
-            ),
-        ),
-    ] = None,
-    adapter_mix: Annotated[
-        str | None,
-        typer.Option(
-            "--adapter-mix",
-            help=(
-                "Weighted composition of named adapters, e.g. "
-                "`knowledge:1.0,tone:0.5`. Mutually exclusive with --adapter. "
-                "Multi-adapter docs only. LoRA-only; QLoRA requires "
-                "--dequantize."
-            ),
-        ),
-    ] = None,
-    adapter_mix_method: Annotated[
-        str,
-        typer.Option(
-            "--adapter-mix-method",
-            help=(
-                "PEFT combination strategy for --adapter-mix. `linear` "
-                "(default) sums LoRA deltas; `svd` recomposes via SVD "
-                "(higher fidelity, heavier compute). Only meaningful "
-                "with --adapter-mix."
-            ),
-        ),
-    ] = "linear",
-    verbose: Annotated[
-        bool,
-        typer.Option("--verbose", help="Log each subprocess command as it launches."),
-    ] = False,
-    emit_sway_json: Annotated[
-        bool,
-        typer.Option(
-            "--emit-sway-json",
-            help=(
-                "After the export, also write a ready-to-run sway.yaml "
-                "(via dlm-sway autogen) into the export dir. Requires the "
-                "[sway] extra: pip install 'dlm[sway]'."
-            ),
-        ),
-    ] = False,
-) -> None:
-    """Export the adapter to a runtime target."""
-
-    from rich.console import Console
-
-    from dlm.base_models import GatedModelError, download_spec
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.doc.parser import parse_file
-    from dlm.export import (
-        ExportError,
-        PreflightError,
-        SubprocessError,
-        UnknownExportTargetError,
-        UnsafeMergeError,
-        VendoringError,
-        resolve_export_plan,
-        run_export,
-    )
-    from dlm.export.ollama import (
-        OllamaBinaryNotFoundError,
-        OllamaCreateError,
-        OllamaError,
-        OllamaSmokeError,
-        OllamaVersionError,
-    )
-    from dlm.export.quantize import run_checked
-    from dlm.export.targets import (
-        finalize_mlx_serve_export,
-        finalize_vllm_export,
-        prepare_llama_server_export,
-        prepare_mlx_serve_export,
-        prepare_vllm_export,
-        resolve_target,
-    )
-    from dlm.store.paths import for_dlm
-
-    console = Console(stderr=True)
-
-    if draft is not None and no_draft:
-        console.print("[red]error:[/red] --draft and --no-draft are mutually exclusive; pick one.")
-        raise typer.Exit(code=2)
-    if adapter is not None and adapter_mix is not None:
-        console.print(
-            "[red]export:[/red] --adapter and --adapter-mix are mutually exclusive; pick one."
-        )
-        raise typer.Exit(code=2)
-    try:
-        resolved_target = resolve_target(target)
-    except UnknownExportTargetError as exc:
-        console.print(f"[red]export:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-    parsed = parse_file(path)
-    adapters_declared = parsed.frontmatter.training.adapters
-    if adapter is not None:
-        if adapters_declared is None:
-            console.print(
-                "[red]export:[/red] --adapter is only valid on multi-adapter "
-                "documents (this doc does not declare `training.adapters`)."
-            )
-            raise typer.Exit(code=2)
-        if adapter not in adapters_declared:
-            declared = sorted(adapters_declared)
-            console.print(
-                f"[red]export:[/red] --adapter {adapter!r} is not declared (declared: {declared})."
-            )
-            raise typer.Exit(code=2)
-
-    mix_entries: list[tuple[str, float]] | None = None
-    if adapter_mix is not None:
-        from dlm.export.weighted_merge import (
-            InvalidMixSpecError,
-            parse_mix_spec,
-            validate_mix_against_declared,
-        )
-
-        if adapters_declared is None:
-            console.print(
-                "[red]export:[/red] --adapter-mix is only valid on multi-adapter "
-                "documents (this doc does not declare `training.adapters`)."
-            )
-            raise typer.Exit(code=2)
-        if adapter_mix_method not in ("linear", "svd"):
-            console.print(
-                f"[red]export:[/red] --adapter-mix-method must be "
-                f"`linear` or `svd`, got {adapter_mix_method!r}."
-            )
-            raise typer.Exit(code=2)
-        try:
-            entries = parse_mix_spec(adapter_mix)
-            validate_mix_against_declared(entries, set(adapters_declared))
-        except InvalidMixSpecError as exc:
-            console.print(f"[red]export:[/red] {exc}")
-            raise typer.Exit(code=2) from exc
-        mix_entries = [(e.name, e.weight) for e in entries]
-
-    store = for_dlm(parsed.frontmatter.dlm_id)
-
-    # Gate-driven static mix: when the doc has an enabled gate and the
-    # user didn't pass --adapter-mix / --adapter, freeze the learned
-    # gate to per-adapter weights for the GGUF export path. Dynamic
-    # routing only lives in the `dlm prompt` flow; the runtime can't
-    # evaluate the torch gate, so we substitute the prior here. A CLI
-    # --adapter-mix wins — users who know what they want get full
-    # control.
-    if mix_entries is None and adapter is None:
-        from dlm.export.gate_fallback import resolve_and_announce
-
-        resolution = resolve_and_announce(store, parsed)
-        if resolution.entries is not None:
-            mix_entries = resolution.entries
-            for line in resolution.banner_lines:
-                console.print(line)
-
-    already_accepted = _previously_accepted(store.manifest)
-    try:
-        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
-    except GatedModelError as exc:
-        console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.")
-        if exc.license_url:
-            console.print(f"  review the license at: {exc.license_url}")
-        console.print("  accept via `dlm train --i-accept-license` before exporting.")
-        raise typer.Exit(code=1) from exc
-
-    # Audio bases take HF-snapshot unconditionally — llama.cpp has no
-    # audio-arch roadmap at our pinned tag — so branch early without
-    # resolving a GGUF plan.
-    from dlm.modality import modality_for
-
-    export_dispatch = modality_for(spec)
-    if resolved_target.name == "vllm" and export_dispatch.accepts_audio:
-        console.print(
-            "[red]export:[/red] --target vllm is not wired for audio-language "
-            "documents yet; the current vllm export path only supports text bases."
-        )
-        raise typer.Exit(code=2)
-    if resolved_target.name == "mlx-serve" and export_dispatch.accepts_audio:
-        console.print(
-            "[red]export:[/red] --target mlx-serve is not wired for audio-language "
-            "documents yet; the current mlx-serve export path only supports text bases."
-        )
-        raise typer.Exit(code=2)
-    if export_dispatch.accepts_audio:
-        try:
-            dispatch_result = export_dispatch.dispatch_export(
-                store=store,
-                spec=spec,
-                adapter_name=adapter,
-                quant=quant,
-                merged=merged,
-                adapter_mix_raw=adapter_mix,
-            )
-        except ExportError as exc:
-            console.print(f"[red]export:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-        assert dispatch_result is not None  # audio modality always returns a result
-        for line in dispatch_result.banner_lines:
-            console.print(line)
-        return
-
-    try:
-        plan = resolve_export_plan(
-            cli_quant=quant,
-            cli_merged=merged,
-            cli_dequantize=dequantize,
-            cli_no_template=no_template,
-            cli_ollama_name=name,
-            cli_no_imatrix=no_imatrix,
-            frontmatter_default_quant=parsed.frontmatter.export.default_quant,
-        )
-    except ValueError as exc:
-        console.print(f"[red]export:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-
-    store.ensure_layout()
-
-    # VL bases: arch-probe + try single-file GGUF on SUPPORTED (with
-    # fallback to HF-snapshot on refusal or subprocess failure). A
-    # missing local base snapshot should not hard-fail the whole
-    # export — the dispatcher can still emit the HF-snapshot path
-    # without GGUF context.
-    if resolved_target.name == "vllm" and export_dispatch.accepts_images:
-        console.print(
-            "[red]export:[/red] --target vllm is not wired for vision-language "
-            "documents yet; the current vllm export path only supports text bases."
-        )
-        raise typer.Exit(code=2)
-    if resolved_target.name == "mlx-serve" and export_dispatch.accepts_images:
-        console.print(
-            "[red]export:[/red] --target mlx-serve is not wired for vision-language "
-            "documents yet; the current mlx-serve export path only supports text bases."
-        )
-        raise typer.Exit(code=2)
-    if export_dispatch.accepts_images:
-        gguf_emission_context = None
-        try:
-            cached_vl = download_spec(spec, local_files_only=True)
-        except RuntimeError as exc:
-            _ = exc
-        else:
-            gguf_emission_context = {
-                "plan": plan,
-                "cached_base_dir": cached_vl.path,
-                "source_dlm_path": path.resolve(),
-                "training_sequence_len": parsed.frontmatter.training.sequence_len,
-                "dlm_version": f"v{parsed.frontmatter.dlm_version}",
-            }
-        try:
-            dispatch_result = export_dispatch.dispatch_export(
-                store=store,
-                spec=spec,
-                adapter_name=adapter,
-                quant=quant,
-                merged=merged,
-                adapter_mix_raw=adapter_mix,
-                gguf_emission_context=gguf_emission_context,
-            )
-        except ExportError as exc:
-            console.print(f"[red]export:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-        assert dispatch_result is not None  # VL modality always returns a result
-        for line in dispatch_result.banner_lines:
-            console.print(line)
-        return
-
-    try:
-        cached = download_spec(spec, local_files_only=True)
-    except RuntimeError as exc:
-        console.print(
-            f"[red]export:[/red] base model not in local cache — run `dlm train` first.\n  {exc}"
-        )
-        raise typer.Exit(code=1) from exc
-
-    def _verbose_runner(cmd: Sequence[str]) -> object:
-        console.print(f"[dim]$ {' '.join(cmd)}[/dim]")
-        return run_checked(cmd)
-
-    adapter_path_override = None
-    if mix_entries is not None:  # pragma: no cover - heavy path
-        # Build the weighted-merged adapter into an ephemeral dir,
-        # then feed the path to run_export as an override. The tmp
-        # dir lives under the store's cache/ so it cleans up with
-        # the rest of the store on `dlm pack`.
-        from dlm.export.weighted_merge import MixEntry, build_and_stage
-
-        entries_typed = [MixEntry(name=n, weight=w) for (n, w) in mix_entries]
-        adapter_path_override = build_and_stage(
-            store=store,
-            spec=spec,
-            cached_base_dir=cached.path,
-            entries=entries_typed,
-            combination_type=adapter_mix_method,  # type: ignore[arg-type]
-        )
-
-    if resolved_target.name == "vllm":
-        ignored_flags: list[str] = []
-        if quant is not None:
-            ignored_flags.append("--quant")
-        if merged:
-            ignored_flags.append("--merged")
-        if dequantize:
-            ignored_flags.append("--dequantize")
-        if no_template:
-            ignored_flags.append("--no-template")
-        if skip_ollama:
-            ignored_flags.append("--skip-ollama")
-        if no_imatrix:
-            ignored_flags.append("--no-imatrix")
-        if draft is not None:
-            ignored_flags.append("--draft")
-        if no_draft:
-            ignored_flags.append("--no-draft")
-        if ignored_flags:
-            console.print(
-                "[yellow]export:[/yellow] ignoring flags not applicable to "
-                f"`--target vllm`: {', '.join(ignored_flags)}"
-            )
-
-        declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
-        try:
-            vllm_result = prepare_vllm_export(
-                store=store,
-                spec=spec,
-                served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}",
-                training_sequence_len=parsed.frontmatter.training.sequence_len,
-                adapter_name=adapter,
-                adapter_path_override=adapter_path_override,
-                declared_adapter_names=declared_adapter_names,
-            )
-        except ExportError as exc:
-            console.print(f"[red]export:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-
-        vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result)
-        if vllm_smoke is not None and not vllm_smoke.ok:
-            console.print(
-                f"[red]smoke:[/red] {vllm_smoke.detail}\n"
-                "  re-run with `--no-smoke` to skip the smoke test."
-            )
-            raise typer.Exit(code=1)
-
-        manifest_path = finalize_vllm_export(
-            store=store,
-            spec=spec,
-            prepared=vllm_result,
-            smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail,
-            adapter_name=adapter,
-            adapter_mix=mix_entries,
-        )
-        console.print(f"[green]exported:[/green] {vllm_result.export_dir}")
-        console.print("target:  vllm")
-        assert vllm_result.launch_script_path is not None
-        assert vllm_result.config_path is not None
-        console.print(f"launch:  {vllm_result.launch_script_path.name}")
-        console.print(f"config:  {vllm_result.config_path.name}")
-        console.print(f"manifest: {manifest_path.name}")
-        if vllm_smoke is not None and vllm_smoke.detail:
-            console.print(f"smoke:   {vllm_smoke.detail}")
-        return
-
-    if resolved_target.name == "mlx-serve":
-        mlx_ignored_flags: list[str] = []
-        if quant is not None:
-            mlx_ignored_flags.append("--quant")
-        if merged:
-            mlx_ignored_flags.append("--merged")
-        if dequantize:
-            mlx_ignored_flags.append("--dequantize")
-        if name is not None:
-            mlx_ignored_flags.append("--name")
-        if no_template:
-            mlx_ignored_flags.append("--no-template")
-        if skip_ollama:
-            mlx_ignored_flags.append("--skip-ollama")
-        if no_imatrix:
-            mlx_ignored_flags.append("--no-imatrix")
-        if draft is not None:
-            mlx_ignored_flags.append("--draft")
-        if no_draft:
-            mlx_ignored_flags.append("--no-draft")
-        if mlx_ignored_flags:
-            console.print(
-                "[yellow]export:[/yellow] ignoring flags not applicable to "
-                f"`--target mlx-serve`: {', '.join(mlx_ignored_flags)}"
-            )
-
-        declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
-        try:
-            mlx_serve_result = prepare_mlx_serve_export(
-                store=store,
-                spec=spec,
-                adapter_name=adapter,
-                adapter_path_override=adapter_path_override,
-                declared_adapter_names=declared_adapter_names,
-            )
-        except ExportError as exc:
-            console.print(f"[red]export:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-
-        mlx_serve_smoke = None if no_smoke else resolved_target.smoke_test(mlx_serve_result)
-        if mlx_serve_smoke is not None and not mlx_serve_smoke.ok:
-            console.print(
-                f"[red]smoke:[/red] {mlx_serve_smoke.detail}\n"
-                "  re-run with `--no-smoke` to skip the smoke test."
-            )
-            raise typer.Exit(code=1)
-
-        manifest_path = finalize_mlx_serve_export(
-            store=store,
-            spec=spec,
-            prepared=mlx_serve_result,
-            smoke_output_first_line=None if mlx_serve_smoke is None else mlx_serve_smoke.detail,
-            adapter_name=adapter,
-            adapter_mix=mix_entries,
-        )
-        console.print(f"[green]exported:[/green] {mlx_serve_result.export_dir}")
-        console.print("target:  mlx-serve")
-        assert mlx_serve_result.launch_script_path is not None
-        console.print(f"launch:  {mlx_serve_result.launch_script_path.name}")
-        console.print(f"manifest: {manifest_path.name}")
-        if mlx_serve_smoke is not None and mlx_serve_smoke.detail:
-            console.print(f"smoke:   {mlx_serve_smoke.detail}")
-        return
-
-    try:
-        result = run_export(
-            store,
-            spec,
-            plan,
-            target=resolved_target.name,
-            cached_base_dir=cached.path,
-            subprocess_runner=_verbose_runner if verbose else None,
-            skip_ollama=skip_ollama or resolved_target.name != "ollama",
-            skip_smoke=no_smoke,
-            source_dlm_path=path.resolve(),
-            training_sequence_len=parsed.frontmatter.training.sequence_len,
-            override_temperature=parsed.frontmatter.export.default_temperature,
-            override_top_p=parsed.frontmatter.export.default_top_p,
-            draft_override=draft,
-            draft_disabled=no_draft,
-            adapter_name=adapter,
-            adapter_path_override=adapter_path_override,
-            adapter_mix=mix_entries,
-        )
-    except UnsafeMergeError as exc:
-        console.print(f"[red]merge:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except VendoringError as exc:
-        console.print(
-            f"[red]vendor:[/red] {exc}\n"
-            "  run `scripts/bump-llama-cpp.sh build` or "
-            "`git submodule update --init --recursive`."
-        )
-        raise typer.Exit(code=1) from exc
-    except PreflightError as exc:
-        console.print(f"[red]preflight[{exc.probe}]:[/red] {exc.detail}")
-        raise typer.Exit(code=1) from exc
-    except SubprocessError as exc:
-        console.print(f"[red]subprocess:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except OllamaBinaryNotFoundError as exc:
-        console.print(
-            f"[red]ollama:[/red] {exc}\n"
-            "  install from https://ollama.com/download "
-            "or re-run with `--skip-ollama`."
-        )
-        raise typer.Exit(code=1) from exc
-    except OllamaVersionError as exc:
-        console.print(f"[red]ollama:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except OllamaCreateError as exc:
-        console.print(f"[red]ollama create:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except OllamaSmokeError as exc:
-        console.print(
-            f"[red]smoke:[/red] {exc}\n  re-run with `--no-smoke` to skip the smoke test."
-        )
-        raise typer.Exit(code=1) from exc
-    except OllamaError as exc:
-        console.print(f"[red]ollama:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-    except ExportError as exc:
-        console.print(f"[red]export:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    if resolved_target.name == "llama-server":
-        adapter_dir = adapter_path_override
-        if adapter_dir is None:
-            if adapter is None:
-                adapter_dir = store.resolve_current_adapter()
-            else:
-                adapter_dir = store.resolve_current_adapter_for(adapter)
-        assert adapter_dir is not None
-        try:
-            llama_server_result = prepare_llama_server_export(
-                export_dir=result.export_dir,
-                manifest_path=result.manifest_path,
-                artifacts=result.artifacts,
-                adapter_dir=adapter_dir,
-                spec=spec,
-                training_sequence_len=parsed.frontmatter.training.sequence_len,
-            )
-        except VendoringError as exc:
-            console.print(
-                f"[red]vendor:[/red] {exc}\n"
-                "  run `scripts/bump-llama-cpp.sh build --with-server` or "
-                "`git submodule update --init --recursive`."
-            )
-            raise typer.Exit(code=1) from exc
-        except ExportError as exc:
-            console.print(f"[red]export:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-        llama_server_smoke = None if no_smoke else resolved_target.smoke_test(llama_server_result)
-        if llama_server_smoke is not None and not llama_server_smoke.ok:
-            console.print(
-                f"[red]smoke:[/red] {llama_server_smoke.detail}\n"
-                "  re-run with `--no-smoke` to skip the smoke test."
-            )
-            raise typer.Exit(code=1)
-
-    cached_tag = " [dim](cached base)[/dim]" if result.cached else ""
-    console.print(f"[green]exported:[/green] {result.export_dir}{cached_tag}")
-    for artifact in result.artifacts:
-        console.print(f"  {artifact.name}")
-
-    # S26 X1 — also emit a sway.yaml next to the GGUF when the user
-    # asks for it. Done AFTER the regular export so a sway-side
-    # failure can never roll back a working GGUF deployment.
-    if emit_sway_json:
-        from dlm.export.sway_json import SwayJsonExportError, write_sway_json
-
-        try:
-            sway_yaml_path = write_sway_json(path, result.export_dir)
-        except SwayJsonExportError as exc:
-            console.print(f"[red]sway-json:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-        console.print(f"[green]sway.yaml:[/green] {sway_yaml_path}")
-        console.print("  next: sway run " + str(sway_yaml_path))
-    if resolved_target.name == "llama-server":
-        assert llama_server_result.launch_script_path is not None
-        assert llama_server_result.config_path is not None
-        console.print(f"target:  {result.target}")
-        console.print(f"launch:  {llama_server_result.launch_script_path.name}")
-        console.print(f"template: {llama_server_result.config_path.name}")
-        if llama_server_smoke is not None and llama_server_smoke.detail:
-            console.print(f"smoke:   {llama_server_smoke.detail}")
-        return
-    if result.ollama_name:
-        console.print(f"ollama:  {result.ollama_name} (v{result.ollama_version})")
-    if result.smoke_output_first_line:
-        console.print(f"smoke:   {result.smoke_output_first_line}")
-
-
diff --git a/src/dlm/cli/commands/export.py b/src/dlm/cli/commands/export.py
new file mode 100644
index 0000000..2ef6b41
--- /dev/null
+++ b/src/dlm/cli/commands/export.py
@@ -0,0 +1,640 @@
+"""`dlm export` — export the adapter to a runtime target."""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+from pathlib import Path
+from typing import Annotated
+
+import typer
+
+from dlm.cli.commands._shared import _previously_accepted
+
+
+def export_cmd(
+    path: Annotated[Path, typer.Argument(help=".dlm file to export.")],
+    target: Annotated[
+        str,
+        typer.Option(
+            "--target",
+            help="Export destination. Currently supported: ollama, llama-server, vllm, mlx-serve.",
+        ),
+    ] = "ollama",
+    quant: Annotated[
+        str | None,
+        typer.Option("--quant", help="GGUF quant level (defaults to frontmatter)."),
+    ] = None,
+    merged: Annotated[
+        bool,
+        typer.Option("--merged", help="Merge the adapter into the base before export."),
+    ] = False,
+    dequantize: Annotated[
+        bool,
+        typer.Option(
+            "--dequantize",
+            help="Dequantize a QLoRA base to fp16 before merging.",
+        ),
+    ] = False,
+    name: Annotated[str | None, typer.Option("--name", help="Ollama model name.")] = None,
+    no_template: Annotated[
+        bool,
+        typer.Option("--no-template", help="Skip writing TEMPLATE into the Modelfile."),
+    ] = False,
+    no_smoke: Annotated[
+        bool,
+        typer.Option("--no-smoke", help="Register the export but skip the smoke prompt."),
+    ] = False,
+    no_imatrix: Annotated[
+        bool,
+        typer.Option(
+            "--no-imatrix",
+            help=(
+                "Skip importance-matrix calibration. Default uses the "
+                "replay corpus to calibrate k-quant quantization."
+            ),
+        ),
+    ] = False,
+    draft: Annotated[
+        str | None,
+        typer.Option(
+            "--draft",
+            help=(
+                "Speculative-decoding draft model Ollama tag "
+                "(e.g. qwen2.5:0.5b). Default uses the registered pair "
+                "for this base; override here to pick a custom draft."
+            ),
+        ),
+    ] = None,
+    no_draft: Annotated[
+        bool,
+        typer.Option(
+            "--no-draft",
+            help="Suppress PARAMETER draft_model emission even when a pair is registered.",
+        ),
+    ] = False,
+    skip_ollama: Annotated[
+        bool,
+        typer.Option(
+            "--skip-ollama",
+            help="Emit GGUFs + manifest only; do not touch the Ollama binary.",
+        ),
+    ] = False,
+    adapter: Annotated[
+        str | None,
+        typer.Option(
+            "--adapter",
+            help=(
+                "Named adapter to export. Required on multi-adapter "
+                "documents; rejected on single-adapter documents."
+            ),
+        ),
+    ] = None,
+    adapter_mix: Annotated[
+        str | None,
+        typer.Option(
+            "--adapter-mix",
+            help=(
+                "Weighted composition of named adapters, e.g. "
+                "`knowledge:1.0,tone:0.5`. Mutually exclusive with --adapter. "
+                "Multi-adapter docs only. LoRA-only; QLoRA requires "
+                "--dequantize."
+            ),
+        ),
+    ] = None,
+    adapter_mix_method: Annotated[
+        str,
+        typer.Option(
+            "--adapter-mix-method",
+            help=(
+                "PEFT combination strategy for --adapter-mix. `linear` "
+                "(default) sums LoRA deltas; `svd` recomposes via SVD "
+                "(higher fidelity, heavier compute). Only meaningful "
+                "with --adapter-mix."
+            ),
+        ),
+    ] = "linear",
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", help="Log each subprocess command as it launches."),
+    ] = False,
+    emit_sway_json: Annotated[
+        bool,
+        typer.Option(
+            "--emit-sway-json",
+            help=(
+                "After the export, also write a ready-to-run sway.yaml "
+                "(via dlm-sway autogen) into the export dir. Requires the "
+                "[sway] extra: pip install 'dlm[sway]'."
+            ),
+        ),
+    ] = False,
+) -> None:
+    """Export the adapter to a runtime target."""
+
+    from rich.console import Console
+
+    from dlm.base_models import GatedModelError, download_spec
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.doc.parser import parse_file
+    from dlm.export import (
+        ExportError,
+        PreflightError,
+        SubprocessError,
+        UnknownExportTargetError,
+        UnsafeMergeError,
+        VendoringError,
+        resolve_export_plan,
+        run_export,
+    )
+    from dlm.export.ollama import (
+        OllamaBinaryNotFoundError,
+        OllamaCreateError,
+        OllamaError,
+        OllamaSmokeError,
+        OllamaVersionError,
+    )
+    from dlm.export.quantize import run_checked
+    from dlm.export.targets import (
+        finalize_mlx_serve_export,
+        finalize_vllm_export,
+        prepare_llama_server_export,
+        prepare_mlx_serve_export,
+        prepare_vllm_export,
+        resolve_target,
+    )
+    from dlm.store.paths import for_dlm
+
+    console = Console(stderr=True)
+
+    if draft is not None and no_draft:
+        console.print("[red]error:[/red] --draft and --no-draft are mutually exclusive; pick one.")
+        raise typer.Exit(code=2)
+    if adapter is not None and adapter_mix is not None:
+        console.print(
+            "[red]export:[/red] --adapter and --adapter-mix are mutually exclusive; pick one."
+        )
+        raise typer.Exit(code=2)
+    try:
+        resolved_target = resolve_target(target)
+    except UnknownExportTargetError as exc:
+        console.print(f"[red]export:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+    parsed = parse_file(path)
+    adapters_declared = parsed.frontmatter.training.adapters
+    if adapter is not None:
+        if adapters_declared is None:
+            console.print(
+                "[red]export:[/red] --adapter is only valid on multi-adapter "
+                "documents (this doc does not declare `training.adapters`)."
+            )
+            raise typer.Exit(code=2)
+        if adapter not in adapters_declared:
+            declared = sorted(adapters_declared)
+            console.print(
+                f"[red]export:[/red] --adapter {adapter!r} is not declared (declared: {declared})."
+            )
+            raise typer.Exit(code=2)
+
+    mix_entries: list[tuple[str, float]] | None = None
+    if adapter_mix is not None:
+        from dlm.export.weighted_merge import (
+            InvalidMixSpecError,
+            parse_mix_spec,
+            validate_mix_against_declared,
+        )
+
+        if adapters_declared is None:
+            console.print(
+                "[red]export:[/red] --adapter-mix is only valid on multi-adapter "
+                "documents (this doc does not declare `training.adapters`)."
+            )
+            raise typer.Exit(code=2)
+        if adapter_mix_method not in ("linear", "svd"):
+            console.print(
+                f"[red]export:[/red] --adapter-mix-method must be "
+                f"`linear` or `svd`, got {adapter_mix_method!r}."
+            )
+            raise typer.Exit(code=2)
+        try:
+            entries = parse_mix_spec(adapter_mix)
+            validate_mix_against_declared(entries, set(adapters_declared))
+        except InvalidMixSpecError as exc:
+            console.print(f"[red]export:[/red] {exc}")
+            raise typer.Exit(code=2) from exc
+        mix_entries = [(e.name, e.weight) for e in entries]
+
+    store = for_dlm(parsed.frontmatter.dlm_id)
+
+    # Gate-driven static mix: when the doc has an enabled gate and the
+    # user didn't pass --adapter-mix / --adapter, freeze the learned
+    # gate to per-adapter weights for the GGUF export path. Dynamic
+    # routing only lives in the `dlm prompt` flow; the runtime can't
+    # evaluate the torch gate, so we substitute the prior here. A CLI
+    # --adapter-mix wins — users who know what they want get full
+    # control.
+    if mix_entries is None and adapter is None:
+        from dlm.export.gate_fallback import resolve_and_announce
+
+        resolution = resolve_and_announce(store, parsed)
+        if resolution.entries is not None:
+            mix_entries = resolution.entries
+            for line in resolution.banner_lines:
+                console.print(line)
+
+    already_accepted = _previously_accepted(store.manifest)
+    try:
+        spec = resolve_base_model(parsed.frontmatter.base_model, accept_license=already_accepted)
+    except GatedModelError as exc:
+        console.print(f"[red]license:[/red] base model {parsed.frontmatter.base_model!r} is gated.")
+        if exc.license_url:
+            console.print(f"  review the license at: {exc.license_url}")
+        console.print("  accept via `dlm train --i-accept-license` before exporting.")
+        raise typer.Exit(code=1) from exc
+
+    # Audio bases take HF-snapshot unconditionally — llama.cpp has no
+    # audio-arch roadmap at our pinned tag — so branch early without
+    # resolving a GGUF plan.
+    from dlm.modality import modality_for
+
+    export_dispatch = modality_for(spec)
+    if resolved_target.name == "vllm" and export_dispatch.accepts_audio:
+        console.print(
+            "[red]export:[/red] --target vllm is not wired for audio-language "
+            "documents yet; the current vllm export path only supports text bases."
+        )
+        raise typer.Exit(code=2)
+    if resolved_target.name == "mlx-serve" and export_dispatch.accepts_audio:
+        console.print(
+            "[red]export:[/red] --target mlx-serve is not wired for audio-language "
+            "documents yet; the current mlx-serve export path only supports text bases."
+        )
+        raise typer.Exit(code=2)
+    if export_dispatch.accepts_audio:
+        try:
+            dispatch_result = export_dispatch.dispatch_export(
+                store=store,
+                spec=spec,
+                adapter_name=adapter,
+                quant=quant,
+                merged=merged,
+                adapter_mix_raw=adapter_mix,
+            )
+        except ExportError as exc:
+            console.print(f"[red]export:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+        assert dispatch_result is not None  # audio modality always returns a result
+        for line in dispatch_result.banner_lines:
+            console.print(line)
+        return
+
+    try:
+        plan = resolve_export_plan(
+            cli_quant=quant,
+            cli_merged=merged,
+            cli_dequantize=dequantize,
+            cli_no_template=no_template,
+            cli_ollama_name=name,
+            cli_no_imatrix=no_imatrix,
+            frontmatter_default_quant=parsed.frontmatter.export.default_quant,
+        )
+    except ValueError as exc:
+        console.print(f"[red]export:[/red] {exc}")
+        raise typer.Exit(code=2) from exc
+
+    store.ensure_layout()
+
+    # VL bases: arch-probe + try single-file GGUF on SUPPORTED (with
+    # fallback to HF-snapshot on refusal or subprocess failure). A
+    # missing local base snapshot should not hard-fail the whole
+    # export — the dispatcher can still emit the HF-snapshot path
+    # without GGUF context.
+    if resolved_target.name == "vllm" and export_dispatch.accepts_images:
+        console.print(
+            "[red]export:[/red] --target vllm is not wired for vision-language "
+            "documents yet; the current vllm export path only supports text bases."
+        )
+        raise typer.Exit(code=2)
+    if resolved_target.name == "mlx-serve" and export_dispatch.accepts_images:
+        console.print(
+            "[red]export:[/red] --target mlx-serve is not wired for vision-language "
+            "documents yet; the current mlx-serve export path only supports text bases."
+        )
+        raise typer.Exit(code=2)
+    if export_dispatch.accepts_images:
+        gguf_emission_context = None
+        try:
+            cached_vl = download_spec(spec, local_files_only=True)
+        except RuntimeError as exc:
+            _ = exc
+        else:
+            gguf_emission_context = {
+                "plan": plan,
+                "cached_base_dir": cached_vl.path,
+                "source_dlm_path": path.resolve(),
+                "training_sequence_len": parsed.frontmatter.training.sequence_len,
+                "dlm_version": f"v{parsed.frontmatter.dlm_version}",
+            }
+        try:
+            dispatch_result = export_dispatch.dispatch_export(
+                store=store,
+                spec=spec,
+                adapter_name=adapter,
+                quant=quant,
+                merged=merged,
+                adapter_mix_raw=adapter_mix,
+                gguf_emission_context=gguf_emission_context,
+            )
+        except ExportError as exc:
+            console.print(f"[red]export:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+        assert dispatch_result is not None  # VL modality always returns a result
+        for line in dispatch_result.banner_lines:
+            console.print(line)
+        return
+
+    try:
+        cached = download_spec(spec, local_files_only=True)
+    except RuntimeError as exc:
+        console.print(
+            f"[red]export:[/red] base model not in local cache — run `dlm train` first.\n  {exc}"
+        )
+        raise typer.Exit(code=1) from exc
+
+    def _verbose_runner(cmd: Sequence[str]) -> object:
+        console.print(f"[dim]$ {' '.join(cmd)}[/dim]")
+        return run_checked(cmd)
+
+    adapter_path_override = None
+    if mix_entries is not None:  # pragma: no cover - heavy path
+        # Build the weighted-merged adapter into an ephemeral dir,
+        # then feed the path to run_export as an override. The tmp
+        # dir lives under the store's cache/ so it cleans up with
+        # the rest of the store on `dlm pack`.
+        from dlm.export.weighted_merge import MixEntry, build_and_stage
+
+        entries_typed = [MixEntry(name=n, weight=w) for (n, w) in mix_entries]
+        adapter_path_override = build_and_stage(
+            store=store,
+            spec=spec,
+            cached_base_dir=cached.path,
+            entries=entries_typed,
+            combination_type=adapter_mix_method,  # type: ignore[arg-type]
+        )
+
+    if resolved_target.name == "vllm":
+        ignored_flags: list[str] = []
+        if quant is not None:
+            ignored_flags.append("--quant")
+        if merged:
+            ignored_flags.append("--merged")
+        if dequantize:
+            ignored_flags.append("--dequantize")
+        if no_template:
+            ignored_flags.append("--no-template")
+        if skip_ollama:
+            ignored_flags.append("--skip-ollama")
+        if no_imatrix:
+            ignored_flags.append("--no-imatrix")
+        if draft is not None:
+            ignored_flags.append("--draft")
+        if no_draft:
+            ignored_flags.append("--no-draft")
+        if ignored_flags:
+            console.print(
+                "[yellow]export:[/yellow] ignoring flags not applicable to "
+                f"`--target vllm`: {', '.join(ignored_flags)}"
+            )
+
+        declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
+        try:
+            vllm_result = prepare_vllm_export(
+                store=store,
+                spec=spec,
+                served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}",
+                training_sequence_len=parsed.frontmatter.training.sequence_len,
+                adapter_name=adapter,
+                adapter_path_override=adapter_path_override,
+                declared_adapter_names=declared_adapter_names,
+            )
+        except ExportError as exc:
+            console.print(f"[red]export:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+
+        vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result)
+        if vllm_smoke is not None and not vllm_smoke.ok:
+            console.print(
+                f"[red]smoke:[/red] {vllm_smoke.detail}\n"
+                "  re-run with `--no-smoke` to skip the smoke test."
+            )
+            raise typer.Exit(code=1)
+
+        manifest_path = finalize_vllm_export(
+            store=store,
+            spec=spec,
+            prepared=vllm_result,
+            smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail,
+            adapter_name=adapter,
+            adapter_mix=mix_entries,
+        )
+        console.print(f"[green]exported:[/green] {vllm_result.export_dir}")
+        console.print("target:  vllm")
+        assert vllm_result.launch_script_path is not None
+        assert vllm_result.config_path is not None
+        console.print(f"launch:  {vllm_result.launch_script_path.name}")
+        console.print(f"config:  {vllm_result.config_path.name}")
+        console.print(f"manifest: {manifest_path.name}")
+        if vllm_smoke is not None and vllm_smoke.detail:
+            console.print(f"smoke:   {vllm_smoke.detail}")
+        return
+
+    if resolved_target.name == "mlx-serve":
+        mlx_ignored_flags: list[str] = []
+        if quant is not None:
+            mlx_ignored_flags.append("--quant")
+        if merged:
+            mlx_ignored_flags.append("--merged")
+        if dequantize:
+            mlx_ignored_flags.append("--dequantize")
+        if name is not None:
+            mlx_ignored_flags.append("--name")
+        if no_template:
+            mlx_ignored_flags.append("--no-template")
+        if skip_ollama:
+            mlx_ignored_flags.append("--skip-ollama")
+        if no_imatrix:
+            mlx_ignored_flags.append("--no-imatrix")
+        if draft is not None:
+            mlx_ignored_flags.append("--draft")
+        if no_draft:
+            mlx_ignored_flags.append("--no-draft")
+        if mlx_ignored_flags:
+            console.print(
+                "[yellow]export:[/yellow] ignoring flags not applicable to "
+                f"`--target mlx-serve`: {', '.join(mlx_ignored_flags)}"
+            )
+
+        declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
+        try:
+            mlx_serve_result = prepare_mlx_serve_export(
+                store=store,
+                spec=spec,
+                adapter_name=adapter,
+                adapter_path_override=adapter_path_override,
+                declared_adapter_names=declared_adapter_names,
+            )
+        except ExportError as exc:
+            console.print(f"[red]export:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+
+        mlx_serve_smoke = None if no_smoke else resolved_target.smoke_test(mlx_serve_result)
+        if mlx_serve_smoke is not None and not mlx_serve_smoke.ok:
+            console.print(
+                f"[red]smoke:[/red] {mlx_serve_smoke.detail}\n"
+                "  re-run with `--no-smoke` to skip the smoke test."
+            )
+            raise typer.Exit(code=1)
+
+        manifest_path = finalize_mlx_serve_export(
+            store=store,
+            spec=spec,
+            prepared=mlx_serve_result,
+            smoke_output_first_line=None if mlx_serve_smoke is None else mlx_serve_smoke.detail,
+            adapter_name=adapter,
+            adapter_mix=mix_entries,
+        )
+        console.print(f"[green]exported:[/green] {mlx_serve_result.export_dir}")
+        console.print("target:  mlx-serve")
+        assert mlx_serve_result.launch_script_path is not None
+        console.print(f"launch:  {mlx_serve_result.launch_script_path.name}")
+        console.print(f"manifest: {manifest_path.name}")
+        if mlx_serve_smoke is not None and mlx_serve_smoke.detail:
+            console.print(f"smoke:   {mlx_serve_smoke.detail}")
+        return
+
+    try:
+        result = run_export(
+            store,
+            spec,
+            plan,
+            target=resolved_target.name,
+            cached_base_dir=cached.path,
+            subprocess_runner=_verbose_runner if verbose else None,
+            skip_ollama=skip_ollama or resolved_target.name != "ollama",
+            skip_smoke=no_smoke,
+            source_dlm_path=path.resolve(),
+            training_sequence_len=parsed.frontmatter.training.sequence_len,
+            override_temperature=parsed.frontmatter.export.default_temperature,
+            override_top_p=parsed.frontmatter.export.default_top_p,
+            draft_override=draft,
+            draft_disabled=no_draft,
+            adapter_name=adapter,
+            adapter_path_override=adapter_path_override,
+            adapter_mix=mix_entries,
+        )
+    except UnsafeMergeError as exc:
+        console.print(f"[red]merge:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except VendoringError as exc:
+        console.print(
+            f"[red]vendor:[/red] {exc}\n"
+            "  run `scripts/bump-llama-cpp.sh build` or "
+            "`git submodule update --init --recursive`."
+        )
+        raise typer.Exit(code=1) from exc
+    except PreflightError as exc:
+        console.print(f"[red]preflight[{exc.probe}]:[/red] {exc.detail}")
+        raise typer.Exit(code=1) from exc
+    except SubprocessError as exc:
+        console.print(f"[red]subprocess:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except OllamaBinaryNotFoundError as exc:
+        console.print(
+            f"[red]ollama:[/red] {exc}\n"
+            "  install from https://ollama.com/download "
+            "or re-run with `--skip-ollama`."
+        )
+        raise typer.Exit(code=1) from exc
+    except OllamaVersionError as exc:
+        console.print(f"[red]ollama:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except OllamaCreateError as exc:
+        console.print(f"[red]ollama create:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except OllamaSmokeError as exc:
+        console.print(
+            f"[red]smoke:[/red] {exc}\n  re-run with `--no-smoke` to skip the smoke test."
+        )
+        raise typer.Exit(code=1) from exc
+    except OllamaError as exc:
+        console.print(f"[red]ollama:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+    except ExportError as exc:
+        console.print(f"[red]export:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if resolved_target.name == "llama-server":
+        adapter_dir = adapter_path_override
+        if adapter_dir is None:
+            if adapter is None:
+                adapter_dir = store.resolve_current_adapter()
+            else:
+                adapter_dir = store.resolve_current_adapter_for(adapter)
+        assert adapter_dir is not None
+        try:
+            llama_server_result = prepare_llama_server_export(
+                export_dir=result.export_dir,
+                manifest_path=result.manifest_path,
+                artifacts=result.artifacts,
+                adapter_dir=adapter_dir,
+                spec=spec,
+                training_sequence_len=parsed.frontmatter.training.sequence_len,
+            )
+        except VendoringError as exc:
+            console.print(
+                f"[red]vendor:[/red] {exc}\n"
+                "  run `scripts/bump-llama-cpp.sh build --with-server` or "
+                "`git submodule update --init --recursive`."
+            )
+            raise typer.Exit(code=1) from exc
+        except ExportError as exc:
+            console.print(f"[red]export:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+        llama_server_smoke = None if no_smoke else resolved_target.smoke_test(llama_server_result)
+        if llama_server_smoke is not None and not llama_server_smoke.ok:
+            console.print(
+                f"[red]smoke:[/red] {llama_server_smoke.detail}\n"
+                "  re-run with `--no-smoke` to skip the smoke test."
+            )
+            raise typer.Exit(code=1)
+
+    cached_tag = " [dim](cached base)[/dim]" if result.cached else ""
+    console.print(f"[green]exported:[/green] {result.export_dir}{cached_tag}")
+    for artifact in result.artifacts:
+        console.print(f"  {artifact.name}")
+
+    # When the user opts in, also emit a sway.yaml next to the GGUF.
+    # Done AFTER the regular export so a sway-side failure can never
+    # roll back a working GGUF deployment.
+    if emit_sway_json:
+        from dlm.export.sway_json import SwayJsonExportError, write_sway_json
+
+        try:
+            sway_yaml_path = write_sway_json(path, result.export_dir)
+        except SwayJsonExportError as exc:
+            console.print(f"[red]sway-json:[/red] {exc}")
+            raise typer.Exit(code=1) from exc
+        console.print(f"[green]sway.yaml:[/green] {sway_yaml_path}")
+        console.print("  next: sway run " + str(sway_yaml_path))
+    if resolved_target.name == "llama-server":
+        assert llama_server_result.launch_script_path is not None
+        assert llama_server_result.config_path is not None
+        console.print(f"target:  {result.target}")
+        console.print(f"launch:  {llama_server_result.launch_script_path.name}")
+        console.print(f"template: {llama_server_result.config_path.name}")
+        if llama_server_smoke is not None and llama_server_smoke.detail:
+            console.print(f"smoke:   {llama_server_smoke.detail}")
+        return
+    if result.ollama_name:
+        console.print(f"ollama:  {result.ollama_name} (v{result.ollama_version})")
+    if result.smoke_output_first_line:
+        console.print(f"smoke:   {result.smoke_output_first_line}")

From 5bfd080a8aaa5de91adda16d26f2bb6947b028eb Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:40:58 -0400
Subject: [PATCH 25/46] Extract metrics_cmd query into
 dlm.metrics.cli:gather_metrics

---
 src/dlm/cli/commands/metrics.py | 35 +++++++------
 src/dlm/metrics/cli.py          | 90 +++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 15 deletions(-)
 create mode 100644 src/dlm/metrics/cli.py

diff --git a/src/dlm/cli/commands/metrics.py b/src/dlm/cli/commands/metrics.py
index ebbbd73..d22eb4a 100644
--- a/src/dlm/cli/commands/metrics.py
+++ b/src/dlm/cli/commands/metrics.py
@@ -38,14 +38,11 @@ def metrics_cmd(
     from rich.console import Console
 
     from dlm.doc.parser import parse_file
+    from dlm.metrics.cli import MetricsQuery, gather_metrics
     from dlm.metrics.queries import (
-        evals_for_run,
         evals_to_dict,
-        preference_mining_for_run,
         preference_mining_to_dict,
-        recent_runs,
         runs_to_dict,
-        steps_for_run,
         steps_to_dict,
     )
     from dlm.store.paths import for_dlm
@@ -61,24 +58,31 @@ def metrics_cmd(
     parsed = parse_file(path)
     store = for_dlm(parsed.frontmatter.dlm_id)
 
-    runs = recent_runs(store.root, limit=limit, phase=phase, since=since_delta, run_id=run_id)
+    view = gather_metrics(
+        MetricsQuery(
+            store_root=store.root,
+            run_id=run_id,
+            phase=phase,
+            since=since_delta,
+            limit=limit,
+        )
+    )
 
     if run_id is not None:
-        # Drill-down: show this run's steps + evals.
-        if not runs:
+        if view.drilldown is None:
             console.print(f"[red]metrics:[/red] no run with run_id={run_id}")
             raise typer.Exit(code=1)
-        run = runs[0]
-        steps = steps_for_run(store.root, run_id)
-        evals = evals_for_run(store.root, run_id)
-        preference_rows = preference_mining_for_run(store.root, run_id)
+        run = view.drilldown.run
+        steps = view.drilldown.steps
+        evals = view.drilldown.evals
+        preference_rows = view.drilldown.preference_rows
 
         if json_out:
             payload = {
                 "run": runs_to_dict([run])[0],
-                "steps": steps_to_dict(steps),
-                "evals": evals_to_dict(evals),
-                "preference_mining": preference_mining_to_dict(preference_rows),
+                "steps": steps_to_dict(list(steps)),
+                "evals": evals_to_dict(list(evals)),
+                "preference_mining": preference_mining_to_dict(list(preference_rows)),
             }
             sys.stdout.write(json.dumps(payload, indent=2) + "\n")
             return
@@ -113,8 +117,9 @@ def metrics_cmd(
         return
 
     # Top-level: list runs.
+    runs = view.runs
     if json_out:
-        sys.stdout.write(json.dumps({"runs": runs_to_dict(runs)}, indent=2) + "\n")
+        sys.stdout.write(json.dumps({"runs": runs_to_dict(list(runs))}, indent=2) + "\n")
         return
     if csv_out:
         writer = csv.writer(sys.stdout)
diff --git a/src/dlm/metrics/cli.py b/src/dlm/metrics/cli.py
new file mode 100644
index 0000000..c3609e9
--- /dev/null
+++ b/src/dlm/metrics/cli.py
@@ -0,0 +1,90 @@
+"""Domain dispatcher for `dlm metrics`.
+
+Splits the per-store-metrics query off the CLI plumbing: callers (CLI,
+LSP, future web UI) build a `MetricsQuery`, call `gather_metrics`, and
+get a typed `MetricsView` with the rows they need to render. The CLI
+owns formatting — text, JSON, CSV — and `gather_metrics` doesn't import
+typer, rich, or stdout writers.
+
+Imports the queries module dotted (not the individual functions) so
+test fixtures that patch `dlm.metrics.queries.<name>` are visible to
+this dispatcher at call time.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from dlm.metrics import queries
+from dlm.metrics.queries import EvalRow, PreferenceMineRow, RunRow, StepRow
+
+if TYPE_CHECKING:
+    from datetime import timedelta
+    from pathlib import Path
+
+
+@dataclass(frozen=True)
+class MetricsQuery:
+    """Inputs to `gather_metrics`. All fields validated upstream by the caller."""
+
+    store_root: Path
+    run_id: int | None = None
+    phase: str | None = None
+    since: timedelta | None = None
+    limit: int = 20
+
+
+@dataclass(frozen=True)
+class MetricsRunDrilldown:
+    """Per-run metrics: the run row plus its steps / evals / preference events."""
+
+    run: RunRow
+    steps: tuple[StepRow, ...]
+    evals: tuple[EvalRow, ...]
+    preference_rows: tuple[PreferenceMineRow, ...]
+
+
+@dataclass(frozen=True)
+class MetricsView:
+    """Snapshot of the metrics layer for a given query.
+
+    `drilldown` is set iff `query.run_id is not None` AND a matching run
+    exists. `runs` is the row list (always populated for top-level
+    listings; on drill-down it contains exactly the matched run when
+    found, empty otherwise so the CLI can detect the not-found case).
+    """
+
+    runs: tuple[RunRow, ...]
+    drilldown: MetricsRunDrilldown | None
+
+
+def gather_metrics(query: MetricsQuery) -> MetricsView:
+    """Read the per-store metrics DB and return a `MetricsView`."""
+    runs = tuple(
+        queries.recent_runs(
+            query.store_root,
+            limit=query.limit,
+            phase=query.phase,
+            since=query.since,
+            run_id=query.run_id,
+        )
+    )
+
+    if query.run_id is None or not runs:
+        return MetricsView(runs=runs, drilldown=None)
+
+    run = runs[0]
+    steps = tuple(queries.steps_for_run(query.store_root, query.run_id))
+    evals = tuple(queries.evals_for_run(query.store_root, query.run_id))
+    preference_rows = tuple(queries.preference_mining_for_run(query.store_root, query.run_id))
+
+    return MetricsView(
+        runs=runs,
+        drilldown=MetricsRunDrilldown(
+            run=run,
+            steps=steps,
+            evals=evals,
+            preference_rows=preference_rows,
+        ),
+    )

From 4a930b8743bf080fd483efdd6ac280349e4de5af Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:46:02 -0400
Subject: [PATCH 26/46] Extract synth_instructions_cmd into
 dlm.synth.dispatch:run_synth_instructions

---
 src/dlm/cli/commands/synth.py    | 102 ++++++++------------
 src/dlm/synth/dispatch.py        | 157 +++++++++++++++++++++++++++++++
 tests/unit/cli/test_synth_cmd.py |   2 +-
 3 files changed, 197 insertions(+), 64 deletions(-)
 create mode 100644 src/dlm/synth/dispatch.py

diff --git a/src/dlm/cli/commands/synth.py b/src/dlm/cli/commands/synth.py
index a00deee..53ced7e 100644
--- a/src/dlm/cli/commands/synth.py
+++ b/src/dlm/cli/commands/synth.py
@@ -106,28 +106,20 @@ def synth_instructions_cmd(
 
     from dlm.doc.errors import DlmParseError
     from dlm.doc.parser import parse_file
-    from dlm.preference import JudgeUnavailableError, build_judge
+    from dlm.preference import JudgeUnavailableError
     from dlm.store.paths import for_dlm
     from dlm.synth import (
         InvalidTeacherSpecError,
         TeacherInvocationError,
         TeacherUnavailableError,
-        build_synth_plan,
-        build_teacher,
-        clear_pending_plan,
-        filter_synth_plan,
+        render_apply_plan,
         render_filter_report,
         render_synth_plan,
-        save_pending_plan,
     )
-    from dlm.synth import (
-        apply_plan as apply_synth_plan,
-    )
-    from dlm.synth import (
-        build_apply_plan as build_synth_apply_plan,
-    )
-    from dlm.synth import (
-        render_apply_plan as render_synth_apply_plan,
+    from dlm.synth.dispatch import (
+        SynthInstructionsRequest,
+        SynthOutcome,
+        run_synth_instructions,
     )
 
     console = Console(stderr=True)
@@ -159,20 +151,26 @@ def synth_instructions_cmd(
 
     store = for_dlm(parsed.frontmatter.dlm_id)
 
+    request = SynthInstructionsRequest(
+        parsed=parsed,
+        target_path=path,
+        store=store,
+        teacher=teacher,
+        per_section=per_section,
+        strategy=cast(Literal["extraction", "expansion", "both"], strategy),
+        filter_kind=cast(Literal["sway", "none", "dedup-only"], filter_kind),
+        threshold=threshold,
+        max_pairs=max_pairs,
+        max_new_tokens=max_new_tokens,
+        temperature=temp,
+        top_p=top_p,
+        seed=seed,
+        apply=apply,
+        dry_run=dry_run,
+    )
+
     try:
-        strategy_value = cast(Literal["extraction", "expansion", "both"], strategy)
-        teacher_obj = build_teacher(teacher, dlm_path=path)
-        plan = build_synth_plan(
-            parsed,
-            teacher_obj,
-            per_section=per_section,
-            strategy=strategy_value,
-            max_pairs=max_pairs,
-            max_new_tokens=max_new_tokens,
-            temperature=temp,
-            top_p=top_p,
-            seed=seed,
-        )
+        result = run_synth_instructions(request)
     except InvalidTeacherSpecError as exc:
         console.print(f"[red]synth:[/red] {exc}")
         raise typer.Exit(code=2) from exc
@@ -182,64 +180,42 @@ def synth_instructions_cmd(
     except TeacherInvocationError as exc:
         console.print(f"[red]synth:[/red] {exc}")
         raise typer.Exit(code=1) from exc
-    except ValueError as exc:
+    except JudgeUnavailableError as exc:
         console.print(f"[red]synth:[/red] {exc}")
-        raise typer.Exit(code=2) from exc
-
-    judge_obj = None
-    if filter_kind == "sway":
-        try:
-            judge_obj = build_judge("sway", dlm_path=path)
-        except JudgeUnavailableError as exc:
-            console.print(f"[red]synth:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-
-    try:
-        filter_value = cast(Literal["sway", "none", "dedup-only"], filter_kind)
-        filtered = filter_synth_plan(
-            plan,
-            filter_kind=filter_value,
-            judge=judge_obj,
-            threshold=threshold,
-        )
+        raise typer.Exit(code=1) from exc
     except ValueError as exc:
         console.print(f"[red]synth:[/red] {exc}")
         raise typer.Exit(code=2) from exc
 
-    out_console.print(render_synth_plan(plan))
+    out_console.print(render_synth_plan(result.plan))
     out_console.print("")
-    out_console.print(render_filter_report(filtered))
+    out_console.print(render_filter_report(result.filtered_plan))
 
-    if not filtered.additions:
-        if not dry_run:
-            clear_pending_plan(store)
+    if result.outcome is SynthOutcome.NO_ADDITIONS:
         out_console.print(
             "\n[yellow]no synth additions accepted[/yellow] — either generation "
             "yielded no valid pairs, dedup removed them, or the filter rejected them."
         )
         raise typer.Exit(code=2)
 
-    sections = [addition.addition.section for addition in filtered.additions]
-
-    if apply:
-        apply_plan = build_synth_apply_plan(parsed, sections)
+    if result.outcome is SynthOutcome.APPLIED:
+        assert result.apply_plan is not None
+        assert result.apply_summary is not None
         out_console.print("")
-        out_console.print(render_synth_apply_plan(apply_plan))
-        summary = apply_synth_plan(parsed, apply_plan, target=path)
-        clear_pending_plan(store)
+        out_console.print(render_apply_plan(result.apply_plan))
         out_console.print(
-            f"\n[green]synth:[/green] wrote {summary.added} section(s) to {path} "
-            f"({summary.skipped} skipped)"
+            f"\n[green]synth:[/green] wrote {result.apply_summary.added} section(s) to {path} "
+            f"({result.apply_summary.skipped} skipped)"
         )
         return
 
-    if dry_run:
+    if result.outcome is SynthOutcome.DRY_RUN:
         out_console.print("\n[green]synth:[/green] dry-run only — nothing staged.")
         return
 
-    pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
+    # SynthOutcome.STAGED
     out_console.print(
-        f"\n[green]synth:[/green] staged {len(pending.sections)} auto-synth instruction "
+        f"\n[green]synth:[/green] staged {result.pending_count} auto-synth instruction "
         f"section(s). Run [bold]dlm synth list {path}[/bold] to inspect them."
     )
 
diff --git a/src/dlm/synth/dispatch.py b/src/dlm/synth/dispatch.py
new file mode 100644
index 0000000..cc278be
--- /dev/null
+++ b/src/dlm/synth/dispatch.py
@@ -0,0 +1,157 @@
+"""Domain dispatcher for `dlm synth instructions`.
+
+Lifts the build-teacher → build-plan → filter → stage/apply pipeline
+out of the CLI. Callers (CLI, LSP, future automation) build a
+`SynthInstructionsRequest`, call `run_synth_instructions`, and render
+the typed `SynthInstructionsResult` themselves. The dispatcher does no
+console I/O; teacher/judge/plan errors propagate as the existing typed
+exceptions so the caller can map each to its own exit code or banner.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import StrEnum
+from typing import TYPE_CHECKING
+
+from dlm.synth import apply as _apply
+from dlm.synth import filter as _filter
+from dlm.synth import pending as _pending
+from dlm.synth import run as _run
+from dlm.synth import teachers as _teachers
+from dlm.synth.apply import SynthApplyPlan, SynthApplySummary
+from dlm.synth.filter import FilteredSynthPlan, SynthFilterKind
+from dlm.synth.prompts import SynthStrategy
+from dlm.synth.run import SynthRunPlan
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from dlm.doc.parser import ParsedDlm
+    from dlm.preference.judge import PreferenceJudge
+    from dlm.store.paths import StorePath
+
+
+class SynthOutcome(StrEnum):
+    """Discriminator for what `run_synth_instructions` did."""
+
+    APPLIED = "applied"  # Sections written to the .dlm.
+    STAGED = "staged"  # Sections persisted as the pending plan.
+    DRY_RUN = "dry_run"  # Plan rendered, nothing staged or written.
+    NO_ADDITIONS = "no_additions"  # Filter rejected every candidate.
+
+
+@dataclass(frozen=True)
+class SynthInstructionsRequest:
+    """Inputs to `run_synth_instructions`.
+
+    The CLI is responsible for parsing the .dlm and resolving the
+    store; the dispatcher receives both as already-typed objects.
+    `apply` and `dry_run` are mutually exclusive — the CLI rejects
+    that combo before constructing the request.
+    """
+
+    parsed: ParsedDlm
+    target_path: Path
+    store: StorePath
+    teacher: str
+    per_section: int
+    strategy: SynthStrategy
+    filter_kind: SynthFilterKind
+    threshold: float | None
+    max_pairs: int | None
+    max_new_tokens: int
+    temperature: float
+    top_p: float | None
+    seed: int | None
+    apply: bool
+    dry_run: bool
+
+
+@dataclass(frozen=True)
+class SynthInstructionsResult:
+    """Outcome of `run_synth_instructions`.
+
+    `plan` and `filtered_plan` are always populated so the CLI can
+    render them. `apply_plan` / `apply_summary` are only set on
+    `SynthOutcome.APPLIED`. `pending_count` is non-zero only on
+    `SynthOutcome.STAGED`.
+    """
+
+    plan: SynthRunPlan
+    filtered_plan: FilteredSynthPlan
+    outcome: SynthOutcome
+    apply_plan: SynthApplyPlan | None = None
+    apply_summary: SynthApplySummary | None = None
+    pending_count: int = 0
+
+
+def run_synth_instructions(req: SynthInstructionsRequest) -> SynthInstructionsResult:
+    """Build, filter, and stage/apply auto-synth instructions for one .dlm."""
+    teacher_obj = _teachers.build_teacher(req.teacher, dlm_path=req.target_path)
+    plan = _run.build_synth_plan(
+        req.parsed,
+        teacher_obj,
+        per_section=req.per_section,
+        strategy=req.strategy,
+        max_pairs=req.max_pairs,
+        max_new_tokens=req.max_new_tokens,
+        temperature=req.temperature,
+        top_p=req.top_p,
+        seed=req.seed,
+    )
+
+    judge_obj: PreferenceJudge | None = None
+    if req.filter_kind == "sway":
+        from dlm.preference import build_judge
+
+        judge_obj = build_judge("sway", dlm_path=req.target_path)
+
+    filtered = _filter.filter_synth_plan(
+        plan,
+        filter_kind=req.filter_kind,
+        judge=judge_obj,
+        threshold=req.threshold,
+    )
+
+    if not filtered.additions:
+        if not req.dry_run:
+            _pending.clear_pending_plan(req.store)
+        return SynthInstructionsResult(
+            plan=plan,
+            filtered_plan=filtered,
+            outcome=SynthOutcome.NO_ADDITIONS,
+        )
+
+    sections = [addition.addition.section for addition in filtered.additions]
+
+    if req.apply:
+        apply_plan = _apply.build_apply_plan(req.parsed, sections)
+        summary = _apply.apply_plan(req.parsed, apply_plan, target=req.target_path)
+        _pending.clear_pending_plan(req.store)
+        return SynthInstructionsResult(
+            plan=plan,
+            filtered_plan=filtered,
+            outcome=SynthOutcome.APPLIED,
+            apply_plan=apply_plan,
+            apply_summary=summary,
+        )
+
+    if req.dry_run:
+        return SynthInstructionsResult(
+            plan=plan,
+            filtered_plan=filtered,
+            outcome=SynthOutcome.DRY_RUN,
+        )
+
+    pending = _pending.save_pending_plan(
+        req.store,
+        source_path=req.target_path.resolve(),
+        sections=sections,
+    )
+    return SynthInstructionsResult(
+        plan=plan,
+        filtered_plan=filtered,
+        outcome=SynthOutcome.STAGED,
+        pending_count=len(pending.sections),
+    )
diff --git a/tests/unit/cli/test_synth_cmd.py b/tests/unit/cli/test_synth_cmd.py
index 94310ea..7dbfaf9 100644
--- a/tests/unit/cli/test_synth_cmd.py
+++ b/tests/unit/cli/test_synth_cmd.py
@@ -149,7 +149,7 @@ def _build_teacher(raw: str, **_kwargs: object) -> _FakeTeacher:
         payload = payloads.get(raw, payloads["self"])
         return _FakeTeacher(raw, payload)
 
-    monkeypatch.setattr("dlm.synth.build_teacher", _build_teacher)
+    monkeypatch.setattr("dlm.synth.teachers.build_teacher", _build_teacher)
     monkeypatch.setattr("dlm.preference.build_judge", lambda *args, **kwargs: _FakeJudge())
 
 

From d1ea0f674f0f8bcf242e8124900515417fc7b266 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:57:05 -0400
Subject: [PATCH 27/46] Extract preference_mine into
 dlm.preference.dispatch:run_preference_mine

---
 src/dlm/cli/commands/preference.py    |  96 +++++---------
 src/dlm/preference/dispatch.py        | 184 ++++++++++++++++++++++++++
 tests/unit/cli/test_preference_cmd.py |   4 +-
 3 files changed, 221 insertions(+), 63 deletions(-)
 create mode 100644 src/dlm/preference/dispatch.py

diff --git a/src/dlm/cli/commands/preference.py b/src/dlm/cli/commands/preference.py
index 21f3fce..c495460 100644
--- a/src/dlm/cli/commands/preference.py
+++ b/src/dlm/cli/commands/preference.py
@@ -89,25 +89,19 @@ def preference_mine_cmd(
     from dlm.doc.parser import parse_file
     from dlm.hardware import doctor
     from dlm.inference import AdapterNotFoundError
-    from dlm.inference.backends import (
-        UnsupportedBackendError,
-        build_backend,
-        select_backend,
-    )
-    from dlm.metrics import MetricsRecorder, PreferenceMineEvent
-    from dlm.metrics.events import PreferenceMineWriteMode
+    from dlm.inference.backends import UnsupportedBackendError, select_backend
     from dlm.modality import modality_for
     from dlm.preference import (
         InvalidJudgeSpecError,
         JudgeUnavailableError,
-        build_apply_plan,
-        build_judge,
-        build_mine_plan,
         render_apply_plan,
         render_mine_plan,
     )
-    from dlm.preference.apply import apply_plan as apply_preference_plan
-    from dlm.preference.pending import clear_pending_plan, save_pending_plan
+    from dlm.preference.dispatch import (
+        PreferenceMineOutcome,
+        PreferenceMineRequest,
+        run_preference_mine,
+    )
     from dlm.store.paths import for_dlm
 
     console = Console(stderr=True)
@@ -187,27 +181,30 @@ def preference_mine_cmd(
     except UnsupportedBackendError as exc:
         console.print(f"[red]preference:[/red] {exc}")
         raise typer.Exit(code=2) from exc
-    backend_obj = build_backend(backend_name, caps)
+
+    request = PreferenceMineRequest(
+        parsed=parsed,
+        target_path=path,
+        store=store,
+        spec=spec,
+        capabilities=caps,
+        backend_name=backend_name,
+        judge_spec=judge,
+        mined_run_id=run_id,
+        samples=samples,
+        max_pairs=max_pairs,
+        threshold=threshold,
+        temperature=temp,
+        top_p=top_p,
+        adapter=adapter,
+        apply=apply,
+    )
 
     try:
-        backend_obj.load(spec, store, adapter_name=adapter)
+        result = run_preference_mine(request)
     except AdapterNotFoundError as exc:
         console.print(f"[red]preference:[/red] {exc}")
         raise typer.Exit(code=1) from exc
-
-    try:
-        judge_obj = build_judge(judge, dlm_path=path)
-        plan = build_mine_plan(
-            parsed,
-            backend_obj,
-            judge_obj,
-            mined_run_id=run_id,
-            samples=samples,
-            max_pairs=max_pairs,
-            threshold=threshold,
-            temperature=temp,
-            top_p=top_p,
-        )
     except InvalidJudgeSpecError as exc:
         console.print(f"[red]preference:[/red] {exc}")
         raise typer.Exit(code=2) from exc
@@ -217,28 +214,10 @@ def preference_mine_cmd(
     except ValueError as exc:
         console.print(f"[red]preference:[/red] {exc}")
         raise typer.Exit(code=2) from exc
-    finally:
-        backend_obj.unload()
-
-    recorder = MetricsRecorder(store.root)
-
-    def _record_preference_mine(write_mode: PreferenceMineWriteMode) -> None:
-        recorder.record_preference_mine(
-            PreferenceMineEvent(
-                run_id=run_id,
-                judge_name=judge_obj.name,
-                sample_count=samples,
-                mined_pairs=len(plan.additions),
-                skipped_prompts=len(plan.skipped),
-                write_mode=write_mode,
-            )
-        )
 
-    out_console.print(render_mine_plan(plan))
+    out_console.print(render_mine_plan(result.plan))
 
-    if not plan.additions:
-        clear_pending_plan(store)
-        _record_preference_mine("empty")
+    if result.outcome is PreferenceMineOutcome.NO_ADDITIONS:
         out_console.print(
             "\n[yellow]no candidates to mine[/yellow] — either instruction prompts "
             "did not yield a confident pair, or the matching preference sections "
@@ -246,25 +225,20 @@ def _record_preference_mine(write_mode: PreferenceMineWriteMode) -> None:
         )
         raise typer.Exit(code=2)
 
-    sections = [addition.section for addition in plan.additions]
-
-    if apply:
-        apply_plan = build_apply_plan(parsed, sections)
+    if result.outcome is PreferenceMineOutcome.APPLIED:
+        assert result.apply_plan is not None
+        assert result.apply_summary is not None
         out_console.print("")
-        out_console.print(render_apply_plan(apply_plan))
-        summary = apply_preference_plan(parsed, apply_plan, target=path)
-        clear_pending_plan(store)
-        _record_preference_mine("applied")
+        out_console.print(render_apply_plan(result.apply_plan))
         out_console.print(
-            f"\n[green]preference:[/green] wrote {summary.added} section(s) to {path} "
-            f"({summary.skipped} skipped)"
+            f"\n[green]preference:[/green] wrote {result.apply_summary.added} section(s) to {path} "
+            f"({result.apply_summary.skipped} skipped)"
         )
         return
 
-    pending = save_pending_plan(store, source_path=path.resolve(), sections=sections)
-    _record_preference_mine("staged")
+    # PreferenceMineOutcome.STAGED
     out_console.print(
-        f"\n[green]preference:[/green] staged {len(pending.sections)} mined preference "
+        f"\n[green]preference:[/green] staged {result.pending_count} mined preference "
         f"section(s). Run [bold]dlm preference apply {path}[/bold] to write them."
     )
 
diff --git a/src/dlm/preference/dispatch.py b/src/dlm/preference/dispatch.py
new file mode 100644
index 0000000..ceaf668
--- /dev/null
+++ b/src/dlm/preference/dispatch.py
@@ -0,0 +1,184 @@
+"""Domain dispatcher for `dlm preference mine`.
+
+Lifts the build-backend → load → build-judge → mine → stage/apply
+pipeline (and metrics record) out of the CLI. Callers (CLI, LSP, future
+automation) build a `PreferenceMineRequest`, call `run_preference_mine`,
+and render the typed `PreferenceMineResult` themselves. The dispatcher
+does no console I/O; backend / judge / mine errors propagate as the
+existing typed exceptions so the caller can map each to its own exit
+code or banner.
+
+External-module imports are dotted (e.g. `from dlm.inference import
+backends as _backends; _backends.build_backend(...)`) so test fixtures
+that monkeypatch `dlm.inference.backends.<name>` are visible to the
+dispatcher at call time.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import StrEnum
+from typing import TYPE_CHECKING
+
+from dlm.inference import backends as _backends
+from dlm.inference.backends.select import BackendName
+from dlm.metrics import MetricsRecorder
+from dlm.metrics.events import PreferenceMineEvent
+from dlm.preference import apply as _apply
+from dlm.preference import judge as _judge_mod
+from dlm.preference import mine as _mine
+from dlm.preference import pending as _pending
+from dlm.preference.apply import PreferenceApplyPlan, PreferenceApplySummary
+from dlm.preference.mine import PreferenceMinePlan
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from dlm.base_models.schema import BaseModelSpec
+    from dlm.doc.parser import ParsedDlm
+    from dlm.hardware.capabilities import Capabilities
+    from dlm.store.paths import StorePath
+
+
+class PreferenceMineOutcome(StrEnum):
+    """Discriminator for what `run_preference_mine` did."""
+
+    APPLIED = "applied"  # Sections written to the .dlm.
+    STAGED = "staged"  # Sections persisted as the pending plan.
+    NO_ADDITIONS = "no_additions"  # Mining yielded no confident pairs.
+
+
+@dataclass(frozen=True)
+class PreferenceMineRequest:
+    """Inputs to `run_preference_mine`.
+
+    The CLI is responsible for parsing the .dlm, resolving the store,
+    enforcing license acceptance, and selecting the backend kind; the
+    dispatcher receives all of those as already-typed objects.
+    `mined_run_id` is required — the CLI exits early when no prior
+    training run exists.
+    """
+
+    parsed: ParsedDlm
+    target_path: Path
+    store: StorePath
+    spec: BaseModelSpec
+    capabilities: Capabilities
+    backend_name: BackendName
+    judge_spec: str
+    mined_run_id: int
+    samples: int
+    max_pairs: int | None
+    threshold: float | None
+    temperature: float
+    top_p: float | None
+    adapter: str | None
+    apply: bool
+
+
+@dataclass(frozen=True)
+class PreferenceMineResult:
+    """Outcome of `run_preference_mine`.
+
+    `plan` is always populated so the CLI can render it. `apply_plan`
+    and `apply_summary` are only set on `PreferenceMineOutcome.APPLIED`.
+    `pending_count` is non-zero only on `PreferenceMineOutcome.STAGED`.
+    `judge_name` is the resolved judge identifier, used by the CLI for
+    metrics narration.
+    """
+
+    plan: PreferenceMinePlan
+    outcome: PreferenceMineOutcome
+    judge_name: str
+    apply_plan: PreferenceApplyPlan | None = None
+    apply_summary: PreferenceApplySummary | None = None
+    pending_count: int = 0
+
+
+def run_preference_mine(req: PreferenceMineRequest) -> PreferenceMineResult:
+    """Build, load, mine, and stage/apply preference sections for one .dlm."""
+    backend_obj = _backends.build_backend(req.backend_name, req.capabilities)
+    backend_obj.load(req.spec, req.store, adapter_name=req.adapter)
+
+    try:
+        judge_obj = _judge_mod.build_judge(req.judge_spec, dlm_path=req.target_path)
+        judge_name = judge_obj.name
+        plan = _mine.build_mine_plan(
+            req.parsed,
+            backend_obj,
+            judge_obj,
+            mined_run_id=req.mined_run_id,
+            samples=req.samples,
+            max_pairs=req.max_pairs,
+            threshold=req.threshold,
+            temperature=req.temperature,
+            top_p=req.top_p,
+        )
+    finally:
+        backend_obj.unload()
+
+    recorder = MetricsRecorder(req.store.root)
+
+    if not plan.additions:
+        _pending.clear_pending_plan(req.store)
+        recorder.record_preference_mine(
+            PreferenceMineEvent(
+                run_id=req.mined_run_id,
+                judge_name=judge_name,
+                sample_count=req.samples,
+                mined_pairs=0,
+                skipped_prompts=len(plan.skipped),
+                write_mode="empty",
+            )
+        )
+        return PreferenceMineResult(
+            plan=plan,
+            outcome=PreferenceMineOutcome.NO_ADDITIONS,
+            judge_name=judge_name,
+        )
+
+    sections = [addition.section for addition in plan.additions]
+
+    if req.apply:
+        apply_plan = _apply.build_apply_plan(req.parsed, sections)
+        summary = _apply.apply_plan(req.parsed, apply_plan, target=req.target_path)
+        _pending.clear_pending_plan(req.store)
+        recorder.record_preference_mine(
+            PreferenceMineEvent(
+                run_id=req.mined_run_id,
+                judge_name=judge_name,
+                sample_count=req.samples,
+                mined_pairs=len(plan.additions),
+                skipped_prompts=len(plan.skipped),
+                write_mode="applied",
+            )
+        )
+        return PreferenceMineResult(
+            plan=plan,
+            outcome=PreferenceMineOutcome.APPLIED,
+            judge_name=judge_name,
+            apply_plan=apply_plan,
+            apply_summary=summary,
+        )
+
+    pending = _pending.save_pending_plan(
+        req.store,
+        source_path=req.target_path.resolve(),
+        sections=sections,
+    )
+    recorder.record_preference_mine(
+        PreferenceMineEvent(
+            run_id=req.mined_run_id,
+            judge_name=judge_name,
+            sample_count=req.samples,
+            mined_pairs=len(plan.additions),
+            skipped_prompts=len(plan.skipped),
+            write_mode="staged",
+        )
+    )
+    return PreferenceMineResult(
+        plan=plan,
+        outcome=PreferenceMineOutcome.STAGED,
+        judge_name=judge_name,
+        pending_count=len(pending.sections),
+    )
diff --git a/tests/unit/cli/test_preference_cmd.py b/tests/unit/cli/test_preference_cmd.py
index 46f6bdc..19d527f 100644
--- a/tests/unit/cli/test_preference_cmd.py
+++ b/tests/unit/cli/test_preference_cmd.py
@@ -135,7 +135,7 @@ def _patch_text_mining(
     )
     if judge_names is None:
         monkeypatch.setattr(
-            "dlm.preference.build_judge",
+            "dlm.preference.judge.build_judge",
             lambda *args, **kwargs: _FakeJudge(),
         )
         return
@@ -143,7 +143,7 @@ def _patch_text_mining(
     def _build_judge(ref: str, **_kwargs: object) -> _NamedFakeJudge:
         return _NamedFakeJudge(judge_names[ref])
 
-    monkeypatch.setattr("dlm.preference.build_judge", _build_judge)
+    monkeypatch.setattr("dlm.preference.judge.build_judge", _build_judge)
 
 
 class TestPreferenceCmd:

From 457be54fda46befe8265868ebe949528625afe6f Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 19:57:11 -0400
Subject: [PATCH 28/46] Pin synth dispatcher's build_judge import to
 dlm.preference.judge

The dispatcher previously did 'from dlm.preference import build_judge'
(re-export). Tests monkeypatch the canonical 'dlm.preference.judge.build_judge'
path; using the canonical import in the dispatcher keeps function-local
attribute lookup aligned with what tests patch.
---
 src/dlm/synth/dispatch.py        | 2 +-
 tests/unit/cli/test_synth_cmd.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dlm/synth/dispatch.py b/src/dlm/synth/dispatch.py
index cc278be..1ef2c36 100644
--- a/src/dlm/synth/dispatch.py
+++ b/src/dlm/synth/dispatch.py
@@ -103,7 +103,7 @@ def run_synth_instructions(req: SynthInstructionsRequest) -> SynthInstructionsRe
 
     judge_obj: PreferenceJudge | None = None
     if req.filter_kind == "sway":
-        from dlm.preference import build_judge
+        from dlm.preference.judge import build_judge
 
         judge_obj = build_judge("sway", dlm_path=req.target_path)
 
diff --git a/tests/unit/cli/test_synth_cmd.py b/tests/unit/cli/test_synth_cmd.py
index 7dbfaf9..0c668a4 100644
--- a/tests/unit/cli/test_synth_cmd.py
+++ b/tests/unit/cli/test_synth_cmd.py
@@ -150,7 +150,7 @@ def _build_teacher(raw: str, **_kwargs: object) -> _FakeTeacher:
         return _FakeTeacher(raw, payload)
 
     monkeypatch.setattr("dlm.synth.teachers.build_teacher", _build_teacher)
-    monkeypatch.setattr("dlm.preference.build_judge", lambda *args, **kwargs: _FakeJudge())
+    monkeypatch.setattr("dlm.preference.judge.build_judge", lambda *args, **kwargs: _FakeJudge())
 
 
 def _patch_preference_alias_runtime(monkeypatch: pytest.MonkeyPatch) -> None:
@@ -167,7 +167,7 @@ def _patch_preference_alias_runtime(monkeypatch: pytest.MonkeyPatch) -> None:
         "dlm.inference.backends.build_backend",
         lambda *args, **kwargs: _FakeBackend({"What is DGEMM?": ["bad answer", "good answer"]}),
     )
-    monkeypatch.setattr("dlm.preference.build_judge", lambda *args, **kwargs: _FakeJudge())
+    monkeypatch.setattr("dlm.preference.judge.build_judge", lambda *args, **kwargs: _FakeJudge())
 
 
 class TestSynthCmd:

From 80c9adb47cadc776a0bcf3d1bf71e95bb09a802b Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 20:01:36 -0400
Subject: [PATCH 29/46] Extract init scaffold + manifest write into
 dlm.store.bootstrap:run_init

---
 src/dlm/cli/commands/init.py      | 165 ++++--------------------
 src/dlm/store/bootstrap.py        | 205 ++++++++++++++++++++++++++++++
 tests/unit/cli/test_init_edges.py |   2 +-
 3 files changed, 229 insertions(+), 143 deletions(-)
 create mode 100644 src/dlm/store/bootstrap.py

diff --git a/src/dlm/cli/commands/init.py b/src/dlm/cli/commands/init.py
index 27c3db9..cd6ab20 100644
--- a/src/dlm/cli/commands/init.py
+++ b/src/dlm/cli/commands/init.py
@@ -75,7 +75,8 @@ def init_cmd(
         require_acceptance,
     )
     from dlm.base_models import resolve as resolve_base_model
-    from dlm.io.ulid import mint_ulid
+    from dlm.store.bootstrap import InitRequest, ScaffoldKind, run_init
+    from dlm.templates import TemplateError
 
     console = Console(stderr=True)
 
@@ -170,20 +171,6 @@ def init_cmd(
             skip_export_probes=skip_export_probes,
         )
 
-    # NOW apply the template — license has already been accepted
-    # (either by --i-accept-license or interactive prompt), so pass
-    # the acceptance through. apply_template enforces the license
-    # contract at its boundary.
-    applied_result = None
-    if template is not None:
-        from dlm.templates import TemplateError, apply_template
-
-        try:
-            applied_result = apply_template(template, path, force=force, accept_license=True)
-        except TemplateError as exc:
-            console.print(f"[red]init:[/red] {exc}")
-            raise typer.Exit(code=1) from exc
-
     # Record the license acceptance (or None for non-gated specs). We
     # know `resolve_base_model` already validated the flag/prompt chain
     # — `accept_license=True` means either the user passed the flag or
@@ -217,37 +204,30 @@ def init_cmd(
         )
         raise typer.Exit(code=2)
 
-    if applied_result is not None:
-        dlm_id = applied_result.dlm_id
+    if multimodal:
+        scaffold_kind = ScaffoldKind.VISION
+    elif audio:
+        scaffold_kind = ScaffoldKind.AUDIO
     else:
-        dlm_id = mint_ulid()
-        if multimodal:
-            _write_init_scaffold_multimodal(path, spec.key, dlm_id)
-        elif audio:
-            _write_init_scaffold_audio(path, spec.key, dlm_id)
-        else:
-            _write_init_scaffold(path, spec.key, dlm_id)
+        scaffold_kind = ScaffoldKind.TEXT
+
+    request = InitRequest(
+        path=path,
+        spec=spec,
+        acceptance=acceptance,
+        force=force,
+        template_name=template,
+        scaffold_kind=scaffold_kind,
+    )
 
-    # Create the store + write the initial manifest so `dlm show` sees
-    # the license record and `dlm train` has a prior manifest to diff
-    # against.
-    from dlm.store.manifest import Manifest, save_manifest
-    from dlm.store.paths import for_dlm
+    try:
+        result = run_init(request)
+    except TemplateError as exc:
+        console.print(f"[red]init:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
 
-    store = for_dlm(dlm_id)
-    store.ensure_layout()
-    save_manifest(
-        store.manifest,
-        Manifest(
-            dlm_id=dlm_id,
-            base_model=spec.key,
-            base_model_revision=spec.revision,
-            source_path=path.resolve(),
-            license_acceptance=acceptance,
-        ),
-    )
-    if applied_result is not None:
-        meta = applied_result.template.meta
+    if result.applied_template is not None:
+        meta = result.applied_template.template.meta
         console.print(
             f"[green]init:[/green] wrote {path} from template "
             f"[bold]{meta.name}[/bold] ({meta.title}) — base {spec.key}."
@@ -282,102 +262,3 @@ def _prompt_accept_license(console: object, base: str, license_url: str | None)
     except EOFError:
         return False
     return answer in ("y", "yes")
-
-
-def _write_init_scaffold(path: Path, base_model_key: str, dlm_id: str) -> None:
-    """Write a minimal-but-valid .dlm file at `path`.
-
-    Body has one PROSE paragraph + a commented instruction section so
-    users see both section shapes on first open.
-    """
-    scaffold = f"""---
-dlm_id: {dlm_id}
-dlm_version: 1
-base_model: {base_model_key}
----
-
-# Your document title
-
-Write prose here. It will train via continued pretraining (CPT) loss.
-
-::instruction::
-
-### Q
-Your example question.
-
-### A
-Your example answer.
-"""
-    path.write_text(scaffold, encoding="utf-8")
-
-
-def _write_init_scaffold_multimodal(path: Path, base_model_key: str, dlm_id: str) -> None:
-    """Write a VL-shaped .dlm file at `path`.
-
-    Body shows the `::image::` attribute fence + a caption so users
-    see the v10 grammar on first open. The placeholder path
-    `figures/your-image.png` is deliberately non-existent — first
-    `dlm train` will refuse with a clear file-missing error, prompting
-    the user to drop a real image in. This is friendlier than
-    committing an inert sample that users might not notice isn't theirs.
-
-    `dlm_version: 10` because IMAGE sections require schema v10.
-    """
-    scaffold = f"""---
-dlm_id: {dlm_id}
-dlm_version: 10
-base_model: {base_model_key}
----
-
-# Your document title
-
-Write prose here. It will train via continued pretraining (CPT) loss.
-
-::image path="figures/your-image.png" alt="short description"::
-Caption text describing the image. Training rows bundle the image
-with this caption as `<image>\\n<caption>`.
-
-::instruction::
-
-### Q
-What is in this image?
-
-### A
-Describe what the image shows.
-"""
-    path.write_text(scaffold, encoding="utf-8")
-
-
-def _write_init_scaffold_audio(path: Path, base_model_key: str, dlm_id: str) -> None:
-    """Write an audio-shaped .dlm file at `path`.
-
-    Body shows the `::audio::` attribute fence with the sibling-
-    transcript-friendly `transcript="..."` form so users see the v11
-    grammar on first open. The placeholder path `clips/your-clip.wav`
-    is deliberately non-existent — first `dlm train` refuses with a
-    clear file-missing error rather than silently training on an inert
-    sample.
-
-    `dlm_version: 11` because AUDIO sections require schema v11.
-    """
-    scaffold = f"""---
-dlm_id: {dlm_id}
-dlm_version: 11
-base_model: {base_model_key}
----
-
-# Your document title
-
-Write prose here. It will train via continued pretraining (CPT) loss.
-
-::audio path="clips/your-clip.wav" transcript="Transcript of the audio clip."::
-
-::instruction::
-
-### Q
-What was said in this recording?
-
-### A
-Describe what you hear in the audio.
-"""
-    path.write_text(scaffold, encoding="utf-8")
diff --git a/src/dlm/store/bootstrap.py b/src/dlm/store/bootstrap.py
new file mode 100644
index 0000000..032e393
--- /dev/null
+++ b/src/dlm/store/bootstrap.py
@@ -0,0 +1,205 @@
+"""Domain dispatcher for `dlm init`.
+
+Lifts the scaffold-write → store-create → manifest-write pipeline out of
+the CLI. Callers (CLI, LSP "Initialize from Template" command, future
+automation) build an `InitRequest`, call `run_init`, and render the
+typed `InitResult`. The dispatcher does no console I/O; template
+errors propagate as `TemplateError` so the caller can map them to its
+own exit code or banner.
+
+The CLI keeps the user-interactive concerns: flag-mutex validation,
+multimodal/audio default-base swap, `--template` peek for license-prompt
+target, the GatedModelError → interactive-prompt → retry loop, and the
+modality-consistency check. This dispatcher takes an already-resolved
+`BaseModelSpec` plus an already-built `LicenseAcceptance | None`.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import StrEnum
+from typing import TYPE_CHECKING
+
+from dlm.io.ulid import mint_ulid
+from dlm.store.manifest import Manifest, save_manifest
+from dlm.store.paths import StorePath, for_dlm
+from dlm.templates import init as _templates_init
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from dlm.base_models.license import LicenseAcceptance
+    from dlm.base_models.schema import BaseModelSpec
+    from dlm.templates.init import ApplyResult
+
+
+class ScaffoldKind(StrEnum):
+    """Body shape to write when no `--template` is given."""
+
+    TEXT = "text"
+    VISION = "vision"
+    AUDIO = "audio"
+
+
+@dataclass(frozen=True)
+class InitRequest:
+    """Inputs to `run_init`.
+
+    `template_name` and `scaffold_kind` are mutually informative:
+    when `template_name is not None`, the template's body wins and
+    `scaffold_kind` is ignored. The CLI rejects `--template` combined
+    with `--multimodal` / `--audio` before constructing the request.
+    """
+
+    path: Path
+    spec: BaseModelSpec
+    acceptance: LicenseAcceptance | None
+    force: bool
+    template_name: str | None
+    scaffold_kind: ScaffoldKind
+
+
+@dataclass(frozen=True)
+class InitResult:
+    """Outcome of `run_init`. `applied_template` is set iff the request
+    carried a `template_name` (the dispatcher applied a gallery template
+    rather than writing a scaffold)."""
+
+    dlm_id: str
+    store: StorePath
+    applied_template: ApplyResult | None
+
+
+def run_init(req: InitRequest) -> InitResult:
+    """Apply a template (or write a scaffold), then provision the store."""
+    if req.template_name is not None:
+        applied = _templates_init.apply_template(
+            req.template_name,
+            req.path,
+            force=req.force,
+            accept_license=True,
+        )
+        dlm_id = applied.dlm_id
+    else:
+        applied = None
+        dlm_id = mint_ulid()
+        if req.scaffold_kind is ScaffoldKind.VISION:
+            _write_init_scaffold_multimodal(req.path, req.spec.key, dlm_id)
+        elif req.scaffold_kind is ScaffoldKind.AUDIO:
+            _write_init_scaffold_audio(req.path, req.spec.key, dlm_id)
+        else:
+            _write_init_scaffold(req.path, req.spec.key, dlm_id)
+
+    store = for_dlm(dlm_id)
+    store.ensure_layout()
+    save_manifest(
+        store.manifest,
+        Manifest(
+            dlm_id=dlm_id,
+            base_model=req.spec.key,
+            base_model_revision=req.spec.revision,
+            source_path=req.path.resolve(),
+            license_acceptance=req.acceptance,
+        ),
+    )
+
+    return InitResult(dlm_id=dlm_id, store=store, applied_template=applied)
+
+
+def _write_init_scaffold(path: Path, base_model_key: str, dlm_id: str) -> None:
+    """Write a minimal-but-valid text-only `.dlm` at `path`.
+
+    Body has one PROSE paragraph + a commented instruction section so
+    users see both section shapes on first open.
+    """
+    scaffold = f"""---
+dlm_id: {dlm_id}
+dlm_version: 1
+base_model: {base_model_key}
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::instruction::
+
+### Q
+Your example question.
+
+### A
+Your example answer.
+"""
+    path.write_text(scaffold, encoding="utf-8")
+
+
+def _write_init_scaffold_multimodal(path: Path, base_model_key: str, dlm_id: str) -> None:
+    """Write a vision-language `.dlm` at `path`.
+
+    Body shows the `::image::` attribute fence + a caption so users see
+    the v10 grammar on first open. The placeholder path
+    `figures/your-image.png` is deliberately non-existent — first
+    `dlm train` refuses with a clear file-missing error, prompting the
+    user to drop a real image in. Friendlier than committing an inert
+    sample users might not notice isn't theirs.
+
+    `dlm_version: 10` because IMAGE sections require schema v10.
+    """
+    scaffold = f"""---
+dlm_id: {dlm_id}
+dlm_version: 10
+base_model: {base_model_key}
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::image path="figures/your-image.png" alt="short description"::
+Caption text describing the image. Training rows bundle the image
+with this caption as `<image>\\n<caption>`.
+
+::instruction::
+
+### Q
+What is in this image?
+
+### A
+Describe what the image shows.
+"""
+    path.write_text(scaffold, encoding="utf-8")
+
+
+def _write_init_scaffold_audio(path: Path, base_model_key: str, dlm_id: str) -> None:
+    """Write an audio-language `.dlm` at `path`.
+
+    Body shows the `::audio::` attribute fence with the sibling-
+    transcript-friendly `transcript="..."` form so users see the v11
+    grammar on first open. The placeholder path `clips/your-clip.wav`
+    is deliberately non-existent — first `dlm train` refuses with a
+    clear file-missing error rather than silently training on an inert
+    sample.
+
+    `dlm_version: 11` because AUDIO sections require schema v11.
+    """
+    scaffold = f"""---
+dlm_id: {dlm_id}
+dlm_version: 11
+base_model: {base_model_key}
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::audio path="clips/your-clip.wav" transcript="Transcript of the audio clip."::
+
+::instruction::
+
+### Q
+What was said in this recording?
+
+### A
+Describe what you hear in the audio.
+"""
+    path.write_text(scaffold, encoding="utf-8")
diff --git a/tests/unit/cli/test_init_edges.py b/tests/unit/cli/test_init_edges.py
index c80a673..ddb4357 100644
--- a/tests/unit/cli/test_init_edges.py
+++ b/tests/unit/cli/test_init_edges.py
@@ -148,7 +148,7 @@ def _fake_apply_template(
         ) -> object:
             raise TemplateError("template exploded")
 
-        monkeypatch.setattr(templates, "apply_template", _fake_apply_template)
+        monkeypatch.setattr("dlm.templates.init.apply_template", _fake_apply_template)
 
         result = runner.invoke(
             app,

From f37fac5ca31fd29e40ceda29304825b068d63ba2 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 20:05:17 -0400
Subject: [PATCH 30/46] Extract show summarizers into
 dlm.store.show:gather_store_view

---
 src/dlm/cli/commands/show.py | 333 +++++------------------------------
 src/dlm/store/show.py        | 298 +++++++++++++++++++++++++++++++
 2 files changed, 343 insertions(+), 288 deletions(-)
 create mode 100644 src/dlm/store/show.py

diff --git a/src/dlm/cli/commands/show.py b/src/dlm/cli/commands/show.py
index 639c9ce..a4cf5ac 100644
--- a/src/dlm/cli/commands/show.py
+++ b/src/dlm/cli/commands/show.py
@@ -23,8 +23,8 @@ def show_cmd(
     from dlm.doc.errors import DlmParseError
     from dlm.doc.parser import parse_file
     from dlm.store.errors import ManifestCorruptError
-    from dlm.store.inspect import inspect_store
     from dlm.store.paths import for_dlm
+    from dlm.store.show import StoreViewRequest, gather_store_view
 
     console = Console(stderr=True)
     out_console = Console()
@@ -36,86 +36,67 @@ def show_cmd(
         raise typer.Exit(code=1) from exc
 
     store = for_dlm(parsed.frontmatter.dlm_id)
-    training_sources, discovered_configs = _summarize_training_sources_and_discovered(
-        parsed, path.resolve().parent
-    )
-    # The per-document cache config comes from frontmatter, not on-disk
-    # state — report it on both the pre-train and initialized-store paths
-    # so authors can sanity-check the knobs before `dlm train` runs.
-    cache_cfg = parsed.frontmatter.training.cache
-    training_cache_config: dict[str, object] = {
-        "enabled": cache_cfg.enabled,
-        "max_bytes": cache_cfg.max_bytes,
-        "prune_older_than_days": cache_cfg.prune_older_than_days,
-    }
 
-    # Store may not exist yet (no `dlm train` run). Treat that as an
-    # informational state rather than an error — useful after `dlm init`.
-    if not store.manifest.exists():
+    try:
+        view = gather_store_view(StoreViewRequest(parsed=parsed, target_path=path, store=store))
+    except ManifestCorruptError as exc:
+        console.print(f"[red]show:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    if view.inspection is None:
         if json_out:
             payload: dict[str, object] = {
-                "dlm_id": parsed.frontmatter.dlm_id,
-                "base_model": parsed.frontmatter.base_model,
+                "dlm_id": view.parsed_dlm_id,
+                "base_model": view.parsed_base_model,
                 "store_initialized": False,
-                "source_path": str(path.resolve()),
-                "training_cache_config": training_cache_config,
+                "source_path": str(view.source_path),
+                "training_cache_config": view.training_cache_config,
             }
-            if training_sources is not None:
-                payload["training_sources"] = training_sources
-            if discovered_configs:
-                payload["discovered_training_configs"] = discovered_configs
+            if view.training_sources is not None:
+                payload["training_sources"] = view.training_sources
+            if view.discovered_configs:
+                payload["discovered_training_configs"] = view.discovered_configs
             sys.stdout.write(_json.dumps(payload, indent=2) + "\n")
         else:
             out_console.print(f"[bold]{path}[/bold]")
-            out_console.print(f"  dlm_id:       {parsed.frontmatter.dlm_id}")
-            out_console.print(f"  base_model:   {parsed.frontmatter.base_model}")
+            out_console.print(f"  dlm_id:       {view.parsed_dlm_id}")
+            out_console.print(f"  base_model:   {view.parsed_base_model}")
             out_console.print("  store:        [dim]not yet initialized (run `dlm train`)[/dim]")
-            if training_sources:
-                _render_training_sources_text(out_console, training_sources)
+            if view.training_sources:
+                _render_training_sources_text(out_console, view.training_sources)
         return
 
-    try:
-        inspection = inspect_store(store, source_path=path.resolve())
-    except ManifestCorruptError as exc:
-        console.print(f"[red]show:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
-    training_cache = _summarize_training_cache(store.tokenized_cache_dir, store.root)
-    gate = _summarize_gate(store)
-    preference_mining = _summarize_preference_mining(store.root)
-    base_security = _summarize_base_security(parsed.frontmatter.base_model)
-
     if json_out:
-        payload_full = _inspection_to_dict(inspection)
-        if training_sources is not None:
-            payload_full["training_sources"] = training_sources
-        if discovered_configs:
-            payload_full["discovered_training_configs"] = discovered_configs
-        if training_cache is not None:
-            payload_full["training_cache"] = training_cache
-        payload_full["training_cache_config"] = training_cache_config
-        if gate is not None:
-            payload_full["gate"] = gate
-        if preference_mining is not None:
-            payload_full["preference_mining"] = preference_mining
-            payload_full["preference_mining_runs"] = preference_mining["run_count"]
-            payload_full["total_auto_mined_pairs"] = preference_mining["total_mined_pairs"]
-        if base_security is not None:
-            payload_full["base_security"] = base_security
+        payload_full = _inspection_to_dict(view.inspection)
+        if view.training_sources is not None:
+            payload_full["training_sources"] = view.training_sources
+        if view.discovered_configs:
+            payload_full["discovered_training_configs"] = view.discovered_configs
+        if view.training_cache is not None:
+            payload_full["training_cache"] = view.training_cache
+        payload_full["training_cache_config"] = view.training_cache_config
+        if view.gate is not None:
+            payload_full["gate"] = view.gate
+        if view.preference_mining is not None:
+            payload_full["preference_mining"] = view.preference_mining
+            payload_full["preference_mining_runs"] = view.preference_mining["run_count"]
+            payload_full["total_auto_mined_pairs"] = view.preference_mining["total_mined_pairs"]
+        if view.base_security is not None:
+            payload_full["base_security"] = view.base_security
         # Write JSON to raw stdout — Rich's Console wraps lines at the
         # terminal width and would corrupt the JSON.
         sys.stdout.write(_json.dumps(payload_full, indent=2, default=str) + "\n")
         return
 
-    _render_inspection_text(out_console, path, inspection)
-    if training_sources:
-        _render_training_sources_text(out_console, training_sources)
-    if training_cache is not None and training_cache.get("entry_count", 0):
-        _render_training_cache_text(out_console, training_cache)
-    if gate is not None:
-        _render_gate_text(out_console, gate)
-    if base_security is not None and base_security.get("trust_remote_code"):
-        _render_base_security_text(out_console, base_security)
+    _render_inspection_text(out_console, path, view.inspection)
+    if view.training_sources:
+        _render_training_sources_text(out_console, view.training_sources)
+    if view.training_cache is not None and view.training_cache.get("entry_count", 0):
+        _render_training_cache_text(out_console, view.training_cache)
+    if view.gate is not None:
+        _render_gate_text(out_console, view.gate)
+    if view.base_security is not None and view.base_security.get("trust_remote_code"):
+        _render_base_security_text(out_console, view.base_security)
 
 
 def _inspection_to_dict(inspection: object) -> dict[str, object]:
@@ -195,230 +176,6 @@ def _render_inspection_text(console: object, path: Path, inspection: object) ->
         console.print("  [yellow]orphaned:[/yellow]     source .dlm is missing or mismatched")
 
 
-def _summarize_training_sources(parsed: object, base_path: Path) -> list[dict[str, object]] | None:
-    """Best-effort resolution of `training.sources` for `dlm show`.
-
-    Returns None when the frontmatter declares no directives; returns
-    a list of per-source dicts otherwise. Failures to expand (missing
-    paths, policy escapes) fall back to declared-only records so the
-    show output stays useful for debugging a misconfigured directive.
-    """
-    records, _ = _summarize_training_sources_and_discovered(parsed, base_path)
-    return records
-
-
-def _summarize_training_sources_and_discovered(
-    parsed: object, base_path: Path
-) -> tuple[list[dict[str, object]] | None, list[dict[str, object]]]:
-    """Like `_summarize_training_sources` but also returns the per-anchor
-    `.dlm/training.yaml` + `.dlm/ignore` discovery records.
-
-    Returns `(training_sources, discovered_configs)`. `discovered_configs`
-    is always a list (empty when nothing was found or the expansion
-    failed); `training_sources` matches the single-value helper's
-    contract.
-    """
-    from dlm.directives import DirectiveError, expand_sources
-    from dlm.doc.parser import ParsedDlm
-
-    assert isinstance(parsed, ParsedDlm)
-    directives = parsed.frontmatter.training.sources
-    if not directives:
-        return None, []
-
-    declared: list[dict[str, object]] = [
-        {
-            "path": d.path,
-            "include": list(d.include),
-            "exclude": list(d.exclude),
-            "max_files": d.max_files,
-            "max_bytes_per_file": d.max_bytes_per_file,
-        }
-        for d in directives
-    ]
-
-    try:
-        result = expand_sources(parsed, base_path=base_path)
-    except (DirectiveError, OSError):
-        return declared, []
-
-    records: list[dict[str, object]] = []
-    for decl, prov in zip(declared, result.provenance, strict=False):
-        records.append(
-            {
-                **decl,
-                "file_count": prov.file_count,
-                "total_bytes": prov.total_bytes,
-                "skipped_binary": prov.skipped_binary,
-                "skipped_encoding": prov.skipped_encoding,
-                "skipped_over_size": prov.skipped_over_size,
-            }
-        )
-    # If the expander returned fewer entries than declared (shouldn't
-    # happen on success but defensive), pad with declared-only.
-    if len(records) < len(declared):
-        records.extend(declared[len(records) :])
-
-    discovered_records: list[dict[str, object]] = []
-    for dc in result.discovered:
-        discovered_records.append(
-            {
-                "anchor": str(dc.anchor),
-                "has_training_yaml": dc.config is not None,
-                "has_ignore": bool(dc.ignore_rules),
-                "include": list(dc.config.include) if dc.config else [],
-                "exclude": list(dc.config.exclude) if dc.config else [],
-                "exclude_defaults": (dc.config.exclude_defaults if dc.config else True),
-                "metadata": dict(dc.config.metadata) if dc.config else {},
-                "ignore_rules": len(dc.ignore_rules),
-            }
-        )
-    return records, discovered_records
-
-
-def _summarize_training_cache(cache_dir: Path, store_root: Path) -> dict[str, object] | None:
-    """Return a JSON-friendly snapshot of the tokenized-section cache.
-
-    None when the cache dir doesn't exist (store never trained with
-    the cache, or pre-Sprint-31 layout). Cheap — reads the manifest
-    only, not the entry files.
-    """
-    if not cache_dir.is_dir():
-        return None
-    from dlm.directives.cache import TokenizedCache
-    from dlm.metrics import queries as _queries
-
-    cache = TokenizedCache.open(cache_dir)
-    last = _queries.latest_tokenization(store_root)
-    return {
-        "path": str(cache_dir),
-        "entry_count": cache.entry_count,
-        "bytes": cache.total_bytes,
-        "last_run_hit_rate": last.hit_rate if last else None,
-        "last_run_id": last.run_id if last else None,
-    }
-
-
-def _summarize_gate(store: object) -> dict[str, object] | None:
-    """Return a JSON-friendly snapshot of the learned adapter gate.
-
-    None when the store has no gate config (pre-Sprint-34 runs, or
-    `training.gate.enabled` was false). Reads two sources: the
-    on-disk `gate_config.json` for mode + adapter order, and the
-    metrics `gate_events` table for per-adapter mean weight from the
-    most recent run that recorded a gate.
-    """
-    import json as _json
-
-    from dlm.store.paths import StorePath
-    from dlm.train.gate.paths import gate_config_path
-
-    assert isinstance(store, StorePath)
-    cfg_path = gate_config_path(store)
-
-    from dlm.metrics import queries as _queries
-    from dlm.train.gate.module import GateMetadata
-
-    events = _queries.latest_gate_events(store.root)
-    # Divergence path: training raised before writing a config, but we
-    # still emit one GateEvent per adapter with mode="diverged" so
-    # operators can see the failure. Surface it even when the config
-    # file is absent.
-    if not cfg_path.exists():
-        if events and events[0].mode == "diverged":
-            return {
-                "mode": "diverged",
-                "adapter_names": [e.adapter_name for e in events],
-                "input_dim": None,
-                "hidden_proj_dim": None,
-                "last_run_id": events[0].run_id,
-                "per_adapter": [
-                    {
-                        "adapter_name": e.adapter_name,
-                        "mean_weight": e.mean_weight,
-                        "sample_count": e.sample_count,
-                        "mode": e.mode,
-                    }
-                    for e in events
-                ],
-            }
-        return None
-
-    raw = _json.loads(cfg_path.read_text(encoding="utf-8"))
-    meta = GateMetadata.from_json(raw)
-    per_adapter: list[dict[str, object]] = []
-    run_id: int | None = None
-    if events:
-        run_id = events[0].run_id
-        per_adapter = [
-            {
-                "adapter_name": e.adapter_name,
-                "mean_weight": e.mean_weight,
-                "sample_count": e.sample_count,
-                "mode": e.mode,
-            }
-            for e in events
-        ]
-    else:
-        # No recorded events yet; fall back to the config so `dlm show`
-        # still reports that a gate exists and in which mode.
-        per_adapter = [{"adapter_name": name} for name in meta.adapter_names]
-    return {
-        "mode": meta.mode,
-        "adapter_names": list(meta.adapter_names),
-        "input_dim": meta.input_dim,
-        "hidden_proj_dim": meta.hidden_proj_dim,
-        "last_run_id": run_id,
-        "per_adapter": per_adapter,
-    }
-
-
-def _summarize_preference_mining(store_root: Path) -> dict[str, object] | None:
-    """Return the latest preference-mine summary for `dlm show --json`."""
-    from dlm.metrics import queries as _queries
-
-    totals = _queries.preference_mining_totals(store_root)
-    if totals is None:
-        return None
-    last = _queries.latest_preference_mining(store_root)
-    assert last is not None
-    rows = _queries.preference_mining_for_run(store_root, last.run_id)
-    return {
-        "run_count": totals.run_count,
-        "event_count": totals.event_count,
-        "total_mined_pairs": totals.total_mined_pairs,
-        "total_skipped_prompts": totals.total_skipped_prompts,
-        "last_run_id": last.run_id,
-        "last_run_event_count": len(rows),
-        "last_event": _queries.preference_mining_to_dict([last])[0],
-    }
-
-
-def _summarize_base_security(base_model_key: str) -> dict[str, object] | None:
-    """Surface security-sensitive base-model flags for `dlm show`.
-
-    Today that's just `trust_remote_code` — a flag that causes the HF
-    loader to execute Python from the model repo. We resolve the spec
-    out of the in-process registry (no network: the resolver reads a
-    frozen Python dict) so users can see which bases opt in without
-    grepping source. Returns None when the key doesn't resolve (an
-    `hf:...` escape hatch that isn't in the registry); the caller
-    silently skips in that case.
-    """
-    from dlm.base_models import resolve as resolve_base_model
-    from dlm.base_models.errors import BaseModelError
-
-    try:
-        spec = resolve_base_model(base_model_key, accept_license=True)
-    except BaseModelError:
-        return None
-    return {
-        "base_model": spec.key,
-        "architecture": spec.architecture,
-        "trust_remote_code": bool(spec.trust_remote_code),
-    }
-
-
 def _render_base_security_text(console: object, snap: dict[str, object]) -> None:
     from rich.console import Console
 
diff --git a/src/dlm/store/show.py b/src/dlm/store/show.py
new file mode 100644
index 0000000..dcada29
--- /dev/null
+++ b/src/dlm/store/show.py
@@ -0,0 +1,298 @@
+"""Domain dispatcher for `dlm show`.
+
+Aggregates the .dlm + store snapshot the CLI renders to text or JSON.
+Callers (CLI, LSP doc-overview panel, future automation) build a
+`StoreViewRequest`, call `gather_store_view`, and render the typed
+`StoreView` themselves. The dispatcher does no console I/O;
+`ManifestCorruptError` propagates so the caller can map it to its own
+exit code or banner.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from dlm.directives import expand_sources as _expand_sources
+from dlm.directives.errors import DirectiveError
+from dlm.metrics import queries as _queries
+from dlm.store.inspect import StoreInspection, inspect_store
+
+if TYPE_CHECKING:
+    from dlm.doc.parser import ParsedDlm
+    from dlm.store.paths import StorePath
+
+
+@dataclass(frozen=True)
+class StoreViewRequest:
+    """Inputs to `gather_store_view`."""
+
+    parsed: ParsedDlm
+    target_path: Path
+    store: StorePath
+
+
+@dataclass(frozen=True)
+class StoreView:
+    """Aggregated snapshot a `dlm show` caller renders to text or JSON.
+
+    `inspection` is `None` when the store has no manifest yet
+    (post-`dlm init`, pre-`dlm train`); the caller surfaces the
+    "store: not yet initialized" path in that case. The summary dicts
+    are JSON-safe and form the v1 contract for `dlm show --json`; any
+    reshape is a version bump (recorded in `tests/golden/cli-json/`).
+    """
+
+    parsed_dlm_id: str
+    parsed_base_model: str
+    source_path: Path
+    training_cache_config: dict[str, object]
+    training_sources: list[dict[str, object]] | None
+    discovered_configs: list[dict[str, object]]
+    inspection: StoreInspection | None
+    training_cache: dict[str, object] | None
+    gate: dict[str, object] | None
+    preference_mining: dict[str, object] | None
+    base_security: dict[str, object] | None
+
+
+def gather_store_view(req: StoreViewRequest) -> StoreView:
+    """Walk the .dlm + its store and produce a `StoreView`.
+
+    Raises `ManifestCorruptError` if the manifest is unparseable.
+    """
+    parsed = req.parsed
+    base_path = req.target_path.resolve().parent
+    training_sources, discovered_configs = _summarize_training_sources_and_discovered(
+        parsed, base_path
+    )
+    cache_cfg = parsed.frontmatter.training.cache
+    training_cache_config: dict[str, object] = {
+        "enabled": cache_cfg.enabled,
+        "max_bytes": cache_cfg.max_bytes,
+        "prune_older_than_days": cache_cfg.prune_older_than_days,
+    }
+
+    if not req.store.manifest.exists():
+        return StoreView(
+            parsed_dlm_id=parsed.frontmatter.dlm_id,
+            parsed_base_model=parsed.frontmatter.base_model,
+            source_path=req.target_path.resolve(),
+            training_cache_config=training_cache_config,
+            training_sources=training_sources,
+            discovered_configs=discovered_configs,
+            inspection=None,
+            training_cache=None,
+            gate=None,
+            preference_mining=None,
+            base_security=None,
+        )
+
+    inspection = inspect_store(req.store, source_path=req.target_path.resolve())
+    training_cache = _summarize_training_cache(req.store.tokenized_cache_dir, req.store.root)
+    gate = _summarize_gate(req.store)
+    preference_mining = _summarize_preference_mining(req.store.root)
+    base_security = _summarize_base_security(parsed.frontmatter.base_model)
+
+    return StoreView(
+        parsed_dlm_id=parsed.frontmatter.dlm_id,
+        parsed_base_model=parsed.frontmatter.base_model,
+        source_path=req.target_path.resolve(),
+        training_cache_config=training_cache_config,
+        training_sources=training_sources,
+        discovered_configs=discovered_configs,
+        inspection=inspection,
+        training_cache=training_cache,
+        gate=gate,
+        preference_mining=preference_mining,
+        base_security=base_security,
+    )
+
+
+def _summarize_training_sources_and_discovered(
+    parsed: ParsedDlm, base_path: Path
+) -> tuple[list[dict[str, object]] | None, list[dict[str, object]]]:
+    """Best-effort `training.sources` expansion + `.dlm/training.yaml` discovery.
+
+    Returns `(training_sources, discovered_configs)`. `training_sources`
+    is None when the frontmatter declares no directives; otherwise
+    declared records are returned even when expansion fails (so the
+    show output stays useful for debugging a misconfigured directive).
+    `discovered_configs` is always a list (empty when nothing was
+    found or the expansion failed).
+    """
+    directives = parsed.frontmatter.training.sources
+    if not directives:
+        return None, []
+
+    declared: list[dict[str, object]] = [
+        {
+            "path": d.path,
+            "include": list(d.include),
+            "exclude": list(d.exclude),
+            "max_files": d.max_files,
+            "max_bytes_per_file": d.max_bytes_per_file,
+        }
+        for d in directives
+    ]
+
+    try:
+        result = _expand_sources(parsed, base_path=base_path)
+    except (DirectiveError, OSError):
+        return declared, []
+
+    records: list[dict[str, object]] = []
+    for decl, prov in zip(declared, result.provenance, strict=False):
+        records.append(
+            {
+                **decl,
+                "file_count": prov.file_count,
+                "total_bytes": prov.total_bytes,
+                "skipped_binary": prov.skipped_binary,
+                "skipped_encoding": prov.skipped_encoding,
+                "skipped_over_size": prov.skipped_over_size,
+            }
+        )
+    if len(records) < len(declared):
+        records.extend(declared[len(records) :])
+
+    discovered_records: list[dict[str, object]] = []
+    for dc in result.discovered:
+        discovered_records.append(
+            {
+                "anchor": str(dc.anchor),
+                "has_training_yaml": dc.config is not None,
+                "has_ignore": bool(dc.ignore_rules),
+                "include": list(dc.config.include) if dc.config else [],
+                "exclude": list(dc.config.exclude) if dc.config else [],
+                "exclude_defaults": (dc.config.exclude_defaults if dc.config else True),
+                "metadata": dict(dc.config.metadata) if dc.config else {},
+                "ignore_rules": len(dc.ignore_rules),
+            }
+        )
+    return records, discovered_records
+
+
+def _summarize_training_cache(cache_dir: Path, store_root: Path) -> dict[str, object] | None:
+    """Return a JSON-friendly snapshot of the tokenized-section cache.
+
+    None when the cache dir doesn't exist (store never trained with the
+    cache, or pre-Sprint-31 layout). Cheap — reads the manifest only,
+    not the entry files.
+    """
+    if not cache_dir.is_dir():
+        return None
+    from dlm.directives.cache import TokenizedCache
+
+    cache = TokenizedCache.open(cache_dir)
+    last = _queries.latest_tokenization(store_root)
+    return {
+        "path": str(cache_dir),
+        "entry_count": cache.entry_count,
+        "bytes": cache.total_bytes,
+        "last_run_hit_rate": last.hit_rate if last else None,
+        "last_run_id": last.run_id if last else None,
+    }
+
+
+def _summarize_gate(store: StorePath) -> dict[str, object] | None:
+    """Return a JSON-friendly snapshot of the learned adapter gate.
+
+    None when the store has no gate config and no diverged-gate events.
+    Reads `gate_config.json` for mode + adapter order, and the
+    `gate_events` table for per-adapter mean weight from the most
+    recent run that recorded a gate.
+    """
+    import json as _json
+
+    from dlm.train.gate.module import GateMetadata
+    from dlm.train.gate.paths import gate_config_path
+
+    cfg_path = gate_config_path(store)
+
+    events = _queries.latest_gate_events(store.root)
+    if not cfg_path.exists():
+        if events and events[0].mode == "diverged":
+            return {
+                "mode": "diverged",
+                "adapter_names": [e.adapter_name for e in events],
+                "input_dim": None,
+                "hidden_proj_dim": None,
+                "last_run_id": events[0].run_id,
+                "per_adapter": [
+                    {
+                        "adapter_name": e.adapter_name,
+                        "mean_weight": e.mean_weight,
+                        "sample_count": e.sample_count,
+                        "mode": e.mode,
+                    }
+                    for e in events
+                ],
+            }
+        return None
+
+    raw = _json.loads(cfg_path.read_text(encoding="utf-8"))
+    meta = GateMetadata.from_json(raw)
+    per_adapter: list[dict[str, object]] = []
+    run_id: int | None = None
+    if events:
+        run_id = events[0].run_id
+        per_adapter = [
+            {
+                "adapter_name": e.adapter_name,
+                "mean_weight": e.mean_weight,
+                "sample_count": e.sample_count,
+                "mode": e.mode,
+            }
+            for e in events
+        ]
+    else:
+        per_adapter = [{"adapter_name": name} for name in meta.adapter_names]
+    return {
+        "mode": meta.mode,
+        "adapter_names": list(meta.adapter_names),
+        "input_dim": meta.input_dim,
+        "hidden_proj_dim": meta.hidden_proj_dim,
+        "last_run_id": run_id,
+        "per_adapter": per_adapter,
+    }
+
+
+def _summarize_preference_mining(store_root: Path) -> dict[str, object] | None:
+    """Latest preference-mine summary for the JSON contract."""
+    totals = _queries.preference_mining_totals(store_root)
+    if totals is None:
+        return None
+    last = _queries.latest_preference_mining(store_root)
+    assert last is not None
+    rows = _queries.preference_mining_for_run(store_root, last.run_id)
+    return {
+        "run_count": totals.run_count,
+        "event_count": totals.event_count,
+        "total_mined_pairs": totals.total_mined_pairs,
+        "total_skipped_prompts": totals.total_skipped_prompts,
+        "last_run_id": last.run_id,
+        "last_run_event_count": len(rows),
+        "last_event": _queries.preference_mining_to_dict([last])[0],
+    }
+
+
+def _summarize_base_security(base_model_key: str) -> dict[str, object] | None:
+    """Surface `trust_remote_code` flag from the base-model registry.
+
+    Returns None when the key doesn't resolve (an `hf:...` escape hatch
+    that isn't in the registry); the caller silently skips in that case.
+    """
+    from dlm.base_models import resolve as resolve_base_model
+    from dlm.base_models.errors import BaseModelError
+
+    try:
+        spec = resolve_base_model(base_model_key, accept_license=True)
+    except BaseModelError:
+        return None
+    return {
+        "base_model": spec.key,
+        "architecture": spec.architecture,
+        "trust_remote_code": bool(spec.trust_remote_code),
+    }

From ce2cab18c999746b59d053d165f463f79ec068a8 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 20:08:09 -0400
Subject: [PATCH 31/46] Extract text-path prompt body into
 dlm.inference.dispatch:run_prompt

Lifts build_backend + load + generate out of the CLI for text-only
bases. VL and audio paths still live in prompt.py CLI helpers; a
follow-up phase splits them into modality-aware dispatchers.
---
 src/dlm/cli/commands/prompt.py | 31 ++++++++-------
 src/dlm/inference/dispatch.py  | 72 ++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 15 deletions(-)
 create mode 100644 src/dlm/inference/dispatch.py

diff --git a/src/dlm/cli/commands/prompt.py b/src/dlm/cli/commands/prompt.py
index 1f7501c..7dbbec0 100644
--- a/src/dlm/cli/commands/prompt.py
+++ b/src/dlm/cli/commands/prompt.py
@@ -96,11 +96,8 @@ def prompt_cmd(
     from dlm.doc.parser import parse_file
     from dlm.hardware import doctor
     from dlm.inference import AdapterNotFoundError
-    from dlm.inference.backends import (
-        UnsupportedBackendError,
-        build_backend,
-        select_backend,
-    )
+    from dlm.inference.backends import UnsupportedBackendError, select_backend
+    from dlm.inference.dispatch import PromptRequest, run_prompt
     from dlm.store.paths import for_dlm
 
     console = Console(stderr=True)
@@ -234,30 +231,34 @@ def prompt_cmd(
     except UnsupportedBackendError as exc:
         console.print(f"[red]prompt:[/red] {exc}")
         raise typer.Exit(code=2) from exc
-    backend_obj = build_backend(backend_name, caps)
 
     if verbose:
         console.print(f"[dim]backend:[/dim] {backend_name}")
 
-    try:
-        backend_obj.load(spec, store, adapter_name=adapter)
-    except AdapterNotFoundError as exc:
-        console.print(f"[red]prompt:[/red] {exc}")
-        raise typer.Exit(code=1) from exc
-
     if query is None:
         query = sys.stdin.read().strip()
     if not query:
         console.print("[red]prompt:[/red] empty query (pass a string or pipe on stdin)")
         raise typer.Exit(code=2)
 
-    response = backend_obj.generate(
-        query,
+    request = PromptRequest(
+        spec=spec,
+        capabilities=caps,
+        store=store,
+        backend_name=backend_name,
+        query=query,
         max_new_tokens=max_tokens,
         temperature=temp,
         top_p=top_p,
+        adapter=adapter,
     )
-    sys.stdout.write(response + "\n")
+    try:
+        result = run_prompt(request)
+    except AdapterNotFoundError as exc:
+        console.print(f"[red]prompt:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
+
+    sys.stdout.write(result.response + "\n")
 
 
 def _dispatch_vl_prompt(  # pragma: no cover
diff --git a/src/dlm/inference/dispatch.py b/src/dlm/inference/dispatch.py
new file mode 100644
index 0000000..14892e5
--- /dev/null
+++ b/src/dlm/inference/dispatch.py
@@ -0,0 +1,72 @@
+"""Domain dispatcher for `dlm prompt` (text path).
+
+Lifts the build-backend → load → generate pipeline out of the CLI for
+text-only bases. Callers (CLI, LSP inline-preview, future automation)
+build a `PromptRequest`, call `run_prompt`, and render the typed
+`PromptResult`. The dispatcher does no console I/O nor stdin reads;
+the CLI resolves the query string from argv or stdin before dispatch.
+
+Vision-language and audio-language paths still live in CLI helpers
+(`_dispatch_vl_prompt`, `_dispatch_audio_prompt`); a follow-up phase
+lifts those into modality-aware dispatchers under
+`dlm.inference.dispatch_vl` / `dispatch_audio`.
+
+External-module imports are dotted (e.g. `from dlm.inference import
+backends as _backends; _backends.build_backend(...)`) so test fixtures
+that monkeypatch `dlm.inference.backends.<name>` resolve at call time.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from dlm.inference import backends as _backends
+from dlm.inference.backends.select import BackendName
+
+if TYPE_CHECKING:
+    from dlm.base_models.schema import BaseModelSpec
+    from dlm.hardware.capabilities import Capabilities
+    from dlm.store.paths import StorePath
+
+
+@dataclass(frozen=True)
+class PromptRequest:
+    """Inputs to `run_prompt`.
+
+    The CLI is responsible for selecting the backend kind (`auto` →
+    `pytorch` / `mlx`), license-checking the spec, and resolving the
+    query string from argv or stdin; the dispatcher receives all of
+    those as already-typed objects.
+    """
+
+    spec: BaseModelSpec
+    capabilities: Capabilities
+    store: StorePath
+    backend_name: BackendName
+    query: str
+    max_new_tokens: int
+    temperature: float
+    top_p: float | None
+    adapter: str | None
+
+
+@dataclass(frozen=True)
+class PromptResult:
+    """Outcome of `run_prompt`. The CLI writes `response` to stdout."""
+
+    response: str
+    backend_name: BackendName
+
+
+def run_prompt(req: PromptRequest) -> PromptResult:
+    """Build, load, and generate a single response for a text-only base."""
+    backend_obj = _backends.build_backend(req.backend_name, req.capabilities)
+    backend_obj.load(req.spec, req.store, adapter_name=req.adapter)
+    response = backend_obj.generate(
+        req.query,
+        max_new_tokens=req.max_new_tokens,
+        temperature=req.temperature,
+        top_p=req.top_p,
+    )
+    return PromptResult(response=response, backend_name=req.backend_name)

From abbf3a65ac9c176a09ac8140068e2ed23c91fee8 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Tue, 28 Apr 2026 20:13:21 -0400
Subject: [PATCH 32/46] Extract train doctor + manifest-provision + run_phases
 into dlm.train.dispatch:run_train
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lifts the hardware probe → manifest bootstrap → phase orchestration
sequence out of the CLI. Watch loop, RPC probe server, multi-GPU
accelerate launcher dispatch, and license interactive prompt stay
CLI-side. Dotted imports in the dispatcher keep tests' monkeypatches
on dlm.hardware.doctor and dlm.train.preference.phase_orchestrator.run_phases
visible at call time.
---
 src/dlm/cli/commands/train.py |  89 +++++++---------------
 src/dlm/train/dispatch.py     | 136 ++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 61 deletions(-)
 create mode 100644 src/dlm/train/dispatch.py

diff --git a/src/dlm/cli/commands/train.py b/src/dlm/cli/commands/train.py
index 3d73676..d56a329 100644
--- a/src/dlm/cli/commands/train.py
+++ b/src/dlm/cli/commands/train.py
@@ -274,7 +274,6 @@ def train_cmd(
     from dlm.base_models import resolve as resolve_base_model
     from dlm.doc.errors import DlmParseError
     from dlm.doc.parser import parse_file
-    from dlm.hardware import doctor
     from dlm.lock import LockMode, LockValidationError
     from dlm.store.paths import for_dlm
     from dlm.train import (
@@ -283,12 +282,17 @@ def train_cmd(
         ResumeIntegrityError,
         TrainingError,
     )
+    from dlm.train.dispatch import (
+        NoViableTrainingPlanError,
+        TrainRequest,
+        run_train,
+    )
     from dlm.train.preference import (
         DpoPhaseError,
         NoPreferenceContentError,
         PriorAdapterRequiredError,
     )
-    from dlm.train.preference.phase_orchestrator import Phase, run_phases
+    from dlm.train.preference.phase_orchestrator import Phase
 
     console = Console(stderr=True)
 
@@ -442,70 +446,30 @@ def train_cmd(
     from dlm.train.distributed import detect_world_size
 
     ws = detect_world_size()
-    doctor_result = doctor(
-        training_config=parsed.frontmatter.training,
-        base_params=spec.params,
-        seq_len=min(parsed.frontmatter.training.sequence_len, spec.effective_context_length),
-        world_size=ws,
-    )
-    plan = doctor_result.plan
-    if plan is None:
-        console.print(
-            "[red]doctor:[/red] no viable training plan for this host. "
-            "Run `dlm doctor` for details."
-        )
-        raise typer.Exit(code=1)
-
     store = for_dlm(parsed.frontmatter.dlm_id)
-    store.ensure_layout()
-
-    # `dlm init` writes a manifest as part of store provisioning. Mirror
-    # that manifest write here when the store layout exists but has no
-    # manifest yet — covers two flows:
-    #   - auto-scaffold via `dlm train <dir>` on a fresh directory
-    #   - hand-authored .dlm with a fresh ULID that never went through
-    #     `dlm init` (e.g. authored via the LSP / VSCode extension)
-    # License acceptance has already been validated upstream by this
-    # point, so we just record it.
-    if not store.manifest.exists():
-        from dlm.base_models import is_gated
-        from dlm.base_models.license import require_acceptance
-        from dlm.store.manifest import Manifest, save_manifest
-
-        acceptance = (
-            require_acceptance(spec, accept_license=True, via="cli_flag")
-            if is_gated(spec)
-            else None
-        )
-        save_manifest(
-            store.manifest,
-            Manifest(
-                dlm_id=parsed.frontmatter.dlm_id,
-                base_model=spec.key,
-                base_model_revision=spec.revision,
-                source_path=path.resolve(),
-                license_acceptance=acceptance,
-            ),
-        )
 
     from dlm.modality import ModalityError
 
+    request = TrainRequest(
+        parsed=parsed,
+        target_path=path,
+        spec=spec,
+        store=store,
+        phase=phase_literal,
+        mode=mode,
+        seed=seed,
+        max_steps=max_steps,
+        lock_mode=lock_mode,
+        world_size=ws,
+        strict_metrics=strict_metrics,
+        include_auto_mined=not no_mined,
+    )
+
     try:
-        phase_results = run_phases(
-            store,
-            parsed,
-            spec,
-            plan,
-            phase=phase_literal,
-            mode=mode,
-            seed=seed,
-            max_steps=max_steps,
-            lock_mode=lock_mode,
-            capabilities=doctor_result.capabilities,
-            world_size=ws,
-            strict_metrics=strict_metrics,
-            include_auto_mined=not no_mined,
-        )
+        train_result = run_train(request)
+    except NoViableTrainingPlanError as exc:
+        console.print(f"[red]doctor:[/red] {exc}")
+        raise typer.Exit(code=1) from exc
     except sqlite3.Error as exc:
         console.print(f"[red]metrics:[/red] {exc}")
         raise typer.Exit(code=1) from exc
@@ -548,6 +512,9 @@ def train_cmd(
         console.print(f"[red]training:[/red] {exc}")
         raise typer.Exit(code=1) from exc
 
+    plan = train_result.plan
+    phase_results = train_result.phase_results
+
     if not phase_results:
         console.print(
             "[yellow]no-op:[/yellow] nothing to train for the requested phase. "
diff --git a/src/dlm/train/dispatch.py b/src/dlm/train/dispatch.py
new file mode 100644
index 0000000..24a95aa
--- /dev/null
+++ b/src/dlm/train/dispatch.py
@@ -0,0 +1,136 @@
+"""Domain dispatcher for `dlm train` (single-shot path).
+
+Lifts the doctor → manifest-provision → run_phases pipeline out of the
+CLI. Callers (CLI, LSP "Run Training" command, future automation) build
+a `TrainRequest`, call `run_train`, and render the typed `TrainResult`.
+The dispatcher does no console I/O; CLI-shaped concerns — multi-GPU
+launcher dispatch, license interactive prompt, --watch loop, RPC
+probe server, terminal rendering — stay in `dlm.cli.commands.train`.
+
+External-module imports are dotted (e.g. `from dlm import hardware as
+_hardware; _hardware.doctor(...)`) so test fixtures that monkeypatch
+`dlm.hardware.doctor` and
+`dlm.train.preference.phase_orchestrator.run_phases` resolve at call
+time.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from dlm import hardware as _hardware
+from dlm.train.preference import phase_orchestrator as _orchestrator
+from dlm.train.preference.phase_orchestrator import Phase, PhaseResult
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from dlm.base_models.schema import BaseModelSpec
+    from dlm.doc.parser import ParsedDlm
+    from dlm.hardware.plan import TrainingPlan
+    from dlm.lock import LockMode
+    from dlm.store.paths import StorePath
+    from dlm.train.trainer import Mode
+
+
+class NoViableTrainingPlanError(RuntimeError):
+    """`doctor()` returned no plan for the current host / config."""
+
+
+@dataclass(frozen=True)
+class TrainRequest:
+    """Inputs to `run_train`.
+
+    The CLI is responsible for parsing the .dlm, resolving the spec
+    (with license acceptance), validating flags, and detecting the
+    accelerate world size; the dispatcher receives all of those as
+    already-typed objects.
+    """
+
+    parsed: ParsedDlm
+    target_path: Path
+    spec: BaseModelSpec
+    store: StorePath
+    phase: Phase
+    mode: Mode
+    seed: int | None
+    max_steps: int | None
+    lock_mode: LockMode
+    world_size: int
+    strict_metrics: bool
+    include_auto_mined: bool
+
+
+@dataclass(frozen=True)
+class TrainResult:
+    """Outcome of `run_train`. `phase_results` is empty when nothing
+    matched the requested phase (no SFT content, no preference content,
+    etc.); the CLI surfaces a "no-op" message."""
+
+    plan: TrainingPlan
+    phase_results: list[PhaseResult]
+
+
+def run_train(req: TrainRequest) -> TrainResult:
+    """Probe hardware, ensure store manifest, run all requested phases."""
+    doctor_result = _hardware.doctor(
+        training_config=req.parsed.frontmatter.training,
+        base_params=req.spec.params,
+        seq_len=min(
+            req.parsed.frontmatter.training.sequence_len,
+            req.spec.effective_context_length,
+        ),
+        world_size=req.world_size,
+    )
+    plan = doctor_result.plan
+    if plan is None:
+        raise NoViableTrainingPlanError(
+            "no viable training plan for this host. Run `dlm doctor` for details."
+        )
+
+    req.store.ensure_layout()
+
+    # `dlm init` writes a manifest as part of store provisioning. Mirror
+    # that here when the layout exists but the manifest doesn't — covers
+    # auto-scaffold via `dlm train <dir>` and hand-authored .dlms with
+    # fresh ULIDs that never went through `dlm init` (e.g. authored via
+    # the LSP). License acceptance has already been validated upstream.
+    if not req.store.manifest.exists():
+        from dlm.base_models import is_gated
+        from dlm.base_models.license import require_acceptance
+        from dlm.store.manifest import Manifest, save_manifest
+
+        acceptance = (
+            require_acceptance(req.spec, accept_license=True, via="cli_flag")
+            if is_gated(req.spec)
+            else None
+        )
+        save_manifest(
+            req.store.manifest,
+            Manifest(
+                dlm_id=req.parsed.frontmatter.dlm_id,
+                base_model=req.spec.key,
+                base_model_revision=req.spec.revision,
+                source_path=req.target_path.resolve(),
+                license_acceptance=acceptance,
+            ),
+        )
+
+    phase_results = _orchestrator.run_phases(
+        req.store,
+        req.parsed,
+        req.spec,
+        plan,
+        phase=req.phase,
+        mode=req.mode,
+        seed=req.seed,
+        max_steps=req.max_steps,
+        lock_mode=req.lock_mode,
+        capabilities=doctor_result.capabilities,
+        world_size=req.world_size,
+        strict_metrics=req.strict_metrics,
+        include_auto_mined=req.include_auto_mined,
+    )
+
+    return TrainResult(plan=plan, phase_results=phase_results)

From d39aefbf4efc813a13f8d87afc5bc2ecc97766c5 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:03:14 -0400
Subject: [PATCH 33/46] Move audit-13 follow-up findings into versioned docs
 tree

---
 docs/audits/13-brutal-promise-audit.md        | 381 ++++++++
 docs/audits/13-followup/PLAN.md               |  77 ++
 docs/audits/13-followup/finding03/baseline.md |  62 ++
 .../finding03/direct-query-results.md         |  78 ++
 .../13-followup/finding03/probe-base.dlm      |  17 +
 .../13-followup/finding03/sway-results.json   | 856 +++++++++++++++++
 .../13-followup/finding03/sway-results.md     |  42 +
 docs/audits/13-followup/finding03/sway.yaml   | 105 +++
 docs/audits/13-followup/finding03/the-doc.dlm | 725 ++++++++++++++
 .../finding04/direct-query-results.md         | 143 +++
 docs/audits/13-followup/finding04/the-doc.dlm | 415 +++++++++
 .../13-followup/findings/00-overview.md       | 112 +++
 .../13-followup/findings/01-recipe-failure.md | 146 +++
 .../13-followup/findings/02-cpt-isolation.md  | 143 +++
 .../13-followup/findings/03-base-floor.md     | 140 +++
 .../findings/04-instruction-shape.md          | 156 ++++
 docs/audits/13-followup/stage1/sway.yaml      | 101 ++
 docs/audits/13-followup/stage1/the-doc.dlm    |  47 +
 docs/audits/13-followup/sway-results.json     | 881 ++++++++++++++++++
 docs/audits/13-followup/sway-results.md       |  41 +
 docs/audits/13-followup/sway.yaml             | 105 +++
 docs/audits/13-followup/the-doc.dlm           | 725 ++++++++++++++
 22 files changed, 5498 insertions(+)
 create mode 100644 docs/audits/13-brutal-promise-audit.md
 create mode 100644 docs/audits/13-followup/PLAN.md
 create mode 100644 docs/audits/13-followup/finding03/baseline.md
 create mode 100644 docs/audits/13-followup/finding03/direct-query-results.md
 create mode 100644 docs/audits/13-followup/finding03/probe-base.dlm
 create mode 100644 docs/audits/13-followup/finding03/sway-results.json
 create mode 100644 docs/audits/13-followup/finding03/sway-results.md
 create mode 100644 docs/audits/13-followup/finding03/sway.yaml
 create mode 100644 docs/audits/13-followup/finding03/the-doc.dlm
 create mode 100644 docs/audits/13-followup/finding04/direct-query-results.md
 create mode 100644 docs/audits/13-followup/finding04/the-doc.dlm
 create mode 100644 docs/audits/13-followup/findings/00-overview.md
 create mode 100644 docs/audits/13-followup/findings/01-recipe-failure.md
 create mode 100644 docs/audits/13-followup/findings/02-cpt-isolation.md
 create mode 100644 docs/audits/13-followup/findings/03-base-floor.md
 create mode 100644 docs/audits/13-followup/findings/04-instruction-shape.md
 create mode 100644 docs/audits/13-followup/stage1/sway.yaml
 create mode 100644 docs/audits/13-followup/stage1/the-doc.dlm
 create mode 100644 docs/audits/13-followup/sway-results.json
 create mode 100644 docs/audits/13-followup/sway-results.md
 create mode 100644 docs/audits/13-followup/sway.yaml
 create mode 100644 docs/audits/13-followup/the-doc.dlm

diff --git a/docs/audits/13-brutal-promise-audit.md b/docs/audits/13-brutal-promise-audit.md
new file mode 100644
index 0000000..fa0ae0b
--- /dev/null
+++ b/docs/audits/13-brutal-promise-audit.md
@@ -0,0 +1,381 @@
+# Audit 13 — Brutal end-to-end audit of the broader DLM promise
+
+**Scope:** the marketing claim from `CLAUDE.md` — *"a text file with a `.dlm`
+extension becomes a local, reproducible, trainable LLM. Edit the document,
+retrain, share. Not a toy — LoRA/QLoRA on a real pretrained base, exportable
+to Ollama."* Verified by running, not reading.
+
+**Date:** 2026-04-29.
+**Hardware:** Apple Silicon (Darwin 25.4.0 arm64, 18 cores, 48 GB unified
+memory, MPS, no CUDA, no bitsandbytes, no FlashAttention). Doctor reports
+`backend=mps`, `determinism_class=best-effort`.
+**Wall-time actual:** ~33 minutes of in-band runs (23:36 → ~00:09 EDT)
+within the 90-min budget. Heaviest single op: 11.4-min fortran train at
+120 steps.
+**Method:** B12.1 reproduction, full E2E-A through E2E-E from the prompt,
+with sway as the measurement instrument.
+**Artifacts:** `docs/audits/13-findings/{logs,fortran-fine-tune,sway-runs,exports}/`.
+
+## Verdict
+
+**YELLOW.** The promise is **mostly delivered, with named caveats**:
+
+- **Trainable** — yes. Real `train` against SmolLM2-135M-Instruct on a
+  2,021-file fortran corpus completed in 11.4 min on MPS, monotonic loss
+  drop 1.96 → 1.69, eval_loss 1.83. Adapter, training_state.pt with
+  RNGs, pinned_versions.json, metrics.sqlite all written.
+- **Exportable to Ollama** — yes, end-to-end. GGUF base + adapter produced
+  in ~20 s, registered with `ollama create`, smoke prompt produced
+  coherent text. **But** `--no-template` is silently ignored at emission
+  (M13.1).
+- **Reproducible** — **partially**. Sway re-run is bit-exact deterministic;
+  `dlm train --fresh --seed 42` re-run on the same tiny doc yields
+  **different adapter SHAs** (`1afc...` vs `bb1f...`) on MPS.
+  `training_state.rng.json` is bit-identical across runs, so the drift
+  is in MPS-kernel ops, not RNG state. Doctor says
+  `determinism_class: best-effort` — the docs are honest, but
+  CLAUDE.md's "reproducible" headline is aspirational on Apple Silicon.
+- **"Not a toy"** — **partially**. The 135M adapter measurably shifted
+  the model on fortran-keyword prompts (sway delta_kl `+44.18σ` vs null
+  baseline) but **did not internalize** the hand-written Q/A pairs
+  (sway paraphrase_invariance `-3.51σ`, FAIL). Confirmed at runtime: the
+  Ollama smoke output for "When should I use do concurrent?" produced a
+  Python code block, not Fortran. The base 135M floor is too thin for
+  the Q/A semantic-binding promise on a one-pass, 120-step run.
+  Distribution-shift signal is real; transfer to question-answering is
+  not.
+- **Share** — yes. `dlm pack` → `dlm verify` → `DLM_HOME=… dlm unpack`
+  → `dlm prompt` round-tripped a 15 MB bundle cleanly. **But**
+  CLAUDE.md documents `dlm unpack PATH --home DIR`; the actual flag is
+  `DLM_HOME=…` env-var only (M13.2).
+- **"Edit the document, retrain"** — yes mechanically, but at ~3.5 s/step
+  on MPS with 60-step eval pauses adding ~30s, the iteration loop is
+  6-12 min for a 120-step run. Workable, not delightful.
+
+The strongest evidence in this audit is **sway**: with the null_adapter
+calibration probe in the suite, sway gives 4-decimal scores, z-scores,
+and a single-line verdict that resolves "did training do anything?"
+honestly. Sway re-runs are bit-identical. The audit is much stronger
+as a result; without sway it would have been "the smoke output looked
+fortran-ish" hand-waving.
+
+The B12.1 blocker from Audit 12 is **fixed**. No new blockers found.
+
+## What's fixed since Audit 12
+
+- **B12.1 manifest race resolved.** Same fortran corpus + same source
+  directives + fresh ULID → trained to completion. The audit-12 store
+  (`01KQAR00VP2KAKVGCB7W4XRRZY`) now has a `manifest.json` and the
+  failing path no longer reproduces.
+- **M12.1 cli/commands.py extracted.** The 4,650-LOC monolith from
+  Audit 12 is gone; `src/dlm/cli/commands/` is now a 23-file package,
+  largest file 698 LOC (`train.py`), total 4,449 LOC. Restructured.
+
+## Blocker findings
+
+**None on the broader promise.** Every CLAUDE.md headline claim has a
+working code path on a fresh box. Reservations are filed below as
+majors and minors.
+
+## Major findings
+
+### M13.1 — `dlm export --no-template` is a no-op at Modelfile emission
+
+**Live caught.** `dlm export … --no-template` suppresses the preflight
+chat-template check, but the Ollama Modelfile's `TEMPLATE """…"""`
+block is still written.
+
+- Help text (`src/dlm/cli/commands/export.py:41`): "Skip writing
+  TEMPLATE into the Modelfile."
+- Plan path: `cli_no_template` is forwarded into
+  `ExportPlan.include_template = not cli_no_template`
+  (`src/dlm/export/plan.py:141`).
+- **Only one consumer** of `plan.include_template` exists in `src/`:
+  `src/dlm/export/runner.py:223`, where it gates the preflight check.
+- `src/dlm/export/ollama/modelfile.py:101,124` calls
+  `_build_template_block(template_row)` and appends it to the Modelfile
+  unconditionally — `plan.include_template` is never consulted by the
+  emission code.
+- **Evidence** (`docs/audits/13-findings/logs/export-2-no-template.log`
+  + the resulting Modelfile in
+  `~/.dlm/store/01KQBN26S8RY8MGHE9HM09E8CM/exports/Q4_K_M/Modelfile`):
+  ```
+  FROM ./base.Q4_K_M.gguf
+  ADAPTER ./adapter.gguf
+
+  TEMPLATE """{{- if .System }}<|im_start|>system
+  …
+  ```
+  The block is present even though `--no-template` was the only
+  template-affecting flag.
+
+**Severity:** MAJOR — the help text lies. Users counting on this flag to
+test "what does Ollama do without a TEMPLATE override?" get the wrong
+answer.
+
+**Fix shape:** in `_build_template_block` consumers, check
+`ctx.plan.include_template` and emit nothing (or a comment) when False.
+
+### M13.2 — `dlm unpack --home DIR` is documented but not implemented
+
+CLAUDE.md "CLI surface by release" lists:
+```
+dlm unpack <path> [--home DIR] [--force]
+```
+
+`uv run dlm unpack --help` shows:
+```
+--force   Overwrite an existing store with the same dlm_id.
+--out     Directory to place the restored .dlm (default: alongside the pack).
+```
+
+Passing `--home DIR` aborts with `No such option: --home`. The actual
+override is via env var (`DLM_HOME=… dlm unpack ...` works). Either:
+
+- Add a `--home` flag matching the documented surface, OR
+- Update CLAUDE.md to drop `--home` (and any other commands documented
+  with it). The same pattern almost certainly applies elsewhere — a
+  doc/CLI sync sweep is the right scope.
+
+**Severity:** MAJOR for documentation-versus-binary mismatch on a
+top-line CLI claim.
+
+### M13.3 — `dlm metrics PATH --run-id 1` parse breaks (option-after-positional)
+
+```
+$ dlm metrics PATH --run-id 1
+… error: Missing argument 'PATH'.
+$ dlm metrics --run-id 1 PATH
+run_id=1  phase=sft  seed=42  status=ok  steps=12  evals=4
+```
+
+Typer's group dispatch ate the positional. Workaround is "options
+before positional," but every CLI user expects the opposite. Either:
+
+- Restructure as a subcommand-group: `dlm metrics show PATH --run-id 1`
+- Or pass `path` to the group's callback so subcommand parsing sees a
+  consumed positional first.
+
+**Severity:** MAJOR for UX paper-cut on a documented v2 surface
+command. Trivially fixable.
+
+### M13.4 — "Reproducible" is aspirational on MPS
+
+`dlm train --fresh --seed 42 --max-steps 8` on a tiny 3-Q/A doc, run
+twice in succession on the same hardware, in the same shell:
+
+| Run | adapter_model.safetensors SHA |
+|---|---|
+| v0001 | `1afcd3f524e62dd17b87bf7059f698ab87882986e0397a353d5e7f3b358837e0` |
+| v0002 | `bb1f67dbc19b7ebdba1910477f8d5cb23d4e5442092fada06f5d2817a3a137d6` |
+
+Different bits. `training_state.rng.json` was bit-identical between
+runs, so RNG-seed plumbing is correct — the drift is downstream of
+RNGs, in MPS kernel ops. `dlm doctor --json` already reports
+`determinism_class: best-effort` and the per-store `dlm.lock`
+acknowledges this. So the implementation doesn't lie; **CLAUDE.md
+does**, by stating "Edit the document, retrain, share" and labeling
+the project "reproducible" without a platform caveat. On Linux+CUDA
+with `CUBLAS_WORKSPACE_CONFIG=:4096:8` this is presumably bit-exact
+(not verified in this audit); on macOS+MPS it isn't.
+
+Also captured (m13.5 below): `--fresh` does **not** wipe and reset the
+adapter version counter. v0001 from run 1 stayed; run 2 wrote v0002.
+Tangential to determinism but related — interpreting `--fresh` as
+"start from base weights for *this* run" is sensible, but a user might
+reasonably expect "throw away prior versions and start over."
+
+### M13.5 — `dlm show` re-expands directives on every invocation, on stderr
+
+```
+$ dlm show fortran.dlm --json 2>/dev/null | head
+{ … clean JSON … }
+$ dlm show fortran.dlm --json 2>&1 >/dev/null | wc -l
+243
+```
+
+For a 2,021-file source-directive document, `dlm show --json` re-walks
+the corpus and prints 243 lines of `dlm.directives.expand INFO:
+directive: … exceeds max_bytes_per_file=32768; skipping` to stderr,
+**every time**. `dlm show` is a read-only command users will run
+repeatedly. The expansion result *should* be cached, and the INFO logs
+should be silenced unless `--verbose`.
+
+Mostly cosmetic until you script `dlm show` in a loop, at which point
+it becomes a 1-2 second hit per call.
+
+## Minor / informational findings
+
+- **m13.6 — Stale brew install masks current binary.** `which dlm` →
+  `/opt/homebrew/bin/dlm` → `dlm 0.9.0`, missing `repl, metrics,
+  templates, push, pull, serve, verify, preference, synth, cache`
+  (every command added since 0.9.0). The repo HEAD is `0.10.0` with
+  the full surface. The release script either didn't bump the brew
+  formula, or the user hasn't `brew upgrade`d. Symptom: a fresh
+  contributor pasting commands from CLAUDE.md against the brew binary
+  hits "No such command 'repl'." Either bump the brew formula on every
+  `pyproject.toml` version bump (CI gate), or document `uv run dlm` as
+  the canonical entry point in the repo's README.
+- **m13.7 — ANSI escape spam from Ollama on non-TTY pipe.** Piping
+  `ollama run … | tee` produces output salted with `[?25l[?2026h`
+  cursor-control sequences (see `logs/ollama-fortran-prompt.log`). Not
+  a dlm bug — Ollama 0.20.7 doesn't suppress its TTY UI under a pipe.
+  Listed because it surfaces as garbled-looking dlm-export smoke
+  output if anyone scripts the round-trip and doesn't `sed` it out.
+- **m13.8 — Sway's `section_internalization` and `leakage` probes need
+  the dlm bridge to be useful.** Both probes opt-out / SKIP without
+  `ctx.sections` populated by the dlm-sway bridge. The bridge requires
+  `pip install 'dlm-sway[dlm]'` plus the local DLM checkout, which I
+  intentionally didn't install in the audit's sway venv (no clean way
+  to do so without giving sway a typosquat-friendly PyPI install
+  permission). On a fresh user's machine the bridge install is one
+  pip line; it's worth shipping a starter spec that activates the
+  bridge so users see attribution probes light up first time.
+- **m13.9 — `dlm pack` produces unsigned bundles by default.** `dlm
+  verify` correctly reports "is unsigned — no provenance.json
+  inside." Signing is opt-in. Reasonable default; flagged because the
+  Phase 7 promise of "share" is partly trust-signed sharing. A user
+  expecting "I can verify this came from someone trustworthy"
+  mid-distribution will discover the answer is "no, unless they
+  signed it" only after running verify.
+- **m13.10 — `--fresh` keeps prior adapter versions.** Two consecutive
+  `dlm train --fresh ...` runs on the same store produced v0001 and
+  v0002 side-by-side. If `--fresh` semantics are "fresh weights, but
+  keep history," document it. If they're "wipe and start over," fix
+  the version-counter reset.
+
+## What works (earned praise)
+
+- **Source-directive expansion at scale**: 2,021 .f90/.fypp files
+  across 2 sources expanded in ~1.5 s, with sensible
+  `max_bytes_per_file` skipping (62 oversized + 1 non-UTF-8) logged
+  per-file. Tokenization cache populated 2,030 entries (3.1 MB).
+- **Manifest contract honored.** After completion, the store has
+  `manifest.json`, `adapter/versions/v0001/{adapter_config.json,
+  adapter_model.safetensors, training_state.pt, training_state.rng.json,
+  pinned_versions.json}`, `metrics.sqlite`, `replay/`,
+  `tokenized-cache/`. CLAUDE.md pitfall #2 (training_state sidecar)
+  observably honored.
+- **Strict-mode parsing.** `dlm show` and `dlm train` on a malformed
+  `.dlm` (bad ULID, wrong type, unknown key) return a single
+  composed Pydantic error with line numbers and a one-shot diagnosis:
+  ```
+  error: /tmp/audit-e2e-e/malformed.dlm:2: dlm_id: Value error, dlm_id
+  must be a 26-char Crockford base32 ULID, got 'not-a-ulid-just-a-string';
+  training.lora_r: Input should be a valid integer, unable to parse
+  string as an integer; training.unknown_key: Extra inputs are not
+  permitted
+  ```
+- **Export pipeline produces the documented artifacts.** Within ~20 s
+  on MPS: `base.Q4_K_M.gguf` (105 MB), `adapter.gguf` (1.8 MB),
+  `imatrix.gguf` (631 KB), `imatrix.meta.json`,
+  `export_manifest.json` with sha256 + llama_cpp_tag (`b8816`).
+- **Ollama integration end-to-end.** `dlm export … --name dlm-fortran-audit13`
+  registered `dlm-fortran-audit13:latest` (107 MB). `ollama run` produced
+  coherent (though not fortran-flavored) output. The Modelfile's
+  `TEMPLATE` block is the registry-authored Go template, not a Jinja
+  fuzz-match (CLAUDE.md pitfall #1).
+- **`dlm pack` / `dlm unpack` round-trip.** 15 MB bundle, restored
+  cleanly into a fresh `DLM_HOME`, `dlm prompt` ran against the
+  restored store immediately.
+- **Per-store metrics.sqlite is real.** Tables `runs, steps, evals,
+  exports, tokenization, gate_events, preference_mining`. Step-loss
+  curve queryable via plain SQL — see
+  `logs/step-loss.txt` for the audit's own run.
+- **Sway is sharper than the docs claim.** Wall 3.2 s on a 4-probe
+  + null_adapter suite, perfectly deterministic across re-runs,
+  composite verdict + per-category scores + per-probe z-scores. The
+  audit's central evidence — *the adapter shifted distributions on
+  fortran prompts but did not bind Q/A pairs* — is z-scored with
+  CI95.
+
+## Promise audit table
+
+| CLAUDE.md headline claim | Verdict | Evidence pointer |
+|---|---|---|
+| "A text file with a .dlm extension becomes a local … LLM" | **PASS** | `~/.dlm/store/01KQBN26S8RY8MGHE9HM09E8CM/{adapter,manifest.json,exports/Q4_K_M/}` after a 6.8 KB `.dlm` |
+| "trainable LLM" (LoRA) | **PASS** | `logs/train-1.log` — 120 steps, train_loss 1.771, eval_loss 1.826; `metrics.sqlite` has the curve |
+| "trainable LLM" (QLoRA) | **UNVERIFIED** | bitsandbytes is not available on Apple Silicon; no QLoRA path attempted |
+| "real pretrained base" | **PASS** | `adapter_config.json:base_model_name_or_path = "HuggingFaceTB/SmolLM2-135M-Instruct"`; revision pinned in manifest |
+| "exportable to Ollama" | **PASS** | `ollama list` shows `dlm-fortran-audit13:latest`; `ollama run` returned coherent text |
+| "reproducible" | **PARTIAL** | sway: bit-exact reruns. dlm train on MPS: SHA drift between v0001 and v0002 with same seed (`logs/det-12-shas.txt`). Doctor honestly reports `best-effort` |
+| "Edit the document, retrain" (workflow) | **PASS-with-caveat** | 11.4-min wall for 120 steps. `--watch` flag exists; not exercised this audit |
+| "share" (pack/unpack) | **PASS** | `logs/pack.log` + `logs/unpack-2.log` + `logs/unpack-prompt.log` round-trip |
+| "share" (push/pull/serve to HF/HTTP/LAN) | **UNVERIFIED** | not exercised this audit |
+| "Not a toy" — the 135M model demonstrably learns a domain | **PARTIAL/FAIL** | sway: `dk_fortran` z=+44.18σ (distribution shift, real); `para_fortran` z=-3.51σ (Q/A binding **failed**). Adapter shifted token distributions on fortran-keyword prompts but did not internalize Q/A semantics in 120 steps. Visible at runtime: ollama smoke produced Python, not Fortran |
+| "Pad token must NOT default to EOS" (CLAUDE.md pitfall #4) | **PASS** | tokenizer log says `pad_token_id: 0`, distinct from EOS |
+| "training_state.pt sidecar" (pitfall #2) | **PASS** | `versions/v0001/training_state.pt` (7.5 MB) + `training_state.pt.sha256` + `training_state.rng.json` (16 KB) |
+| "merge_and_unload on QLoRA refused without --dequantize" (pitfall #3) | **PASS-by-code-read** | `src/dlm/export/plan.py:102` and `src/dlm/export/merge.py` enforce; not live-tested (no QLoRA on MPS) |
+| `dlm doctor --json` reports plan + capabilities | **PASS** | `logs/doctor-uv.json` |
+| All v1.0 CLI commands present | **PASS** (binary `0.10.0`) / **FAIL** (brew `0.9.0`) | `dlm 0.10.0 --help` lists all v1.0 + v2 commands; `/opt/homebrew/bin/dlm` is 0.9.0 and missing `repl, metrics, templates, push, pull, serve, verify, preference, synth, cache` |
+| `dlm unpack --home DIR` documented flag | **FAIL** | `--home` is not a flag; `DLM_HOME=` env var is the actual override (M13.2) |
+| `dlm export --no-template` does what it says | **FAIL** | preflight is suppressed, emission is not; M13.1 |
+
+## Methodology notes
+
+Order of operations:
+
+1. Read `docs/audits/12-brutal-post-audit-11.md`. Confirmed B12.1 is
+   the open blocker to verify.
+2. `dlm doctor --json` and `dlm --help` via both `which dlm`
+   (`/opt/homebrew/bin/dlm` 0.9.0) and `uv run dlm` (repo HEAD 0.10.0).
+   Discovered the brew/repo skew immediately — every subsequent
+   command used `uv run dlm`.
+3. `dlm init` to scaffold a fresh `.dlm`, then overwrote with the
+   audit-12 fortran source-directive frontmatter, refreshed to schema
+   v15 and a fresh ULID. Two source directives:
+   `~/GithubOrgs/FortranGoingOnForty` (1,847 .f90/.F90/.f95) and
+   `/tmp/stdlib_build/src` (174 .f90/.fypp). Total post-skip: 2,021
+   files, 7.97 MB content.
+4. `dlm train --fresh --seed 42 --max-steps 120`. **B12.1 did not
+   reproduce** — manifest written, training proceeded. 11.4 min wall.
+   Loss monotonic 1.927 → 1.886 → 1.805 → 1.689 → 1.638 → 1.607
+   over six log points. Eval at steps 60/90/120; final
+   eval_loss=1.826.
+5. Captured adapter SHA snapshot. Authored `sway.yaml` with
+   `delta_kl + paraphrase_invariance + calibration_drift + leakage`,
+   first-pass schema mismatch on `section_internalization` (audit-author
+   error), reauthored, ran. First sway run had no calibration → re-ran
+   with `null_adapter` probe added.
+6. `dlm export ... --quant Q4_K_M` (with `--no-template` after
+   preflight refused without it — see M13.1). Inspected emitted
+   artifacts. `--name dlm-fortran-audit13` registered into Ollama.
+   `ollama run` smoke prompt.
+7. Determinism: tiny 3-Q/A doc with a fresh ULID, `dlm train --fresh
+   --seed 42 --max-steps 8` twice. Compared
+   `adapter_model.safetensors` SHAs and
+   `training_state.rng.json` byte-diff. Re-ran sway against the
+   spec twice; compared per-probe `(verdict, score, raw, z)` tuples.
+8. Failure-mode hunting: `dlm prompt` on never-trained doc; `dlm
+   train` on malformed frontmatter; `dlm export --merged` on plain
+   LoRA; 200 KB inflated `.dlm` parses; `dlm metrics PATH --run-id
+   1` (broke); `dlm unpack --home` (missing flag).
+9. `dlm pack` / `dlm verify` / `dlm unpack` (with `DLM_HOME=`) round-
+   trip and prompt-on-restored-store.
+
+Cleanup: `ollama rm dlm-fortran-audit13`.
+
+The single biggest methodology learning: **if the audit had skipped
+the `null_adapter` probe in the sway suite, every probe would have
+shown verdict but no z-score, and the "did training do anything?"
+question would have been a vibes call**. Sway with calibration is
+properly falsifiable; sway without it is just a per-probe score
+table. The dlm cookbook's starter sway specs should default-include
+`null_adapter`.
+
+## Summary in one paragraph
+
+The promise is real for "a `.dlm` file becomes a trainable, exportable,
+shareable LoRA on top of a real HF base." The 11.4-min train, the
+working Ollama round-trip, the clean pack/unpack, the strict Pydantic
+parsing, and sway's z-scored verdict on a real corpus all back this
+up. The promise is **overstated** in two places: "reproducible" needs
+a platform caveat (best-effort on MPS, verified non-bit-exact even
+with the same seed), and "Not a toy" understates how much the 135M
+floor needs help — distribution shift is real, Q/A internalization
+is not, on a single 120-step pass. The two help-text bugs (M13.1
+`--no-template` no-op, M13.2 `--unpack --home` missing) and the doc
+drift (`/opt/homebrew/bin/dlm` is 0.9.0) are minor in code but
+material in trust: a careful user reading CLAUDE.md and running the
+brew binary will hit "No such command 'repl'." within 30 seconds.
+None of this is a blocker; all of it is a documentation/UX sweep.
diff --git a/docs/audits/13-followup/PLAN.md b/docs/audits/13-followup/PLAN.md
new file mode 100644
index 0000000..6c7f0be
--- /dev/null
+++ b/docs/audits/13-followup/PLAN.md
@@ -0,0 +1,77 @@
+# Audit 13 follow-up — get fortran Q/A binding to work
+
+**Goal:** flip `paraphrase_invariance` from **−3.51σ FAIL** to **PASS** and
+get the Ollama smoke prompt to answer a fortran question with fortran (not
+Python). Driven by Audit 13's diagnosis: the original .dlm had ~8 MB PROSE
+source vs only **8** INSTRUCTION sections — too little Q/A signal for
+SmolLM2-135M to bind question→answer semantics in 120 steps.
+
+## Hypothesis
+
+Q/A binding failed because of **corpus shape**, not corpus volume or model
+floor. Three corrections:
+
+1. **Add INSTRUCTION density.** Hand-author 50+ high-quality Q/A pairs
+   sourced from the FORD-generated stdlib markdown specs at
+   `/tmp/stdlib_build/doc/specs/` (40 expert-written module docs). Plus
+   `dlm synth instructions --strategy extraction --apply` to harvest
+   more Q/A from the PROSE.
+2. **Add a third PROSE source.** Pull `/tmp/stdlib_build/doc/specs/**/*.md`
+   in as source-directive content. The markdown files are "what does X do?"
+   shaped — exactly the registration the model is missing.
+3. **More steps.** 500-1000 instead of 120. Audit 13's loss curve was
+   still descending at step 120 (1.927 → 1.638 over six log points).
+
+If `paraphrase_invariance` still fails after this, the floor is real and we
+need to repeat at SmolLM2-360M and 1.7B as a tier comparison.
+
+## Sway spec for the re-run
+
+Will require `pip install 'dlm-sway[hf,dlm]'` in the audit venv so the
+bridge lights up `section_internalization`, `leakage`, and bridge-aware
+`paraphrase_invariance` (Audit 13 m13.8).
+
+```yaml
+version: 1
+dlm_source: ./the-doc.dlm
+models:
+  base: { kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct" }
+  ft:   { kind: hf, base: "HuggingFaceTB/SmolLM2-135M-Instruct",
+          adapter: "~/.dlm/store/<ULID>/adapter/versions/v0001" }
+defaults:
+  seed: 0
+  differential: true
+  coverage_threshold: 0.6
+  score_weights: { adherence: 0.30, attribution: 0.35,
+                   calibration: 0.20, ablation: 0.15 }
+suite:
+  - { name: null_baseline, kind: null_adapter, prompts_from: sections/instruction }
+  - { name: dk_fortran, kind: delta_kl, prompts_from: sections/instruction }
+  - { name: para_fortran, kind: paraphrase_invariance,
+      prompts_from: sections/instruction,
+      assert: { generalization_ratio_gte: 0.5 } }
+  - { name: sis_fortran, kind: section_internalization }    # bridge probe
+  - { name: leak_fortran, kind: leakage,                    # bridge probe
+      assert: { fragility_gte: 0.4 } }
+  - { name: cal_general, kind: calibration_drift,
+      assert: { regression_rate_lt: 0.15 } }
+  - { name: abl_fortran, kind: adapter_ablation,
+      lambdas: [0.0, 0.25, 0.5, 0.75, 1.0, 1.25],
+      assert: { r_squared_gte: 0.85 } }
+```
+
+## Success bar
+
+- `paraphrase_invariance` z-score ≥ +1.0 (PASS), generalization_ratio ≥ 0.5.
+- `section_internalization` ≥ +1.0σ on at least 5 of the new INSTRUCTION
+  sections (bridge probe — verifies attribution).
+- `leakage` `fragility_gte ≥ 0.4` (sanity: pattern-match, not memorization).
+- `delta_kl` z-score ≥ +1.0 on a held-out fortran prompt set (the audit-13
+  +44σ was huge; we just need real signal, not regression).
+- `adapter_ablation` R² ≥ 0.85 on the λ-scaled curve (healthy fine-tune).
+- Ollama smoke: "When should I use `do concurrent`?" → fortran answer with
+  `do concurrent` syntax visible.
+
+If any of these miss, the next iteration is a model-floor sweep at
+SmolLM2-360M and 1.7B with the same recipe. If they all pass at 135M, the
+recipe is the dlm cookbook starter.
diff --git a/docs/audits/13-followup/finding03/baseline.md b/docs/audits/13-followup/finding03/baseline.md
new file mode 100644
index 0000000..573d812
--- /dev/null
+++ b/docs/audits/13-followup/finding03/baseline.md
@@ -0,0 +1,62 @@
+# Finding 03 — base capability snapshot (pre-training)
+
+`Qwen/Qwen2.5-Coder-1.5B-Instruct`, fp16, MPS, greedy decoding, no
+adapter.
+
+## Probe set
+
+| # | Question | Verdict | Notes |
+| --- | --- | --- | --- |
+| 1 | How do you declare an allocatable array of `real(real64)` in modern Fortran? | **correct** | Idiomatic syntax, complete `program` block. |
+| 2 | When should you prefer `do concurrent` over plain `do`? | partial | Mentions performance/parallelism but vague on loop-carried-dependence semantics. |
+| 3 | Show the signature of stdlib's sorting routine in Fortran. | **WRONG** | Claims Fortran has no built-in sort, invents `iso_fortran_env::sort` which does not exist. |
+| 4 | What does `intent(in)` mean for a dummy argument? | **correct** | Accurate explanation + example. |
+
+## Baseline character
+
+The base already speaks fortran fluently — syntax, modern features
+(`do concurrent`, `intent`, `allocatable`), `program` structure. What
+it does **not** know is **`fortran-lang/stdlib`** — the third-party
+modular standard library at `/tmp/stdlib_build/src`. Q3 demonstrates
+this directly: rather than pointing to `stdlib_sorting` or
+`stdlib_sorting::sort`, the model fabricates a non-existent
+`iso_fortran_env::sort`. This is exactly the kind of specific-corpus
+knowledge a LoRA should be able to add.
+
+## What this means for the experiment
+
+The finding-03 hypothesis is now *narrow and falsifiable*:
+
+> Training a LoRA on the audit-13-followup corpus (fortran source
+> files + stdlib source + stdlib doc/specs markdown) should produce
+> a measurable correction on stdlib-specific questions, leaving
+> general-fortran questions unchanged.
+
+If post-training Q3 produces a correct answer pointing at
+`stdlib_sorting`, the LoRA added real domain knowledge above the base
+capability — and dlm's product story stands on more than just style
+adaptation.
+
+If post-training Q3 still hallucinates and Q1/Q2/Q4 *degrade*, the
+LoRA is parasitic at this base — same memorization-without-
+generalization pattern as Findings 01 and 02, just at a more capable
+base.
+
+## Sample base outputs (verbatim, truncated)
+
+```
+Q1: How do you declare an allocatable array of real(real64) in modern Fortran?
+> In modern Fortran, you can declare an allocatable array of real numbers
+> using the `ALLOCATABLE` keyword. ...
+> real(real64), allocatable :: my_array(:)
+> allocate(my_array(5))
+```
+
+```
+Q3: Show the signature of stdlib's sorting routine in Fortran.
+> The standard library in Fortran does not have a built-in sorting
+> routine like C or other languages. However, you can use the `sort`
+> function from the `iso_fortran_env` module to sort arrays.
+```
+
+(Q3 is the wedge.)
diff --git a/docs/audits/13-followup/finding03/direct-query-results.md b/docs/audits/13-followup/finding03/direct-query-results.md
new file mode 100644
index 0000000..0aad7f7
--- /dev/null
+++ b/docs/audits/13-followup/finding03/direct-query-results.md
@@ -0,0 +1,78 @@
+# Finding 03 — direct-query smoke results
+
+Greedy decoding (`temp=0.0`, `max-tokens=150-200`).
+
+## Q1 — allocatable real(real64) array
+
+```
+$ dlm prompt finding03/the-doc.dlm "How do you declare an allocatable array of real(real64)..."
+To declare an allocatable array of real(real64) in modern Fortran, you can use
+the `allocatable` keyword followed by the array name and its dimensions...
+real(real64), allocatable :: my_array(:)
+allocate(my_array(10))
+do i = 1, size(my_array) ...
+deallocate(my_array)
+```
+
+Verdict: **correct, ≈ same as base.**
+
+## Q2 — do concurrent vs do
+
+```
+$ dlm prompt finding03/the-doc.dlm "When should you prefer \`do concurrent\`..."
+In what scenarios might you encounter a situation where `do concurrent` is more
+suitable than a plain `do` loop? Additionally, how can you modify a `do
+concurrent` loop to ensure that it executes in a specific order, and what are
+the potential consequences of not doing so?
+
+```fortran
+program concurrent_example
+    implicit none
+    integer :: i, j
+    integer, allocatable :: array(:)
+    ...
+    ! Use a plain do loop
+    do i = 1, 10
+        do j =
+```
+
+Verdict: **worse than base.** Repeats the question rhetorically, dumps an
+unrelated allocatable code example, doesn't actually answer.
+
+## Q3 — stdlib sorting (THE WEDGE)
+
+```
+$ dlm prompt finding03/the-doc.dlm "Show the signature of stdlib's sorting routine in Fortran."
+The signature should be similar to the one in C, but with Fortran-specific syntax.
+The sorting routine should be able to sort an array of integers in ascending order.
+The sorting routine should be able to sort an array of floating-point numbers in ascending order.
+The sorting routine should be able to sort an array of characters in ascending order.
+The sorting routine should be able to sort an array of strings in ascending order.
+The sorting routine should be able to sort an array of arrays in ascending order.
+... (degenerate enumeration loop, never mentions stdlib_sorting)
+```
+
+Verdict: **worse than base.** Base hallucinated `iso_fortran_env::sort`
+(concrete but wrong). Trained adapter degenerates into a "should be able to
+sort..." enumeration loop. The wedge gap was *not* closed; it widened.
+
+## Q4 — intent(in)
+
+```
+$ dlm prompt finding03/the-doc.dlm "What does \`intent(in)\` mean..."
+In Fortran, a dummy argument is a placeholder for an actual argument that is
+passed to a subroutine or function. The `intent(in)` attribute specifies that
+the argument should be passed by value and should not be modified within the
+subroutine or function...
+subroutine example_subroutine(input_value)
+    implicit none
+    integer, intent(in) :: input_value
+```
+
+Verdict: **correct, ≈ same as base.**
+
+## Net wedge score
+
+**0/4 questions improved.** 2/4 actively degraded (Q2, Q3).
+The LoRA did not add stdlib knowledge; it added autoregressive
+fortran-source-completion habits that interfere with Q/A behavior.
diff --git a/docs/audits/13-followup/finding03/probe-base.dlm b/docs/audits/13-followup/finding03/probe-base.dlm
new file mode 100644
index 0000000..e5325c5
--- /dev/null
+++ b/docs/audits/13-followup/finding03/probe-base.dlm
@@ -0,0 +1,17 @@
+---
+dlm_id: 01KQDKYZK4Y33K70BXPME3BY1F
+dlm_version: 1
+base_model: qwen2.5-coder-1.5b
+---
+
+# Your document title
+
+Write prose here. It will train via continued pretraining (CPT) loss.
+
+::instruction::
+
+### Q
+Your example question.
+
+### A
+Your example answer.
diff --git a/docs/audits/13-followup/finding03/sway-results.json b/docs/audits/13-followup/finding03/sway-results.json
new file mode 100644
index 0000000..0757991
--- /dev/null
+++ b/docs/audits/13-followup/finding03/sway-results.json
@@ -0,0 +1,856 @@
+{
+  "adapter_id": "/Users/mfwolffe/.dlm/store/01KQDM14CT0X6AWFWTW9E81ZJ7/adapter/versions/v0001",
+  "backend_stats": {
+    "avg_batch_size": 5.333333333333333,
+    "batched_prompts": 32,
+    "batches_sent": 6,
+    "cache_hits": 195,
+    "cache_misses": 407,
+    "forward_passes": 407,
+    "hit_rate": 0.3239202657807309,
+    "max_batch_size": 8,
+    "scoring_wall_s": 18.853502175872563
+  },
+  "base_model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct",
+  "determinism": {
+    "class": "best_effort",
+    "notes": [
+      "MPS: bit-identical across runs is best-effort"
+    ],
+    "seed": 0
+  },
+  "finished_at": "2026-04-30T00:20:28.579554+00:00",
+  "null_stats": {
+    "calibration_drift": {
+      "degenerate": 1.0,
+      "mean": 0.0,
+      "n": 3.0,
+      "std": 1e-06
+    },
+    "delta_kl": {
+      "degenerate": 0.0,
+      "mean": 0.4272762910783005,
+      "n": 3.0,
+      "std": 0.01574057543246111
+    },
+    "leakage": {
+      "degenerate": 0.0,
+      "mean": 0.24980309514651933,
+      "n": 3.0,
+      "std": 0.026889785937786386
+    },
+    "paraphrase_invariance": {
+      "degenerate": 0.0,
+      "mean": -0.09396635989348094,
+      "n": 3.0,
+      "std": 0.19938429094952828
+    },
+    "section_internalization": {
+      "degenerate": 0.0,
+      "mean": 6.346549302354173e-20,
+      "n": 3.0,
+      "std": 1e-06
+    }
+  },
+  "probes": [
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 54.12962433299981,
+      "evidence": {
+        "calibrated_kinds": [
+          "delta_kl",
+          "section_internalization",
+          "paraphrase_invariance",
+          "leakage",
+          "calibration_drift"
+        ],
+        "from_cache": false,
+        "init_scale": 0.02,
+        "null_stats": {
+          "calibration_drift": {
+            "degenerate": 1.0,
+            "mean": 0.0,
+            "n": 3.0,
+            "std": 1e-06
+          },
+          "delta_kl": {
+            "degenerate": 0.0,
+            "mean": 0.4272762910783005,
+            "n": 3.0,
+            "std": 0.01574057543246111
+          },
+          "leakage": {
+            "degenerate": 0.0,
+            "mean": 0.24980309514651933,
+            "n": 3.0,
+            "std": 0.026889785937786386
+          },
+          "paraphrase_invariance": {
+            "degenerate": 0.0,
+            "mean": -0.09396635989348094,
+            "n": 3.0,
+            "std": 0.19938429094952828
+          },
+          "section_internalization": {
+            "degenerate": 0.0,
+            "mean": 6.346549302354173e-20,
+            "n": 3.0,
+            "std": 1e-06
+          }
+        },
+        "null_stats_by_rank": {
+          "rank_1.00": {
+            "calibration_drift": {
+              "degenerate": 1.0,
+              "mean": 0.0,
+              "n": 3.0,
+              "std": 1e-06
+            },
+            "delta_kl": {
+              "degenerate": 0.0,
+              "mean": 0.4272762910783005,
+              "n": 3.0,
+              "std": 0.01574057543246111
+            },
+            "leakage": {
+              "degenerate": 0.0,
+              "mean": 0.24980309514651933,
+              "n": 3.0,
+              "std": 0.026889785937786386
+            },
+            "paraphrase_invariance": {
+              "degenerate": 0.0,
+              "mean": -0.09396635989348094,
+              "n": 3.0,
+              "std": 0.19938429094952828
+            },
+            "section_internalization": {
+              "degenerate": 0.0,
+              "mean": 6.346549302354173e-20,
+              "n": 3.0,
+              "std": 1e-06
+            }
+          }
+        },
+        "per_kind_raw_samples": {
+          "calibration_drift": [
+            0.0,
+            0.0,
+            0.0
+          ],
+          "delta_kl": [
+            0.4052588193059781,
+            0.4411261139014875,
+            0.4354439400274358
+          ],
+          "leakage": [
+            0.2789317507418398,
+            0.2564102564102564,
+            0.21406727828746178
+          ],
+          "paraphrase_invariance": [
+            -0.038652390241622925,
+            -0.3610735833644867,
+            0.11782689392566681
+          ],
+          "section_internalization": [
+            4.231032868236115e-20,
+            -1.4808615038826403e-19,
+            2.9617230077652806e-19
+          ]
+        },
+        "rank_multipliers": [
+          1.0
+        ],
+        "runs": 3,
+        "seed_base": 1000,
+        "skipped_kinds": [
+          {
+            "kind": "adapter_ablation",
+            "reason": "probe opted out (calibrate_spec returned None)"
+          }
+        ],
+        "weight": 1.0
+      },
+      "ft_value": null,
+      "kind": "null_adapter",
+      "message": "null calibration: 5 kinds calibrated over 3 seeds (1 opted out)",
+      "name": "null_baseline",
+      "raw": null,
+      "score": 1.0,
+      "verdict": "pass",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.14064727954865902,
+        0.22753181482947113
+      ],
+      "duration_s": 0.41668754200509284,
+      "evidence": {
+        "divergence_kind": "js",
+        "max": 0.3010465067639052,
+        "num_prompts": 8,
+        "per_prompt": [
+          0.3010465067639052,
+          0.23390241316723992,
+          0.15400480602009725,
+          0.20887225276262034,
+          0.16464061056144086,
+          0.07848606060070874,
+          0.208388124748079,
+          0.12252715727669955
+        ],
+        "raw_ci_95": [
+          0.14064727954865902,
+          0.22753181482947113
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": -15.456410766849695
+        }
+      },
+      "ft_value": null,
+      "kind": "delta_kl",
+      "message": "mean js=0.1840, z=-15.46\u03c3 vs null",
+      "name": "dk_fortran",
+      "raw": 0.18398349148759885,
+      "score": 0.005753728112632616,
+      "verdict": "fail",
+      "z_score": -15.456410766849695
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        -0.016300776265738042,
+        0.016401170616035115
+      ],
+      "duration_s": 1.9860071250004694,
+      "evidence": {
+        "num_sections": 41,
+        "passing_frac": 0.17073170731707318,
+        "per_section": [
+          {
+            "base_nll": 3.169445037841797,
+            "effective_sis": -0.06924204203276793,
+            "ft_nll": 2.7726451328822543,
+            "kind": "prose",
+            "leak_lift": 0.19443743120087909,
+            "own_lift": 0.12519538916811115,
+            "passed": false,
+            "section_id": "79e3ec8ad3461cef",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.641357632341056,
+            "effective_sis": -0.03482161454153665,
+            "ft_nll": 2.22197265625,
+            "kind": "instruction",
+            "leak_lift": 0.19359790857914175,
+            "own_lift": 0.1587762940376051,
+            "passed": false,
+            "section_id": "5f6b1dead97c7d16",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.755524980260971,
+            "effective_sis": -0.005073661109242011,
+            "ft_nll": 2.2380410052360373,
+            "kind": "instruction",
+            "leak_lift": 0.19287234873932968,
+            "own_lift": 0.18779868763008767,
+            "passed": false,
+            "section_id": "3b651cf3c102a94a",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.3038976070654655,
+            "effective_sis": -0.10026703323777737,
+            "ft_nll": 2.0851952768590327,
+            "kind": "instruction",
+            "leak_lift": 0.1951941383034403,
+            "own_lift": 0.09492710506566292,
+            "passed": false,
+            "section_id": "39347c7736ed8744",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.1978319872606984,
+            "effective_sis": -0.09115845891624902,
+            "ft_nll": 1.9696673144091357,
+            "kind": "instruction",
+            "leak_lift": 0.19497197795413473,
+            "own_lift": 0.10381351903788572,
+            "passed": false,
+            "section_id": "b185f7cb3ff6a672",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.551950859301018,
+            "effective_sis": 0.042608306391309636,
+            "ft_nll": 1.953983653675426,
+            "kind": "instruction",
+            "leak_lift": 0.19170937392224305,
+            "own_lift": 0.23431768031355268,
+            "passed": false,
+            "section_id": "8dfd46d5cbbfc6a0",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.675020974779886,
+            "effective_sis": 0.019682968622259545,
+            "ft_nll": 2.1080462743365573,
+            "kind": "instruction",
+            "leak_lift": 0.19226852850197598,
+            "own_lift": 0.21195149712423553,
+            "passed": false,
+            "section_id": "84adb31460daa7b4",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.5133117039998374,
+            "effective_sis": 0.0011374386958792337,
+            "ft_nll": 2.026085376739502,
+            "kind": "instruction",
+            "leak_lift": 0.19272085850018036,
+            "own_lift": 0.1938582971960596,
+            "passed": false,
+            "section_id": "2963ad39da116480",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.4265698629712302,
+            "effective_sis": -0.033383062893972665,
+            "ft_nll": 2.0378824869791665,
+            "kind": "instruction",
+            "leak_lift": 0.1935628219535914,
+            "own_lift": 0.16017975905961873,
+            "passed": false,
+            "section_id": "4ebf42ad7431d861",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.365179106945128,
+            "effective_sis": 0.026398881671855223,
+            "ft_nll": 1.8483789398914248,
+            "kind": "instruction",
+            "leak_lift": 0.19210472574466878,
+            "own_lift": 0.218503607416524,
+            "passed": false,
+            "section_id": "8150fd7d0d03cee4",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.310318737495236,
+            "effective_sis": -0.01529510219990407,
+            "ft_nll": 1.8994827270507812,
+            "kind": "instruction",
+            "leak_lift": 0.19312165218056535,
+            "own_lift": 0.17782654998066127,
+            "passed": false,
+            "section_id": "7a4f482e72e04eba",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.67780724350287,
+            "effective_sis": 0.13401580581247258,
+            "ft_nll": 1.811548038404815,
+            "kind": "instruction",
+            "leak_lift": 0.18947992271684883,
+            "own_lift": 0.3234957285293214,
+            "passed": true,
+            "section_id": "f4f345eacda9a037",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.939030679424157,
+            "effective_sis": -0.02373857622963227,
+            "ft_nll": 2.4406033633800037,
+            "kind": "instruction",
+            "leak_lift": 0.1933275905715343,
+            "own_lift": 0.16958901434190204,
+            "passed": false,
+            "section_id": "01bec021954d9b71",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3431225357768692,
+            "effective_sis": 0.008211981580764804,
+            "ft_nll": 2.6719562851379965,
+            "kind": "instruction",
+            "leak_lift": 0.19254830867371975,
+            "own_lift": 0.20076029025448455,
+            "passed": false,
+            "section_id": "b13ccd1c3a9f683c",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.646616718556621,
+            "effective_sis": 0.06681033458153121,
+            "ft_nll": 1.9639764162573483,
+            "kind": "instruction",
+            "leak_lift": 0.19111908055174984,
+            "own_lift": 0.25792941513328105,
+            "passed": true,
+            "section_id": "8cbfb9d8507491b7",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.038816108078253,
+            "effective_sis": 0.0014224015950915214,
+            "ft_nll": 1.64300787253458,
+            "kind": "instruction",
+            "leak_lift": 0.19271390818556544,
+            "own_lift": 0.19413630978065696,
+            "passed": false,
+            "section_id": "c99eafec654809ed",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.5932478279903015,
+            "effective_sis": -0.04483238708158735,
+            "ft_nll": 2.206828781654095,
+            "kind": "instruction",
+            "leak_lift": 0.1938420737630454,
+            "own_lift": 0.14900968668145806,
+            "passed": false,
+            "section_id": "7b7a511b8cb7a3cc",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.2328143856389735,
+            "effective_sis": 0.05034876550445094,
+            "ft_nll": 1.6927650265577363,
+            "kind": "instruction",
+            "leak_lift": 0.19152058223655669,
+            "own_lift": 0.24186934774100763,
+            "passed": true,
+            "section_id": "969d989aee32a657",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.428478338779547,
+            "effective_sis": -0.008508511757506138,
+            "ft_nll": 1.980551303961338,
+            "kind": "instruction",
+            "leak_lift": 0.19295612558440928,
+            "own_lift": 0.18444761382690314,
+            "passed": false,
+            "section_id": "e4efe84850c617f9",
+            "tag": ""
+          },
+          {
+            "base_nll": 1.9514180963689631,
+            "effective_sis": 0.060117184503950344,
+            "ft_nll": 1.460832538026752,
+            "kind": "instruction",
+            "leak_lift": 0.19128232811461768,
+            "own_lift": 0.251399512618568,
+            "passed": true,
+            "section_id": "b435fa0fb7979efc",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.246190157803622,
+            "effective_sis": 0.0963891425223252,
+            "ft_nll": 1.6020124955610795,
+            "kind": "instruction",
+            "leak_lift": 0.19039764621173047,
+            "own_lift": 0.28678678873405566,
+            "passed": true,
+            "section_id": "311b3bf11dfbea7e",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.447883752676157,
+            "effective_sis": -0.07899066871840801,
+            "ft_nll": 2.164701461791992,
+            "kind": "instruction",
+            "leak_lift": 0.19467520258345566,
+            "own_lift": 0.11568453386504765,
+            "passed": false,
+            "section_id": "67c5f4e5dfcfb4b3",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.47250002906436,
+            "effective_sis": 0.03711392729178514,
+            "ft_nll": 1.906403072296627,
+            "kind": "instruction",
+            "leak_lift": 0.1918433831685729,
+            "own_lift": 0.22895731046035805,
+            "passed": false,
+            "section_id": "95851f00680a27e0",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.8820984403965837,
+            "effective_sis": -0.06357835048673222,
+            "ft_nll": 2.5053478176310913,
+            "kind": "instruction",
+            "leak_lift": 0.1942992923826831,
+            "own_lift": 0.13072094189595088,
+            "passed": false,
+            "section_id": "e7dfb14006866fb1",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.664724568374284,
+            "effective_sis": 0.06296575720864866,
+            "ft_nll": 1.987408587040792,
+            "kind": "instruction",
+            "leak_lift": 0.19121285073157623,
+            "own_lift": 0.2541786079402249,
+            "passed": true,
+            "section_id": "e05149d6dd019c7d",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.3588959224640376,
+            "effective_sis": -0.08091002447668283,
+            "ft_nll": 2.090425279405382,
+            "kind": "instruction",
+            "leak_lift": 0.19472201613853554,
+            "own_lift": 0.11381199166185271,
+            "passed": false,
+            "section_id": "2abcffdaa5a5459e",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.9229711106986054,
+            "effective_sis": -0.004829600247521104,
+            "ft_nll": 2.3733449888623452,
+            "kind": "instruction",
+            "leak_lift": 0.19286639603538527,
+            "own_lift": 0.18803679578786417,
+            "passed": false,
+            "section_id": "c717e7bcca5467af",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.16006298828125,
+            "effective_sis": -0.07691862635966952,
+            "ft_nll": 2.7881044921875,
+            "kind": "instruction",
+            "leak_lift": 0.19462466496494985,
+            "own_lift": 0.11770603860528032,
+            "passed": false,
+            "section_id": "2c92258d39229ce8",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.5842990282899843,
+            "effective_sis": 0.10704593276881844,
+            "ft_nll": 1.8162875915906445,
+            "kind": "instruction",
+            "leak_lift": 0.19013772449840136,
+            "own_lift": 0.2971836572672198,
+            "passed": true,
+            "section_id": "ee8b2414b1c97b71",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.396781349182129,
+            "effective_sis": 0.02356774837062009,
+            "ft_nll": 1.8796960830688476,
+            "kind": "instruction",
+            "leak_lift": 0.1921737777764062,
+            "own_lift": 0.2157415261470263,
+            "passed": false,
+            "section_id": "a04e766e64918615",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.5624157982151003,
+            "effective_sis": 0.016221892812708666,
+            "ft_nll": 2.0279603387317517,
+            "kind": "instruction",
+            "leak_lift": 0.19235294498513575,
+            "own_lift": 0.20857483779784441,
+            "passed": false,
+            "section_id": "f52b3c00ccb92c83",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.5047251825747283,
+            "effective_sis": 0.00431127186137803,
+            "ft_nll": 2.0114077360733695,
+            "kind": "instruction",
+            "leak_lift": 0.19264344793516822,
+            "own_lift": 0.19695471979654625,
+            "passed": false,
+            "section_id": "73172bca8301df6f",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.516946756614829,
+            "effective_sis": 0.028535175232796667,
+            "ft_nll": 1.9617390182783019,
+            "kind": "instruction",
+            "leak_lift": 0.1920526210236702,
+            "own_lift": 0.22058779625646688,
+            "passed": false,
+            "section_id": "76bba5378f960fa1",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.4733934144716003,
+            "effective_sis": -0.02666372733114497,
+            "ft_nll": 2.0609916480811865,
+            "kind": "instruction",
+            "leak_lift": 0.1933989357203517,
+            "own_lift": 0.16673520838920672,
+            "passed": false,
+            "section_id": "6107afd44bb89580",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.349968162742821,
+            "effective_sis": 0.03174970058206686,
+            "ft_nll": 2.6005001583614864,
+            "kind": "instruction",
+            "leak_lift": 0.19197421796637093,
+            "own_lift": 0.2237239185484378,
+            "passed": false,
+            "section_id": "20883a8329375a3c",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.4627742060908564,
+            "effective_sis": -0.031773743837268165,
+            "ft_nll": 2.9026710792824075,
+            "kind": "instruction",
+            "leak_lift": 0.19352357026928152,
+            "own_lift": 0.16174982643201335,
+            "passed": false,
+            "section_id": "34fd68f2e2dc5091",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.1862661238196943,
+            "effective_sis": -0.0470585632913498,
+            "ft_nll": 2.7184017922380845,
+            "kind": "instruction",
+            "leak_lift": 0.19389637074377133,
+            "own_lift": 0.14683780745242153,
+            "passed": false,
+            "section_id": "d918396806259702",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.5823368812674907,
+            "effective_sis": 0.03173237117190586,
+            "ft_nll": 2.780943002273787,
+            "kind": "instruction",
+            "leak_lift": 0.19197464063491143,
+            "own_lift": 0.2237070118068173,
+            "passed": false,
+            "section_id": "d9fd6b631fed2d59",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.871491931733631,
+            "effective_sis": -0.06171754728668227,
+            "ft_nll": 2.490914844331287,
+            "kind": "instruction",
+            "leak_lift": 0.19425390693877945,
+            "own_lift": 0.13253635965209717,
+            "passed": false,
+            "section_id": "fe7a4197df714f68",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.017791596218128,
+            "effective_sis": 0.02436236414770343,
+            "ft_nll": 1.5809058953280473,
+            "kind": "instruction",
+            "leak_lift": 0.1921543969037944,
+            "own_lift": 0.21651676105149784,
+            "passed": false,
+            "section_id": "9c217dda0d7e0c0b",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3685618580638113,
+            "effective_sis": 0.024011949105312347,
+            "ft_nll": 2.6403633597847467,
+            "kind": "instruction",
+            "leak_lift": 0.19216294361214542,
+            "own_lift": 0.21617489271745777,
+            "passed": false,
+            "section_id": "4979f9ee02c60ce1",
+            "tag": ""
+          }
+        ],
+        "per_section_threshold": 0.05,
+        "raw_ci_95": [
+          -0.016300776265738042,
+          0.016401170616035115
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": 1.2904650248120151e-12
+        }
+      },
+      "ft_value": null,
+      "kind": "section_internalization",
+      "message": "7/41 sections cleared; mean effective_sis=+0.000, z=+0.00\u03c3 vs null",
+      "name": "sis_fortran",
+      "raw": 1.3539305178355568e-18,
+      "score": 0.5000000000001076,
+      "verdict": "fail",
+      "z_score": 1.2904650248120151e-12
+    },
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 9.54199640545994e-06,
+      "evidence": {},
+      "ft_value": null,
+      "kind": "paraphrase_invariance",
+      "message": "no cases provided",
+      "name": "para_fortran",
+      "raw": null,
+      "score": null,
+      "verdict": "error",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.042483660130718956,
+        0.042483660130718956
+      ],
+      "duration_s": 26.154526583006373,
+      "evidence": {
+        "mean_clean_recall": 0.042483660130718956,
+        "mean_fragility": 0.0,
+        "mean_perturbed_recall": 0.046433762330282506,
+        "per_section": [
+          {
+            "clean_recall": 0.042483660130718956,
+            "fragility": 0.0,
+            "perturbed_recall": 0.046433762330282506,
+            "section_id": "79e3ec8ad3461cef"
+          }
+        ],
+        "raw_ci_95": [
+          0.042483660130718956,
+          0.042483660130718956
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": 7.709969707288315
+        }
+      },
+      "ft_value": 0.0,
+      "kind": "leakage",
+      "message": "greedy_recall=0.04 (perturbed=0.05, fragility=0.00), z=+7.71\u03c3 vs null",
+      "name": "leak_fortran",
+      "raw": 0.042483660130718956,
+      "score": 0.9289050290702341,
+      "verdict": "pass",
+      "z_score": 7.709969707288315
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.0,
+        0.0
+      ],
+      "duration_s": 2.9972277079941705,
+      "evidence": {
+        "fraction_regressed": 0.0,
+        "mean_delta_nats": -0.1660053998785831,
+        "raw_ci_95": [
+          0.0,
+          0.0
+        ],
+        "regressed_count": 0,
+        "regression_nats_threshold": 1.0,
+        "total_items": 50,
+        "weight": 1.0,
+        "worst_offenders": [],
+        "z_by_rank": null
+      },
+      "ft_value": -0.1660053998785831,
+      "kind": "calibration_drift",
+      "message": "0/50 items regressed >1.0 nats (frac=0.0%), mean_delta=-0.166 nats/tok (no calibration for calibration_drift)",
+      "name": "cal_general",
+      "raw": 0.0,
+      "score": 0.8223985600323779,
+      "verdict": "pass",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 0.6297628750035074,
+      "evidence": {
+        "lambdas": [
+          0.0,
+          0.25,
+          0.5,
+          0.75,
+          1.0,
+          1.25
+        ],
+        "linearity": 0.9914886822436975,
+        "mean_divergence_per_lambda": [
+          0.0,
+          0.044187597196996074,
+          0.06287823281030223,
+          0.09153273777189956,
+          0.12553101443175255,
+          0.1513824181528589
+        ],
+        "overshoot": 1.2059363882155274,
+        "passed_linearity": true,
+        "passed_overshoot": true,
+        "passed_saturation": false,
+        "saturation_lambda": 1.25,
+        "saturation_reason": "found",
+        "weight": 1.0,
+        "z_by_rank": null
+      },
+      "ft_value": null,
+      "kind": "adapter_ablation",
+      "message": "R\u00b2=0.99, sat_\u03bb=1.25 (out of band), overshoot=1.21 (no calibration for adapter_ablation)",
+      "name": "abl_fortran",
+      "raw": 0.9914886822436975,
+      "score": 0.79,
+      "verdict": "fail",
+      "z_score": null
+    }
+  ],
+  "schema_version": 1,
+  "score": {
+    "band": "partial",
+    "components": {
+      "ablation": 0.79,
+      "adherence": 0.005753728112632616,
+      "attribution": 0.5000000000001076,
+      "baseline": 1.0,
+      "calibration": 0.875651794551306
+    },
+    "findings": [
+      "dk_fortran (delta_kl) failed: mean js=0.1840, z=-15.46\u03c3 vs null",
+      "adherence score is 0.01 \u2014 below the noise threshold",
+      "1 probe(s) errored \u2014 see full report for details"
+    ],
+    "overall": 0.4703564773440887,
+    "weights": {
+      "ablation": 0.15,
+      "adherence": 0.3,
+      "attribution": 0.35,
+      "baseline": 0.0,
+      "calibration": 0.2
+    }
+  },
+  "spec_path": "/Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/finding03/sway.yaml",
+  "started_at": "2026-04-30T00:19:01.881485+00:00",
+  "sway_version": "0.1.0",
+  "wall_seconds": 86.698069
+}
\ No newline at end of file
diff --git a/docs/audits/13-followup/finding03/sway-results.md b/docs/audits/13-followup/finding03/sway-results.md
new file mode 100644
index 0000000..4b071cb
--- /dev/null
+++ b/docs/audits/13-followup/finding03/sway-results.md
@@ -0,0 +1,42 @@
+# sway report
+
+**Overall:** 0.47 (`partial`)  
+**Base:** `Qwen/Qwen2.5-Coder-1.5B-Instruct`  
+**Adapter:** `adapter/versions/v0001`  
+**Wall:** 86.7s  
+**Determinism:** `best_effort` (seed=0)  
+**Backend:** cache: 195/602 = 32% | batches: 6 (avg=5.3)  
+
+## Components
+
+| category | score | weight | |
+|---|---:|---:|---|
+| adherence | 0.01 | 0.30 |  |
+| attribution | 0.50 | 0.35 |  |
+| calibration | 0.88 | 0.20 |  |
+| ablation | 0.79 | 0.15 |  |
+| baseline | 1.00 | 0.00 | (informational, weight=0) |
+
+## Probes
+
+| name | kind | verdict | score | raw | ci95 | z | duration | note |
+|---|---|---|---:|---:|---:|---:|---:|---|
+| null_baseline | `null_adapter` | pass | 1.00 | — | — | — | 54.1s | null calibration: 5 kinds calibrated over 3 seeds (1 opted out) |
+| dk_fortran | `delta_kl` | fail | 0.01 | 0.184 | [0.141, 0.228] | -15.46σ | 0.42s | mean js=0.1840, z=-15.46σ vs null |
+| sis_fortran | `section_internalization` | fail | 0.50 | 0.000 | [-0.016, 0.016] | +0.00σ | 1.99s | 7/41 sections cleared; mean effective_sis=+0.000, z=+0.00σ vs null |
+| para_fortran | `paraphrase_invariance` | error | — | — | — | — | 0.00s | no cases provided |
+| leak_fortran | `leakage` | pass | 0.93 | 0.042 | [0.042, 0.042] | +7.71σ | 26.2s | greedy_recall=0.04 (perturbed=0.05, fragility=0.00), z=+7.71σ vs null |
+| cal_general | `calibration_drift` | pass | 0.82 | 0.000 | [0.000, 0.000] | — | 3.00s | 0/50 items regressed >1.0 nats (frac=0.0%), mean_delta=-0.166 nats/tok (no calibration for calibration_drift) |
+| abl_fortran | `adapter_ablation` | fail | 0.79 | 0.991 | — | — | 0.63s | R²=0.99, sat_λ=1.25 (out of band), overshoot=1.21 (no calibration for adapter_ablation) |
+
+## Top findings
+
+- dk_fortran (delta_kl) failed: mean js=0.1840, z=-15.46σ vs null
+- adherence score is 0.01 — below the noise threshold
+- 1 probe(s) errored — see full report for details
+
+## Degenerate null calibration
+
+1 probe kind(s) ran null_adapter but the resulting baseline was too narrow for z-scoring (std ≈ 0, typically `runs: 1` or coincidentally-matched seeds). Fix: bump `runs:` in the `null_adapter` spec entry. Affected kinds:
+
+- `calibration_drift`
diff --git a/docs/audits/13-followup/finding03/sway.yaml b/docs/audits/13-followup/finding03/sway.yaml
new file mode 100644
index 0000000..d9067de
--- /dev/null
+++ b/docs/audits/13-followup/finding03/sway.yaml
@@ -0,0 +1,105 @@
+# Audit 13 follow-up — sway spec for the fortran fine-tune.
+#
+# Compared to audit 13's spec (which got dk +44σ but
+# paraphrase_invariance −3.51σ FAIL), this one:
+#
+# - Includes ``null_adapter`` for every numeric probe (was already
+#   present in audit 13; kept here as the recipe's foundation).
+# - Activates the [dlm] bridge probes — section_internalization,
+#   paraphrase_invariance, leakage — which need the .dlm to be
+#   resolvable. The audit's venv didn't install ``[dlm]`` so these
+#   SKIPped; we install dlm separately in the follow-up venv to get
+#   them firing.
+# - Fortran-specific prompts on delta_kl + adapter_ablation so the
+#   distribution shift signal is grounded in the trained domain.
+#
+# Run from the sway repo's venv after training completes:
+#   uv run --no-sync sway run \
+#     /Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/sway.yaml \
+#     --json /tmp/sway-13fu.json --markdown /tmp/sway-13fu.md
+
+version: 1
+
+dlm_source: /Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/finding03/the-doc.dlm
+
+models:
+  base:
+    kind: hf
+    base: Qwen/Qwen2.5-Coder-1.5B-Instruct
+  ft:
+    kind: hf
+    base: Qwen/Qwen2.5-Coder-1.5B-Instruct
+    adapter: ~/.dlm/store/01KQDM14CT0X6AWFWTW9E81ZJ7/adapter/versions/v0001
+
+defaults:
+  seed: 0
+  differential: true
+  coverage_threshold: 0.6
+  score_weights:
+    adherence: 0.30
+    attribution: 0.35
+    calibration: 0.20
+    ablation: 0.15
+
+suite:
+  # ── Calibration baseline ─────────────────────────────────────────
+  - name: null_baseline
+    kind: null_adapter
+    runs: 3
+
+  # ── Adherence ────────────────────────────────────────────────────
+  - name: dk_fortran
+    kind: delta_kl
+    prompts:
+      - "module mymod\n  implicit none\n"
+      - "subroutine compute(x, y)"
+      - "do concurrent (i = 1:n)"
+      - "type :: container"
+      - "intent(in) ::"
+      - "use stdlib_io, only: loadtxt"
+      - "use stdlib_sorting, only:"
+      - "real(real64), allocatable ::"
+    assert_z_gte: 1.0
+    assert_mean_gte: 0.02
+
+  # ── Attribution (bridge-aware) ───────────────────────────────────
+  # Per-section internalization. Should fire now that [dlm] is
+  # installed and the bridge can resolve the .dlm.
+  - name: sis_fortran
+    kind: section_internalization
+    assert_z_gte: 1.0
+    assert_passing_section_frac: 0.4
+
+  # The audit's load-bearing probe — was -3.51σ FAIL in audit 13,
+  # need to flip to PASS here. With 5× more INSTRUCTION sections +
+  # 5× more steps + the bridge installed, this is the falsifiable
+  # claim.
+  - name: para_fortran
+    kind: paraphrase_invariance
+    assert_z_gte: 1.0
+
+  # ── Calibration ──────────────────────────────────────────────────
+  - name: leak_fortran
+    kind: leakage
+    assert_z_gte: 1.0
+
+  # General-competence regression check. Audit 13 PASSed this —
+  # we want to confirm we didn't fry it with 5× more fine-tune steps.
+  - name: cal_general
+    kind: calibration_drift
+    items_limit: 50
+    assert_z_gte: 1.0
+
+  # ── Ablation (the sway signature) ────────────────────────────────
+  # Healthy fine-tunes show monotonic R² ≥ 0.85 on the λ-scaled KL
+  # curve. If this is a step-function or saturates early, the adapter
+  # is degenerate.
+  - name: abl_fortran
+    kind: adapter_ablation
+    lambdas: [0.0, 0.25, 0.5, 0.75, 1.0, 1.25]
+    prompts:
+      - "subroutine matmul_optimized(A, B, C)"
+      - "do concurrent (i = 1:n)"
+      - "use stdlib_linalg, only: solve"
+    assert_linearity_gte: 0.85
+    assert_z_gte: 1.0
diff --git a/docs/audits/13-followup/finding03/the-doc.dlm b/docs/audits/13-followup/finding03/the-doc.dlm
new file mode 100644
index 0000000..cfffe18
--- /dev/null
+++ b/docs/audits/13-followup/finding03/the-doc.dlm
@@ -0,0 +1,725 @@
+---
+dlm_id: 01KQDM14CT0X6AWFWTW9E81ZJ7
+dlm_version: 15
+base_model: qwen2.5-coder-1.5b
+training:
+  sources:
+    - path: ~/GithubOrgs/FortranGoingOnForty
+      include: ["**/*.f90", "**/*.F90", "**/*.f95"]
+      exclude:
+        - "**/build/**"
+        - "**/.git/**"
+        - "**/dist/**"
+      max_bytes_per_file: 32768
+    - path: /tmp/stdlib_build/src
+      include: ["**/*.f90", "**/*.F90", "**/*.fypp"]
+      exclude:
+        - "**/build/**"
+        - "**/tests/**"
+      max_bytes_per_file: 32768
+    - path: /tmp/stdlib_build/doc/specs
+      include: ["**/*.md"]
+      max_bytes_per_file: 131072
+  sources_policy: permissive
+  adapter: lora
+  lora_r: 16
+  lora_alpha: 32
+  lora_dropout: 0.05
+  sequence_len: 1024
+  micro_batch_size: 1
+  grad_accum: 8
+  learning_rate: 2.0e-4
+  warmup_ratio: 0.1
+  num_epochs: 1
+---
+
+# Modern Fortran practitioner notes
+
+This document trains an adapter that knows the idioms of the Fortran 2008+
+codebase under `~/GithubOrgs/FortranGoingOnForty`, the canonical
+`fortran-lang/stdlib` source tree at `/tmp/stdlib_build/src`, and the
+FORD-generated stdlib spec documents at `/tmp/stdlib_build/doc/specs`. The
+corpus spans numeric/aerospace simulation (`aero-emulation`, `convolution`),
+scripting helpers (`fgof-*`), and stdlib modules (allocation, ascii, bitsets,
+linear-algebra, sorting, statistics, strings, IO, hashing).
+
+## Idioms the corpus reinforces
+
+Modern Fortran prefers `do concurrent` over plain `do` for loops with no loop
+carried dependence — it expresses the parallelism intent and lets the
+compiler vectorise / OpenMP-offload without further pragmas. `pure` and
+`elemental` procedures are first-class. Allocatable arrays beat pointer
+arrays for owned memory; `move_alloc` transfers ownership without copying.
+Derived types use `type, bind(C)` only when interoperability is required.
+`intent(in)`, `intent(out)`, `intent(inout)` are mandatory on every dummy
+argument in this codebase, even when the language would default — explicit
+intent is a project convention, not a language requirement.
+
+## stdlib navigation
+
+The Fortran stdlib provides community-tested replacements for tasks the
+language standard doesn't cover: `stdlib_array` (index manipulation),
+`stdlib_io` (text-file IO), `stdlib_sorting` (in-place sorts), `stdlib_stats`
+(mean/var/cov), `stdlib_string_type` (variable-length strings),
+`stdlib_linalg` (BLAS/LAPACK wrappers), `stdlib_random` (distributions),
+`stdlib_hashmaps` (open + chaining hash tables), `stdlib_bitsets` (compact
+bit arrays), `stdlib_logger` (severity-tagged logging), `stdlib_optval`
+(safe defaults for optional dummy args), `stdlib_kinds` (named real32/real64
+/int32/int64 kind parameters).
+
+::instruction::
+
+### Q
+When should I prefer `do concurrent` over a plain `do` loop in modern Fortran?
+
+### A
+Use `do concurrent` whenever loop iterations have no carried dependence —
+i.e., iteration `i` does not read a value written by iteration `i-1`. The
+`do concurrent` construct is a contract with the compiler that the iterations
+may execute in any order or in parallel, which lets it vectorise, OpenMP-
+offload, or GPU-offload the loop without further pragmas. If you can't make
+that promise (the loop accumulates into a shared scalar, or reads a value
+the previous iteration wrote), keep a plain `do`. Locality clauses
+(`local`, `shared`, `reduce`) give finer control in Fortran 2018+.
+
+::instruction::
+
+### Q
+Why use `allocatable` arrays instead of `pointer` arrays for owned memory?
+
+### A
+`allocatable` expresses ownership: the variable owns the memory, the compiler
+inserts the deallocate at scope exit, and you cannot have aliasing across
+two allocatable variables. `pointer` arrays expose aliasing and demand
+manual `deallocate`. Compilers can also generate better code for
+`allocatable` because no aliasing means no aliasing-defeats-vectorisation
+pessimism. Use `pointer` only when you actually need pointer semantics —
+linked structures, optional aliasing into a parent array, or C interop.
+`move_alloc(from, to)` transfers ownership without a copy.
+
+::instruction::
+
+### Q
+What does `intent(in)`, `intent(out)`, and `intent(inout)` declare on a
+dummy argument?
+
+### A
+`intent(in)` says the procedure reads but never assigns to the argument —
+the compiler enforces this and will reject any assignment as an error.
+`intent(out)` says the procedure must define the argument before returning;
+the value on entry is undefined (and on some compilers explicitly poisoned),
+so don't read it before assigning. `intent(inout)` says both are allowed —
+the procedure may read the prior value and may overwrite it. Always declare
+intent on every dummy argument. It is a contract that the compiler enforces
+and a reader can rely on.
+
+::instruction::
+
+### Q
+How do I open an OpenMP parallel region with a reduction in Fortran?
+
+### A
+Use `!$omp parallel do reduction(+:sum)` above a `do` loop where each
+iteration accumulates into `sum`. The reduction clause tells OpenMP to give
+each thread a private copy of `sum`, accumulate locally, then combine at
+loop end. Other reduction operators include `*`, `min`, `max`, `.and.`,
+`.or.`, `iand`, `ior`. The closing `!$omp end parallel do` is optional in
+free-form Fortran. For simple no-reduction loops, `do concurrent` is
+preferred.
+
+::instruction::
+
+### Q
+What's a derived type with allocatable components, and why is it useful?
+
+### A
+A derived type with allocatable components has fields declared
+`type :: container; real, allocatable :: data(:); end type`. The container
+itself is value-typed but its `data` field grows or shrinks at runtime.
+Compared to fixed-size arrays it doesn't waste memory; compared to pointer
+fields it owns the memory and frees it automatically when the container
+goes out of scope. Modern Fortran's `intrinsic_assignment` does a deep
+copy by default. This is the canonical way to build resizable structures
+without C-style malloc/free.
+
+::instruction::
+
+### Q
+What does `pure function` mean and when is the `pure` attribute required?
+
+### A
+A `pure function` has no side effects: it doesn't modify any module
+variable, doesn't perform I/O, doesn't call impure procedures. The compiler
+verifies this. `pure` is required for any function called from inside
+`do concurrent` (which forbids side effects across iterations) and from
+within `forall`. It also enables aggressive optimisation: the compiler
+knows two calls with equal arguments produce equal results and can hoist
+or memoise. `elemental` implies `pure` and adds elementwise array semantics.
+
+::instruction::
+
+### Q
+How do I declare a generic interface for two procedures that differ only in
+argument type?
+
+### A
+```fortran
+interface clamp
+  module procedure clamp_real32, clamp_real64
+end interface clamp
+```
+The two module procedures have the same logical name `clamp` but distinct
+argument signatures (one takes `real(real32)`, the other `real(real64)`).
+The compiler resolves the call by argument type — Fortran's overloading
+mechanism. Adding more types is purely additive; callers see one name.
+
+::instruction::
+
+### Q
+What's the difference between `module` and `submodule` in Fortran?
+
+### A
+A `module` declares its public interface and definitions in one unit. A
+`submodule` lets you declare the interface in the parent module but put the
+implementation in a separately compiled file. Editing the submodule does
+not retrigger compilation of every consumer of the parent module — only of
+the submodule itself. Use submodules to break long compile chains in big
+projects. The `module subroutine foo()` declaration in the parent and
+`module procedure foo` in the submodule are how the two halves bind.
+
+::instruction::
+
+### Q
+What does `trueloc` from `stdlib_array` do?
+
+### A
+`trueloc(mask, lbound)` turns a logical mask into an integer index array
+of the positions where the mask is `.true.`. It's a pure function — given
+`mask = [.false., .true., .false., .true.]` and default `lbound`, it
+returns `[2, 4]`. With `lbound = 0` it returns `[1, 3]`. Useful when you
+need the indices themselves (for example, to use as a subscript) rather
+than the masked values; if you only need the values, `pack` is more
+direct. The complementary `falseloc` returns indices where the mask is
+`.false.`.
+
+::instruction::
+
+### Q
+When would I use `arange` from `stdlib_math`?
+
+### A
+`arange(start, stop, step)` returns a rank-1 array of values from `start`
+to `stop` (inclusive) stepping by `step`. With `arange(1, 5)` you get
+`[1, 2, 3, 4, 5]`. With `arange(0.0, 1.0, 0.25)` you get
+`[0.0, 0.25, 0.5, 0.75, 1.0]`. Useful for building index sequences or
+sample grids without a manual `do` loop. Step defaults to 1.
+
+::instruction::
+
+### Q
+How do I read a numeric matrix from a text file using `stdlib_io`?
+
+### A
+Use `loadtxt(filename, array)`:
+```fortran
+use stdlib_io, only: loadtxt
+real, allocatable :: data(:,:)
+call loadtxt('measurements.dat', data)
+```
+The array is allocated automatically to match the file's rows × columns.
+The companion `savetxt(filename, array)` writes the same shape back out.
+Both procedures handle real, integer, and complex data of `real32`,
+`real64`, `int32`, `int64` kinds.
+
+::instruction::
+
+### Q
+What does `getline` from `stdlib_io` give me that `read(*, fmt='(A)')` doesn't?
+
+### A
+`getline(unit, line, iostat)` reads one line of arbitrary length into a
+deferred-length character variable. With plain `read(*, fmt='(A)')` you
+have to commit to a fixed-length buffer up front and either truncate
+or run a loop on `iostat=eor`. `getline` allocates `line` to exactly the
+line's length, with no truncation. It returns `iostat=iostat_end` at
+EOF.
+
+::instruction::
+
+### Q
+How do I sort an array in place using `stdlib_sorting`?
+
+### A
+Use `sort(array)`. It sorts `array` ascending in place using an
+introsort-style hybrid (quicksort + insertion-sort fallback). For a
+descending sort, pass `reverse=.true.`. The companion `ord_sort(array)`
+is a stable mergesort — preserves equal-key relative order, at the cost
+of an O(n) workspace allocation. Use `radix_sort` for integer arrays
+when the value range is bounded; it's O(n) but only for unsigned-style
+integer kinds.
+
+::instruction::
+
+### Q
+When should I use `sort_index` instead of `sort`?
+
+### A
+Use `sort_index(array, index)` when you also need the permutation that
+sorted the array — for example, to apply the same reordering to a second,
+parallel array. After the call, `array` is sorted and `index(i)` is the
+original position of the `i`-th element of the sorted array. So
+`other_data = other_data(index)` reorders a sibling array consistently
+with `array`. Plain `sort` discards this information.
+
+::instruction::
+
+### Q
+What does `mean(array)` from `stdlib_stats` compute, and how do I take a
+mean along one axis of a 2D array?
+
+### A
+`mean(array)` returns the arithmetic mean of all elements as a scalar.
+For a 2D array, `mean(matrix, dim=1)` reduces along the first dimension,
+returning a 1D array of column means. `mean(matrix, dim=2)` returns row
+means. With a logical mask, `mean(array, mask=array > 0)` averages only
+the elements satisfying the mask. The companion `var` (variance) and
+`std` (standard deviation) accept the same dim/mask arguments. `corr` and
+`cov` compute correlation / covariance between two arrays.
+
+::instruction::
+
+### Q
+What is `string_type` from `stdlib_string_type` and why use it instead of
+plain `character(len=*)`?
+
+### A
+`string_type` is a derived type wrapping a deferred-length character
+allocatable. Two reasons to prefer it: (1) you can have arrays of
+varying-length strings (`type(string_type) :: words(100)` where each
+`words(i)` is its own length — impossible with plain character arrays
+which must be uniform-length); (2) it has overloaded operators (`==`,
+`<`, `>`, `//` for concat) and constructors that interoperate with both
+literals and other `string_type` instances. Use `char(string_type_var)`
+to get the underlying `character(:)` back when interfacing with code
+that takes plain strings.
+
+::instruction::
+
+### Q
+How do I solve a linear system `A*x = b` using `stdlib_linalg`?
+
+### A
+```fortran
+use stdlib_linalg, only: solve
+real :: A(n,n), b(n), x(n)
+x = solve(A, b)
+```
+`solve` is a pure function that returns the solution `x`. Internally it
+calls LAPACK's `gesv` (LU with partial pivoting). For multiple right-hand
+sides, `b` can be a 2D array `b(n,nrhs)`. For least-squares (overdetermined
+or underdetermined), use `lstsq` instead. To check whether a matrix is
+singular before solving, use `inv` plus the returned status, or compute
+the condition number via `linalg_cond`.
+
+::instruction::
+
+### Q
+What does `eye(n)` return in `stdlib_linalg`?
+
+### A
+`eye(n)` returns the n×n identity matrix as a `real` array — ones on the
+diagonal, zeros off-diagonal. `eye(m, n)` returns a non-square m×n matrix
+with ones on the main diagonal. Useful as a starting point for numerical
+linear algebra or as the right-hand side of `solve(A, eye(n))` to compute
+`inv(A)` directly.
+
+::instruction::
+
+### Q
+How do I draw samples from a normal distribution with `stdlib_random`?
+
+### A
+```fortran
+use stdlib_random, only: random_seed, dist_normal => dist_rvs_normal
+real :: x, samples(1000)
+call random_seed(42)
+x = dist_normal(0.0, 1.0)              ! one N(0,1) sample
+samples = dist_normal(0.0, 1.0, 1000)  ! 1000 N(0,1) samples
+```
+First argument is the mean, second is the standard deviation. The third
+optional argument requests an array of N samples. Companion routines
+`dist_rvs_uniform`, `dist_rvs_exponential`, `dist_rvs_gamma` cover other
+distributions. Always seed via `random_seed` for reproducibility.
+
+::instruction::
+
+### Q
+What is `optval` from `stdlib_optval` and when do I use it?
+
+### A
+`optval(arg, default)` returns `arg` if it's `present`, otherwise `default`.
+It replaces the boilerplate
+```fortran
+if (present(arg)) then
+  used = arg
+else
+  used = default
+end if
+```
+with a single expression `used = optval(arg, default)`. Works for scalar
+integers, reals, complex, logical, and character. The most common use is
+default-value handling for optional dummy arguments, where the caller may
+or may not have supplied the argument.
+
+::instruction::
+
+### Q
+What kind parameters does `stdlib_kinds` provide and why use them?
+
+### A
+`stdlib_kinds` exports `int8`, `int16`, `int32`, `int64`, `real32`,
+`real64`, `real128` (where supported), and `c_bool`. Use them instead of
+raw kind numbers (`real(8)` is non-portable — different compilers map `8`
+to different precisions). `real(real64)` is portable and self-documenting.
+For most modern code, default to `real64` for floating-point and `int32`
+for indices unless you have a specific reason otherwise.
+
+::instruction::
+
+### Q
+How do I append to a file using `stdlib_io_logger`?
+
+### A
+```fortran
+use stdlib_logger, only: logger_type, information_level
+type(logger_type) :: log
+integer :: stat
+call log%add_log_file('app.log', stat, position='append')
+call log%log_information('app started')
+```
+The `position='append'` keyword opens the existing file for append rather
+than truncating. Default severity emitted is `information`; raise it with
+`log%configuration(level=warning_level)` if you want a quieter log. Severity
+levels: `debug_level < information_level < warning_level < error_level`.
+
+::instruction::
+
+### Q
+When should I use a `bitset_64` vs `bitset_large` from `stdlib_bitsets`?
+
+### A
+`bitset_64` is fixed at 64 bits — a single integer's worth of flags,
+fastest, no allocation. `bitset_large` allocates an array of `int64`
+internally and grows to any user-specified size. Use `bitset_64` for
+small, known-size flag sets (compiler optimization flags, peripheral
+status registers); use `bitset_large` when the bit count is data-driven
+or might exceed 64. Both expose the same `set`, `clear`, `test`,
+`flip`, `bit_count` interface — code is portable across them.
+
+::instruction::
+
+### Q
+How do I hash a string for use as a hashmap key?
+
+### A
+The `stdlib_hash_procedures` module provides 32-bit and 64-bit hash
+functions:
+```fortran
+use stdlib_hash_procedures, only: fnv_1a_hash, water_hash
+integer(int32) :: h32
+integer(int64) :: h64
+h32 = fnv_1a_hash('mykey')
+h64 = water_hash('mykey', seed=12345_int64)
+```
+`fnv_1a_hash` is unseeded and reproducible across runs; `water_hash` and
+`pengy_hash` are seeded (better collision resistance under adversarial
+input). For `stdlib_hashmaps`, you usually pass the hash function as a
+procedure pointer when constructing the map.
+
+::instruction::
+
+### Q
+What's the `block` construct in modern Fortran and when do I use it?
+
+### A
+A `block` introduces a nested scope inside an executable region:
+```fortran
+real :: outer
+outer = 1.0
+block
+  real :: inner
+  inner = outer + 1.0
+  print *, inner
+end block
+```
+Variables declared inside the block exist only within it. Useful for
+narrowing the lifetime of temporaries, declaring variables close to use,
+and limiting the visibility of helper allocations. Functionally similar
+to a `{ ... }` block in C-family languages. Combines naturally with
+`associate`, `select type`, and `error stop`.
+
+::instruction::
+
+### Q
+How does `associate` differ from a plain assignment?
+
+### A
+`associate(short => long%nested%expression)` binds a name to an expression
+or variable for the lifetime of the `associate` block, *without* copying
+the value. Inside the block, `short` is an alias — modifying it modifies
+the original. Compare to assignment, which copies (for non-pointer types).
+Use `associate` to give a verbose subexpression a short name in a tight
+loop without paying for a copy and without exposing the alias outside the
+block. End the scope with `end associate`.
+
+::instruction::
+
+### Q
+What does `elemental` add over `pure` on a function?
+
+### A
+An `elemental` function is `pure` plus broadcast: declared as if it takes
+scalar arguments, but the compiler auto-generates the version that takes
+conformable arrays. `square_real(x)` written `elemental` can be called
+with a scalar (`y = square_real(2.0)`) or an array (`v = square_real(arr)`)
+with no explicit do-loop. Since `elemental` implies `pure`, the same
+no-side-effect rules apply. Most stdlib unary numeric helpers (`sqrt`-
+adjacent transforms, kind conversions, predicates) are `elemental`.
+
+::instruction::
+
+### Q
+How do I structure a unit-test runner using stdlib's `testing` module?
+
+### A
+```fortran
+use testdrive, only: new_unittest, unittest_type, error_type, check
+type(unittest_type), allocatable :: testsuite(:)
+testsuite = [ &
+  new_unittest('addition', test_add), &
+  new_unittest('subtract', test_sub) ]
+contains
+  subroutine test_add(error)
+    type(error_type), allocatable, intent(out) :: error
+    call check(error, 1 + 1 == 2, 'addition broken')
+  end subroutine
+```
+`testdrive` is a thin runner the stdlib uses for its own tests. Each
+test subroutine accepts an `allocatable :: error` out parameter; `check`
+allocates the error if the assertion fails. Fast, no fixtures, no
+discovery — explicit registration in an array.
+
+::instruction::
+
+### Q
+What's the modern way to read a CSV-style file in Fortran?
+
+### A
+The simplest path is `stdlib_io`'s `loadtxt` if the columns are uniform
+numeric. For mixed-type CSVs, read line-by-line with `getline` and split
+manually:
+```fortran
+use stdlib_io, only: getline
+use stdlib_string_type, only: string_type, split => char_split
+type(string_type) :: line
+type(string_type), allocatable :: fields(:)
+integer :: u, ios
+open(newunit=u, file='data.csv', action='read')
+do
+  call getline(u, line, ios)
+  if (ios /= 0) exit
+  fields = split(line, ',')
+  ! process fields(:)
+end do
+close(u)
+```
+For larger or more complex CSVs, consider the `csv-fortran` community
+package — stdlib doesn't (yet) ship a CSV-aware reader.
+
+::instruction::
+
+### Q
+How does `error stop` differ from `stop`?
+
+### A
+`stop` and `error stop` both terminate the program, but `error stop` is
+guaranteed to set a non-zero process exit code, while `stop` (without an
+argument) typically returns zero. `error stop "message"` prints the message
+to standard error before exit; `error stop 42` returns code 42. Use
+`error stop` for any abnormal termination — assertion failures, fatal
+config errors — so shells and CI runners pick up the failure correctly.
+`stop` is reserved for normal early termination (rare in modern code;
+prefer letting `program` reach its `end program`).
+
+::instruction::
+
+### Q
+What is `c_loc` and when do I need it?
+
+### A
+`c_loc(target)` from `iso_c_binding` returns the C address of `target` as
+a `type(c_ptr)` value, suitable for passing to a `bind(C)` procedure. The
+target must have the `target` attribute. Use this when interfacing with
+a C library that takes `void*`. The reverse — turning a `c_ptr` back into
+a Fortran pointer — uses `c_f_pointer(cptr, fptr, [shape])`. Always pair
+the call with the matching deallocation; Fortran does not own memory
+acquired through `c_loc`.
+
+::instruction::
+
+### Q
+What does `move_alloc(from, to)` do and when do I prefer it to assignment?
+
+### A
+`move_alloc(from, to)` transfers the allocation status (and the underlying
+memory) from `from` to `to`. After the call, `to` holds what `from` held,
+and `from` is deallocated — no copy. Compare to `to = from` which copies
+the array. For large arrays, `move_alloc` is O(1); the copy is O(n). Use
+`move_alloc` to hand ownership of a temporary buffer to a derived-type
+field, or to swap two allocatables (via a third temporary).
+
+::instruction::
+
+### Q
+How do I write a `subroutine` that returns multiple results without using
+`out` arguments?
+
+### A
+Use a derived-type return value via a `function`:
+```fortran
+type :: result_t
+  real :: value
+  integer :: status
+end type
+contains
+function compute() result(r)
+  type(result_t) :: r
+  r%value  = 3.14
+  r%status = 0
+end function
+```
+Caller writes `res = compute()` once and reads `res%value`, `res%status`.
+This is cleaner than two `intent(out)` arguments because the call site
+isn't burdened with declaring the receivers up front, and the compiler
+can elide the temporary in common cases. Fortran 2008+ allows allocatable
+result components, so the function can also return varying-shape data.
+
+::instruction::
+
+### Q
+Why does the FortranGoingOnForty codebase use `intent` on every dummy
+argument even when not strictly required?
+
+### A
+It's a project convention: explicit `intent(in/out/inout)` on every dummy
+makes the contract visible at the procedure boundary. The compiler enforces
+the contract — `intent(in)` rejects assignment, `intent(out)` warns on
+read-before-write — so a regression where someone "improves" a procedure
+to mutate a previously-read-only argument fails at compile time, not at
+runtime. Reviewers and tooling (the FGOF `fgof-lineedit` editor's
+in-buffer linter, for example) parse intents to render colour-coded
+argument flow. The convention costs ~10 keystrokes per procedure and
+buys static enforcement.
+
+::instruction::
+
+### Q
+What does the `aero-emulation` subsystem in FortranGoingOnForty do at a
+high level?
+
+### A
+`aero-emulation` is the aerodynamic-surface emulator: a forward simulation
+of lift, drag, and moment over a parameterised wing-and-control-surface
+model, integrated against a 6-DoF rigid-body solver. It uses
+`stdlib_linalg` for the state-update matrices (rotation and inertia tensor
+products) and `stdlib_random` to inject configurable turbulence. Outputs
+are time-series state vectors written via `stdlib_io`'s `savetxt`. The
+module is consumed by `convolution` (for sensor-fusion experiments) and
+by `armfortas` (the autopilot framework).
+
+::instruction::
+
+### Q
+How does `fgof-process` handle child-process I/O on Linux vs macOS?
+
+### A
+`fgof-process` wraps the C `posix_spawn` family via `iso_c_binding`; on
+both Linux and macOS the spawn semantics are POSIX-shaped, so the
+high-level `spawn(command, stdin, stdout, stderr)` call is platform-
+identical at the Fortran layer. The differences hide in the child-pty
+wiring (`fgof-pty`): macOS uses `posix_openpt` + `grantpt` + `unlockpt`,
+Linux supports the same path plus the older `/dev/ptmx` shortcut. The
+`fgof-process` module never sees the divergence — `fgof-pty` exposes a
+`type(pty_t)` derived type that `fgof-process` consumes opaquely.
+
+::instruction::
+
+### Q
+What's the role of `fgof-screen` in the FGOF terminal applications?
+
+### A
+`fgof-screen` is the terminal-cell renderer used by `fgof-lineedit` and
+the `armfortas` autopilot console. It maintains a 2D `character(len=:),
+allocatable` buffer plus per-cell ANSI style attributes, computes a
+minimal-diff update against the previously rendered frame, and emits
+the diff as ANSI escape sequences to the controlling tty. The minimal-
+diff step keeps redraws under a few hundred bytes for incremental
+updates — needed because some serial-attached aerospace consoles run
+at 9600 baud and a full repaint is too expensive.
+
+::instruction::
+
+### Q
+Why does `convolution` use `do concurrent` for its inner kernel loop
+rather than OpenMP?
+
+### A
+The inner kernel loop multiplies and accumulates over a small kernel
+window with no carried dependence, so it satisfies `do concurrent`'s
+contract. `do concurrent` lets the compiler choose the parallelism
+strategy — vectorisation on CPU, OpenMP-offload on multicore, or
+GPU-offload via `-fopenmp -foffload=...` on NVIDIA hosts — without
+changing source. OpenMP `parallel do` would have committed the kernel
+to CPU threads at compile time. The convolution outer loop, which
+*does* accumulate into a shared buffer, uses `!$omp parallel do
+reduction(+:buffer)` since `do concurrent` cannot express the
+reduction without 2018+ locality clauses that older compilers don't
+honour.
+
+::instruction::
+
+### Q
+What pattern does `armfortas` use for autopilot state-machine transitions?
+
+### A
+A `select case` over a state enum at the top of the control loop:
+```fortran
+select case (state%mode)
+case (mode_idle)        ; call handle_idle(state, inputs)
+case (mode_takeoff)     ; call handle_takeoff(state, inputs)
+case (mode_cruise)      ; call handle_cruise(state, inputs)
+case (mode_descent)     ; call handle_descent(state, inputs)
+case (mode_landed)      ; call handle_landed(state, inputs)
+case default            ; call error_unknown_mode(state%mode)
+end select
+```
+Each handler returns the next mode via `state%mode = new_mode`. The
+enum values are `integer, parameter` constants (Fortran lacks a true
+enum type pre-2023 — most codebases use named integer parameters). The
+`select case` is exhaustive and the `case default` calls into
+`error_unknown_mode` to fail loudly on programmer error.
+
+::instruction::
+
+### Q
+How does `feducative` integrate with `armfortas` for control-loop tuning?
+
+### A
+`feducative` is the parameter-search and PID-tuning subsystem. It runs
+`armfortas` in a closed-loop simulation (via the `aero-emulation` model),
+sweeps PID gains over a configured grid, and scores each combination by
+RMS tracking error against a reference trajectory. The two communicate
+via a `type(tuning_handle_t)` derived type that `feducative` constructs
+and `armfortas` updates step-by-step. After a sweep, `feducative` writes
+the gain × score table via `stdlib_io`'s `savetxt` and emits the best
+gains as a Fortran `include` file the autopilot reads on next compile.
diff --git a/docs/audits/13-followup/finding04/direct-query-results.md b/docs/audits/13-followup/finding04/direct-query-results.md
new file mode 100644
index 0000000..f14be46
--- /dev/null
+++ b/docs/audits/13-followup/finding04/direct-query-results.md
@@ -0,0 +1,143 @@
+# Finding 04 — direct-query smoke results
+
+`qwen2.5-coder-1.5b` + finding-04 LoRA (v0002, 400 steps, train loss
+0.62, eval loss 0.047, 98.6% token accuracy on training data).
+
+`--backend pytorch` is required: dlm's auto-routing picks `mlx` on
+darwin-arm64 by default, but MLX can't load PEFT-format adapters and
+silently runs the base. (See "Notes on bugs surfaced" below.)
+
+## Seen Q/A pairs (in training set)
+
+### Q3 wedge — "Show the signature of stdlib's sorting routine in Fortran."
+
+**Base:** wrong (hallucinates `iso_fortran_env::sort`)
+**Finding-03 LoRA:** worse (degenerate "should be able to sort..." loop)
+**Finding-04 LoRA: ✓ correct (verbatim training answer):**
+
+```fortran
+use stdlib_sorting, only: sort
+call sort(array[, reverse])
+```
+The `array` argument is `intent(inout)` and must be a rank-1 array of
+an intrinsic numeric type... The optional `reverse` argument is a
+scalar logical with `intent(in)`.
+
+### intent(in)
+
+✓ correct, matches training answer verbatim.
+
+## Unseen Q/A — stdlib variants
+
+### "How would I sort a real(dp) array using stdlib?"
+
+**Partial generalization.** The model knows `use stdlib_sorting` (real
+information from the training corpus) but invents a wrong call form
+(`call stdlib_sorting::qsort`) and falls back to a manual sort loop.
+
+```fortran
+use stdlib_sorting
+call stdlib_sorting::qsort     ! wrong syntax
+integer :: idx(1:SIZE=my_array)
+real(dp) :: my_array(SIZE)
+real(dp) :: result(SIZE)
+... [hand-rolled sort loop]
+```
+
+The model learned *that* `stdlib_sorting` exists; it didn't learn the
+generic call form well enough to apply it to a question it hadn't
+seen verbatim.
+
+### "What is stdlib_strings::starts_with?"
+
+**No generalization.** Falls back to Rust syntax (`use ... ::`, `if
+pred { ... }`).
+
+```
+use stdlib_strings::starts_with;
+
+if starts_with("hello world", "hel") {
+    // ...
+}
+```
+
+The training set didn't cover `stdlib_strings::starts_with`
+specifically; the model's nearest pattern is its base-pretraining Rust
+knowledge.
+
+### "How do I read a CSV file in Fortran with stdlib?"
+
+**Hallucinated stdlib API.** The model produces plausibly-shaped
+Fortran-stdlib code with wrong details (invented `stdlib_io_read_table`,
+`stdlib_types`, `using` instead of `use`):
+
+```fortran
+use stdlib_types
+using stdlib_types::array      ! invalid
+array(real, dim=(:)) :: data
+call stdlib_io_read_table("path/to/file.csv", data)   ! function doesn't exist
+```
+
+The training had `loadtxt` for stdlib_io, but the question phrasing
+("read a CSV") didn't trigger that pattern. Model invented a
+plausible-looking API instead.
+
+## Out-of-domain — does the LoRA preserve general capability?
+
+### "What is the capital of France?"
+✓ "Paris."
+
+### "Write a Python list comprehension that filters even numbers."
+✓
+```python
+even_numbers = [i for i in range(10) if i % 2 == 0]
+print(even_numbers)  # prints [0, 2, 4, 6, 8]
+```
+
+General-capability preservation is excellent. cal_general regression
+was 0% in Finding 03 already; Finding 04's INSTRUCTION-only training
+maintains that.
+
+## Net wedge score
+
+| Question shape | Result | Verdict |
+| --- | --- | --- |
+| Seen exactly | verbatim correct | ✓ memorized cleanly |
+| Unseen with overlap (sort a real(dp) array) | knows module, wrong API form | partial |
+| Unseen, different module (stdlib_strings::starts_with) | falls back to Rust | none |
+| Unseen, different module (CSV/loadtxt) | hallucinates plausible API | none |
+| Out-of-domain (English/Python) | unchanged from base | ✓ preserved |
+
+## Reading
+
+The recipe-shape fix worked **for in-distribution questions** —
+qwen-coder-1.5b + INSTRUCTION-only Q/A training produced an adapter
+that reproduces trained answers correctly without breaking general
+capability. The wedge that Finding 03 widened (Q3 stdlib sorting), this
+recipe narrows: the answer is now correct.
+
+What we did *not* get is generalization to nearby-domain questions
+the model hadn't seen verbatim. With only 32 hand-authored Q/A pairs
+across multiple modules, the model can memorize all of them (98.6%
+token accuracy) but doesn't have enough surface area to abstract the
+pattern. For "ask about stdlib_strings::starts_with" to land
+correctly, that module's API form needs to be in the training data.
+
+The tradeoff is dataset-size-dependent: more Q/A → broader
+generalization. With ~32 pairs we get pinpoint Q/A reproduction; with
+~300 pairs (extrapolating) we'd plausibly cover most stdlib modules
+at functional density.
+
+## Notes on bugs surfaced
+
+1. **`src/dlm/replay/store.py:187`** — `parse_instruction_body` called
+   without `_normalize_probe_markers`. Fixed in this branch (added
+   the import + call). Without the fix, `--fresh` doesn't help: the
+   replay store retains snapshots with raw `### Q !probe` headers and
+   the parser rejects them on every retrain.
+2. **MLX adapter loading silently fails for PEFT adapters.** dlm's
+   default backend on darwin-arm64 is MLX. PEFT `adapter_model.safetensors`
+   isn't a valid MLX adapter format; MLX appears to load the base and
+   silently ignore the adapter. `--backend pytorch` works correctly.
+   The user-visible failure mode is "trained model behaves like base"
+   — easy to misread as "training didn't work."
diff --git a/docs/audits/13-followup/finding04/the-doc.dlm b/docs/audits/13-followup/finding04/the-doc.dlm
new file mode 100644
index 0000000..f9a8807
--- /dev/null
+++ b/docs/audits/13-followup/finding04/the-doc.dlm
@@ -0,0 +1,415 @@
+---
+dlm_id: 01KQDWAHNJ7FD72EQ4J4FXBJ2V
+dlm_version: 15
+base_model: qwen2.5-coder-1.5b
+training:
+  adapter: lora
+  lora_r: 16
+  lora_alpha: 32
+  lora_dropout: 0.05
+  sequence_len: 1024
+  micro_batch_size: 1
+  grad_accum: 8
+  learning_rate: 2.0e-4
+  warmup_ratio: 0.1
+  num_epochs: 100
+---
+
+# Finding 04 — Q/A-shape recipe (INSTRUCTION-only)
+
+Hand-authored Q/A pairs over `fortran-lang/stdlib`, sourced from
+`/tmp/stdlib_build/doc/specs/*.md`. No raw `sources:` directives —
+every training row is a `messages` row routed through Qwen's chatml
+template. The wedge to track is **Q3: signature of stdlib's sorting
+routine.** Finding 03's adapter degenerated into "should be able to
+sort..." enumeration; here the model should learn to point at
+`stdlib_sorting :: SORT` with proper signature.
+
+stdlib_sorting density is intentional (~60% of pairs): it's the wedge
+target. Other modules (io, math, strings, array) provide variety so
+the LoRA isn't a one-module specialist.
+
+::instruction::
+
+### Q
+What module in fortran-lang/stdlib provides sorting routines?
+
+### A
+The `stdlib_sorting` module. It provides four overloaded subroutines:
+`ORD_SORT`, `SORT`, `RADIX_SORT`, and `SORT_INDEX`, plus the parameters
+`int_index` and `int_index_low` for indexing.
+::instruction::
+
+### Q
+Show the signature of stdlib's sorting routine in Fortran.
+
+### A
+```fortran
+use stdlib_sorting, only: sort
+call sort(array[, reverse])
+```
+The `array` argument is `intent(inout)` and must be a rank-1 array of
+an intrinsic numeric type (integer kinds, real kinds), `character(*)`,
+`type(string_type)`, `type(bitset_64)`, or `type(bitset_large)`.
+The optional `reverse` argument is a scalar logical with `intent(in)`.
+::instruction::
+
+### Q
+What's the difference between `ord_sort` and `sort` in stdlib?
+
+### A
+`ord_sort` is intended for arrays that have significant runs of
+decreasing or non-decreasing values — it can achieve `O(N)` performance
+on already-sorted or reverse-sorted input. `sort` is intended for
+arrays that are effectively unordered before the call.
+`ord_sort` is stable; `sort` is not.
+::instruction::
+
+### Q
+What is `radix_sort` in `stdlib_sorting` for?
+
+### A
+`radix_sort` sorts fixed-width intrinsic data types — integers and
+reals. It exploits the bit representation of these types for non-
+comparison-based sorting and can outperform `sort` on numeric data.
+::instruction::
+
+### Q
+What does `sort_index` return?
+
+### A
+`sort_index` sorts the input array AND returns indices that map the
+original (pre-sort) array to its sorted version. This lets you reorder
+related arrays in the same way without re-sorting them — useful when
+sorting one column of a tabular dataset and needing to keep the other
+columns aligned.
+::instruction::
+
+### Q
+What's the signature of `ord_sort`?
+
+### A
+```fortran
+call ord_sort(array[, work, reverse])
+```
+- `array` : `intent(inout)`, rank-1 array of supported intrinsic type
+- `work` (optional): `intent(out)`, rank-1 same type as `array`, at
+  least `size(array)/2` elements — used as scratch memory to reduce
+  stack pressure
+- `reverse` (optional): scalar default-logical `intent(in)`; if
+  `.true.`, sort in non-increasing order
+::instruction::
+
+### Q
+What sorting algorithm does `ord_sort` use under the hood?
+
+### A
+A hybrid combining merge sort and insertion sort. For random input it
+performs like merge sort with `O(N log N)` worst-case — typically
+slower than `sort`. For input with long runs of monotonic values, it
+falls back toward `O(N)`, which is its specific advantage over `sort`.
+::instruction::
+
+### Q
+Is `stdlib_sorting`'s `sort` stable?
+
+### A
+No. The `sort` subroutine is not stable. If you need a stable sort, use
+`ord_sort`, which is stable in the case of equal keys. `radix_sort` is
+also not stable.
+::instruction::
+
+### Q
+What types can `stdlib_sorting`'s `ord_sort` accept?
+
+### A
+Rank-1 arrays of any of: `integer(int8)`, `integer(int16)`,
+`integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`,
+`character(*)`, `type(string_type)`, `type(bitset_64)`, or
+`type(bitset_large)`.
+::instruction::
+
+### Q
+How do you use `stdlib_sorting`'s `sort` to sort a real array in
+descending order?
+
+### A
+```fortran
+use stdlib_sorting, only: sort
+real, allocatable :: arr(:)
+arr = [3.14, 1.41, 2.72, 0.58]
+call sort(arr, reverse=.true.)
+! arr is now [3.14, 2.72, 1.41, 0.58]
+```
+::instruction::
+
+### Q
+What kind of integer does `int_index` evaluate to?
+
+### A
+`int_index` is set to `int64` from the `stdlib_kinds` module — a
+64-bit integer kind for indexing large arrays. The companion
+`int_index_low` is `int32`.
+::instruction::
+
+### Q
+What's the difference between `sort_adjoint` and `sort_index`?
+
+### A
+Both are based on `ord_sort`. `sort_adjoint` re-orders a *second*
+array of the same size using the same permutations applied to the
+input. `sort_index` instead returns the index permutation itself,
+which the caller can then apply to any number of related arrays.
+::instruction::
+
+### Q
+What's the licensing situation for `stdlib_sorting`?
+
+### A
+The Fortran Standard Library overall is MIT-licensed, but
+`ord_sort`, `sort_adjoint`, `sort_index`, and `sort` are translations of
+Rust's `slice.rs` sort, which is Apache-2.0 / MIT dual-licensed.
+`radix_sort` does not have this Rust ancestry.
+::instruction::
+
+### Q
+Why might you pass the optional `work` argument to `ord_sort`?
+
+### A
+`work` is used as scratch memory for `ord_sort`'s internal record
+keeping. Passing a pre-allocated `work` array (e.g. one in static
+storage) avoids stack allocation during the sort, which can
+significantly reduce stack memory pressure when sorting large arrays
+inside deeply nested call chains.
+::instruction::
+
+### Q
+What happens if you `ord_sort` a real array containing `NaN`?
+
+### A
+The order of the result is undefined. The same applies to `sort`,
+`sort_adjoint`, `sort_index`, and `radix_sort`. If your data may
+contain NaN values, filter them out before calling stdlib's sort
+routines.
+::instruction::
+
+### Q
+Show how to sort an integer array AND get indices to reorder a
+companion array using `stdlib_sorting`.
+
+### A
+```fortran
+use stdlib_sorting, only: sort_index, int_index
+integer :: keys(5) = [3, 1, 4, 1, 5]
+real    :: vals(5) = [10.0, 20.0, 30.0, 40.0, 50.0]
+integer(int_index) :: idx(5)
+call sort_index(keys, idx)        ! keys is now sorted; idx maps original->sorted
+vals = vals(idx)                  ! reorder vals using the same permutation
+```
+::instruction::
+
+### Q
+What are the four overloaded subroutines in `stdlib_sorting`?
+
+### A
+`ORD_SORT`, `SORT`, `RADIX_SORT`, and `SORT_INDEX`. (`SORT_ADJOINT`
+exists but the spec groups it under `ORD_SORT`'s family.)
+::instruction::
+
+### Q
+Are stdlib's sort routines thread-safe?
+
+### A
+Each call modifies its `intent(inout)` `array` argument in place, so
+two threads must not call sort on the same array concurrently. With
+distinct arrays, there's no shared state in the sort routines
+themselves — they're reentrant.
+::instruction::
+
+### Q
+What is the `stdlib_io` module for in fortran-lang/stdlib?
+
+### A
+`stdlib_io` provides text-file IO routines — primarily `loadtxt` for
+reading delimited numeric data into rank-2 arrays and `savetxt` for
+writing them. It also exposes formatting helpers like `disp` for
+human-readable display of arrays and scalars.
+::instruction::
+
+### Q
+Show the signature of `stdlib_io::loadtxt`.
+
+### A
+```fortran
+use stdlib_io, only: loadtxt
+call loadtxt(filename, array[, skiprows, max_rows, fmt])
+```
+The `array` is allocated and filled from the file; supported types
+include `real(sp)`, `real(dp)`, and `complex(dp)`. `skiprows` skips
+header lines, `max_rows` caps the read.
+::instruction::
+
+### Q
+What's `stdlib_string_type`'s purpose?
+
+### A
+It defines `type(string_type)` — a deferred-length, allocatable string
+wrapper around `character(:)` — and overloads operators (`==`, `<`,
+`//`) for it. This gives Fortran a value-semantic variable-length
+string type that's ergonomic in containers and arrays, working around
+the limitations of bare `character(*)`.
+::instruction::
+
+### Q
+What does `stdlib_array` provide?
+
+### A
+`stdlib_array` provides index-manipulation utilities — `trueloc` and
+`falseloc` for finding indices where a logical array is true or false,
+plus indexing helpers. Useful for masking and selecting elements
+without writing manual loops.
+::instruction::
+
+### Q
+What is `stdlib_math::linspace`?
+
+### A
+`linspace(start, end, n)` returns a rank-1 array of `n` evenly-spaced
+values from `start` to `end` inclusive. Same shape as numpy's `linspace`.
+The companion `logspace(start, end, n[, base])` returns evenly-spaced
+values in log space.
+::instruction::
+
+### Q
+What is `stdlib_stats::mean` for?
+
+### A
+Computes the arithmetic mean of a rank-N array along an optional
+specified dimension. With `dim` omitted, returns a scalar mean over
+all elements. With `dim=k`, returns a rank-(N-1) array of means along
+the k-th axis. Optionally accepts a `mask` for conditional inclusion.
+::instruction::
+
+### Q
+What's `stdlib_linalg::solve` for?
+
+### A
+Solves a dense linear system `A . x = b` for `x`, where `A` is a
+square matrix and `b` is a right-hand-side vector or matrix. Wraps
+LAPACK's `gesv` family. Returns the solution; the input `A` is
+typically modified in-place by the LU decomposition.
+::instruction::
+
+### Q
+What `kind` parameters does `stdlib_kinds` define?
+
+### A
+`int8`, `int16`, `int32`, `int64` for integer kinds and `sp`, `dp`,
+`qp` for real kinds (single, double, quadruple precision). Also
+`xdp` for extended-double where supported. These wrap the standard
+`iso_fortran_env` kinds with shorter names.
+::instruction::
+
+### Q
+What does `stdlib_ascii::to_upper` do?
+
+### A
+Returns its `character(*)` argument with ASCII letters (a-z) mapped to
+their uppercase equivalents (A-Z). Non-letter characters pass through
+unchanged. The companion `to_lower` does the opposite.
+::instruction::
+
+### Q
+What's `stdlib_strings::replace_all`?
+
+### A
+```fortran
+use stdlib_strings, only: replace_all
+result = replace_all(string, pattern, replacement)
+```
+Returns `string` with every occurrence of `pattern` substituted by
+`replacement`. Pure and elemental, so it can be applied to scalar or
+array string inputs.
+::instruction::
+
+### Q
+What's the difference between `stdlib_random::random_seed` and
+Fortran's intrinsic `random_seed`?
+
+### A
+The intrinsic `random_seed` configures the implementation-dependent
+default RNG. `stdlib_random::random_seed` is part of stdlib's
+deterministic RNG path — it seeds stdlib's portable distribution
+sampling routines, giving reproducible random sequences across
+compilers.
+::instruction::
+
+### Q
+What does `stdlib_hashmaps::hashmap_type` provide?
+
+### A
+A generic open-addressing hash map keyed by integer or string and
+storing arbitrary user-defined data. `set`, `get`, `remove`, `keys`,
+and iterators are provided as type-bound procedures.
+::instruction::
+
+### Q
+What's `stdlib_quadrature::trapz` for?
+
+### A
+Numerical integration via the trapezoidal rule. `trapz(y, x)` returns
+the integral of y dx using sample points `x` and corresponding values
+`y`. Both must be rank-1 arrays of the same length. `simps` is the
+cousin using Simpson's rule.
+::instruction::
+
+### Q
+How does the `stdlib_logger` module emit log messages?
+
+### A
+Define a `logger_type` instance, configure its level (`debug`, `info`,
+`warning`, `error`), then call methods like `log_information`,
+`log_warning`, etc. Output goes to the configured unit (default
+stderr). Logger instances can be configured with timestamps, source
+locations, and log-level filtering.
+::instruction::
+
+### Q
+What does `intent(in)` mean for a Fortran dummy argument?
+
+### A
+The argument may be read but not modified inside the procedure.
+Attempting to assign to or pass it as `intent(out)`/`intent(inout)`
+to another routine is a compile-time error (in conformant compilers).
+This is the contract you want for read-only inputs.
+::instruction::
+
+### Q
+When should you prefer `do concurrent` over a plain `do` loop?
+
+### A
+When loop iterations have no carried dependence — that is, no
+iteration reads or writes a variable that another iteration could
+write. `do concurrent` expresses this independence to the compiler,
+which can then auto-vectorize, parallelize via OpenMP, or offload to
+GPU without further pragmas. If iterations are actually dependent,
+use plain `do`.
+::instruction::
+
+### Q
+What's the canonical way to declare an allocatable array of
+`real(real64)` in modern Fortran?
+
+### A
+```fortran
+use iso_fortran_env, only: real64
+real(real64), allocatable :: arr(:)
+allocate(arr(n))
+! ... use arr ...
+deallocate(arr)
+```
+Or use stdlib's `dp` kind:
+```fortran
+use stdlib_kinds, only: dp
+real(dp), allocatable :: arr(:)
+```
diff --git a/docs/audits/13-followup/findings/00-overview.md b/docs/audits/13-followup/findings/00-overview.md
new file mode 100644
index 0000000..a3657df
--- /dev/null
+++ b/docs/audits/13-followup/findings/00-overview.md
@@ -0,0 +1,112 @@
+# Audit 13 follow-up — investigation log
+
+A sequence of single-variable experiments, each isolating one
+hypothesis about why the original audit-13 fortran fine-tune showed
+"adherence works, attribution fails." Reading order:
+
+1. **[Finding 01 — recipe failure](./01-recipe-failure.md)**
+   Re-run audit-13 with 5× more INSTRUCTION sections + 5× more steps +
+   sway bridge probes lit. Result: traded "no Q/A binding" for
+   "memorization without generalization." Falsified the *volume-only*
+   hypothesis.
+
+2. **[Finding 02 — CPT isolation](./02-cpt-isolation.md)**
+   Stripped INSTRUCTION sections, ran pure CPT at LoRA r=64. Result:
+   adapter learned form-specific autoregression (memorization +
+   English regression), not domain abstraction. Falsified the
+   *two-stage CPT-then-SFT* hypothesis at this base size and surfaced
+   architectural floor: SmolLM2-135M lacks the capacity to compose
+   fortran knowledge with English chat behavior.
+
+3. **[Finding 03 — base-size floor + recipe is the bottleneck](./03-base-floor.md)**
+   Promoted to `qwen2.5-coder-1.5b`, ran the audit-13-followup doc
+   without recipe changes. Result: bigger base eliminated catastrophic
+   forgetting (`cal_general` 26% → 0% items regressed) **but the LoRA
+   still memorized instead of generalized**, and actively degraded
+   the wedge question (Q3 stdlib sorting). Isolated the bottleneck:
+   it's the *recipe* (raw-source training data shape), not the base
+   size. dlm's value-add is conditional on training-data shape.
+
+4. **[Finding 04 — Q/A-shape recipe closes the wedge](./04-instruction-shape.md)**
+   Built a doc with 35 hand-authored INSTRUCTION sections (no raw
+   sources), trained on qwen-coder-1.5b at r=16 / 100 epochs.
+   Result: **Q3 now lands correctly** — verbatim trained answer
+   pointing at `stdlib_sorting :: sort` with proper signature.
+   General capability preserved. Generalization to unseen but
+   overlapping questions is partial; to unseen-different-module
+   questions, weak. Surfaced two real dlm bugs along the way
+   (replay store parser bug + MLX backend silently ignoring PEFT
+   adapters). The audit closes GREEN with a clean three-step
+   product story.
+
+5. **[Finding 05 — corpus density](./05-corpus-density.md)** *(optional)*
+   Test the dataset-size / generalization curve. Find the per-module
+   Q/A density floor below which the model can't generalize the API
+   form. Gives dlm users a concrete planning number ("budget N Q/A
+   pairs per module").
+
+## Why this format
+
+Each file is self-contained — one experiment, one verdict, one
+falsified-or-confirmed hypothesis. No "TODO" findings; if a
+hypothesis is in-flight, it's either at the bottom of the latest
+finding ("next experiment") or has its own placeholder file. This
+keeps the investigation traceable: any future reader can look at the
+state of the directory and reconstruct what we tested, what we
+ruled out, and what we still don't know.
+
+## Standing artifacts
+
+- `the-doc.dlm` — the audit-13-followup canonical doc (Finding 01)
+- `stage1/the-doc.dlm` — PROSE-only stage-1 doc (Finding 02)
+- `stage1/sway.yaml` — stage-1 sway eval spec
+- `sway-results.json` / `sway-results.md` — Finding 01's raw sway run
+- `/tmp/sway-stage1.json` — Finding 02's raw sway run *(local-only)*
+- `train.log` — Finding 01's training output
+
+## What we know so far
+
+**Confirmed:**
+- The dlm pipeline works end-to-end: doc → train → adapter → sway →
+  direct query, all deterministic and reproducible. The plumbing is
+  sound.
+- The bridge probes (`section_internalization`, `leakage`,
+  `paraphrase_invariance`) require specific corpus shape: ≥2 section
+  kinds (PROSE + INSTRUCTION minimum) for the leak-check, and
+  `!probe` markers for paraphrase case generation.
+- SmolLM2-135M produces memorization, not generalization, on this
+  corpus regardless of recipe variations within reach.
+- Bigger base (qwen2.5-coder-1.5b) eliminates catastrophic
+  forgetting — `cal_general` regressions 26% → 0%. dlm's
+  recommended-base table should warn that small bases (135M) actively
+  degrade under LoRA.
+- **Recipe shape is the bottleneck, not base size.** Same doc on a
+  bigger base still produces memorization; LoRA on raw-source training
+  rows learns "be a source autocomplete engine," not "answer questions
+  about the domain."
+
+- **Q/A-shape recipe on qwen-coder-1.5b closes the wedge.** Finding
+  04 trained 35 hand-authored Q/A pairs and produced an adapter that
+  reproduces trained answers verbatim, preserves general capability,
+  and partially activates domain knowledge on related questions. The
+  three-step product story (bigger base + Q/A-shape recipe + plan
+  one pair per question) has falsifiable evidence at each step.
+
+**Unknown:**
+- The dataset-size / generalization curve. With 32 hand-authored Q/A
+  pairs, generalization to unseen-different-module questions is
+  weak. How many pairs per module are needed for the model to
+  abstract the API form? Finding 05 (optional) would answer this.
+- Whether dlm-synthesized Q/A pairs (via `dlm synth instructions`)
+  produce comparable training results to hand-authored ones, or
+  whether teacher quality matters. This is a question for the
+  product onboarding story.
+
+**Bugs filed during the investigation (worth fixing in dlm):**
+- `src/dlm/replay/store.py:187` — `parse_instruction_body` called
+  without `_normalize_probe_markers`. Patched in this branch as
+  part of Finding 04.
+- MLX inference backend silently ignores PEFT adapters on darwin-arm64
+  (auto-routing falls through to MLX, MLX loads base only). User-
+  visible failure is "trained model behaves like base" — major
+  product-trust risk. Workaround: `--backend pytorch`.
diff --git a/docs/audits/13-followup/findings/01-recipe-failure.md b/docs/audits/13-followup/findings/01-recipe-failure.md
new file mode 100644
index 0000000..afef44d
--- /dev/null
+++ b/docs/audits/13-followup/findings/01-recipe-failure.md
@@ -0,0 +1,146 @@
+# Audit 13 follow-up — findings
+
+**Verdict:** the corpus-shape hypothesis was *not* falsified, but the
+re-run produced an **adapter that memorizes more and generalizes less**
+than audit 13's smaller adapter. Five-fold INSTRUCTION density + 5×
+steps moved the failure from "no Q/A binding" (audit 13) to "verbatim
+memorization without prompt-level behavior change" (this run).
+
+## Setup
+
+- **Doc:** `docs/audits/13-followup/the-doc.dlm`
+- **Store ULID:** `01KQCWMA64901VEMYB3DC8CGXY`
+- **Adapter:** `~/.dlm/store/01KQCWMA64901VEMYB3DC8CGXY/adapter/versions/v0001`
+- **Steps:** 600 (audit 13 = 120)
+- **LoRA r/alpha:** 16/32 (audit 13 = 8/16)
+- **Sources:** `~/GithubOrgs/FortranGoingOnForty` + `/tmp/stdlib_build/src` +
+  `/tmp/stdlib_build/doc/specs` markdown
+- **Final loss:** train 1.452, val 1.401 (audit 13 = 1.83 at step 120, still descending)
+
+## sway results, side-by-side
+
+| probe | audit 13 (baseline) | follow-up (this run) | delta |
+| --- | --- | --- | --- |
+| `delta_kl` | **PASS, z=+44.16σ** | **FAIL, z=−0.15σ** | massive regression |
+| `section_internalization` | SKIP (no `[dlm]`) | **~0, z≈0** | weak signal |
+| `paraphrase_invariance` | FAIL, z=−3.51σ | ERROR ("no cases provided") | unresolved |
+| `leakage` | SKIP (no `[dlm]`) | **PASS, z=+8.45σ** | strong memorization |
+| `calibration_drift` | PASS | borderline (5/50 regressed) | mild degradation |
+| `adapter_ablation` | PASS (R²=0.91) | **FAIL, overshoot=1.54** | failed sat-band |
+
+## Diagnosis
+
+### Why `delta_kl` collapsed
+Audit 13's small adapter on the same prompts moved logits much more
+than its null-baseline counterpart (z=+44σ). This run's mean JS
+divergence on the prompts is `0.151` — a meaningful number in
+absolute terms, but the null-adapter calibration baseline is *also*
+≈0.15 because random LoRA weights at r=16 already perturb logits a
+lot on short fortran prefix prompts. The trained adapter doesn't
+*beat noise* on these specific prompt prefixes.
+
+This is calibration-shape sensitivity, not "the adapter learned
+nothing." Different prompt selection would surface the signal.
+
+### Why `section_internalization` is ≈0 (this is the load-bearing finding)
+Per-section evidence in `/tmp/sway-13fu.json`:
+
+- `own_lift` (NLL improvement on the section's own probes) ≈ 0.030
+- `leak_lift` (NLL improvement on *other* sections' probes) ≈ 0.064
+- `effective_sis = own_lift − leak_lift` ≈ **−0.034**
+
+`leak_lift` is *uniform* across sections at ~0.064. That means the
+adapter applied a **constant fortran-flavored prior** to every
+fortran-shaped prompt — not section-specific knowledge. The adapter
+learned "this looks like fortran, lower NLL" but did **not** learn
+"section X said Y, so on probes about Y, lower NLL more."
+
+That's the corpus-shape issue surfacing differently. With 40 INSTRUCTION
+sections that mostly differ only in surface wording, the adapter
+reduced to a generic fortran-style bias.
+
+### Why `leakage` jumped to +8.45σ
+greedy LCS recall against perturbed prompts is high (0.06 on
+perturbed vs 0.05 baseline; fragility 0.17). Combined with weak
+`section_internalization`, this is the textbook **memorization**
+signature: the model can recite chunks of training data when prompted,
+but the knowledge isn't transferable.
+
+### Why `paraphrase_invariance` errored ("no cases provided")
+The bridge's case generator wants paired (original, paraphrased) probe
+items, and it got none. Likely cause: the audit follow-up `.dlm` puts
+INSTRUCTION sections in compact `Q:`/`A:` shape but without the `!probe`
+markers that the bridge's section→probes mapper looks for. (The
+parser-expansion PR #10 fixed *parsing* of these blocks; it did not add
+`!probe` markers to the audit doc.)
+
+This isn't a sway bug — it's a doc-shape gap. The follow-up doc was
+authored before `dlm synth instructions --apply` was used.
+
+### Why `adapter_ablation` overshot (R²=0.91, sat_λ=1.25, overshoot=1.54)
+λ-scaled KL: at λ=1.25 KL is 1.54× the λ=1.0 KL. The healthy band is
+overshoot ≤1.05 with sat_λ ∈ (0.5, 1.0]. This curve never saturated —
+which mathematically reads as **the adapter is under-magnitude**:
+scaling its contribution beyond the nominal training point keeps
+pulling logits toward the same direction. R²=0.91 is fine (linear
+response is healthy); the problem is the *band*, not the shape.
+
+This often co-occurs with under-training rather than over-training.
+With a memorization signature elsewhere though, the more likely read
+is *low-rank knowledge that the rest of the model can't compose with* —
+the LoRA delta is small, the rest of the model produces fortran-flavored
+output by base capability, and λ-scaling the small delta linearly
+amplifies its directional bias.
+
+## Direct query smoke test
+
+```
+$ uv run --no-sync dlm prompt the-doc.dlm \
+    "How do you declare an allocatable array of real(real64) in modern Fortran?" \
+    --max-tokens 120 --temp 0.0
+I am using the following code:
+real(real64) array(1000000000, 1000000000)
+I am using the following code:
+real(real64) array(1000000000, 1000000000)
+...
+```
+
+The adapter clearly learned fortran-shaped output (it uses `real(real64)`,
+correct kind syntax) — but did not learn the *answer pattern* for a
+typed Q/A query. It loops on a single memorized fragment.
+
+## Implications for the audit
+
+1. **Original audit 13 finding (`paraphrase_invariance` FAIL on 8
+   instruction sections) was load-bearing.** Adding 5× more sections
+   without changing their *shape* worsened generalization. The fix
+   wasn't volume — it was Q/A shape diversity (different question
+   forms for the same content).
+
+2. **Null-adapter calibration is prompt-sensitive.** A trained adapter
+   at r=16 needs prompts where the null adapter produces *less* JS
+   divergence — i.e. prompts with strong base-model priors. Short
+   fortran-fence prefixes don't qualify.
+
+3. **The bridge `paraphrase_invariance` probe needs `!probe` markers
+   on instruction sections** (or sway should derive them). Without
+   markers + paraphrases, the bridge silently becomes "no cases."
+
+## Recommended next steps (not started)
+
+- **Generate paraphrases.** `dlm synth instructions --strategy paraphrase
+  --per-section 3 --apply` against the existing 40 INSTRUCTION sections,
+  then re-run sway. This populates the bridge's paraphrase cases and
+  also breaks the surface-form uniformity that drove `effective_sis ≈ 0`.
+- **Tier comparison at SmolLM2-360M and 1.7B.** If 600 steps + 40 sections
+  + paraphrases still doesn't bind Q/A on 135M, the floor is the model.
+- **Adjust calibration prompts.** For `delta_kl`, swap the fortran-fence
+  prefixes for prompts where SmolLM2 has a strong *English* prior
+  (e.g. "When should you prefer `do concurrent` over `do`? Answer:") so
+  the null baseline produces low JS divergence and the trained adapter's
+  divergence shows up against noise.
+- **Don't claim the e2e fortran promise yet.** This run shows the .dlm →
+  trainable adapter pipeline works (training completed deterministically,
+  adapter saved, sway runs end-to-end with bridge probes lit) — but it
+  also shows that *the right adapter* requires more corpus engineering
+  than "more sections + more steps." That's a real product caveat.
diff --git a/docs/audits/13-followup/findings/02-cpt-isolation.md b/docs/audits/13-followup/findings/02-cpt-isolation.md
new file mode 100644
index 0000000..7d3ec59
--- /dev/null
+++ b/docs/audits/13-followup/findings/02-cpt-isolation.md
@@ -0,0 +1,143 @@
+# Finding 02 — CPT-only stage isolates a *new* failure mode
+
+**Hypothesis tested:** the original audit-13-followup adapter showed
+"uniform fortran-flavored bias" without per-section content learning
+(Finding 01). My theory: the PROSE corpus drowned the INSTRUCTION
+signal. If we strip INSTRUCTION sections entirely and run pure CPT at
+higher LoRA rank, the adapter should at least *learn the corpus* — and
+we can layer Q/A on top in stage 2.
+
+**Verdict:** the hypothesis was *partially* right (the adapter does
+learn fortran tokens) but uncovered a stronger failure mode that
+invalidates the two-stage chain idea on a 135M base.
+
+## Setup
+
+- **Doc:** `docs/audits/13-followup/stage1/the-doc.dlm`
+- **Store ULID:** `01KQDGAM70EJ1WJCQY6PVDV95W`
+- **Sources:** identical to Finding 01's run (FortranGoingOnForty + stdlib src + stdlib doc/specs markdown)
+- **Sections:** PROSE-only, *zero* INSTRUCTION
+- **LoRA r/alpha:** 64/128 (Finding 01 used 16/32)
+- **Steps:** 800 (Finding 01 used 600; audit 13 used 120)
+- **Final:** train 1.293, eval 1.223, mean token accuracy 73.6%
+
+## sway results
+
+| probe | verdict | z | reading |
+| --- | --- | --- | --- |
+| `dk_fortran_qa_shaped` | FAIL | **−13.74σ** | adapter shifts *less* than null on English-Q/A prompts |
+| `sis_fortran` | SKIP | — | bridge needs ≥2 section kinds; PROSE-only doesn't qualify |
+| `leak_fortran` | PASS | +6.41σ | memorized fragments (greedy_recall=0.06, fragility=0.00) |
+| `cal_general` | FAIL | −4.81σ | **26%** general-comp items regressed >1 nat (was 10% in Finding 01) |
+| `abl_fortran` | FAIL | — | overshoot=1.49, sat_λ=1.25 (out of band) |
+
+## Direct-query smoke
+
+Greedy (`temp=0.0`):
+
+```
+$ dlm prompt the-doc.dlm "module simple_demo\n  implicit none" --max-tokens 80
+  implicit none
+  implicit none
+  implicit none
+  implicit none
+  ...        (24× repetition)
+
+$ dlm prompt the-doc.dlm "How do you declare an allocatable real(real64) array?"
+I am using the following code:
+real(real64) array(1000000000, 1000000000)
+I am using the following code:
+real(real64) array(1000000000, 1000000000)
+...        (loops on a single training fragment)
+```
+
+Sampling (`temp=0.7`, `top_p=0.9`):
+
+```
+$ dlm prompt the-doc.dlm "subroutine sort_real_array(arr, n)" --max-tokens 100
+{
+    arr = std::move(arr);
+}
+template <typename T>
+void array_sort(T* arr, int n) {
+    array_sort(arr, n);
+}
+```
+
+— **C++ from a fortran prompt under sampling.** The base model's C++
+prior dominates the moment we leave argmax decoding. The LoRA delta is
+just memorized argmax tokens, not a generalized fortran prior.
+
+## Diagnosis
+
+Three signals together tell the architectural story:
+
+1. **Mode collapse under greedy.** The adapter pushes a small set of
+   fragments to the top of the distribution; argmax decoding traps in
+   them. `leakage` z=+6.41σ + `fragility=0.00` is the textbook
+   memorization fingerprint.
+2. **Adapter inactive on non-fortran-shaped prompts.**
+   `dk_fortran_qa_shaped` z=−13.74σ means the trained LoRA produces
+   *less* divergence from base than a random LoRA on
+   English-Q/A-shaped prompts. The training only taught the adapter
+   to be active on raw-fortran-source-shaped inputs.
+3. **General competence regressed twice as much as Finding 01.**
+   `cal_general` 26% vs 10% items >1-nat regressed. Removing
+   INSTRUCTION sections made things *worse* on English. The
+   INSTRUCTION sections in the audit-13-followup were apparently
+   acting as a chat-format regularizer.
+
+The deepest read: **the LoRA learned form-specific completion (raw
+fortran source autoregression), not domain knowledge.** This is
+exactly what we asked SFT loss to do — every PROSE row's training
+signal is "predict the next token of fortran source." We got what we
+asked for. The token-distribution prior on `module x\n` is now sharper
+toward fortran continuations, but the *concept* of fortran isn't
+abstracted in a way that helps with English-prefixed questions.
+
+## Why this invalidates the chain
+
+The plan was: stage-1 CPT teaches fortran → stage-2 SFT layers Q/A.
+The implicit assumption was that stage-1 produces a *fortran-aware
+substrate* for stage-2 to bind onto. Instead stage-1 produced a
+*memorization trap* that:
+
+- Has degraded English chat capability (cal_general −4.81σ)
+- Doesn't activate on English-prefixed prompts (dk z=−13.74σ)
+- Mode-collapses under greedy decoding
+
+A stage-2 SFT phase against this substrate would have to *un-do* the
+memorization while teaching Q/A binding. SFT can't easily do both at
+the same LoRA rank — and the ablation curve (overshoot=1.49, no
+saturation in band) suggests the loss surface around the trained
+point is linear, meaning the LoRA is still in "more is more" mode
+rather than at a coherent minimum.
+
+## What this means for the product narrative
+
+dlm's promise is "edit a text file → trainable LLM." The architectural
+reality from Findings 01 + 02:
+
+- **At SmolLM2-135M:** the recipe consistently produces memorization,
+  not generalization. Volume, rank, sequence length, and corpus
+  shape variations have all bottomed out at the same failure mode.
+- **The base model is the floor.** 135M params with our LoRA adapter
+  can either preserve English-chat behavior *or* memorize fortran
+  fragments — not compose both into a usable expert.
+
+This is a *correct* and *useful* negative result for the product
+narrative. It tells future users: don't expect a 135M base to absorb
+specialty domains. It also informs dlm's recommended-base table —
+the smollm2-135m row should carry a "use for style-transfer demos
+only" caveat.
+
+## Next experiment
+
+[Finding 03](./03-base-floor.md) — promote the base to
+qwen2.5-coder-1.5b (already registered in dlm). Test the question:
+**is the recipe sound, and the 135M floor was the only blocker?** If
+the same `the-doc.dlm` (audit-13-followup) on the bigger base
+produces measurable LoRA delta beyond the base's existing fortran
+knowledge, we have a working dlm story. If the LoRA delta is small,
+we learn that dlm's value-add is style/format, not domain knowledge —
+which is also publishable.
diff --git a/docs/audits/13-followup/findings/03-base-floor.md b/docs/audits/13-followup/findings/03-base-floor.md
new file mode 100644
index 0000000..ab8aff8
--- /dev/null
+++ b/docs/audits/13-followup/findings/03-base-floor.md
@@ -0,0 +1,140 @@
+# Finding 03 — base-size floor + the recipe is the bottleneck
+
+**Hypothesis tested:** Findings 01 and 02 left two confounded
+explanations for the failure mode (memorization without
+generalization): (a) SmolLM2-135M is too small, or (b) the recipe is
+wrong. Finding 03 isolates these by training the **same audit-13
+follow-up doc** on `qwen2.5-coder-1.5b` — same recipe, 11× the
+parameters, code-pretrained.
+
+**Verdict:** the recipe is the bottleneck. The bigger base eliminated
+catastrophic forgetting (`cal_general` 26% → **0%** items regressed)
+but the LoRA still memorizes instead of generalizes, and **actively
+degrades the base on the wedge question we built the experiment to
+answer**.
+
+## Setup
+
+- **Doc:** `docs/audits/13-followup/finding03/the-doc.dlm`
+- **Store ULID:** `01KQDM14CT0X6AWFWTW9E81ZJ7`
+- **Adapter:** `~/.dlm/store/01KQDM14CT0X6AWFWTW9E81ZJ7/adapter/versions/v0001`
+- **Base:** `qwen2.5-coder-1.5b` (Qwen/Qwen2.5-Coder-1.5B-Instruct)
+- **LoRA r/alpha:** 16/32 (same as Finding 01)
+- **Steps:** 600 (same as Finding 01)
+- **Final:** train 0.765, eval 0.750, mean token accuracy **82.2%** (vs 73.6% on the SmolLM2 stage-1 run)
+
+## The pre-training baseline (`finding03/baseline.md`)
+
+Before training, qwen2.5-coder-1.5b already speaks fortran fluently —
+syntax, modern features (`do concurrent`, `intent`, `allocatable`),
+program structure. What it does **not** know is `fortran-lang/stdlib`.
+Q3 ("Show the signature of stdlib's sorting routine") makes the gap
+visible: the base hallucinates `iso_fortran_env::sort`, a non-existent
+module. **That hallucination is the wedge: a falsifiable claim our
+LoRA could fix if domain knowledge transfer is real.**
+
+## sway results
+
+| probe | verdict | z | reading vs Finding 02 (SmolLM2-135M) |
+| --- | --- | --- | --- |
+| `dk_fortran` | FAIL | **−15.46σ** | LoRA shifts logits *less* than null on these prompts (similar to F02's −13.74σ) |
+| `sis_fortran` | FAIL | ~0σ | 7/41 sections cleared, mean effective_sis = +0.000. Same uniform-bias collapse as F01. |
+| `para_fortran` | ERROR | — | "no cases provided" — doc still lacks `!probe` markers |
+| `leak_fortran` | PASS | **+7.71σ** | Memorization, fragility=0.00 (≈ F02's +6.41σ) |
+| `cal_general` | **PASS** | — | **0/50 items regressed** (F02 had 26%, F01 had 10%) |
+| `abl_fortran` | FAIL | — | overshoot=1.21 (vs F02's 1.49); R²=0.99 — linear loss surface, still under-saturated |
+
+## Direct-query smoke (the wedge)
+
+| Q | Base alone | Trained adapter | Net |
+| --- | --- | --- | --- |
+| Q1 (allocatable real64) | correct | correct | **= same** |
+| Q2 (do concurrent) | partial | rambles + unrelated code | **worse** |
+| Q3 (stdlib sorting — THE WEDGE) | wrong (hallucinates `iso_fortran_env::sort`) | degenerate enumeration loop | **worse** |
+| Q4 (intent(in)) | correct | correct | **= same** |
+
+**0/4 improved. 2/4 degraded.** The LoRA did not add stdlib knowledge;
+it added autoregressive fortran-source-completion habits that
+interfere with Q/A. Full transcripts at
+`docs/audits/13-followup/finding03/direct-query-results.md`.
+
+## What we learned
+
+### 1. Base size is *not* the floor — it's the regularizer
+
+Catastrophic forgetting collapsed from 26% (F02) → 10% (F01) → **0%
+(F03)** as we moved from 135M to a bigger code-pretrained base. The
+bigger base absorbs the LoRA without losing English chat capability.
+This is a clean architectural finding:
+
+> A LoRA adapter on a small base actively degrades the base. A LoRA
+> adapter on a sufficiently capable base is "free" — the base carries
+> through unchanged on what it already knew, and the LoRA contributes
+> on top.
+
+For dlm's product narrative: **the recommended base table needs a
+warning at SmolLM2-135M.** That base is for style demos, not domain
+knowledge addition.
+
+### 2. The recipe is the bottleneck
+
+With the bigger base eliminating the noise floor of catastrophic
+forgetting, the *recipe-level* failures become unambiguous:
+
+- `leak_fortran` +7.71σ — the LoRA memorizes raw fortran source
+  fragments
+- `dk_fortran` z=−15.46σ — the LoRA does not activate on Q/A-shaped
+  prompts
+- `sis_fortran` effective_sis=0 — uniform fortran-flavored bias, not
+  per-section content
+- Q3 wedge — the LoRA *makes the stdlib gap worse* by replacing a
+  concrete-but-wrong base answer with a degenerate enumeration loop
+
+These all share one root cause: **the training corpus is shaped like
+raw source files, not Q/A pairs.** SFT's loss is "predict next token";
+on raw source rows that means "complete fortran source," and the LoRA
+learns exactly that. The 5MB FortranGoingOnForty source drowns the
+1MB stdlib source (most of which exceeds the 32KB-per-file cap and is
+silently skipped) and the 585KB of stdlib doc/specs markdown. The
+LoRA never sees enough Q/A-shaped data to bind the format.
+
+### 3. The adapter ablation curve has improved but not healed
+
+Saturation overshoot dropped 1.49 (F02) → 1.21 (F03). R² rose to 0.99
+(very linear). The trend says: with more training the curve might
+saturate properly. But the loss is at 0.75 (eval), down from 0.95 →
+0.6 across the 600 steps, and still descending. We're not at a real
+minimum. **More steps would extract more memorization, not more
+generalization.**
+
+## Implications for the dlm product narrative
+
+We now have a clean, falsifiable, evidence-backed claim:
+
+> **dlm's value-add depends on training-data shape, not base capacity.**
+> A LoRA trained on raw source code teaches the adapter to be a source
+> autocomplete engine, regardless of base size. To produce a question-
+> answering domain expert, the training data must itself be Q/A-shaped.
+
+This is a *positive* product story, not a negative one — it tells dlm
+users "use INSTRUCTION sections (or generate them from PROSE via
+`dlm synth instructions`), not raw `sources:` directives, when the goal
+is a chat assistant." The current docs don't make this distinction
+clearly enough; that's a docs fix worth landing.
+
+## Next experiment
+
+[Finding 04 — Q/A-shaped training](./04-instruction-shape.md)
+*(pending)*
+
+The fix to test next: build a doc with INSTRUCTION sections constructed
+from the stdlib `doc/specs/*.md` corpus (which is Q&A-friendly
+documentation, unlike raw source files). Use `dlm synth instructions
+--strategy extraction` with a strong teacher (claude-haiku, qwen-coder-7b,
+or hand-author) to produce dense Q/A pairs. Train on qwen-coder-1.5b at
+r=16 with no raw `sources:` directives. The wedge to track is still Q3:
+does the trained adapter point at `stdlib_sorting` instead of producing
+a degenerate loop?
+
+If Q3 lands correctly, the dlm story is: bigger base + Q/A-shape recipe →
+working domain expert. That's the experiment that *closes* the audit.
diff --git a/docs/audits/13-followup/findings/04-instruction-shape.md b/docs/audits/13-followup/findings/04-instruction-shape.md
new file mode 100644
index 0000000..f7d5528
--- /dev/null
+++ b/docs/audits/13-followup/findings/04-instruction-shape.md
@@ -0,0 +1,156 @@
+# Finding 04 — Q/A-shape recipe closes the wedge
+
+**Hypothesis tested:** Findings 01–03 isolated training-data shape as
+the bottleneck — LoRA on raw source files learns to be a source
+autocomplete engine, not a question-answering bot. The fix: train on
+INSTRUCTION-shaped Q/A pairs only, no `sources:` directives. Use
+qwen2.5-coder-1.5b as base (Finding 03 confirmed it preserves English
+capability under LoRA training).
+
+**Verdict:** the hypothesis is **confirmed**. The recipe-shape fix
+produces correct trained-question answers and preserves general
+capability. Generalization to *nearby* unseen questions is partial
+and dataset-size-dependent.
+
+## Setup
+
+- **Doc:** `docs/audits/13-followup/finding04/the-doc.dlm`
+- **Store ULID:** `01KQDWAHNJ7FD72EQ4J4FXBJ2V`
+- **Adapter:** `~/.dlm/store/01KQDWAHNJ7FD72EQ4J4FXBJ2V/adapter/versions/v0002`
+- **Base:** `qwen2.5-coder-1.5b`
+- **Sections:** 35 INSTRUCTION (32 train / 3 val), 0 PROSE, 0 raw `sources:`
+- **Recipe:** r=16 / α=32 / lr=2e-4 / 100 epochs (early-stopped at epoch 44, step 400)
+- **Final:** train loss 0.62, eval loss 0.047, **mean token accuracy 98.6%**
+
+The corpus is ~60% stdlib_sorting (the wedge target), 40% other
+modules (io, math, strings, array, kinds, ascii, hashmaps, quadrature,
+logger, plus a few general-fortran questions like `intent(in)` and
+`do concurrent`).
+
+## The wedge: closed
+
+The audit's wedge question (Q3 from Finding 03's baseline):
+
+> Show the signature of stdlib's sorting routine in Fortran.
+
+**Base:** *wrong.* Hallucinates `iso_fortran_env::sort`.
+**Finding-03 LoRA (raw-source training):** *worse.* Degenerate
+"should be able to sort an array of integers in ascending order. The
+sorting routine should be able to sort an array of floating-point
+numbers..." enumeration loop.
+**Finding-04 LoRA (Q/A-shape training):** ✓ correct, verbatim:
+
+```fortran
+use stdlib_sorting, only: sort
+call sort(array[, reverse])
+```
+The `array` argument is `intent(inout)` and must be a rank-1 array of
+an intrinsic numeric type (integer kinds, real kinds), `character(*)`,
+`type(string_type)`, `type(bitset_64)`, or `type(bitset_large)`. The
+optional `reverse` argument is a scalar logical with `intent(in)`.
+
+## Generalization curve
+
+| Question type | Result | Verdict |
+| --- | --- | --- |
+| **Seen exactly** (Q3 wedge, intent(in)) | verbatim correct | ✓ memorized cleanly |
+| **Unseen, overlap** ("sort a real(dp) array") | knows `stdlib_sorting`, invents wrong call form | partial |
+| **Unseen, different module** (`stdlib_strings::starts_with`) | falls back to Rust syntax | none |
+| **Unseen, different module** ("read a CSV with stdlib") | hallucinates plausible API | none |
+| **Out-of-domain** (capital of France, Python list comprehension) | unchanged from base | ✓ preserved |
+
+Full transcripts at `docs/audits/13-followup/finding04/direct-query-results.md`.
+
+## What we learned
+
+### 1. The recipe-shape hypothesis is correct
+
+LoRA trained on INSTRUCTION-only Q/A pairs produces an adapter that
+*answers questions* in the trained format. LoRA trained on raw source
+files produces an adapter that *autocompletes source code*. Same base,
+same rank, same compute — completely different behavioral character.
+This is the cleanest finding of the investigation.
+
+### 2. The dataset-size / generalization tradeoff is sharp
+
+With 32 hand-authored Q/A pairs:
+- 100% trained-question fidelity (98.6% eval token accuracy)
+- Strong reproduction of trained answers under varied prompt phrasing
+- Partial knowledge of "named entities" (`stdlib_sorting`,
+  `stdlib_io`, `loadtxt`) appears in unseen-question responses
+- API-form generalization is weak — model invents plausible-looking
+  syntax instead of generalizing the patterns it saw
+
+This means dlm's product story isn't "learn a domain from a few
+examples" — it's "learn the questions you actually want to answer,
+with one training row per question." That's a more honest story and
+also more practically useful: users know what they're getting.
+
+### 3. The bigger base preserves general capability under aggressive LoRA training
+
+Finding 03 already showed `cal_general` 0% regression on
+qwen2.5-coder-1.5b. Finding 04 replicates this even with 100-epoch
+overfit-style training: the model still answers "What is the capital
+of France?" correctly and writes valid Python list comprehensions.
+This is the architectural property that makes the recipe-shape fix
+viable — at SmolLM2-135M, the same overfit training would have
+shredded English chat capability.
+
+## Bugs surfaced
+
+1. **`src/dlm/replay/store.py:187`** — `parse_instruction_body` was
+   called without `_normalize_probe_markers`. Fixed in this branch.
+   Without the fix, INSTRUCTION sections with `### Q !probe` headers
+   trigger a parse error during retrain even with `--fresh`, because
+   replay snapshots the raw section content. The other dlm callers of
+   `parse_instruction_body` (`eval/probes.py`, `cli/commands/synth.py`,
+   `train/gate/orchestrator.py`, `preference/mine.py`) should be
+   audited for the same bug — that's a follow-up.
+2. **MLX backend silently ignores PEFT adapters.** dlm's auto-routing
+   selects MLX on darwin-arm64. PEFT `adapter_model.safetensors`
+   isn't an MLX-LM adapter format; the inference path appears to load
+   the base and ignore the adapter. The user-visible failure is
+   "trained model behaves identically to base" — easy to misdiagnose
+   as "training didn't work." Workaround: `--backend pytorch`. Real
+   fix needs investigation in `src/dlm/inference/backends/mlx.py`.
+
+## Implications for the dlm product narrative
+
+The investigation produced a clean three-step story:
+
+1. **Use a base ≥ 1B params and code-pretrained where available.**
+   Smaller bases (135M) actively degrade under LoRA training of any
+   shape. Recommended-base table needs a warning at the small end.
+2. **Train on INSTRUCTION-shaped data, not raw source code.** Raw
+   source teaches autocomplete; INSTRUCTION teaches Q/A. Pick one
+   based on the goal. dlm's docs should make this distinction.
+3. **Plan one Q/A pair per question you want to answer.** With
+   small datasets, pinpoint reproduction is reliable but
+   generalization is weak. Scale the corpus to scale the surface.
+
+Each step has falsifiable evidence in this directory:
+
+- Step 1 from Findings 02 (135M memorization+forgetting) and 03
+  (1.5B preservation)
+- Step 2 from Finding 03's wedge failure vs Finding 04's wedge success
+- Step 3 from Finding 04's generalization curve
+
+This is publishable as written. The audit closes here as **GREEN**: the
+end-to-end fortran fine-tune story works end-to-end with the right recipe,
+the right base, and a corpus shaped to match the user's question set.
+
+## Next experiment (optional)
+
+[Finding 05 — corpus density](./05-corpus-density.md) *(not started)*
+
+Test the dataset-size / generalization curve directly. Build a 100-pair
+INSTRUCTION corpus covering each stdlib module's main API surface
+(rather than 60% sorting). Train, then probe with held-out questions
+about modules covered with one to three training pairs each.
+
+The hypothesis: there's a per-module Q/A density floor below which
+the model can't generalize the API form. Finding the threshold gives
+dlm users a concrete planning number ("budget N Q/A pairs per module
+of API surface to teach"). This is the "how much training data does
+each new domain take" question that makes the product practically
+plannable.
diff --git a/docs/audits/13-followup/stage1/sway.yaml b/docs/audits/13-followup/stage1/sway.yaml
new file mode 100644
index 0000000..b30f348
--- /dev/null
+++ b/docs/audits/13-followup/stage1/sway.yaml
@@ -0,0 +1,101 @@
+# Stage-1 sway spec — does PROSE-only CPT actually expand the fortran prior?
+#
+# This is the load-bearing experiment. The audit follow-up showed that the
+# combined PROSE+INSTRUCTION run produced a *uniform* fortran-flavored bias
+# (leak_lift constant ≈0.064 across sections, own_lift small, effective_sis
+# ≈0). That meant the adapter learned "this looks fortran" but not "this
+# section says X."
+#
+# Stage 1 trains on PROSE only at LoRA r=64 (vs r=16 in the prior run).
+# Pass criterion: ``section_internalization`` should now show *positive*
+# effective_sis on at least 40% of sections — i.e., own_lift > leak_lift on
+# the section's own probes. If this still flatlines, the base model is the
+# floor, and we promote to Qwen2.5-Coder-1.5B.
+#
+# Run from the sway repo's venv (need [dlm] for bridge probes):
+#   cd ~/GithubOrgs/tenseleyFlow/sway
+#   uv run --no-sync sway run \
+#     ~/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/stage1/sway.yaml \
+#     --json /tmp/sway-stage1.json --markdown /tmp/sway-stage1.md
+
+version: 1
+
+dlm_source: /Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/stage1/the-doc.dlm
+
+models:
+  base:
+    kind: hf
+    base: HuggingFaceTB/SmolLM2-135M-Instruct
+  ft:
+    kind: hf
+    base: HuggingFaceTB/SmolLM2-135M-Instruct
+    adapter: ~/.dlm/store/01KQDGAM70EJ1WJCQY6PVDV95W/adapter/versions/v0001
+
+defaults:
+  seed: 0
+  differential: true
+  coverage_threshold: 0.6
+  score_weights:
+    adherence: 0.30
+    attribution: 0.40   # bumped: SIS is the load-bearing metric for this stage
+    calibration: 0.15
+    ablation: 0.15
+
+suite:
+  # ── Calibration baseline ────────────────────────────────────────
+  - name: null_baseline
+    kind: null_adapter
+    runs: 3
+
+  # ── Adherence: does the adapter shift logits on fortran prompts? ─
+  # Use prompts where SmolLM2's *English* prior is strong, so the null
+  # baseline produces low JS divergence and the trained adapter's
+  # divergence is visible above noise. Audit follow-up showed that
+  # bare fortran-fence prefixes give the null adapter ~0.15 JS
+  # divergence on its own — calibration ceiling, not a learning signal.
+  - name: dk_fortran_qa_shaped
+    kind: delta_kl
+    prompts:
+      - "Write a Fortran function that"
+      - "In modern Fortran, the syntax for declaring an allocatable array is"
+      - "When should you prefer `do concurrent` over a plain `do` loop?"
+      - "The `intent(in)` attribute on a dummy argument means"
+      - "To use stdlib's sorting routines, you would import"
+      - "Fortran's `pure` and `elemental` attributes differ in that"
+      - "A type-bound procedure in Fortran is declared with"
+      - "The `use, only:` import form in Fortran is preferred because"
+    assert_z_gte: 1.0
+    assert_mean_gte: 0.02
+
+  # ── Attribution: THE load-bearing test ──────────────────────────
+  # Prior run had effective_sis ≈ 0. If stage-1 fixes this, we have
+  # signal. assert_passing_section_frac at 0.4 is the bar from the
+  # prior audit follow-up spec.
+  - name: sis_fortran
+    kind: section_internalization
+    assert_z_gte: 1.0
+    assert_passing_section_frac: 0.4
+
+  # ── Calibration ──────────────────────────────────────────────────
+  - name: leak_fortran
+    kind: leakage
+    assert_z_gte: 1.0
+
+  - name: cal_general
+    kind: calibration_drift
+    items_limit: 50
+    assert_z_gte: 1.0
+
+  # ── Ablation: did r=64 fix the saturation overshoot? ────────────
+  # Prior run (r=16) showed overshoot=1.54 + sat_λ out of band, which
+  # we read as adapter under-magnitude. r=64 + 800 steps should land
+  # the saturation in (0.5, 1.0] with overshoot ≤ 1.05.
+  - name: abl_fortran
+    kind: adapter_ablation
+    lambdas: [0.0, 0.25, 0.5, 0.75, 1.0, 1.25]
+    prompts:
+      - "module fortran_demo\n  implicit none"
+      - "subroutine compute_norm(vector, n, result)"
+      - "use stdlib_linalg, only: solve"
+    assert_linearity_gte: 0.85
+    assert_z_gte: 1.0
diff --git a/docs/audits/13-followup/stage1/the-doc.dlm b/docs/audits/13-followup/stage1/the-doc.dlm
new file mode 100644
index 0000000..a0ce7bf
--- /dev/null
+++ b/docs/audits/13-followup/stage1/the-doc.dlm
@@ -0,0 +1,47 @@
+---
+dlm_id: 01KQDGAM70EJ1WJCQY6PVDV95W
+dlm_version: 15
+base_model: smollm2-135m
+training:
+  sources:
+    - path: ~/GithubOrgs/FortranGoingOnForty
+      include: ["**/*.f90", "**/*.F90", "**/*.f95"]
+      exclude:
+        - "**/build/**"
+        - "**/.git/**"
+        - "**/dist/**"
+      max_bytes_per_file: 32768
+    - path: /tmp/stdlib_build/src
+      include: ["**/*.f90", "**/*.F90", "**/*.fypp"]
+      exclude:
+        - "**/build/**"
+        - "**/tests/**"
+      max_bytes_per_file: 32768
+    - path: /tmp/stdlib_build/doc/specs
+      include: ["**/*.md"]
+      max_bytes_per_file: 131072
+  sources_policy: permissive
+  adapter: lora
+  lora_r: 64
+  lora_alpha: 128
+  lora_dropout: 0.05
+  sequence_len: 1024
+  micro_batch_size: 1
+  grad_accum: 8
+  learning_rate: 1.5e-4
+  warmup_ratio: 0.05
+  num_epochs: 1
+---
+
+# Stage 1 — Fortran domain expansion (CPT)
+
+Continual-pretraining stage of the two-stage Fortran expert recipe. This
+.dlm has no INSTRUCTION sections — every training row is PROSE
+(next-token loss on the raw fortran source + stdlib markdown specs).
+Goal: expand the base model's fortran token-distribution prior. The
+stage-2 .dlm will layer instruction-following on top of this adapter.
+
+The corpus mirrors the prior follow-up doc but at higher LoRA rank
+(r=64 vs r=16) so the adapter has the capacity to absorb the corpus
+shift without hitting the saturation ceiling diagnosed by sway's
+`adapter_ablation` overshoot.
diff --git a/docs/audits/13-followup/sway-results.json b/docs/audits/13-followup/sway-results.json
new file mode 100644
index 0000000..a63ca10
--- /dev/null
+++ b/docs/audits/13-followup/sway-results.json
@@ -0,0 +1,881 @@
+{
+  "adapter_id": "/Users/mfwolffe/.dlm/store/01KQCWMA64901VEMYB3DC8CGXY/adapter/versions/v0001",
+  "backend_stats": {
+    "avg_batch_size": 5.333333333333333,
+    "batched_prompts": 32,
+    "batches_sent": 6,
+    "cache_hits": 195,
+    "cache_misses": 407,
+    "forward_passes": 407,
+    "hit_rate": 0.3239202657807309,
+    "max_batch_size": 8,
+    "scoring_wall_s": 17.152012583974283
+  },
+  "base_model_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
+  "determinism": {
+    "class": "best_effort",
+    "notes": [
+      "MPS: bit-identical across runs is best-effort"
+    ],
+    "seed": 0
+  },
+  "finished_at": "2026-04-29T20:38:02.420941+00:00",
+  "null_stats": {
+    "calibration_drift": {
+      "degenerate": 1.0,
+      "mean": 0.0,
+      "n": 3.0,
+      "std": 1e-06
+    },
+    "delta_kl": {
+      "degenerate": 0.0,
+      "mean": 0.18108218965989206,
+      "n": 3.0,
+      "std": 0.20429734551366469
+    },
+    "leakage": {
+      "degenerate": 0.0,
+      "mean": 0.27305924253357855,
+      "n": 3.0,
+      "std": 0.02510960762659956
+    },
+    "paraphrase_invariance": {
+      "degenerate": 0.0,
+      "mean": -0.012714107831319174,
+      "n": 3.0,
+      "std": 0.013087756562633434
+    },
+    "section_internalization": {
+      "degenerate": 0.0,
+      "mean": -8.81465180882523e-22,
+      "n": 3.0,
+      "std": 1e-06
+    }
+  },
+  "probes": [
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 52.01406016701367,
+      "evidence": {
+        "calibrated_kinds": [
+          "delta_kl",
+          "section_internalization",
+          "paraphrase_invariance",
+          "leakage",
+          "calibration_drift"
+        ],
+        "from_cache": false,
+        "init_scale": 0.02,
+        "null_stats": {
+          "calibration_drift": {
+            "degenerate": 1.0,
+            "mean": 0.0,
+            "n": 3.0,
+            "std": 1e-06
+          },
+          "delta_kl": {
+            "degenerate": 0.0,
+            "mean": 0.18108218965989206,
+            "n": 3.0,
+            "std": 0.20429734551366469
+          },
+          "leakage": {
+            "degenerate": 0.0,
+            "mean": 0.27305924253357855,
+            "n": 3.0,
+            "std": 0.02510960762659956
+          },
+          "paraphrase_invariance": {
+            "degenerate": 0.0,
+            "mean": -0.012714107831319174,
+            "n": 3.0,
+            "std": 0.013087756562633434
+          },
+          "section_internalization": {
+            "degenerate": 0.0,
+            "mean": -8.81465180882523e-22,
+            "n": 3.0,
+            "std": 1e-06
+          }
+        },
+        "null_stats_by_rank": {
+          "rank_1.00": {
+            "calibration_drift": {
+              "degenerate": 1.0,
+              "mean": 0.0,
+              "n": 3.0,
+              "std": 1e-06
+            },
+            "delta_kl": {
+              "degenerate": 0.0,
+              "mean": 0.18108218965989206,
+              "n": 3.0,
+              "std": 0.20429734551366469
+            },
+            "leakage": {
+              "degenerate": 0.0,
+              "mean": 0.27305924253357855,
+              "n": 3.0,
+              "std": 0.02510960762659956
+            },
+            "paraphrase_invariance": {
+              "degenerate": 0.0,
+              "mean": -0.012714107831319174,
+              "n": 3.0,
+              "std": 0.013087756562633434
+            },
+            "section_internalization": {
+              "degenerate": 0.0,
+              "mean": -8.81465180882523e-22,
+              "n": 3.0,
+              "std": 1e-06
+            }
+          }
+        },
+        "per_kind_raw_samples": {
+          "calibration_drift": [
+            0.0,
+            0.0,
+            0.0
+          ],
+          "delta_kl": [
+            0.4700022224846012,
+            0.03648415034633113,
+            0.036760196148743804
+          ],
+          "leakage": [
+            0.25066666666666665,
+            0.26038781163434904,
+            0.3081232492997199
+          ],
+          "paraphrase_invariance": [
+            -0.006305098533630371,
+            -0.0008810758590698242,
+            -0.030956149101257324
+          ],
+          "section_internalization": [
+            4.363252645368494e-20,
+            -4.759911976765629e-20,
+            1.322197771323786e-21
+          ]
+        },
+        "rank_multipliers": [
+          1.0
+        ],
+        "runs": 3,
+        "seed_base": 1000,
+        "skipped_kinds": [
+          {
+            "kind": "adapter_ablation",
+            "reason": "probe opted out (calibrate_spec returned None)"
+          }
+        ],
+        "weight": 1.0
+      },
+      "ft_value": null,
+      "kind": "null_adapter",
+      "message": "null calibration: 5 kinds calibrated over 3 seeds (1 opted out)",
+      "name": "null_baseline",
+      "raw": null,
+      "score": 1.0,
+      "verdict": "pass",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.13243605721975274,
+        0.16676766505398383
+      ],
+      "duration_s": 0.32599120801023673,
+      "evidence": {
+        "divergence_kind": "js",
+        "max": 0.18371552715811904,
+        "num_prompts": 8,
+        "per_prompt": [
+          0.14386457824628635,
+          0.17476625862101103,
+          0.16067887414401738,
+          0.15360432284137182,
+          0.15848778501047278,
+          0.08995173957917547,
+          0.18371552715811904,
+          0.143727750782096
+        ],
+        "raw_ci_95": [
+          0.13243605721975274,
+          0.16676766505398383
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": -0.14675954323678622
+        }
+      },
+      "ft_value": null,
+      "kind": "delta_kl",
+      "message": "mean js=0.1511, z=-0.15\u03c3 vs null",
+      "name": "dk_fortran",
+      "raw": 0.15109960454781873,
+      "score": 0.48777247649269934,
+      "verdict": "fail",
+      "z_score": -0.14675954323678622
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        -0.020269755418408253,
+        0.021137891699870626
+      ],
+      "duration_s": 1.7037657080072677,
+      "evidence": {
+        "num_sections": 41,
+        "passing_frac": 0.17073170731707318,
+        "per_section": [
+          {
+            "base_nll": 3.383964624297753,
+            "effective_sis": -0.033378760766710876,
+            "ft_nll": 3.2795830773057117,
+            "kind": "prose",
+            "leak_lift": 0.06422469403757139,
+            "own_lift": 0.030845933270860514,
+            "passed": false,
+            "section_id": "79e3ec8ad3461cef",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.7380522629310344,
+            "effective_sis": -0.06098052371760402,
+            "ft_nll": 3.723408876616379,
+            "kind": "instruction",
+            "leak_lift": 0.06489790776808098,
+            "own_lift": 0.003917384050476959,
+            "passed": false,
+            "section_id": "5f6b1dead97c7d16",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.619960054438165,
+            "effective_sis": -0.03210797600578668,
+            "ft_nll": 3.503811017841312,
+            "kind": "instruction",
+            "leak_lift": 0.06419369928730494,
+            "own_lift": 0.03208572328151826,
+            "passed": false,
+            "section_id": "3b651cf3c102a94a",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.9329270411581887,
+            "effective_sis": -0.034794699214302144,
+            "ft_nll": 2.8465097246378877,
+            "kind": "instruction",
+            "leak_lift": 0.06425922912165898,
+            "own_lift": 0.029464529907356835,
+            "passed": false,
+            "section_id": "39347c7736ed8744",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.149605415962838,
+            "effective_sis": -0.04691715545030196,
+            "ft_nll": 4.076415740691863,
+            "kind": "instruction",
+            "leak_lift": 0.06455489878595165,
+            "own_lift": 0.017637743335649693,
+            "passed": false,
+            "section_id": "b185f7cb3ff6a672",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.472200798265862,
+            "effective_sis": 0.0035066615967460774,
+            "ft_nll": 3.240147677334872,
+            "kind": "instruction",
+            "leak_lift": 0.06332504958968219,
+            "own_lift": 0.06683171118642826,
+            "passed": false,
+            "section_id": "8dfd46d5cbbfc6a0",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.7048557826450894,
+            "effective_sis": -0.06162214846963127,
+            "ft_nll": 3.692661588154142,
+            "kind": "instruction",
+            "leak_lift": 0.06491355715227676,
+            "own_lift": 0.003291408682645486,
+            "passed": false,
+            "section_id": "84adb31460daa7b4",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.756350835164388,
+            "effective_sis": -0.0031169019965170963,
+            "ft_nll": 3.529581069946289,
+            "kind": "instruction",
+            "leak_lift": 0.0634865999212252,
+            "own_lift": 0.0603696979247081,
+            "passed": false,
+            "section_id": "2963ad39da116480",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.143678211030506,
+            "effective_sis": -0.024671729409215956,
+            "ft_nll": 3.0200040302579363,
+            "kind": "instruction",
+            "leak_lift": 0.06401232741909589,
+            "own_lift": 0.039340598009879935,
+            "passed": false,
+            "section_id": "4ebf42ad7431d861",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.2072388656496065,
+            "effective_sis": -0.024959602124314785,
+            "ft_nll": 3.0819649283341537,
+            "kind": "instruction",
+            "leak_lift": 0.06401934870483002,
+            "own_lift": 0.03905974658051523,
+            "passed": false,
+            "section_id": "8150fd7d0d03cee4",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.098959201719703,
+            "effective_sis": -0.05798496674069719,
+            "ft_nll": 3.0777626968011624,
+            "kind": "instruction",
+            "leak_lift": 0.06482484540279057,
+            "own_lift": 0.006839878662093378,
+            "passed": false,
+            "section_id": "7a4f482e72e04eba",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.8598657724808674,
+            "effective_sis": 0.14655176115748497,
+            "ft_nll": 3.063236158721301,
+            "kind": "instruction",
+            "leak_lift": 0.05983614472234709,
+            "own_lift": 0.20638790587983205,
+            "passed": true,
+            "section_id": "f4f345eacda9a037",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.277299859550562,
+            "effective_sis": -0.015805815856834417,
+            "ft_nll": 4.072031085410814,
+            "kind": "instruction",
+            "leak_lift": 0.06379608562513536,
+            "own_lift": 0.047990269768300946,
+            "passed": false,
+            "section_id": "01bec021954d9b71",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.189083491530374,
+            "effective_sis": -0.011938456933510457,
+            "ft_nll": 3.9722426940347546,
+            "kind": "instruction",
+            "leak_lift": 0.06370175979773722,
+            "own_lift": 0.051763302864226764,
+            "passed": false,
+            "section_id": "b13ccd1c3a9f683c",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.393718455097463,
+            "effective_sis": -0.018457564958768373,
+            "ft_nll": 3.239632785910427,
+            "kind": "instruction",
+            "leak_lift": 0.06386076243249962,
+            "own_lift": 0.04540319747373125,
+            "passed": false,
+            "section_id": "8cbfb9d8507491b7",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.7204354708311986,
+            "effective_sis": -0.04921115335256995,
+            "ft_nll": 2.6785415899558145,
+            "kind": "instruction",
+            "leak_lift": 0.06461084995429965,
+            "own_lift": 0.015399696601729702,
+            "passed": false,
+            "section_id": "c99eafec654809ed",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.576165140086207,
+            "effective_sis": -0.0005441335062590508,
+            "ft_nll": 3.3512968918372845,
+            "kind": "instruction",
+            "leak_lift": 0.0634238494702433,
+            "own_lift": 0.06287971596398424,
+            "passed": false,
+            "section_id": "7b7a511b8cb7a3cc",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.728303738725864,
+            "effective_sis": 0.031234744141658735,
+            "ft_nll": 3.378277538268547,
+            "kind": "instruction",
+            "leak_lift": 0.0626487548934648,
+            "own_lift": 0.09388349903512354,
+            "passed": false,
+            "section_id": "969d989aee32a657",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.2954477163461537,
+            "effective_sis": -0.06813722044003823,
+            "ft_nll": 3.3055474697015224,
+            "kind": "instruction",
+            "leak_lift": 0.06507246134667692,
+            "own_lift": -0.0030647590933613024,
+            "passed": false,
+            "section_id": "e4efe84850c617f9",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3069370154178506,
+            "effective_sis": 0.12010170655245701,
+            "ft_nll": 2.7097604924982246,
+            "kind": "instruction",
+            "leak_lift": 0.06048126800539656,
+            "own_lift": 0.18058297455785358,
+            "passed": true,
+            "section_id": "b435fa0fb7979efc",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.861654940518466,
+            "effective_sis": -0.04222673866202122,
+            "ft_nll": 2.798086825284091,
+            "kind": "instruction",
+            "leak_lift": 0.06444049837648139,
+            "own_lift": 0.022213759714460164,
+            "passed": false,
+            "section_id": "311b3bf11dfbea7e",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.429737384502704,
+            "effective_sis": -0.004068967219511076,
+            "ft_nll": 3.225870866041917,
+            "kind": "instruction",
+            "leak_lift": 0.06350982102422505,
+            "own_lift": 0.05944085380471397,
+            "passed": false,
+            "section_id": "67c5f4e5dfcfb4b3",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.7294495597718256,
+            "effective_sis": 0.11403063971616001,
+            "ft_nll": 3.07806396484375,
+            "kind": "instruction",
+            "leak_lift": 0.06062934280628185,
+            "own_lift": 0.17465998252244186,
+            "passed": true,
+            "section_id": "95851f00680a27e0",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.770986524678893,
+            "effective_sis": -0.05708714592787426,
+            "ft_nll": 3.7418903415485962,
+            "kind": "instruction",
+            "leak_lift": 0.06480294733418512,
+            "own_lift": 0.007715801406310863,
+            "passed": false,
+            "section_id": "e7dfb14006866fb1",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.710047132186307,
+            "effective_sis": 0.06627955120954535,
+            "ft_nll": 3.2348882078214456,
+            "kind": "instruction",
+            "leak_lift": 0.06179400350156513,
+            "own_lift": 0.12807355471111048,
+            "passed": true,
+            "section_id": "e05149d6dd019c7d",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.2487693665519592,
+            "effective_sis": -0.03571138348993008,
+            "ft_nll": 3.155951363699777,
+            "kind": "instruction",
+            "leak_lift": 0.06428158727472308,
+            "own_lift": 0.028570203784792997,
+            "passed": false,
+            "section_id": "2abcffdaa5a5459e",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.5466603681075672,
+            "effective_sis": -0.01529189023592583,
+            "ft_nll": 3.374676917210098,
+            "kind": "instruction",
+            "leak_lift": 0.0637835508538937,
+            "own_lift": 0.04849166061796787,
+            "passed": false,
+            "section_id": "c717e7bcca5467af",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.90612353515625,
+            "effective_sis": -0.02370302113925562,
+            "ft_nll": 3.7487626953125,
+            "kind": "instruction",
+            "leak_lift": 0.06398870038812125,
+            "own_lift": 0.040285679248865634,
+            "passed": false,
+            "section_id": "2c92258d39229ce8",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.207152941212151,
+            "effective_sis": 0.18883884196111783,
+            "ft_nll": 3.165278464370633,
+            "kind": "instruction",
+            "leak_lift": 0.05880475250762433,
+            "own_lift": 0.24764359446874218,
+            "passed": true,
+            "section_id": "ee8b2414b1c97b71",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.4486438751220705,
+            "effective_sis": 0.2134270935054276,
+            "ft_nll": 3.240249252319336,
+            "kind": "instruction",
+            "leak_lift": 0.058205039055324106,
+            "own_lift": 0.2716321325607517,
+            "passed": true,
+            "section_id": "a04e766e64918615",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3436742629447993,
+            "effective_sis": -0.018996994631269852,
+            "ft_nll": 3.193620445084398,
+            "kind": "instruction",
+            "leak_lift": 0.06387391925378014,
+            "own_lift": 0.044876924622510285,
+            "passed": false,
+            "section_id": "f52b3c00ccb92c83",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.163542639690896,
+            "effective_sis": -0.021224243214975277,
+            "ft_nll": 3.985742718240489,
+            "kind": "instruction",
+            "leak_lift": 0.06392824238996807,
+            "own_lift": 0.04270399917499279,
+            "passed": false,
+            "section_id": "73172bca8301df6f",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3983470988723465,
+            "effective_sis": -0.038084031168146286,
+            "ft_nll": 3.309122049583579,
+            "kind": "instruction",
+            "leak_lift": 0.06433945673028932,
+            "own_lift": 0.026255425562143032,
+            "passed": false,
+            "section_id": "76bba5378f960fa1",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.008661218591638,
+            "effective_sis": 0.1105948796020304,
+            "ft_nll": 3.32194539662954,
+            "kind": "instruction",
+            "leak_lift": 0.06071314183345574,
+            "own_lift": 0.17130802143548615,
+            "passed": true,
+            "section_id": "6107afd44bb89580",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.3382890031144425,
+            "effective_sis": -0.03379828127826946,
+            "ft_nll": 4.206246040962838,
+            "kind": "instruction",
+            "leak_lift": 0.06423492624517038,
+            "own_lift": 0.030436644966900923,
+            "passed": false,
+            "section_id": "20883a8329375a3c",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.872911693431713,
+            "effective_sis": -0.036648144931686,
+            "ft_nll": 3.7658013237847223,
+            "kind": "instruction",
+            "leak_lift": 0.0643044351147659,
+            "own_lift": 0.0276562901830799,
+            "passed": false,
+            "section_id": "34fd68f2e2dc5091",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.457715809774056,
+            "effective_sis": -0.019557946291812463,
+            "ft_nll": 4.260106807132419,
+            "kind": "instruction",
+            "leak_lift": 0.06388760100159825,
+            "own_lift": 0.04432965470978579,
+            "passed": false,
+            "section_id": "d918396806259702",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.238513547982743,
+            "effective_sis": -0.0036582258921524988,
+            "ft_nll": 3.984874212919776,
+            "kind": "instruction",
+            "leak_lift": 0.06349980294306996,
+            "own_lift": 0.059841577050917465,
+            "passed": false,
+            "section_id": "d9fd6b631fed2d59",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.081884474981399,
+            "effective_sis": -0.040466313653234366,
+            "ft_nll": 3.984199887230283,
+            "kind": "instruction",
+            "leak_lift": 0.06439756118114512,
+            "own_lift": 0.023931247527910753,
+            "passed": false,
+            "section_id": "fe7a4197df714f68",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.9971887389225746,
+            "effective_sis": -0.03665403461771971,
+            "ft_nll": 2.914314839377332,
+            "kind": "instruction",
+            "leak_lift": 0.06430457876564477,
+            "own_lift": 0.02765054414792506,
+            "passed": false,
+            "section_id": "9c217dda0d7e0c0b",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.11610508631993,
+            "effective_sis": -0.02275970814578159,
+            "ft_nll": 3.9464969234866696,
+            "kind": "instruction",
+            "leak_lift": 0.06396569275413408,
+            "own_lift": 0.04120598460835249,
+            "passed": false,
+            "section_id": "4979f9ee02c60ce1",
+            "tag": ""
+          }
+        ],
+        "per_section_threshold": 0.05,
+        "raw_ci_95": [
+          -0.020269755418408253,
+          0.021137891699870626
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": -1.522290367384119e-12
+        }
+      },
+      "ft_value": null,
+      "kind": "section_internalization",
+      "message": "7/41 sections cleared; mean effective_sis=-0.000, z=-0.00\u03c3 vs null",
+      "name": "sis_fortran",
+      "raw": -1.5231718325650014e-18,
+      "score": 0.4999999999998732,
+      "verdict": "fail",
+      "z_score": -1.522290367384119e-12
+    },
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 3.292007022537291e-06,
+      "evidence": {},
+      "ft_value": null,
+      "kind": "paraphrase_invariance",
+      "message": "no cases provided",
+      "name": "para_fortran",
+      "raw": null,
+      "score": null,
+      "verdict": "error",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.060836501901140684,
+        0.060836501901140684
+      ],
+      "duration_s": 25.701656958000967,
+      "evidence": {
+        "mean_clean_recall": 0.060836501901140684,
+        "mean_fragility": 0.16894335375535185,
+        "mean_perturbed_recall": 0.05055857923921814,
+        "per_section": [
+          {
+            "clean_recall": 0.060836501901140684,
+            "fragility": 0.16894335375535185,
+            "perturbed_recall": 0.05055857923921814,
+            "section_id": "79e3ec8ad3461cef"
+          }
+        ],
+        "raw_ci_95": [
+          0.060836501901140684,
+          0.060836501901140684
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": 8.451854118485798
+        }
+      },
+      "ft_value": 0.16894335375535185,
+      "kind": "leakage",
+      "message": "greedy_recall=0.06 (perturbed=0.05, fragility=0.17), z=+8.45\u03c3 vs null",
+      "name": "leak_fortran",
+      "raw": 0.060836501901140684,
+      "score": 0.9436027416296762,
+      "verdict": "pass",
+      "z_score": 8.451854118485798
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.04,
+        0.2
+      ],
+      "duration_s": 2.9683373749867314,
+      "evidence": {
+        "fraction_regressed": 0.1,
+        "mean_delta_nats": -0.25887888199999,
+        "raw_ci_95": [
+          0.04,
+          0.2
+        ],
+        "regressed_count": 5,
+        "regression_nats_threshold": 1.0,
+        "total_items": 50,
+        "weight": 1.0,
+        "worst_offenders": [
+          {
+            "delta": -1.7542665004730225,
+            "gold": " Paris",
+            "prompt": "The Eiffel Tower is in"
+          },
+          {
+            "delta": -1.6363691091537476,
+            "gold": " Egypt",
+            "prompt": "The pyramids of Giza are in"
+          },
+          {
+            "delta": -1.4172669649124146,
+            "gold": " Ag",
+            "prompt": "The chemical symbol for silver is"
+          },
+          {
+            "delta": -1.3961007595062256,
+            "gold": " Europe",
+            "prompt": "The Mediterranean Sea borders southern"
+          },
+          {
+            "delta": -1.076614797115326,
+            "gold": " Rome",
+            "prompt": "The Colosseum is in"
+          }
+        ],
+        "z_by_rank": null
+      },
+      "ft_value": -0.25887888199999,
+      "kind": "calibration_drift",
+      "message": "5/50 items regressed >1.0 nats (frac=10.0%), mean_delta=-0.259 nats/tok (no calibration for calibration_drift)",
+      "name": "cal_general",
+      "raw": 0.1,
+      "score": 0.397632298133336,
+      "verdict": "pass",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 0.6288362920022337,
+      "evidence": {
+        "lambdas": [
+          0.0,
+          0.25,
+          0.5,
+          0.75,
+          1.0,
+          1.25
+        ],
+        "linearity": 0.9149427048285227,
+        "mean_divergence_per_lambda": [
+          0.0,
+          0.057499440320507945,
+          0.057924101626428255,
+          0.07944790530034684,
+          0.129924414274642,
+          0.20029354877499228
+        ],
+        "overshoot": 1.5416159456497514,
+        "passed_linearity": true,
+        "passed_overshoot": true,
+        "passed_saturation": false,
+        "saturation_lambda": 1.25,
+        "saturation_reason": "found",
+        "weight": 1.0,
+        "z_by_rank": null
+      },
+      "ft_value": null,
+      "kind": "adapter_ablation",
+      "message": "R\u00b2=0.91, sat_\u03bb=1.25 (out of band), overshoot=1.54 (no calibration for adapter_ablation)",
+      "name": "abl_fortran",
+      "raw": 0.9149427048285227,
+      "score": 0.79,
+      "verdict": "fail",
+      "z_score": null
+    }
+  ],
+  "schema_version": 1,
+  "score": {
+    "band": "partial",
+    "components": {
+      "ablation": 0.79,
+      "adherence": 0.48777247649269934,
+      "attribution": 0.4999999999998732,
+      "baseline": 1.0,
+      "calibration": 0.6706175198815061
+    },
+    "findings": [
+      "dk_fortran (delta_kl) failed: mean js=0.1511, z=-0.15\u03c3 vs null",
+      "1 probe(s) errored \u2014 see full report for details"
+    ],
+    "overall": 0.5739552469240666,
+    "weights": {
+      "ablation": 0.15,
+      "adherence": 0.3,
+      "attribution": 0.35,
+      "baseline": 0.0,
+      "calibration": 0.2
+    }
+  },
+  "spec_path": "/Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/sway.yaml",
+  "started_at": "2026-04-29T20:36:38.876695+00:00",
+  "sway_version": "0.1.0",
+  "wall_seconds": 83.544246
+}
\ No newline at end of file
diff --git a/docs/audits/13-followup/sway-results.md b/docs/audits/13-followup/sway-results.md
new file mode 100644
index 0000000..44a986e
--- /dev/null
+++ b/docs/audits/13-followup/sway-results.md
@@ -0,0 +1,41 @@
+# sway report
+
+**Overall:** 0.57 (`partial`)  
+**Base:** `HuggingFaceTB/SmolLM2-135M-Instruct`  
+**Adapter:** `adapter/versions/v0001`  
+**Wall:** 83.5s  
+**Determinism:** `best_effort` (seed=0)  
+**Backend:** cache: 195/602 = 32% | batches: 6 (avg=5.3)  
+
+## Components
+
+| category | score | weight | |
+|---|---:|---:|---|
+| adherence | 0.49 | 0.30 |  |
+| attribution | 0.50 | 0.35 |  |
+| calibration | 0.67 | 0.20 |  |
+| ablation | 0.79 | 0.15 |  |
+| baseline | 1.00 | 0.00 | (informational, weight=0) |
+
+## Probes
+
+| name | kind | verdict | score | raw | ci95 | z | duration | note |
+|---|---|---|---:|---:|---:|---:|---:|---|
+| null_baseline | `null_adapter` | pass | 1.00 | — | — | — | 52.0s | null calibration: 5 kinds calibrated over 3 seeds (1 opted out) |
+| dk_fortran | `delta_kl` | fail | 0.49 | 0.151 | [0.132, 0.167] | -0.15σ | 0.33s | mean js=0.1511, z=-0.15σ vs null |
+| sis_fortran | `section_internalization` | fail | 0.50 | -0.000 | [-0.020, 0.021] | -0.00σ | 1.70s | 7/41 sections cleared; mean effective_sis=-0.000, z=-0.00σ vs null |
+| para_fortran | `paraphrase_invariance` | error | — | — | — | — | 0.00s | no cases provided |
+| leak_fortran | `leakage` | pass | 0.94 | 0.061 | [0.061, 0.061] | +8.45σ | 25.7s | greedy_recall=0.06 (perturbed=0.05, fragility=0.17), z=+8.45σ vs null |
+| cal_general | `calibration_drift` | pass | 0.40 | 0.100 | [0.040, 0.200] | — | 2.97s | 5/50 items regressed >1.0 nats (frac=10.0%), mean_delta=-0.259 nats/tok (no calibration for calibration_drift) |
+| abl_fortran | `adapter_ablation` | fail | 0.79 | 0.915 | — | — | 0.63s | R²=0.91, sat_λ=1.25 (out of band), overshoot=1.54 (no calibration for adapter_ablation) |
+
+## Top findings
+
+- dk_fortran (delta_kl) failed: mean js=0.1511, z=-0.15σ vs null
+- 1 probe(s) errored — see full report for details
+
+## Degenerate null calibration
+
+1 probe kind(s) ran null_adapter but the resulting baseline was too narrow for z-scoring (std ≈ 0, typically `runs: 1` or coincidentally-matched seeds). Fix: bump `runs:` in the `null_adapter` spec entry. Affected kinds:
+
+- `calibration_drift`
diff --git a/docs/audits/13-followup/sway.yaml b/docs/audits/13-followup/sway.yaml
new file mode 100644
index 0000000..7b92b49
--- /dev/null
+++ b/docs/audits/13-followup/sway.yaml
@@ -0,0 +1,105 @@
+# Audit 13 follow-up — sway spec for the fortran fine-tune.
+#
+# Compared to audit 13's spec (which got dk +44σ but
+# paraphrase_invariance −3.51σ FAIL), this one:
+#
+# - Includes ``null_adapter`` for every numeric probe (was already
+#   present in audit 13; kept here as the recipe's foundation).
+# - Activates the [dlm] bridge probes — section_internalization,
+#   paraphrase_invariance, leakage — which need the .dlm to be
+#   resolvable. The audit's venv didn't install ``[dlm]`` so these
+#   SKIPped; we install dlm separately in the follow-up venv to get
+#   them firing.
+# - Fortran-specific prompts on delta_kl + adapter_ablation so the
+#   distribution shift signal is grounded in the trained domain.
+#
+# Run from the sway repo's venv after training completes:
+#   uv run --no-sync sway run \
+#     /Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/sway.yaml \
+#     --json /tmp/sway-13fu.json --markdown /tmp/sway-13fu.md
+
+version: 1
+
+dlm_source: /Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/the-doc.dlm
+
+models:
+  base:
+    kind: hf
+    base: HuggingFaceTB/SmolLM2-135M-Instruct
+  ft:
+    kind: hf
+    base: HuggingFaceTB/SmolLM2-135M-Instruct
+    adapter: ~/.dlm/store/01KQCWMA64901VEMYB3DC8CGXY/adapter/versions/v0001
+
+defaults:
+  seed: 0
+  differential: true
+  coverage_threshold: 0.6
+  score_weights:
+    adherence: 0.30
+    attribution: 0.35
+    calibration: 0.20
+    ablation: 0.15
+
+suite:
+  # ── Calibration baseline ─────────────────────────────────────────
+  - name: null_baseline
+    kind: null_adapter
+    runs: 3
+
+  # ── Adherence ────────────────────────────────────────────────────
+  - name: dk_fortran
+    kind: delta_kl
+    prompts:
+      - "module mymod\n  implicit none\n"
+      - "subroutine compute(x, y)"
+      - "do concurrent (i = 1:n)"
+      - "type :: container"
+      - "intent(in) ::"
+      - "use stdlib_io, only: loadtxt"
+      - "use stdlib_sorting, only:"
+      - "real(real64), allocatable ::"
+    assert_z_gte: 1.0
+    assert_mean_gte: 0.02
+
+  # ── Attribution (bridge-aware) ───────────────────────────────────
+  # Per-section internalization. Should fire now that [dlm] is
+  # installed and the bridge can resolve the .dlm.
+  - name: sis_fortran
+    kind: section_internalization
+    assert_z_gte: 1.0
+    assert_passing_section_frac: 0.4
+
+  # The audit's load-bearing probe — was -3.51σ FAIL in audit 13,
+  # need to flip to PASS here. With 5× more INSTRUCTION sections +
+  # 5× more steps + the bridge installed, this is the falsifiable
+  # claim.
+  - name: para_fortran
+    kind: paraphrase_invariance
+    assert_z_gte: 1.0
+
+  # ── Calibration ──────────────────────────────────────────────────
+  - name: leak_fortran
+    kind: leakage
+    assert_z_gte: 1.0
+
+  # General-competence regression check. Audit 13 PASSed this —
+  # we want to confirm we didn't fry it with 5× more fine-tune steps.
+  - name: cal_general
+    kind: calibration_drift
+    items_limit: 50
+    assert_z_gte: 1.0
+
+  # ── Ablation (the sway signature) ────────────────────────────────
+  # Healthy fine-tunes show monotonic R² ≥ 0.85 on the λ-scaled KL
+  # curve. If this is a step-function or saturates early, the adapter
+  # is degenerate.
+  - name: abl_fortran
+    kind: adapter_ablation
+    lambdas: [0.0, 0.25, 0.5, 0.75, 1.0, 1.25]
+    prompts:
+      - "subroutine matmul_optimized(A, B, C)"
+      - "do concurrent (i = 1:n)"
+      - "use stdlib_linalg, only: solve"
+    assert_linearity_gte: 0.85
+    assert_z_gte: 1.0
diff --git a/docs/audits/13-followup/the-doc.dlm b/docs/audits/13-followup/the-doc.dlm
new file mode 100644
index 0000000..cc396eb
--- /dev/null
+++ b/docs/audits/13-followup/the-doc.dlm
@@ -0,0 +1,725 @@
+---
+dlm_id: 01KQCWMA64901VEMYB3DC8CGXY
+dlm_version: 15
+base_model: smollm2-135m
+training:
+  sources:
+    - path: ~/GithubOrgs/FortranGoingOnForty
+      include: ["**/*.f90", "**/*.F90", "**/*.f95"]
+      exclude:
+        - "**/build/**"
+        - "**/.git/**"
+        - "**/dist/**"
+      max_bytes_per_file: 32768
+    - path: /tmp/stdlib_build/src
+      include: ["**/*.f90", "**/*.F90", "**/*.fypp"]
+      exclude:
+        - "**/build/**"
+        - "**/tests/**"
+      max_bytes_per_file: 32768
+    - path: /tmp/stdlib_build/doc/specs
+      include: ["**/*.md"]
+      max_bytes_per_file: 131072
+  sources_policy: permissive
+  adapter: lora
+  lora_r: 16
+  lora_alpha: 32
+  lora_dropout: 0.05
+  sequence_len: 1024
+  micro_batch_size: 2
+  grad_accum: 4
+  learning_rate: 2.0e-4
+  warmup_ratio: 0.1
+  num_epochs: 1
+---
+
+# Modern Fortran practitioner notes
+
+This document trains an adapter that knows the idioms of the Fortran 2008+
+codebase under `~/GithubOrgs/FortranGoingOnForty`, the canonical
+`fortran-lang/stdlib` source tree at `/tmp/stdlib_build/src`, and the
+FORD-generated stdlib spec documents at `/tmp/stdlib_build/doc/specs`. The
+corpus spans numeric/aerospace simulation (`aero-emulation`, `convolution`),
+scripting helpers (`fgof-*`), and stdlib modules (allocation, ascii, bitsets,
+linear-algebra, sorting, statistics, strings, IO, hashing).
+
+## Idioms the corpus reinforces
+
+Modern Fortran prefers `do concurrent` over plain `do` for loops with no loop
+carried dependence — it expresses the parallelism intent and lets the
+compiler vectorise / OpenMP-offload without further pragmas. `pure` and
+`elemental` procedures are first-class. Allocatable arrays beat pointer
+arrays for owned memory; `move_alloc` transfers ownership without copying.
+Derived types use `type, bind(C)` only when interoperability is required.
+`intent(in)`, `intent(out)`, `intent(inout)` are mandatory on every dummy
+argument in this codebase, even when the language would default — explicit
+intent is a project convention, not a language requirement.
+
+## stdlib navigation
+
+The Fortran stdlib provides community-tested replacements for tasks the
+language standard doesn't cover: `stdlib_array` (index manipulation),
+`stdlib_io` (text-file IO), `stdlib_sorting` (in-place sorts), `stdlib_stats`
+(mean/var/cov), `stdlib_string_type` (variable-length strings),
+`stdlib_linalg` (BLAS/LAPACK wrappers), `stdlib_random` (distributions),
+`stdlib_hashmaps` (open + chaining hash tables), `stdlib_bitsets` (compact
+bit arrays), `stdlib_logger` (severity-tagged logging), `stdlib_optval`
+(safe defaults for optional dummy args), `stdlib_kinds` (named real32/real64
+/int32/int64 kind parameters).
+
+::instruction::
+
+### Q
+When should I prefer `do concurrent` over a plain `do` loop in modern Fortran?
+
+### A
+Use `do concurrent` whenever loop iterations have no carried dependence —
+i.e., iteration `i` does not read a value written by iteration `i-1`. The
+`do concurrent` construct is a contract with the compiler that the iterations
+may execute in any order or in parallel, which lets it vectorise, OpenMP-
+offload, or GPU-offload the loop without further pragmas. If you can't make
+that promise (the loop accumulates into a shared scalar, or reads a value
+the previous iteration wrote), keep a plain `do`. Locality clauses
+(`local`, `shared`, `reduce`) give finer control in Fortran 2018+.
+
+::instruction::
+
+### Q
+Why use `allocatable` arrays instead of `pointer` arrays for owned memory?
+
+### A
+`allocatable` expresses ownership: the variable owns the memory, the compiler
+inserts the deallocate at scope exit, and you cannot have aliasing across
+two allocatable variables. `pointer` arrays expose aliasing and demand
+manual `deallocate`. Compilers can also generate better code for
+`allocatable` because no aliasing means no aliasing-defeats-vectorisation
+pessimism. Use `pointer` only when you actually need pointer semantics —
+linked structures, optional aliasing into a parent array, or C interop.
+`move_alloc(from, to)` transfers ownership without a copy.
+
+::instruction::
+
+### Q
+What does `intent(in)`, `intent(out)`, and `intent(inout)` declare on a
+dummy argument?
+
+### A
+`intent(in)` says the procedure reads but never assigns to the argument —
+the compiler enforces this and will reject any assignment as an error.
+`intent(out)` says the procedure must define the argument before returning;
+the value on entry is undefined (and on some compilers explicitly poisoned),
+so don't read it before assigning. `intent(inout)` says both are allowed —
+the procedure may read the prior value and may overwrite it. Always declare
+intent on every dummy argument. It is a contract that the compiler enforces
+and a reader can rely on.
+
+::instruction::
+
+### Q
+How do I open an OpenMP parallel region with a reduction in Fortran?
+
+### A
+Use `!$omp parallel do reduction(+:sum)` above a `do` loop where each
+iteration accumulates into `sum`. The reduction clause tells OpenMP to give
+each thread a private copy of `sum`, accumulate locally, then combine at
+loop end. Other reduction operators include `*`, `min`, `max`, `.and.`,
+`.or.`, `iand`, `ior`. The closing `!$omp end parallel do` is optional in
+free-form Fortran. For simple no-reduction loops, `do concurrent` is
+preferred.
+
+::instruction::
+
+### Q
+What's a derived type with allocatable components, and why is it useful?
+
+### A
+A derived type with allocatable components has fields declared
+`type :: container; real, allocatable :: data(:); end type`. The container
+itself is value-typed but its `data` field grows or shrinks at runtime.
+Compared to fixed-size arrays it doesn't waste memory; compared to pointer
+fields it owns the memory and frees it automatically when the container
+goes out of scope. Modern Fortran's `intrinsic_assignment` does a deep
+copy by default. This is the canonical way to build resizable structures
+without C-style malloc/free.
+
+::instruction::
+
+### Q
+What does `pure function` mean and when is the `pure` attribute required?
+
+### A
+A `pure function` has no side effects: it doesn't modify any module
+variable, doesn't perform I/O, doesn't call impure procedures. The compiler
+verifies this. `pure` is required for any function called from inside
+`do concurrent` (which forbids side effects across iterations) and from
+within `forall`. It also enables aggressive optimisation: the compiler
+knows two calls with equal arguments produce equal results and can hoist
+or memoise. `elemental` implies `pure` and adds elementwise array semantics.
+
+::instruction::
+
+### Q
+How do I declare a generic interface for two procedures that differ only in
+argument type?
+
+### A
+```fortran
+interface clamp
+  module procedure clamp_real32, clamp_real64
+end interface clamp
+```
+The two module procedures have the same logical name `clamp` but distinct
+argument signatures (one takes `real(real32)`, the other `real(real64)`).
+The compiler resolves the call by argument type — Fortran's overloading
+mechanism. Adding more types is purely additive; callers see one name.
+
+::instruction::
+
+### Q
+What's the difference between `module` and `submodule` in Fortran?
+
+### A
+A `module` declares its public interface and definitions in one unit. A
+`submodule` lets you declare the interface in the parent module but put the
+implementation in a separately compiled file. Editing the submodule does
+not retrigger compilation of every consumer of the parent module — only of
+the submodule itself. Use submodules to break long compile chains in big
+projects. The `module subroutine foo()` declaration in the parent and
+`module procedure foo` in the submodule are how the two halves bind.
+
+::instruction::
+
+### Q
+What does `trueloc` from `stdlib_array` do?
+
+### A
+`trueloc(mask, lbound)` turns a logical mask into an integer index array
+of the positions where the mask is `.true.`. It's a pure function — given
+`mask = [.false., .true., .false., .true.]` and default `lbound`, it
+returns `[2, 4]`. With `lbound = 0` it returns `[1, 3]`. Useful when you
+need the indices themselves (for example, to use as a subscript) rather
+than the masked values; if you only need the values, `pack` is more
+direct. The complementary `falseloc` returns indices where the mask is
+`.false.`.
+
+::instruction::
+
+### Q
+When would I use `arange` from `stdlib_math`?
+
+### A
+`arange(start, stop, step)` returns a rank-1 array of values from `start`
+to `stop` (inclusive) stepping by `step`. With `arange(1, 5)` you get
+`[1, 2, 3, 4, 5]`. With `arange(0.0, 1.0, 0.25)` you get
+`[0.0, 0.25, 0.5, 0.75, 1.0]`. Useful for building index sequences or
+sample grids without a manual `do` loop. Step defaults to 1.
+
+::instruction::
+
+### Q
+How do I read a numeric matrix from a text file using `stdlib_io`?
+
+### A
+Use `loadtxt(filename, array)`:
+```fortran
+use stdlib_io, only: loadtxt
+real, allocatable :: data(:,:)
+call loadtxt('measurements.dat', data)
+```
+The array is allocated automatically to match the file's rows × columns.
+The companion `savetxt(filename, array)` writes the same shape back out.
+Both procedures handle real, integer, and complex data of `real32`,
+`real64`, `int32`, `int64` kinds.
+
+::instruction::
+
+### Q
+What does `getline` from `stdlib_io` give me that `read(*, fmt='(A)')` doesn't?
+
+### A
+`getline(unit, line, iostat)` reads one line of arbitrary length into a
+deferred-length character variable. With plain `read(*, fmt='(A)')` you
+have to commit to a fixed-length buffer up front and either truncate
+or run a loop on `iostat=eor`. `getline` allocates `line` to exactly the
+line's length, with no truncation. It returns `iostat=iostat_end` at
+EOF.
+
+::instruction::
+
+### Q
+How do I sort an array in place using `stdlib_sorting`?
+
+### A
+Use `sort(array)`. It sorts `array` ascending in place using an
+introsort-style hybrid (quicksort + insertion-sort fallback). For a
+descending sort, pass `reverse=.true.`. The companion `ord_sort(array)`
+is a stable mergesort — preserves equal-key relative order, at the cost
+of an O(n) workspace allocation. Use `radix_sort` for integer arrays
+when the value range is bounded; it's O(n) but only for unsigned-style
+integer kinds.
+
+::instruction::
+
+### Q
+When should I use `sort_index` instead of `sort`?
+
+### A
+Use `sort_index(array, index)` when you also need the permutation that
+sorted the array — for example, to apply the same reordering to a second,
+parallel array. After the call, `array` is sorted and `index(i)` is the
+original position of the `i`-th element of the sorted array. So
+`other_data = other_data(index)` reorders a sibling array consistently
+with `array`. Plain `sort` discards this information.
+
+::instruction::
+
+### Q
+What does `mean(array)` from `stdlib_stats` compute, and how do I take a
+mean along one axis of a 2D array?
+
+### A
+`mean(array)` returns the arithmetic mean of all elements as a scalar.
+For a 2D array, `mean(matrix, dim=1)` reduces along the first dimension,
+returning a 1D array of column means. `mean(matrix, dim=2)` returns row
+means. With a logical mask, `mean(array, mask=array > 0)` averages only
+the elements satisfying the mask. The companion `var` (variance) and
+`std` (standard deviation) accept the same dim/mask arguments. `corr` and
+`cov` compute correlation / covariance between two arrays.
+
+::instruction::
+
+### Q
+What is `string_type` from `stdlib_string_type` and why use it instead of
+plain `character(len=*)`?
+
+### A
+`string_type` is a derived type wrapping a deferred-length character
+allocatable. Two reasons to prefer it: (1) you can have arrays of
+varying-length strings (`type(string_type) :: words(100)` where each
+`words(i)` is its own length — impossible with plain character arrays
+which must be uniform-length); (2) it has overloaded operators (`==`,
+`<`, `>`, `//` for concat) and constructors that interoperate with both
+literals and other `string_type` instances. Use `char(string_type_var)`
+to get the underlying `character(:)` back when interfacing with code
+that takes plain strings.
+
+::instruction::
+
+### Q
+How do I solve a linear system `A*x = b` using `stdlib_linalg`?
+
+### A
+```fortran
+use stdlib_linalg, only: solve
+real :: A(n,n), b(n), x(n)
+x = solve(A, b)
+```
+`solve` is a pure function that returns the solution `x`. Internally it
+calls LAPACK's `gesv` (LU with partial pivoting). For multiple right-hand
+sides, `b` can be a 2D array `b(n,nrhs)`. For least-squares (overdetermined
+or underdetermined), use `lstsq` instead. To check whether a matrix is
+singular before solving, use `inv` plus the returned status, or compute
+the condition number via `linalg_cond`.
+
+::instruction::
+
+### Q
+What does `eye(n)` return in `stdlib_linalg`?
+
+### A
+`eye(n)` returns the n×n identity matrix as a `real` array — ones on the
+diagonal, zeros off-diagonal. `eye(m, n)` returns a non-square m×n matrix
+with ones on the main diagonal. Useful as a starting point for numerical
+linear algebra or as the right-hand side of `solve(A, eye(n))` to compute
+`inv(A)` directly.
+
+::instruction::
+
+### Q
+How do I draw samples from a normal distribution with `stdlib_random`?
+
+### A
+```fortran
+use stdlib_random, only: random_seed, dist_normal => dist_rvs_normal
+real :: x, samples(1000)
+call random_seed(42)
+x = dist_normal(0.0, 1.0)              ! one N(0,1) sample
+samples = dist_normal(0.0, 1.0, 1000)  ! 1000 N(0,1) samples
+```
+First argument is the mean, second is the standard deviation. The third
+optional argument requests an array of N samples. Companion routines
+`dist_rvs_uniform`, `dist_rvs_exponential`, `dist_rvs_gamma` cover other
+distributions. Always seed via `random_seed` for reproducibility.
+
+::instruction::
+
+### Q
+What is `optval` from `stdlib_optval` and when do I use it?
+
+### A
+`optval(arg, default)` returns `arg` if it's `present`, otherwise `default`.
+It replaces the boilerplate
+```fortran
+if (present(arg)) then
+  used = arg
+else
+  used = default
+end if
+```
+with a single expression `used = optval(arg, default)`. Works for scalar
+integers, reals, complex, logical, and character. The most common use is
+default-value handling for optional dummy arguments, where the caller may
+or may not have supplied the argument.
+
+::instruction::
+
+### Q
+What kind parameters does `stdlib_kinds` provide and why use them?
+
+### A
+`stdlib_kinds` exports `int8`, `int16`, `int32`, `int64`, `real32`,
+`real64`, `real128` (where supported), and `c_bool`. Use them instead of
+raw kind numbers (`real(8)` is non-portable — different compilers map `8`
+to different precisions). `real(real64)` is portable and self-documenting.
+For most modern code, default to `real64` for floating-point and `int32`
+for indices unless you have a specific reason otherwise.
+
+::instruction::
+
+### Q
+How do I append to a file using `stdlib_io_logger`?
+
+### A
+```fortran
+use stdlib_logger, only: logger_type, information_level
+type(logger_type) :: log
+integer :: stat
+call log%add_log_file('app.log', stat, position='append')
+call log%log_information('app started')
+```
+The `position='append'` keyword opens the existing file for append rather
+than truncating. Default severity emitted is `information`; raise it with
+`log%configuration(level=warning_level)` if you want a quieter log. Severity
+levels: `debug_level < information_level < warning_level < error_level`.
+
+::instruction::
+
+### Q
+When should I use a `bitset_64` vs `bitset_large` from `stdlib_bitsets`?
+
+### A
+`bitset_64` is fixed at 64 bits — a single integer's worth of flags,
+fastest, no allocation. `bitset_large` allocates an array of `int64`
+internally and grows to any user-specified size. Use `bitset_64` for
+small, known-size flag sets (compiler optimization flags, peripheral
+status registers); use `bitset_large` when the bit count is data-driven
+or might exceed 64. Both expose the same `set`, `clear`, `test`,
+`flip`, `bit_count` interface — code is portable across them.
+
+::instruction::
+
+### Q
+How do I hash a string for use as a hashmap key?
+
+### A
+The `stdlib_hash_procedures` module provides 32-bit and 64-bit hash
+functions:
+```fortran
+use stdlib_hash_procedures, only: fnv_1a_hash, water_hash
+integer(int32) :: h32
+integer(int64) :: h64
+h32 = fnv_1a_hash('mykey')
+h64 = water_hash('mykey', seed=12345_int64)
+```
+`fnv_1a_hash` is unseeded and reproducible across runs; `water_hash` and
+`pengy_hash` are seeded (better collision resistance under adversarial
+input). For `stdlib_hashmaps`, you usually pass the hash function as a
+procedure pointer when constructing the map.
+
+::instruction::
+
+### Q
+What's the `block` construct in modern Fortran and when do I use it?
+
+### A
+A `block` introduces a nested scope inside an executable region:
+```fortran
+real :: outer
+outer = 1.0
+block
+  real :: inner
+  inner = outer + 1.0
+  print *, inner
+end block
+```
+Variables declared inside the block exist only within it. Useful for
+narrowing the lifetime of temporaries, declaring variables close to use,
+and limiting the visibility of helper allocations. Functionally similar
+to a `{ ... }` block in C-family languages. Combines naturally with
+`associate`, `select type`, and `error stop`.
+
+::instruction::
+
+### Q
+How does `associate` differ from a plain assignment?
+
+### A
+`associate(short => long%nested%expression)` binds a name to an expression
+or variable for the lifetime of the `associate` block, *without* copying
+the value. Inside the block, `short` is an alias — modifying it modifies
+the original. Compare to assignment, which copies (for non-pointer types).
+Use `associate` to give a verbose subexpression a short name in a tight
+loop without paying for a copy and without exposing the alias outside the
+block. End the scope with `end associate`.
+
+::instruction::
+
+### Q
+What does `elemental` add over `pure` on a function?
+
+### A
+An `elemental` function is `pure` plus broadcast: declared as if it takes
+scalar arguments, but the compiler auto-generates the version that takes
+conformable arrays. `square_real(x)` written `elemental` can be called
+with a scalar (`y = square_real(2.0)`) or an array (`v = square_real(arr)`)
+with no explicit do-loop. Since `elemental` implies `pure`, the same
+no-side-effect rules apply. Most stdlib unary numeric helpers (`sqrt`-
+adjacent transforms, kind conversions, predicates) are `elemental`.
+
+::instruction::
+
+### Q
+How do I structure a unit-test runner using stdlib's `testing` module?
+
+### A
+```fortran
+use testdrive, only: new_unittest, unittest_type, error_type, check
+type(unittest_type), allocatable :: testsuite(:)
+testsuite = [ &
+  new_unittest('addition', test_add), &
+  new_unittest('subtract', test_sub) ]
+contains
+  subroutine test_add(error)
+    type(error_type), allocatable, intent(out) :: error
+    call check(error, 1 + 1 == 2, 'addition broken')
+  end subroutine
+```
+`testdrive` is a thin runner the stdlib uses for its own tests. Each
+test subroutine accepts an `allocatable :: error` out parameter; `check`
+allocates the error if the assertion fails. Fast, no fixtures, no
+discovery — explicit registration in an array.
+
+::instruction::
+
+### Q
+What's the modern way to read a CSV-style file in Fortran?
+
+### A
+The simplest path is `stdlib_io`'s `loadtxt` if the columns are uniform
+numeric. For mixed-type CSVs, read line-by-line with `getline` and split
+manually:
+```fortran
+use stdlib_io, only: getline
+use stdlib_string_type, only: string_type, split => char_split
+type(string_type) :: line
+type(string_type), allocatable :: fields(:)
+integer :: u, ios
+open(newunit=u, file='data.csv', action='read')
+do
+  call getline(u, line, ios)
+  if (ios /= 0) exit
+  fields = split(line, ',')
+  ! process fields(:)
+end do
+close(u)
+```
+For larger or more complex CSVs, consider the `csv-fortran` community
+package — stdlib doesn't (yet) ship a CSV-aware reader.
+
+::instruction::
+
+### Q
+How does `error stop` differ from `stop`?
+
+### A
+`stop` and `error stop` both terminate the program, but `error stop` is
+guaranteed to set a non-zero process exit code, while `stop` (without an
+argument) typically returns zero. `error stop "message"` prints the message
+to standard error before exit; `error stop 42` returns code 42. Use
+`error stop` for any abnormal termination — assertion failures, fatal
+config errors — so shells and CI runners pick up the failure correctly.
+`stop` is reserved for normal early termination (rare in modern code;
+prefer letting `program` reach its `end program`).
+
+::instruction::
+
+### Q
+What is `c_loc` and when do I need it?
+
+### A
+`c_loc(target)` from `iso_c_binding` returns the C address of `target` as
+a `type(c_ptr)` value, suitable for passing to a `bind(C)` procedure. The
+target must have the `target` attribute. Use this when interfacing with
+a C library that takes `void*`. The reverse — turning a `c_ptr` back into
+a Fortran pointer — uses `c_f_pointer(cptr, fptr, [shape])`. Always pair
+the call with the matching deallocation; Fortran does not own memory
+acquired through `c_loc`.
+
+::instruction::
+
+### Q
+What does `move_alloc(from, to)` do and when do I prefer it to assignment?
+
+### A
+`move_alloc(from, to)` transfers the allocation status (and the underlying
+memory) from `from` to `to`. After the call, `to` holds what `from` held,
+and `from` is deallocated — no copy. Compare to `to = from` which copies
+the array. For large arrays, `move_alloc` is O(1); the copy is O(n). Use
+`move_alloc` to hand ownership of a temporary buffer to a derived-type
+field, or to swap two allocatables (via a third temporary).
+
+::instruction::
+
+### Q
+How do I write a `subroutine` that returns multiple results without using
+`out` arguments?
+
+### A
+Use a derived-type return value via a `function`:
+```fortran
+type :: result_t
+  real :: value
+  integer :: status
+end type
+contains
+function compute() result(r)
+  type(result_t) :: r
+  r%value  = 3.14
+  r%status = 0
+end function
+```
+Caller writes `res = compute()` once and reads `res%value`, `res%status`.
+This is cleaner than two `intent(out)` arguments because the call site
+isn't burdened with declaring the receivers up front, and the compiler
+can elide the temporary in common cases. Fortran 2008+ allows allocatable
+result components, so the function can also return varying-shape data.
+
+::instruction::
+
+### Q
+Why does the FortranGoingOnForty codebase use `intent` on every dummy
+argument even when not strictly required?
+
+### A
+It's a project convention: explicit `intent(in/out/inout)` on every dummy
+makes the contract visible at the procedure boundary. The compiler enforces
+the contract — `intent(in)` rejects assignment, `intent(out)` warns on
+read-before-write — so a regression where someone "improves" a procedure
+to mutate a previously-read-only argument fails at compile time, not at
+runtime. Reviewers and tooling (the FGOF `fgof-lineedit` editor's
+in-buffer linter, for example) parse intents to render colour-coded
+argument flow. The convention costs ~10 keystrokes per procedure and
+buys static enforcement.
+
+::instruction::
+
+### Q
+What does the `aero-emulation` subsystem in FortranGoingOnForty do at a
+high level?
+
+### A
+`aero-emulation` is the aerodynamic-surface emulator: a forward simulation
+of lift, drag, and moment over a parameterised wing-and-control-surface
+model, integrated against a 6-DoF rigid-body solver. It uses
+`stdlib_linalg` for the state-update matrices (rotation and inertia tensor
+products) and `stdlib_random` to inject configurable turbulence. Outputs
+are time-series state vectors written via `stdlib_io`'s `savetxt`. The
+module is consumed by `convolution` (for sensor-fusion experiments) and
+by `armfortas` (the autopilot framework).
+
+::instruction::
+
+### Q
+How does `fgof-process` handle child-process I/O on Linux vs macOS?
+
+### A
+`fgof-process` wraps the C `posix_spawn` family via `iso_c_binding`; on
+both Linux and macOS the spawn semantics are POSIX-shaped, so the
+high-level `spawn(command, stdin, stdout, stderr)` call is platform-
+identical at the Fortran layer. The differences hide in the child-pty
+wiring (`fgof-pty`): macOS uses `posix_openpt` + `grantpt` + `unlockpt`,
+Linux supports the same path plus the older `/dev/ptmx` shortcut. The
+`fgof-process` module never sees the divergence — `fgof-pty` exposes a
+`type(pty_t)` derived type that `fgof-process` consumes opaquely.
+
+::instruction::
+
+### Q
+What's the role of `fgof-screen` in the FGOF terminal applications?
+
+### A
+`fgof-screen` is the terminal-cell renderer used by `fgof-lineedit` and
+the `armfortas` autopilot console. It maintains a 2D `character(len=:),
+allocatable` buffer plus per-cell ANSI style attributes, computes a
+minimal-diff update against the previously rendered frame, and emits
+the diff as ANSI escape sequences to the controlling tty. The minimal-
+diff step keeps redraws under a few hundred bytes for incremental
+updates — needed because some serial-attached aerospace consoles run
+at 9600 baud and a full repaint is too expensive.
+
+::instruction::
+
+### Q
+Why does `convolution` use `do concurrent` for its inner kernel loop
+rather than OpenMP?
+
+### A
+The inner kernel loop multiplies and accumulates over a small kernel
+window with no carried dependence, so it satisfies `do concurrent`'s
+contract. `do concurrent` lets the compiler choose the parallelism
+strategy — vectorisation on CPU, OpenMP-offload on multicore, or
+GPU-offload via `-fopenmp -foffload=...` on NVIDIA hosts — without
+changing source. OpenMP `parallel do` would have committed the kernel
+to CPU threads at compile time. The convolution outer loop, which
+*does* accumulate into a shared buffer, uses `!$omp parallel do
+reduction(+:buffer)` since `do concurrent` cannot express the
+reduction without 2018+ locality clauses that older compilers don't
+honour.
+
+::instruction::
+
+### Q
+What pattern does `armfortas` use for autopilot state-machine transitions?
+
+### A
+A `select case` over a state enum at the top of the control loop:
+```fortran
+select case (state%mode)
+case (mode_idle)        ; call handle_idle(state, inputs)
+case (mode_takeoff)     ; call handle_takeoff(state, inputs)
+case (mode_cruise)      ; call handle_cruise(state, inputs)
+case (mode_descent)     ; call handle_descent(state, inputs)
+case (mode_landed)      ; call handle_landed(state, inputs)
+case default            ; call error_unknown_mode(state%mode)
+end select
+```
+Each handler returns the next mode via `state%mode = new_mode`. The
+enum values are `integer, parameter` constants (Fortran lacks a true
+enum type pre-2023 — most codebases use named integer parameters). The
+`select case` is exhaustive and the `case default` calls into
+`error_unknown_mode` to fail loudly on programmer error.
+
+::instruction::
+
+### Q
+How does `feducative` integrate with `armfortas` for control-loop tuning?
+
+### A
+`feducative` is the parameter-search and PID-tuning subsystem. It runs
+`armfortas` in a closed-loop simulation (via the `aero-emulation` model),
+sweeps PID gains over a configured grid, and scores each combination by
+RMS tracking error against a reference trajectory. The two communicate
+via a `type(tuning_handle_t)` derived type that `feducative` constructs
+and `armfortas` updates step-by-step. After a sweep, `feducative` writes
+the gain × score table via `stdlib_io`'s `savetxt` and emits the best
+gains as a Fortran `include` file the autopilot reads on next compile.

From 4d133cfd9c04dcfe1f2ea73bc0d07ab161cb6240 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:04:14 -0400
Subject: [PATCH 34/46] Warn about MLX PEFT-adapter bug in dlm prompt --backend
 help + reference docs

---
 docs/cli/reference.md          | 8 ++++++++
 src/dlm/cli/commands/prompt.py | 4 +++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/cli/reference.md b/docs/cli/reference.md
index 3905a4f..f8735ac 100644
--- a/docs/cli/reference.md
+++ b/docs/cli/reference.md
@@ -122,6 +122,14 @@ dlm prompt <path> [query] [--max-tokens N] [--temp F] [--top-p F]
 | `--backend {auto,pytorch,mlx}` | `auto` | Inference backend. `auto` picks MLX on Apple Silicon (when `uv sync --extra mlx` is installed), else PyTorch. Ignored on VL bases (the VL path always uses PyTorch + AutoModelForImageTextToText). |
 | `--verbose` | false | Print resolved `InferencePlan` on stderr. |
 
+> **MLX backend caveat (darwin-arm64):** the MLX path has known issues
+> applying PEFT-format LoRA adapters — the converter runs without
+> error but the resulting MLX adapter does not modify generation,
+> producing output that matches the base model. If your trained
+> adapter appears to behave like the base, pass `--backend pytorch`
+> as a workaround. See `docs/audits/13-followup/finding04/direct-query-results.md`
+> for the diagnostic walk-through.
+
 Query is the CLI positional argument. Omit to read from stdin.
 
 ### `dlm repl`
diff --git a/src/dlm/cli/commands/prompt.py b/src/dlm/cli/commands/prompt.py
index 7dbbec0..de9484e 100644
--- a/src/dlm/cli/commands/prompt.py
+++ b/src/dlm/cli/commands/prompt.py
@@ -59,7 +59,9 @@ def prompt_cmd(
             help=(
                 "Inference backend: `auto` (default) picks MLX on Apple "
                 "Silicon, else PyTorch. Force with `pytorch` or `mlx`. "
-                "MLX requires `uv sync --extra mlx` on darwin-arm64."
+                "MLX requires `uv sync --extra mlx` on darwin-arm64. "
+                "Note: MLX has a known PEFT-adapter bug — if your "
+                "trained model behaves like the base, use `--backend pytorch`."
             ),
         ),
     ] = "auto",

From 931f6bb3ea9770ba59ad6da73b76589d662181ea Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:13:09 -0400
Subject: [PATCH 35/46] Fix MLX backend silently producing base output for PEFT
 adapters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two bugs combined to make `dlm prompt --backend mlx` produce
base-model behavior even with a fully-trained PEFT LoRA adapter:

1. `target_modules` from PEFT is bare (`q_proj`), but mlx-lm's
   `linear_to_lora_layers` matches `named_modules()` keys inside
   each transformer block via exact equality. The FQN within a
   block is `self_attn.q_proj`, so no keys ever matched and
   `linear_to_lora_layers` silently left the model un-wrapped.

2. PEFT and mlx-lm use different LoRA tensor layouts:
   PEFT lora_A=[r,in], lora_B=[out,r]; mlx-lm lora_a=[in,r],
   lora_b=[r,out]. mlx-lm's `model.load_weights(strict=False)`
   silently skipped the mismatched shapes, leaving zero overlay.

The user-visible failure was "trained model behaves identically
to base" — surfaced during the audit-13 follow-up Finding 04
direct-query smoke test.
---
 src/dlm/inference/mlx_adapter.py              |  55 ++++++++-
 .../inference/test_mlx_adapter_conversion.py  | 112 ++++++++++++++++++
 .../inference/test_mlx_stage_adapter_dir.py   |   5 +-
 3 files changed, 169 insertions(+), 3 deletions(-)

diff --git a/src/dlm/inference/mlx_adapter.py b/src/dlm/inference/mlx_adapter.py
index 7bc627a..95ad13d 100644
--- a/src/dlm/inference/mlx_adapter.py
+++ b/src/dlm/inference/mlx_adapter.py
@@ -44,6 +44,39 @@
 _LORA_AB = re.compile(r"\.lora_([AB])\.weight$")
 """Matches the trailing `.lora_A.weight` / `.lora_B.weight` suffix."""
 
+_ATTN_TARGETS: frozenset[str] = frozenset(
+    {"q_proj", "k_proj", "v_proj", "o_proj", "qkv_proj", "wqkv"}
+)
+"""Bare PEFT `target_modules` names that live under `self_attn.` on
+decoder-only transformers (Qwen2/Llama/Mistral/Phi/SmolLM)."""
+
+_MLP_TARGETS: frozenset[str] = frozenset({"gate_proj", "up_proj", "down_proj", "fc1", "fc2"})
+"""Bare PEFT `target_modules` names that live under `mlp.` on the same
+family of architectures."""
+
+
+def _qualify_target_module(name: str) -> str:
+    """Map a PEFT bare `target_modules` entry to its in-block FQN.
+
+    mlx-lm's `linear_to_lora_layers` matches `named_modules()` keys
+    *within* each transformer block via exact equality (`if k in keys`).
+    PEFT records `target_modules` as bare module names (`q_proj`),
+    while the FQN within an MLX-LM transformer block is fully qualified
+    (`self_attn.q_proj`). Without this rewrite the keys never match and
+    `linear_to_lora_layers` silently leaves the model un-wrapped — the
+    user-visible failure is "trained model behaves identically to base."
+
+    Already-qualified names (containing a `.`) pass through untouched
+    so callers can pre-qualify if needed.
+    """
+    if "." in name:
+        return name
+    if name in _ATTN_TARGETS:
+        return f"self_attn.{name}"
+    if name in _MLP_TARGETS:
+        return f"mlp.{name}"
+    return name
+
 
 class MlxConversionError(RuntimeError):
     """Raised when a PEFT adapter cannot be converted to the MLX layout."""
@@ -121,7 +154,23 @@ def peft_safetensors_to_mlx_safetensors(  # pragma: no cover - I/O + torch deps
     tensors = load_file(str(src))
     mapping = map_all_keys(list(tensors.keys()))
 
-    mlx_tensors = {mlx_key: tensors[peft_key] for peft_key, mlx_key in mapping.items()}
+    # PEFT stores LoRA weights with shapes that don't match what
+    # mlx-lm's `LoRALinear` expects:
+    #
+    #   PEFT lora_A : [r, in_features]       MLX lora_a : [in_features, r]
+    #   PEFT lora_B : [out_features, r]      MLX lora_b : [r, out_features]
+    #
+    # Both tensors need a transpose. Loading without the transpose
+    # makes mlx-lm's `model.load_weights(strict=False)` silently skip
+    # the mismatched shapes and the adapter has no effect — the
+    # textbook "trained model behaves like base" failure mode.
+    mlx_tensors = {}
+    for peft_key, mlx_key in mapping.items():
+        t = tensors[peft_key]
+        # `mlx_key` ends in `.lora_a` or `.lora_b` (lowercase, no `.weight`).
+        if mlx_key.endswith((".lora_a", ".lora_b")):
+            t = t.t().contiguous()
+        mlx_tensors[mlx_key] = t
 
     mlx_safetensors_path.parent.mkdir(parents=True, exist_ok=True)
     save_file(mlx_tensors, str(mlx_safetensors_path))
@@ -174,6 +223,8 @@ def build_mlx_adapter_config(
             "cannot stage mlx adapter without a valid layer count"
         )
 
+    qualified_keys = [_qualify_target_module(t) for t in target_modules]
+
     return {
         "fine_tune_type": "dora" if use_dora else "lora",
         "num_layers": int(base_num_hidden_layers),
@@ -181,6 +232,6 @@ def build_mlx_adapter_config(
             "rank": rank,
             "scale": lora_alpha / rank if rank else float(lora_alpha),
             "dropout": lora_dropout,
-            "keys": list(target_modules),
+            "keys": qualified_keys,
         },
     }
diff --git a/tests/unit/inference/test_mlx_adapter_conversion.py b/tests/unit/inference/test_mlx_adapter_conversion.py
index 1fd6c2b..2ceb6e3 100644
--- a/tests/unit/inference/test_mlx_adapter_conversion.py
+++ b/tests/unit/inference/test_mlx_adapter_conversion.py
@@ -91,3 +91,115 @@ def test_non_positive_layer_count_rejected(self) -> None:
                 },
                 0,
             )
+
+    def test_attn_target_modules_get_self_attn_prefix(self) -> None:
+        """mlx-lm matches `named_modules()` keys *inside* a transformer
+        block via exact equality. PEFT's bare `q_proj` doesn't match
+        the `self_attn.q_proj` FQN, so without the rewrite mlx-lm
+        silently leaves the model un-wrapped — the textbook "trained
+        model behaves like base" failure mode."""
+        from dlm.inference.mlx_adapter import build_mlx_adapter_config
+
+        cfg = build_mlx_adapter_config(
+            {
+                "r": 16,
+                "lora_alpha": 32,
+                "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],
+            },
+            base_num_hidden_layers=28,
+        )
+        assert cfg["lora_parameters"]["keys"] == [
+            "self_attn.q_proj",
+            "self_attn.k_proj",
+            "self_attn.v_proj",
+            "self_attn.o_proj",
+        ]
+
+    def test_mlp_target_modules_get_mlp_prefix(self) -> None:
+        from dlm.inference.mlx_adapter import build_mlx_adapter_config
+
+        cfg = build_mlx_adapter_config(
+            {
+                "r": 8,
+                "target_modules": ["gate_proj", "up_proj", "down_proj"],
+            },
+            base_num_hidden_layers=12,
+        )
+        assert cfg["lora_parameters"]["keys"] == [
+            "mlp.gate_proj",
+            "mlp.up_proj",
+            "mlp.down_proj",
+        ]
+
+    def test_already_qualified_keys_pass_through(self) -> None:
+        """Callers that pre-qualify (e.g. for non-decoder architectures)
+        should not see their dotted keys re-rewritten."""
+        from dlm.inference.mlx_adapter import build_mlx_adapter_config
+
+        cfg = build_mlx_adapter_config(
+            {
+                "r": 8,
+                "target_modules": ["self_attn.q_proj", "encoder.fc1"],
+            },
+            base_num_hidden_layers=12,
+        )
+        assert cfg["lora_parameters"]["keys"] == ["self_attn.q_proj", "encoder.fc1"]
+
+    def test_unknown_target_module_passes_through_unqualified(self) -> None:
+        """Names that aren't in the attn/mlp tables stay bare. Caller
+        supervision is the user's responsibility — we don't guess."""
+        from dlm.inference.mlx_adapter import build_mlx_adapter_config
+
+        cfg = build_mlx_adapter_config(
+            {
+                "r": 8,
+                "target_modules": ["unknown_proj"],
+            },
+            base_num_hidden_layers=12,
+        )
+        assert cfg["lora_parameters"]["keys"] == ["unknown_proj"]
+
+
+class TestPeftSafetensorsToMlxTransposes:
+    """PEFT and MLX-LM use different storage layouts for LoRA tensors:
+
+      PEFT lora_A : [r, in_features]       MLX lora_a : [in_features, r]
+      PEFT lora_B : [out_features, r]      MLX lora_b : [r, out_features]
+
+    Without transposing, mlx-lm's `model.load_weights(strict=False)`
+    silently skips the mismatched shapes and the adapter has no effect.
+    """
+
+    def test_lora_a_and_b_get_transposed(self, tmp_path: object) -> None:
+        from pathlib import Path as _Path
+
+        import torch
+        from safetensors.torch import load_file, save_file
+
+        from dlm.inference.mlx_adapter import peft_safetensors_to_mlx_safetensors
+
+        tmp_path = _Path(str(tmp_path))
+        peft_dir = tmp_path / "peft"
+        peft_dir.mkdir()
+        # PEFT shapes: lora_A=[r=4, in=8], lora_B=[out=16, r=4]
+        peft_tensors = {
+            "base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight": torch.arange(
+                32, dtype=torch.float32
+            ).reshape(4, 8),
+            "base_model.model.model.layers.0.self_attn.q_proj.lora_B.weight": torch.arange(
+                64, dtype=torch.float32
+            ).reshape(16, 4),
+        }
+        save_file(peft_tensors, str(peft_dir / "adapter_model.safetensors"))
+
+        mlx_path = tmp_path / "out" / "adapters.safetensors"
+        peft_safetensors_to_mlx_safetensors(peft_dir, mlx_path)
+
+        mlx_tensors = load_file(str(mlx_path))
+        a = mlx_tensors["model.layers.0.self_attn.q_proj.lora_a"]
+        b = mlx_tensors["model.layers.0.self_attn.q_proj.lora_b"]
+        # Transposed shapes
+        assert tuple(a.shape) == (8, 4)
+        assert tuple(b.shape) == (4, 16)
+        # Values match a transpose, not just a reshape.
+        assert torch.equal(a, peft_tensors[next(iter(peft_tensors))].t())
diff --git a/tests/unit/inference/test_mlx_stage_adapter_dir.py b/tests/unit/inference/test_mlx_stage_adapter_dir.py
index 794def8..c5185c3 100644
--- a/tests/unit/inference/test_mlx_stage_adapter_dir.py
+++ b/tests/unit/inference/test_mlx_stage_adapter_dir.py
@@ -79,7 +79,10 @@ def test_writes_safetensors_and_translated_config(
         assert lp["rank"] == 8
         assert lp["scale"] == pytest.approx(16 / 8)
         assert lp["dropout"] == pytest.approx(0.05)
-        assert lp["keys"] == ["q_proj", "v_proj"]
+        # Bare PEFT target_modules get qualified with the in-block FQN
+        # so mlx-lm's `linear_to_lora_layers` can match them. See
+        # `_qualify_target_module` in dlm.inference.mlx_adapter.
+        assert lp["keys"] == ["self_attn.q_proj", "self_attn.v_proj"]
 
     def test_tensor_keys_match_mlx_layout(self, tmp_path: Path, stub_num_layers: None) -> None:
         src = tmp_path / "peft"

From f7f04504f7a4743951c7c173362bfffc8755ea6a Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:17:28 -0400
Subject: [PATCH 36/46] Fail loud when mlx-lm leaves the model un-wrapped after
 load

Even with the conversion fix, an unconvertible adapter (architecture
whose layers don't follow the self_attn/mlp convention) would still
fall through to base-model output silently. Add a post-load guard
that walks the model's `trainable_parameters` and raises
`MlxConversionError` when zero `lora_a`/`lora_b` parameters are
present. Surfaces the failure as a clear message pointing at
`--backend pytorch` instead of letting the trained adapter behave
identically to the base.
---
 src/dlm/inference/backends/mlx_backend.py     | 13 +++-
 src/dlm/inference/mlx_adapter.py              | 48 +++++++++++++
 .../inference/test_mlx_adapter_conversion.py  | 72 +++++++++++++++++++
 tests/unit/inference/test_mlx_backend.py      | 16 +++++
 4 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/src/dlm/inference/backends/mlx_backend.py b/src/dlm/inference/backends/mlx_backend.py
index d273032..e293177 100644
--- a/src/dlm/inference/backends/mlx_backend.py
+++ b/src/dlm/inference/backends/mlx_backend.py
@@ -35,7 +35,10 @@
 
 from dlm.inference.backends.base import InferenceBackend
 from dlm.inference.errors import AdapterNotFoundError
-from dlm.inference.mlx_adapter import MlxConversionError
+from dlm.inference.mlx_adapter import (
+    MlxConversionError,
+    assert_mlx_adapter_applied,
+)
 
 if TYPE_CHECKING:
     from dlm.base_models import BaseModelSpec
@@ -192,6 +195,14 @@ def load(  # pragma: no cover - heavy path
             adapter_path=str(staged),
         )
 
+        # mlx-lm's `load_adapters` runs `linear_to_lora_layers` +
+        # `model.load_weights(strict=False)`, both of which fail
+        # silently on key/shape mismatches. Verify at least one LoRA
+        # parameter actually attached — else `MlxConversionError` so
+        # the user sees the failure instead of base-model output.
+        staged_cfg = json.loads((staged / _ADAPTER_CONFIG_FILENAME).read_text(encoding="utf-8"))
+        assert_mlx_adapter_applied(self._model, expected_keys=staged_cfg["lora_parameters"]["keys"])
+
     def generate(self, prompt: str, **gen_kwargs: Any) -> str:  # pragma: no cover - heavy path
         if self._model is None or self._tokenizer is None:
             raise RuntimeError("MlxBackend.generate called before load()")
diff --git a/src/dlm/inference/mlx_adapter.py b/src/dlm/inference/mlx_adapter.py
index 95ad13d..9b27eba 100644
--- a/src/dlm/inference/mlx_adapter.py
+++ b/src/dlm/inference/mlx_adapter.py
@@ -235,3 +235,51 @@ def build_mlx_adapter_config(
             "keys": qualified_keys,
         },
     }
+
+
+def assert_mlx_adapter_applied(model: Any, *, expected_keys: list[str]) -> None:
+    """Verify mlx-lm's `load_adapters` actually wrapped the targeted layers.
+
+    `mlx_lm.load(..., adapter_path=...)` calls `linear_to_lora_layers`
+    followed by `model.load_weights(strict=False)`. Both steps fail
+    silently if their inputs don't match the loaded model:
+
+    - `linear_to_lora_layers` is a no-op when `keys` don't match any
+      module's FQN inside the transformer blocks
+    - `load_weights(strict=False)` skips any tensor key that doesn't
+      match a model parameter
+
+    Either failure produces a model that runs as if no adapter were
+    loaded. Catching this here turns the "trained model behaves like
+    base" footgun into an explicit refusal so the user knows to use
+    `--backend pytorch` (or the fix needs an architecture-aware
+    keys translator).
+
+    `expected_keys` are the in-block FQNs from the staged
+    `adapter_config.json` (e.g. `["self_attn.q_proj", ...]`). We confirm
+    that at least one matching module ended up as a LoRA-wrapped layer.
+    """
+    try:
+        import mlx.utils as mlx_utils  # type: ignore[import-not-found, unused-ignore]
+    except ImportError as exc:  # pragma: no cover - mlx not importable
+        raise MlxConversionError(f"mlx not importable for verification: {exc}") from exc
+
+    try:
+        flat: Any = mlx_utils.tree_flatten(model.trainable_parameters())
+    except Exception as exc:  # pragma: no cover - defensive
+        raise MlxConversionError(
+            f"could not enumerate model trainable_parameters for verification: {exc}"
+        ) from exc
+
+    lora_param_count = sum(1 for k, _ in flat if k.endswith(".lora_a") or k.endswith(".lora_b"))
+    if lora_param_count == 0:
+        raise MlxConversionError(
+            "mlx-lm loaded the adapter without applying it — zero "
+            "`lora_a` / `lora_b` parameters present after load. This "
+            "usually means the keys "
+            f"{expected_keys!r} don't match the model's `named_modules()` "
+            "FQNs (e.g. the base architecture uses a different submodule "
+            "layout than `self_attn.*` / `mlp.*`). The trained adapter "
+            "would behave identically to the base model. Use "
+            "`--backend pytorch` as a workaround."
+        )
diff --git a/tests/unit/inference/test_mlx_adapter_conversion.py b/tests/unit/inference/test_mlx_adapter_conversion.py
index 2ceb6e3..c4448d7 100644
--- a/tests/unit/inference/test_mlx_adapter_conversion.py
+++ b/tests/unit/inference/test_mlx_adapter_conversion.py
@@ -2,10 +2,13 @@
 
 from __future__ import annotations
 
+from typing import Any
+
 import pytest
 
 from dlm.inference.mlx_adapter import (
     MlxConversionError,
+    assert_mlx_adapter_applied,
     map_all_keys,
     map_peft_key_to_mlx,
 )
@@ -203,3 +206,72 @@ def test_lora_a_and_b_get_transposed(self, tmp_path: object) -> None:
         assert tuple(b.shape) == (4, 16)
         # Values match a transpose, not just a reshape.
         assert torch.equal(a, peft_tensors[next(iter(peft_tensors))].t())
+
+
+class TestAssertMlxAdapterApplied:
+    """Fail-loud post-load guard. mlx-lm silently leaves a model
+    un-wrapped when keys don't match; this check turns that footgun
+    into an explicit `MlxConversionError` so users see the failure
+    rather than getting silent base-model output."""
+
+    def _fake_model_with_params(self, names: list[str]) -> Any:
+        """Build a stand-in for an mlx model that exposes
+        `trainable_parameters()` returning a flat dict of fake tensors.
+        We don't go through `mlx.utils.tree_flatten`'s real
+        implementation here — assert_mlx_adapter_applied uses it
+        directly, so we assert via the import-mock approach below."""
+
+        class _FakeArr:
+            shape = (1,)
+
+        class _FakeModel:
+            def trainable_parameters(self) -> dict[str, Any]:
+                return {n: _FakeArr() for n in names}
+
+        return _FakeModel()
+
+    def test_passes_when_lora_params_present(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        # Stub mlx.utils.tree_flatten so the test doesn't require
+        # mlx-lm's real flatten semantics — we only need it to walk
+        # the dict-shaped trainable_parameters() output.
+        import sys
+        import types as _types
+
+        fake_mlx = _types.ModuleType("mlx")
+        fake_mlx_utils = _types.ModuleType("mlx.utils")
+
+        def _tree_flatten(d: dict[str, Any]) -> list[tuple[str, Any]]:
+            return list(d.items())
+
+        fake_mlx_utils.tree_flatten = _tree_flatten  # type: ignore[attr-defined]
+        monkeypatch.setitem(sys.modules, "mlx", fake_mlx)
+        monkeypatch.setitem(sys.modules, "mlx.utils", fake_mlx_utils)
+
+        model = self._fake_model_with_params(
+            [
+                "model.layers.0.self_attn.q_proj.lora_a",
+                "model.layers.0.self_attn.q_proj.lora_b",
+            ]
+        )
+        # Should not raise.
+        assert_mlx_adapter_applied(model, expected_keys=["self_attn.q_proj"])
+
+    def test_raises_when_no_lora_params(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        import sys
+        import types as _types
+
+        fake_mlx = _types.ModuleType("mlx")
+        fake_mlx_utils = _types.ModuleType("mlx.utils")
+        fake_mlx_utils.tree_flatten = lambda d: list(d.items())  # type: ignore[attr-defined]
+        monkeypatch.setitem(sys.modules, "mlx", fake_mlx)
+        monkeypatch.setitem(sys.modules, "mlx.utils", fake_mlx_utils)
+
+        # Only base parameters; no lora_a/lora_b.
+        model = self._fake_model_with_params(
+            [
+                "model.embed_tokens.weight",
+                "model.layers.0.self_attn.q_proj.weight",
+            ]
+        )
+        with pytest.raises(MlxConversionError, match="zero `lora_a`"):
+            assert_mlx_adapter_applied(model, expected_keys=["self_attn.q_proj"])
diff --git a/tests/unit/inference/test_mlx_backend.py b/tests/unit/inference/test_mlx_backend.py
index cc40737..c6b64de 100644
--- a/tests/unit/inference/test_mlx_backend.py
+++ b/tests/unit/inference/test_mlx_backend.py
@@ -94,6 +94,14 @@ def test_load_generate_and_unload_happy_path(
         adapter_dir = tmp_path / "adapter"
         adapter_dir.mkdir()
         staged_dir = tmp_path / "staged"
+        staged_dir.mkdir()
+        # Real `stage_mlx_adapter_dir` writes this file; the post-load
+        # assertion guard reads it to know which FQNs to verify, so the
+        # test stub must mirror the on-disk shape.
+        (staged_dir / "adapter_config.json").write_text(
+            '{"lora_parameters": {"keys": ["self_attn.q_proj"]}}',
+            encoding="utf-8",
+        )
 
         backend = MlxBackend(SimpleNamespace())
         monkeypatch.setattr(
@@ -104,6 +112,14 @@ def test_load_generate_and_unload_happy_path(
             "dlm.inference.backends.mlx_backend.stage_mlx_adapter_dir",
             lambda peft_adapter_dir, dst_dir, *, base_hf_id: staged_dir,
         )
+        # Stub the post-load assertion: real `mlx_lm.load` produces a
+        # model with LoRA-wrapped layers, but here we hand back a
+        # placeholder string. Bypassing the assertion keeps the rest
+        # of the stubbed happy-path test intact.
+        monkeypatch.setattr(
+            "dlm.inference.backends.mlx_backend.assert_mlx_adapter_applied",
+            lambda model, *, expected_keys: None,
+        )
 
         fake_mlx = ModuleType("mlx_lm")
         fake_mlx.load = lambda hf_id, adapter_path: ("model", "tokenizer")

From 5ac6d8dbe0e52bc5e2e911baf487518bb0b864e9 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:23:21 -0400
Subject: [PATCH 37/46] Promote normalize_probe_markers to public for
 cross-package callers

---
 src/dlm/data/sections_to_rows.py | 28 +++++++++++++++++++++-------
 src/dlm/preference/mine.py       |  4 ++--
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/dlm/data/sections_to_rows.py b/src/dlm/data/sections_to_rows.py
index 5ad0b51..476e5ea 100644
--- a/src/dlm/data/sections_to_rows.py
+++ b/src/dlm/data/sections_to_rows.py
@@ -40,14 +40,22 @@
 _PROBE_HEADER = f"### Q {_PROBE_MARKER}"
 
 
-def _normalize_probe_markers(body: str) -> str:
+def normalize_probe_markers(body: str) -> str:
     """Rewrite `### Q !probe` → `### Q` so the strict parser accepts it.
 
-    Mirrors `dlm.eval.probes._normalize_probe_markers` (kept local to
-    avoid a data → eval import). Probe-marked Q/A pairs still train
-    exactly like plain pairs; the marker is only load-bearing for probe
-    extraction. We drop it silently here rather than leak `!probe:` into
-    the training question text.
+    Probe-marked Q/A pairs still train exactly like plain pairs; the
+    marker is only load-bearing for probe extraction. We drop it
+    silently here rather than leak `!probe:` into the training question
+    text.
+
+    Public because every caller of `parse_instruction_body` that takes
+    raw section content needs to apply this rewrite before parsing —
+    otherwise the parser rejects probe-marked sections with a misleading
+    "expected `### Q` header" error. Callers: `data.sections_to_rows`,
+    `eval.probes._parse_instruction_sections`,
+    `preference.mine.materialize_pairs`, `replay.store._snapshot_to_rows`,
+    `cli.commands.synth._synth_prompt_summary`,
+    `train.gate.orchestrator._extract_probes`.
     """
     if _PROBE_HEADER not in body:
         return body
@@ -56,6 +64,12 @@ def _normalize_probe_markers(body: str) -> str:
     return "\n".join(rewritten)
 
 
+# Back-compat alias for callers that imported the underscore-prefixed
+# private name. Keeping the alias means downstream branches that haven't
+# rebased through the rename don't break.
+_normalize_probe_markers = normalize_probe_markers
+
+
 Row = dict[str, Any]
 
 _DEFAULT_IMAGE_TOKEN = "<image>"
@@ -112,7 +126,7 @@ def _section_to_rows(
         return [{"text": text, "_dlm_section_id": sid, "_dlm_row_tags": tags}]
 
     if section.type is SectionType.INSTRUCTION:
-        body = _normalize_probe_markers(section.content)
+        body = normalize_probe_markers(section.content)
         pairs = parse_instruction_body(body, section_id=sid)
         return [
             {
diff --git a/src/dlm/preference/mine.py b/src/dlm/preference/mine.py
index 44ade09..d764b43 100644
--- a/src/dlm/preference/mine.py
+++ b/src/dlm/preference/mine.py
@@ -17,7 +17,7 @@
 
 from dlm.data.errors import InstructionParseError
 from dlm.data.instruction_parser import parse_instruction_body
-from dlm.data.sections_to_rows import _normalize_probe_markers
+from dlm.data.sections_to_rows import normalize_probe_markers
 from dlm.doc.parser import ParsedDlm
 from dlm.doc.sections import Section, SectionType
 from dlm.preference.judge import PairScore, PreferenceJudge
@@ -266,7 +266,7 @@ def _extract_prompts(
             continue
         try:
             pairs = parse_instruction_body(
-                _normalize_probe_markers(section.content),
+                normalize_probe_markers(section.content),
                 section_id=section.section_id,
             )
         except InstructionParseError as exc:

From 230cb99b5db8af3d54c33ec1bec7a1b7c5792d01 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:23:28 -0400
Subject: [PATCH 38/46] Normalize probe markers in synth + gate instruction
 parsers

---
 src/dlm/cli/commands/synth.py      | 3 ++-
 src/dlm/train/gate/orchestrator.py | 6 +++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/dlm/cli/commands/synth.py b/src/dlm/cli/commands/synth.py
index 53ced7e..f424092 100644
--- a/src/dlm/cli/commands/synth.py
+++ b/src/dlm/cli/commands/synth.py
@@ -342,9 +342,10 @@ def _synth_prompt_summary(content: str, *, section_id: str) -> str:
     """Best-effort prompt summary for `synth list`."""
     from dlm.data.errors import InstructionParseError
     from dlm.data.instruction_parser import parse_instruction_body
+    from dlm.data.sections_to_rows import normalize_probe_markers
 
     try:
-        pairs = parse_instruction_body(content, section_id=section_id)
+        pairs = parse_instruction_body(normalize_probe_markers(content), section_id=section_id)
     except InstructionParseError:
         return "<unparseable>"
     if not pairs:
diff --git a/src/dlm/train/gate/orchestrator.py b/src/dlm/train/gate/orchestrator.py
index 82747f2..38c5dd2 100644
--- a/src/dlm/train/gate/orchestrator.py
+++ b/src/dlm/train/gate/orchestrator.py
@@ -66,6 +66,7 @@ def probes_from_sections(parsed: ParsedDlm) -> list[GateProbe]:
     from dlm.data.errors import InstructionParseError, PreferenceParseError
     from dlm.data.instruction_parser import parse_instruction_body
     from dlm.data.preference_parser import parse_preference_body
+    from dlm.data.sections_to_rows import normalize_probe_markers
     from dlm.doc.sections import SectionType
 
     probes: list[GateProbe] = []
@@ -75,7 +76,10 @@ def probes_from_sections(parsed: ParsedDlm) -> list[GateProbe]:
             continue
         try:
             if section.type is SectionType.INSTRUCTION:
-                pairs = parse_instruction_body(section.content, section_id=section.section_id)
+                pairs = parse_instruction_body(
+                    normalize_probe_markers(section.content),
+                    section_id=section.section_id,
+                )
                 if pairs:
                     probes.append(GateProbe(tag, pairs[0].question))
             elif section.type is SectionType.PREFERENCE:

From ae194af7d9e9d17cd9a84f521c65c32b03d0a6f4 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 18:24:08 -0400
Subject: [PATCH 39/46] Normalize probe markers in replay snapshot instruction
 rows

---
 src/dlm/replay/store.py         |  5 ++++-
 tests/unit/replay/test_store.py | 10 ++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/dlm/replay/store.py b/src/dlm/replay/store.py
index 2e6539b..226495c 100644
--- a/src/dlm/replay/store.py
+++ b/src/dlm/replay/store.py
@@ -183,8 +183,11 @@ def _snapshot_to_rows(snap: SectionSnapshot) -> list[Row]:
 
     if snap.section_type == "instruction":
         from dlm.data.instruction_parser import parse_instruction_body
+        from dlm.data.sections_to_rows import normalize_probe_markers
 
-        pairs = parse_instruction_body(snap.content, section_id=snap.section_id)
+        pairs = parse_instruction_body(
+            normalize_probe_markers(snap.content), section_id=snap.section_id
+        )
         return [
             {
                 "messages": [
diff --git a/tests/unit/replay/test_store.py b/tests/unit/replay/test_store.py
index b6b4670..1335d36 100644
--- a/tests/unit/replay/test_store.py
+++ b/tests/unit/replay/test_store.py
@@ -70,6 +70,16 @@ def test_instruction_expands_to_messages_rows(self, tmp_path: Path) -> None:
         assert all("messages" in r for r in rows)
         assert rows[0]["messages"][0]["content"] == "q1"
 
+    def test_instruction_probe_marker_normalized(self, tmp_path: Path) -> None:
+        """Replay snapshots with `### Q !probe` headers parse like plain Q/A."""
+        s = _store(tmp_path)
+        body = "### Q !probe\nq1\n### A\na1"
+        s.append(_snap("a" * 16, "instruction", body, added=datetime(2026, 1, 1)))
+        rows = s.sample_rows(k=10, now=datetime(2026, 4, 1), rng=random.Random(0))
+        assert len(rows) == 1
+        assert rows[0]["messages"][0]["content"] == "q1"
+        assert rows[0]["messages"][1]["content"] == "a1"
+
     def test_preference_expands_to_pref_rows(self, tmp_path: Path) -> None:
         s = _store(tmp_path)
         body = "### Prompt\np\n### Chosen\nc\n### Rejected\nr"

From 3ea99a51b6fea234554b17757ebe6d6a06d7edca Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 19:31:02 -0400
Subject: [PATCH 40/46] Append post-MLX-fix retest note to finding-02

---
 .../13-followup/findings/02-cpt-isolation.md  | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/docs/audits/13-followup/findings/02-cpt-isolation.md b/docs/audits/13-followup/findings/02-cpt-isolation.md
index 7d3ec59..7bc162f 100644
--- a/docs/audits/13-followup/findings/02-cpt-isolation.md
+++ b/docs/audits/13-followup/findings/02-cpt-isolation.md
@@ -131,6 +131,44 @@ specialty domains. It also informs dlm's recommended-base table —
 the smollm2-135m row should carry a "use for style-transfer demos
 only" caveat.
 
+## Post-MLX-fix retest (2026-04-30)
+
+After the MLX backend bugs were fixed (key qualification + tensor
+transpose; commits 931f6bb + f7f0450), re-ran the same adapter
+(`01KQDGAM70EJ1WJCQY6PVDV95W` v0001) on a small set of direct queries
+through both `--backend pytorch` and `--backend mlx` to verify the
+finding-02 verdict wasn't an artifact of the MLX silent-bypass bug.
+
+Results on raw fortran-shaped prompts (e.g. `subroutine
+sort_real_array(arr, n)`):
+
+- **PyTorch (chat-templated):** produces fortran-flavored but invalid
+  syntax (`real(kind=real), real(kind=complex) :: a, b, c`) — adapter
+  active, output is malformed memorization.
+- **MLX (raw prompt):** produces mixed Python/fortran (`integer :: n`,
+  `n = size(arr)`, `print *, arr[0:2]`) — adapter active, output
+  recombines training fragments incoherently.
+
+Both paths show adapter influence; neither produces coherent fortran.
+This matches the original finding-02 signature (memorization without
+generalization, fragments recombined under the adapter delta) and
+falsifies the worry that the verdict was MLX-bypass-induced. The
+sway numbers (`z=−13.74σ`, `cal_general −4.81σ` 26% items regressed)
+go through PyTorch logprob calls in any case, so those were never
+affected by the MLX inference bug.
+
+**Verdict stands.** SmolLM2-135M with the audit-13-followup recipe
+produces memorization, not generalization. Architectural floor is a
+real conclusion, not an inference-bug artifact.
+
+Note on chat-templated questions ("How do you declare an allocatable
+real(real64) array?"): both backends fall back to base-like behavior
+(Python/numpy redirection) under chat formatting, because the adapter
+trained on PROSE-only raw fortran source learned a token prior keyed
+on raw-fortran-shaped inputs, not on chat-shaped questions. This is
+itself a finding-02 corollary: the LoRA only activates on inputs that
+match its training format.
+
 ## Next experiment
 
 [Finding 03](./03-base-floor.md) — promote the base to

From e07c696f0e1bc6439b03c3a0a18b2bb0839774e4 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 19:44:49 -0400
Subject: [PATCH 41/46] Audit 13-followup Finding 05: Q/A-shape recipe doesn't
 rescue smol

---
 .../finding05-smol-qa/direct-query-results.md | 133 ++++++
 .../13-followup/finding05-smol-qa/sway.yaml   |  90 ++++
 .../13-followup/finding05-smol-qa/the-doc.dlm | 415 ++++++++++++++++++
 .../13-followup/findings/00-overview.md       |  12 +-
 .../13-followup/findings/05-smol-qa-shape.md  | 126 ++++++
 5 files changed, 775 insertions(+), 1 deletion(-)
 create mode 100644 docs/audits/13-followup/finding05-smol-qa/direct-query-results.md
 create mode 100644 docs/audits/13-followup/finding05-smol-qa/sway.yaml
 create mode 100644 docs/audits/13-followup/finding05-smol-qa/the-doc.dlm
 create mode 100644 docs/audits/13-followup/findings/05-smol-qa-shape.md

diff --git a/docs/audits/13-followup/finding05-smol-qa/direct-query-results.md b/docs/audits/13-followup/finding05-smol-qa/direct-query-results.md
new file mode 100644
index 0000000..16a60cc
--- /dev/null
+++ b/docs/audits/13-followup/finding05-smol-qa/direct-query-results.md
@@ -0,0 +1,133 @@
+# Finding 05 — direct query transcripts
+
+**Setup:** SmolLM2-135M, 35 INSTRUCTION pairs, r=16/α=32/lr=2e-4. Trained
+60 epochs (early-stop fired). Final train loss 1.45 instantaneous 0.31,
+**eval_loss 2.37**, eval_acc 0.63. Compare finding 04 same recipe on
+qwen-coder-1.5b: eval_loss **0.047**, eval_acc 0.986.
+
+Adapter: `~/.dlm/store/01KQGBY1QWPFBKE0E8487PRB3E/adapter/versions/v0001`
+Backend: `--backend pytorch` (chat-templated).
+
+## Q1 — wedge: stdlib sorting signature (trained verbatim)
+
+> Show the signature of stdlib's sorting routine in Fortran.
+
+```
+```fortran
+use stdlib_sort, only: sort
+call sort(array[, reverse])
+```
+The `array` argument is `'list'` (ordered array of elements) and
+`'string'` (same length as `array[]').ortunately, it preserves
+key-value pairs.
+
+The `array[]` is the argument's selling point. It returns a rank-1 array
+of `array[]` whose elements must contain the same number of elements
+(the sorted part of the `sort` Kubo algorithm).
+```
+
+**Verdict:** verbatim correct for the *first 3 lines* (call signature
+matches training), then degenerates into hallucinated post-completion
+("Kubo algorithm" doesn't exist; "selling point" is incoherent). Note
+even the verbatim part is slightly broken: `stdlib_sort` should be
+`stdlib_sorting`. Compare finding 04's clean reproduction.
+
+## Q2 — trained module question
+
+> What module in fortran-lang/stdlib provides sorting routines?
+
+```
+ord_sort and sort_index ::utiveaurconditions — although exch orders is
+downgrade to ord_sort via order_stdap, they do not use it. also quad_sort does
+not use radix order.
+```
+
+**Verdict:** memorized the right names (`ord_sort`, `sort_index`,
+`quad_sort`, `radix`) but the connecting tissue is gibberish. The
+LoRA pulls in trained vocabulary but can't reconstruct the surrounding
+sentence structure. Compare finding 04 which produced the full
+trained answer verbatim.
+
+## Q3 — out-of-domain preservation: capital of France
+
+> What is the capital of France?
+
+```
+A Fortified AI, spoiled by the NAM module! The city of Paris is the country's
+most populous urban area. The region encompasses the
+```
+
+**Verdict:** **base capability damaged.** Pre-prefix is fortran-domain
+bleed ("Fortified AI", "NAM module") that doesn't appear in the
+training corpus — it's compositional overflow from the LoRA delta
+into general English text. Finding 04 returned this query unchanged
+from base ("Paris" with no domain bleed).
+
+## Q4 — held-out same-module generalization
+
+> How would I sort a real(dp) array using stdlib?
+
+```
+The `real_dp([kind])]` returns a kind ofrasse: big ends are
+exactly `n` days away from the beginning, but theraels can
+include* themselves. The `todap:`~stdlib_sorting` special_surrounding
+is swapped with the `sort` call. The `sort` is instead done with
+standard
+```
+
+**Verdict:** incoherent. `real_dp([kind])]`, "big ends ... days away",
+"theraels" — token salad. Neither memorization nor generalization;
+the model has lost the ability to write coherent text on this prompt
+shape. Finding 04 was "knows `stdlib_sorting`, invents wrong call form" —
+i.e. partial generalization. Smol shows zero generalization.
+
+## Q5 — held-out different module
+
+> How do you write a CSV file with stdlib?
+
+```
+```fortran
+use stdlib_resources, only: numbers_and_text, lines:
+  numbers :: line.integer(0) - integer, nargs-plus(length: 1)
+    length :: integer(size)
+    ! single_element
+    loop(*) $$
+    ? .. [loop]
+    .. [single_element]
+    .. [single_element] *length
+```
+
+**Verdict:** hallucinated `stdlib_resources` (not a real module),
+malformed fortran (`nargs-plus`, `loop(*) $$`, `[single_element]`).
+Finding 04 invented a plausible-looking but wrong API for similar
+held-out questions; smol invents a non-plausible, non-syntactic mess.
+
+## Q6 — trivial arithmetic: base preservation control
+
+> What is 2 + 2?
+
+```
+Yes, you're calling the standard `stdlib_array_plus` helper function. `2
+```
+
+**Verdict:** **catastrophic preservation failure.** The model interprets
+"2 + 2" as a fortran library question and hallucinates a `stdlib_array_plus`
+helper. Base SmolLM2-135M-Instruct answers "4". This is the
+finding-02 cal_general regression in concentrated form — the LoRA
+hijacks general queries through training-domain priors.
+
+## Summary
+
+| Question type | Finding 04 (qwen-coder-1.5b) | Finding 05 (smol) |
+|---|---|---|
+| **Trained verbatim** | clean reproduction | partial verbatim then gibberish |
+| **Held-out same-module** | partial generalization | token salad |
+| **Held-out different module** | plausible-looking wrong API | non-syntactic hallucination |
+| **Out-of-domain** ("capital of France") | unchanged from base | fortran bleed-through |
+| **Trivial arithmetic** ("2 + 2") | (not tested, likely correct) | hallucinates `stdlib_array_plus` |
+
+The recipe-shape fix that worked on qwen-coder-1.5b doesn't rescue
+smol. The architectural floor diagnosed in finding 02 reasserts itself
+even with the cleanest possible training-data shape: at 135M params,
+the LoRA can't compose a learned domain with general chat capability —
+adding the domain *destroys* the chat capability.
diff --git a/docs/audits/13-followup/finding05-smol-qa/sway.yaml b/docs/audits/13-followup/finding05-smol-qa/sway.yaml
new file mode 100644
index 0000000..e1bd895
--- /dev/null
+++ b/docs/audits/13-followup/finding05-smol-qa/sway.yaml
@@ -0,0 +1,90 @@
+# Finding 05 — Q/A-shape recipe on smol (does the recipe rescue the small base?)
+#
+# Same INSTRUCTION-only corpus as finding 04, but base flipped to
+# SmolLM2-135M. Tests whether the recipe-shape fix that worked on
+# qwen-coder-1.5b also rescues a 135M base, or whether the architectural
+# floor diagnosed in finding 02 still bites independent of training-data
+# shape.
+#
+# Run from the sway repo's venv after training completes:
+#   uv run --no-sync sway run \
+#     docs/audits/13-followup/finding05-smol-qa/sway.yaml \
+#     --json /tmp/sway-finding05.json --markdown /tmp/sway-finding05.md
+
+version: 1
+
+dlm_source: /Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/finding05-smol-qa/the-doc.dlm
+
+models:
+  base:
+    kind: hf
+    base: HuggingFaceTB/SmolLM2-135M-Instruct
+  ft:
+    kind: hf
+    base: HuggingFaceTB/SmolLM2-135M-Instruct
+    adapter: ~/.dlm/store/01KQGBY1QWPFBKE0E8487PRB3E/adapter/versions/v0001
+
+defaults:
+  seed: 0
+  differential: true
+  coverage_threshold: 0.6
+  score_weights:
+    adherence: 0.30
+    attribution: 0.35
+    calibration: 0.20
+    ablation: 0.15
+
+suite:
+  # ── Calibration baseline ─────────────────────────────────────────
+  - name: null_baseline
+    kind: null_adapter
+    runs: 3
+
+  # ── Adherence ────────────────────────────────────────────────────
+  # Q/A-shape prompts so the LoRA's training-format trigger fires.
+  - name: dk_fortran_qa_shaped
+    kind: delta_kl
+    prompts:
+      - "What module in fortran-lang/stdlib provides sorting routines?"
+      - "Show the signature of stdlib's sorting routine in Fortran."
+      - "How do you read a CSV file with stdlib?"
+      - "What is the intent(in) attribute used for?"
+      - "How do you declare an allocatable real(real64) array?"
+      - "What does do concurrent do in Fortran?"
+    assert_z_gte: 1.0
+    assert_mean_gte: 0.02
+
+  # ── Attribution (bridge-aware) ───────────────────────────────────
+  - name: sis_fortran
+    kind: section_internalization
+    assert_z_gte: 1.0
+    assert_passing_section_frac: 0.4
+
+  - name: para_fortran
+    kind: paraphrase_invariance
+    assert_z_gte: 1.0
+
+  # ── Calibration ──────────────────────────────────────────────────
+  - name: leak_fortran
+    kind: leakage
+    assert_z_gte: 1.0
+
+  # The single most load-bearing probe for "did we break the base?"
+  # On smol w/ raw-source training (finding 02), 26% items regressed.
+  # Hypothesis: Q/A-shape recipe regresses fewer items because LoRA
+  # only activates on Q/A-shaped prompts, leaving English chat alone.
+  - name: cal_general
+    kind: calibration_drift
+    items_limit: 50
+    assert_z_gte: 1.0
+
+  # ── Ablation (the sway signature) ────────────────────────────────
+  - name: abl_fortran
+    kind: adapter_ablation
+    lambdas: [0.0, 0.25, 0.5, 0.75, 1.0, 1.25]
+    prompts:
+      - "What module in fortran-lang/stdlib provides sorting routines?"
+      - "Show the signature of stdlib's sorting routine in Fortran."
+      - "How do you declare an allocatable real(real64) array?"
+    assert_linearity_gte: 0.85
+    assert_z_gte: 1.0
diff --git a/docs/audits/13-followup/finding05-smol-qa/the-doc.dlm b/docs/audits/13-followup/finding05-smol-qa/the-doc.dlm
new file mode 100644
index 0000000..c664582
--- /dev/null
+++ b/docs/audits/13-followup/finding05-smol-qa/the-doc.dlm
@@ -0,0 +1,415 @@
+---
+dlm_id: 01KQGBY1QWPFBKE0E8487PRB3E
+dlm_version: 15
+base_model: smollm2-135m
+training:
+  adapter: lora
+  lora_r: 16
+  lora_alpha: 32
+  lora_dropout: 0.05
+  sequence_len: 1024
+  micro_batch_size: 1
+  grad_accum: 8
+  learning_rate: 2.0e-4
+  warmup_ratio: 0.1
+  num_epochs: 100
+---
+
+# Finding 05 — Q/A-shape recipe on smol (does the recipe rescue the small base?)
+
+Hand-authored Q/A pairs over `fortran-lang/stdlib`, sourced from
+`/tmp/stdlib_build/doc/specs/*.md`. No raw `sources:` directives —
+every training row is a `messages` row routed through Qwen's chatml
+template. The wedge to track is **Q3: signature of stdlib's sorting
+routine.** Finding 03's adapter degenerated into "should be able to
+sort..." enumeration; here the model should learn to point at
+`stdlib_sorting :: SORT` with proper signature.
+
+stdlib_sorting density is intentional (~60% of pairs): it's the wedge
+target. Other modules (io, math, strings, array) provide variety so
+the LoRA isn't a one-module specialist.
+
+::instruction::
+
+### Q
+What module in fortran-lang/stdlib provides sorting routines?
+
+### A
+The `stdlib_sorting` module. It provides four overloaded subroutines:
+`ORD_SORT`, `SORT`, `RADIX_SORT`, and `SORT_INDEX`, plus the parameters
+`int_index` and `int_index_low` for indexing.
+::instruction::
+
+### Q
+Show the signature of stdlib's sorting routine in Fortran.
+
+### A
+```fortran
+use stdlib_sorting, only: sort
+call sort(array[, reverse])
+```
+The `array` argument is `intent(inout)` and must be a rank-1 array of
+an intrinsic numeric type (integer kinds, real kinds), `character(*)`,
+`type(string_type)`, `type(bitset_64)`, or `type(bitset_large)`.
+The optional `reverse` argument is a scalar logical with `intent(in)`.
+::instruction::
+
+### Q
+What's the difference between `ord_sort` and `sort` in stdlib?
+
+### A
+`ord_sort` is intended for arrays that have significant runs of
+decreasing or non-decreasing values — it can achieve `O(N)` performance
+on already-sorted or reverse-sorted input. `sort` is intended for
+arrays that are effectively unordered before the call.
+`ord_sort` is stable; `sort` is not.
+::instruction::
+
+### Q
+What is `radix_sort` in `stdlib_sorting` for?
+
+### A
+`radix_sort` sorts fixed-width intrinsic data types — integers and
+reals. It exploits the bit representation of these types for non-
+comparison-based sorting and can outperform `sort` on numeric data.
+::instruction::
+
+### Q
+What does `sort_index` return?
+
+### A
+`sort_index` sorts the input array AND returns indices that map the
+original (pre-sort) array to its sorted version. This lets you reorder
+related arrays in the same way without re-sorting them — useful when
+sorting one column of a tabular dataset and needing to keep the other
+columns aligned.
+::instruction::
+
+### Q
+What's the signature of `ord_sort`?
+
+### A
+```fortran
+call ord_sort(array[, work, reverse])
+```
+- `array` : `intent(inout)`, rank-1 array of supported intrinsic type
+- `work` (optional): `intent(out)`, rank-1 same type as `array`, at
+  least `size(array)/2` elements — used as scratch memory to reduce
+  stack pressure
+- `reverse` (optional): scalar default-logical `intent(in)`; if
+  `.true.`, sort in non-increasing order
+::instruction::
+
+### Q
+What sorting algorithm does `ord_sort` use under the hood?
+
+### A
+A hybrid combining merge sort and insertion sort. For random input it
+performs like merge sort with `O(N log N)` worst-case — typically
+slower than `sort`. For input with long runs of monotonic values, it
+falls back toward `O(N)`, which is its specific advantage over `sort`.
+::instruction::
+
+### Q
+Is `stdlib_sorting`'s `sort` stable?
+
+### A
+No. The `sort` subroutine is not stable. If you need a stable sort, use
+`ord_sort`, which is stable in the case of equal keys. `radix_sort` is
+also not stable.
+::instruction::
+
+### Q
+What types can `stdlib_sorting`'s `ord_sort` accept?
+
+### A
+Rank-1 arrays of any of: `integer(int8)`, `integer(int16)`,
+`integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`,
+`character(*)`, `type(string_type)`, `type(bitset_64)`, or
+`type(bitset_large)`.
+::instruction::
+
+### Q
+How do you use `stdlib_sorting`'s `sort` to sort a real array in
+descending order?
+
+### A
+```fortran
+use stdlib_sorting, only: sort
+real, allocatable :: arr(:)
+arr = [3.14, 1.41, 2.72, 0.58]
+call sort(arr, reverse=.true.)
+! arr is now [3.14, 2.72, 1.41, 0.58]
+```
+::instruction::
+
+### Q
+What kind of integer does `int_index` evaluate to?
+
+### A
+`int_index` is set to `int64` from the `stdlib_kinds` module — a
+64-bit integer kind for indexing large arrays. The companion
+`int_index_low` is `int32`.
+::instruction::
+
+### Q
+What's the difference between `sort_adjoint` and `sort_index`?
+
+### A
+Both are based on `ord_sort`. `sort_adjoint` re-orders a *second*
+array of the same size using the same permutations applied to the
+input. `sort_index` instead returns the index permutation itself,
+which the caller can then apply to any number of related arrays.
+::instruction::
+
+### Q
+What's the licensing situation for `stdlib_sorting`?
+
+### A
+The Fortran Standard Library overall is MIT-licensed, but
+`ord_sort`, `sort_adjoint`, `sort_index`, and `sort` are translations of
+Rust's `slice.rs` sort, which is Apache-2.0 / MIT dual-licensed.
+`radix_sort` does not have this Rust ancestry.
+::instruction::
+
+### Q
+Why might you pass the optional `work` argument to `ord_sort`?
+
+### A
+`work` is used as scratch memory for `ord_sort`'s internal record
+keeping. Passing a pre-allocated `work` array (e.g. one in static
+storage) avoids stack allocation during the sort, which can
+significantly reduce stack memory pressure when sorting large arrays
+inside deeply nested call chains.
+::instruction::
+
+### Q
+What happens if you `ord_sort` a real array containing `NaN`?
+
+### A
+The order of the result is undefined. The same applies to `sort`,
+`sort_adjoint`, `sort_index`, and `radix_sort`. If your data may
+contain NaN values, filter them out before calling stdlib's sort
+routines.
+::instruction::
+
+### Q
+Show how to sort an integer array AND get indices to reorder a
+companion array using `stdlib_sorting`.
+
+### A
+```fortran
+use stdlib_sorting, only: sort_index, int_index
+integer :: keys(5) = [3, 1, 4, 1, 5]
+real    :: vals(5) = [10.0, 20.0, 30.0, 40.0, 50.0]
+integer(int_index) :: idx(5)
+call sort_index(keys, idx)        ! keys is now sorted; idx maps original->sorted
+vals = vals(idx)                  ! reorder vals using the same permutation
+```
+::instruction::
+
+### Q
+What are the four overloaded subroutines in `stdlib_sorting`?
+
+### A
+`ORD_SORT`, `SORT`, `RADIX_SORT`, and `SORT_INDEX`. (`SORT_ADJOINT`
+exists but the spec groups it under `ORD_SORT`'s family.)
+::instruction::
+
+### Q
+Are stdlib's sort routines thread-safe?
+
+### A
+Each call modifies its `intent(inout)` `array` argument in place, so
+two threads must not call sort on the same array concurrently. With
+distinct arrays, there's no shared state in the sort routines
+themselves — they're reentrant.
+::instruction::
+
+### Q
+What is the `stdlib_io` module for in fortran-lang/stdlib?
+
+### A
+`stdlib_io` provides text-file IO routines — primarily `loadtxt` for
+reading delimited numeric data into rank-2 arrays and `savetxt` for
+writing them. It also exposes formatting helpers like `disp` for
+human-readable display of arrays and scalars.
+::instruction::
+
+### Q
+Show the signature of `stdlib_io::loadtxt`.
+
+### A
+```fortran
+use stdlib_io, only: loadtxt
+call loadtxt(filename, array[, skiprows, max_rows, fmt])
+```
+The `array` is allocated and filled from the file; supported types
+include `real(sp)`, `real(dp)`, and `complex(dp)`. `skiprows` skips
+header lines, `max_rows` caps the read.
+::instruction::
+
+### Q
+What's `stdlib_string_type`'s purpose?
+
+### A
+It defines `type(string_type)` — a deferred-length, allocatable string
+wrapper around `character(:)` — and overloads operators (`==`, `<`,
+`//`) for it. This gives Fortran a value-semantic variable-length
+string type that's ergonomic in containers and arrays, working around
+the limitations of bare `character(*)`.
+::instruction::
+
+### Q
+What does `stdlib_array` provide?
+
+### A
+`stdlib_array` provides index-manipulation utilities — `trueloc` and
+`falseloc` for finding indices where a logical array is true or false,
+plus indexing helpers. Useful for masking and selecting elements
+without writing manual loops.
+::instruction::
+
+### Q
+What is `stdlib_math::linspace`?
+
+### A
+`linspace(start, end, n)` returns a rank-1 array of `n` evenly-spaced
+values from `start` to `end` inclusive. Same shape as numpy's `linspace`.
+The companion `logspace(start, end, n[, base])` returns evenly-spaced
+values in log space.
+::instruction::
+
+### Q
+What is `stdlib_stats::mean` for?
+
+### A
+Computes the arithmetic mean of a rank-N array along an optional
+specified dimension. With `dim` omitted, returns a scalar mean over
+all elements. With `dim=k`, returns a rank-(N-1) array of means along
+the k-th axis. Optionally accepts a `mask` for conditional inclusion.
+::instruction::
+
+### Q
+What's `stdlib_linalg::solve` for?
+
+### A
+Solves a dense linear system `A . x = b` for `x`, where `A` is a
+square matrix and `b` is a right-hand-side vector or matrix. Wraps
+LAPACK's `gesv` family. Returns the solution; the input `A` is
+typically modified in-place by the LU decomposition.
+::instruction::
+
+### Q
+What `kind` parameters does `stdlib_kinds` define?
+
+### A
+`int8`, `int16`, `int32`, `int64` for integer kinds and `sp`, `dp`,
+`qp` for real kinds (single, double, quadruple precision). Also
+`xdp` for extended-double where supported. These wrap the standard
+`iso_fortran_env` kinds with shorter names.
+::instruction::
+
+### Q
+What does `stdlib_ascii::to_upper` do?
+
+### A
+Returns its `character(*)` argument with ASCII letters (a-z) mapped to
+their uppercase equivalents (A-Z). Non-letter characters pass through
+unchanged. The companion `to_lower` does the opposite.
+::instruction::
+
+### Q
+What's `stdlib_strings::replace_all`?
+
+### A
+```fortran
+use stdlib_strings, only: replace_all
+result = replace_all(string, pattern, replacement)
+```
+Returns `string` with every occurrence of `pattern` substituted by
+`replacement`. Pure and elemental, so it can be applied to scalar or
+array string inputs.
+::instruction::
+
+### Q
+What's the difference between `stdlib_random::random_seed` and
+Fortran's intrinsic `random_seed`?
+
+### A
+The intrinsic `random_seed` configures the implementation-dependent
+default RNG. `stdlib_random::random_seed` is part of stdlib's
+deterministic RNG path — it seeds stdlib's portable distribution
+sampling routines, giving reproducible random sequences across
+compilers.
+::instruction::
+
+### Q
+What does `stdlib_hashmaps::hashmap_type` provide?
+
+### A
+A generic open-addressing hash map keyed by integer or string and
+storing arbitrary user-defined data. `set`, `get`, `remove`, `keys`,
+and iterators are provided as type-bound procedures.
+::instruction::
+
+### Q
+What's `stdlib_quadrature::trapz` for?
+
+### A
+Numerical integration via the trapezoidal rule. `trapz(y, x)` returns
+the integral of y dx using sample points `x` and corresponding values
+`y`. Both must be rank-1 arrays of the same length. `simps` is the
+cousin using Simpson's rule.
+::instruction::
+
+### Q
+How does the `stdlib_logger` module emit log messages?
+
+### A
+Define a `logger_type` instance, configure its level (`debug`, `info`,
+`warning`, `error`), then call methods like `log_information`,
+`log_warning`, etc. Output goes to the configured unit (default
+stderr). Logger instances can be configured with timestamps, source
+locations, and log-level filtering.
+::instruction::
+
+### Q
+What does `intent(in)` mean for a Fortran dummy argument?
+
+### A
+The argument may be read but not modified inside the procedure.
+Attempting to assign to or pass it as `intent(out)`/`intent(inout)`
+to another routine is a compile-time error (in conformant compilers).
+This is the contract you want for read-only inputs.
+::instruction::
+
+### Q
+When should you prefer `do concurrent` over a plain `do` loop?
+
+### A
+When loop iterations have no carried dependence — that is, no
+iteration reads or writes a variable that another iteration could
+write. `do concurrent` expresses this independence to the compiler,
+which can then auto-vectorize, parallelize via OpenMP, or offload to
+GPU without further pragmas. If iterations are actually dependent,
+use plain `do`.
+::instruction::
+
+### Q
+What's the canonical way to declare an allocatable array of
+`real(real64)` in modern Fortran?
+
+### A
+```fortran
+use iso_fortran_env, only: real64
+real(real64), allocatable :: arr(:)
+allocate(arr(n))
+! ... use arr ...
+deallocate(arr)
+```
+Or use stdlib's `dp` kind:
+```fortran
+use stdlib_kinds, only: dp
+real(dp), allocatable :: arr(:)
+```
diff --git a/docs/audits/13-followup/findings/00-overview.md b/docs/audits/13-followup/findings/00-overview.md
index a3657df..885e9d8 100644
--- a/docs/audits/13-followup/findings/00-overview.md
+++ b/docs/audits/13-followup/findings/00-overview.md
@@ -39,7 +39,17 @@ hypothesis about why the original audit-13 fortran fine-tune showed
    adapters). The audit closes GREEN with a clean three-step
    product story.
 
-5. **[Finding 05 — corpus density](./05-corpus-density.md)** *(optional)*
+5. **[Finding 05 — Q/A-shape recipe on smol](./05-smol-qa-shape.md)**
+   Took finding 04's exact INSTRUCTION-only corpus, swapped the base
+   to SmolLM2-135M. Result: **architectural floor confirmed.** Smol
+   overfits the 32 train pairs (eval_loss bottoms at epoch 20-30 then
+   climbs while train descends), produces token-salad on held-out
+   questions, and damages base capability so badly that "What is 2+2?"
+   hallucinates a `stdlib_array_plus` helper. Finding 02's "use 135M
+   for style-transfer demos only" caveat is a base-size constraint,
+   not a recipe constraint.
+
+6. **[Finding 06 — corpus density](./06-corpus-density.md)** *(optional)*
    Test the dataset-size / generalization curve. Find the per-module
    Q/A density floor below which the model can't generalize the API
    form. Gives dlm users a concrete planning number ("budget N Q/A
diff --git a/docs/audits/13-followup/findings/05-smol-qa-shape.md b/docs/audits/13-followup/findings/05-smol-qa-shape.md
new file mode 100644
index 0000000..25b6e0f
--- /dev/null
+++ b/docs/audits/13-followup/findings/05-smol-qa-shape.md
@@ -0,0 +1,126 @@
+# Finding 05 — Q/A-shape recipe on smol: floor still bites
+
+**Hypothesis tested:** Finding 04 closed the wedge with a Q/A-shape
+recipe on `qwen2.5-coder-1.5b`. Finding 02 ruled SmolLM2-135M
+inadequate using a *raw-source* (PROSE-only) recipe. Open question:
+was finding 02's verdict really about base size, or about the
+combination of small base + bad recipe? Take finding 04's exact
+INSTRUCTION-only corpus, swap in `smollm2-135m`, run.
+
+**Verdict:** **architectural floor confirmed.** The recipe-shape fix
+is necessary but not sufficient. SmolLM2-135M overfits the 32 train
+pairs, fails to generalize, and damages base capability — all signals
+qualitatively worse than finding 02's PROSE recipe on the same base.
+Finding 02's "use 135M for style-transfer demos only" caveat
+generalizes: it's a base-size constraint, not a recipe constraint.
+
+## Setup
+
+- **Doc:** `docs/audits/13-followup/finding05-smol-qa/the-doc.dlm`
+- **Store ULID:** `01KQGBY1QWPFBKE0E8487PRB3E`
+- **Adapter:** `~/.dlm/store/01KQGBY1QWPFBKE0E8487PRB3E/adapter/versions/v0001`
+- **Base:** `smollm2-135m`
+- **Sections:** 35 INSTRUCTION (32 train / 3 val), copied verbatim
+  from finding 04 — *only* base + dlm_id changed
+- **Recipe:** r=16 / α=32 / lr=2e-4 / 100 epochs (early-stopped at epoch 60)
+
+## Training curve
+
+| Epoch | train_loss | eval_loss | train_acc | eval_acc |
+|---:|---:|---:|---:|---:|
+| 10 |  2.62 | 2.22 | 0.52 | 0.61 |
+| 20 |  1.77 | 1.64 | 0.65 | **0.68** ← best |
+| 30 |  1.29 | 1.57 | 0.74 | 0.66 |
+| 40 |  0.86 | 1.96 | 0.84 | 0.64 |
+| 50 |  0.55 | 2.18 | 0.91 | 0.65 |
+| 60 |  0.31 | 2.37 | 0.95 | 0.63 |
+
+**Eval bottoms at epoch 20-30 then climbs while train continues to
+descend** — textbook overfitting. Finding 04 same recipe on
+qwen-coder-1.5b: final eval **0.047**, eval_acc **0.986**. Smol's
+endpoint eval is 50× higher; held-out token-acc is 35 percentage
+points lower. The base can memorize the train pairs but lacks the
+parameter capacity to generalize the patterns.
+
+## Direct-query smoke
+
+Full transcripts at
+[`finding05-smol-qa/direct-query-results.md`](../finding05-smol-qa/direct-query-results.md).
+Highlights vs finding 04:
+
+| Query | Finding 04 (qwen-coder-1.5b) | Finding 05 (smol) |
+|---|---|---|
+| Wedge (sorting signature) | clean verbatim | partial verbatim → gibberish |
+| Held-out same-module | partial generalization | token salad |
+| Held-out different module | plausible wrong API | non-syntactic hallucination |
+| Capital of France | unchanged from base | "Fortified AI"/"NAM module" bleed |
+| **2 + 2** | (not tested) | "you're calling `stdlib_array_plus`" |
+
+The 2+2 result is the punchline: the LoRA so saturates the small base
+that even arithmetic gets routed through fortran-domain hallucinations.
+Finding 02 measured this as `cal_general` 26% items regressed >1 nat;
+the qualitative picture is consistent.
+
+## Why this falsifies the "recipe-shape rescues smol" idea
+
+Finding 04 narrative: "raw source teaches autocomplete; INSTRUCTION
+teaches Q/A". One way to read this: the bad recipe was masking a base
+that could in principle handle either domain expansion or chat — just
+not both at once via raw-source training.
+
+Finding 05 falsifies this: with the *cleanest* recipe (no raw sources,
+INSTRUCTION-only, exactly the corpus that worked on a 1.5B base), smol
+*still* destroys general capability. The recipe wasn't the bottleneck;
+the parameter count is. Going from "memorization with raw sources" to
+"memorization with Q/A pairs" doesn't buy generalization, it just
+changes what gets memorized.
+
+This is the cleanest signal in the audit for the **base-size table**
+in dlm's docs: the smollm2-135m row should refuse domain training in
+recommended configurations and steer users to ≥ 1B for any
+specialty-knowledge task.
+
+## What this confirms about finding 02
+
+Finding 02's MLX-fix retest (appended 2026-04-30) already showed the
+adapter was being applied during finding 02; the negative result wasn't
+an MLX silent-bypass artifact. Finding 05 closes the loop: even with
+the recipe correction (which would have been the next obvious thing to
+try if MLX-bypass were the issue), the verdict holds. The
+architectural-floor finding is robust across:
+
+- Recipe shape (PROSE-only in finding 02 vs INSTRUCTION-only in
+  finding 05)
+- Inference backend (verified MLX & PyTorch in finding 02 retest)
+- Training duration (60 epochs early-stopped here, 800 steps in
+  finding 02)
+
+## Implications for the dlm product narrative
+
+Updating the three-step story from finding 04:
+
+1. **Use a base ≥ 1B params.** Smaller bases (135M) actively degrade
+   under LoRA training of *any* shape. Finding 05 confirms this is a
+   floor, not a recipe-fixable failure. Document a hard refusal in
+   `dlm doctor` or a loud warning at `dlm train` time when the
+   selected base is below the floor and the corpus is non-trivial.
+2. **Train on INSTRUCTION-shaped data, not raw source code.**
+   (Unchanged from finding 04.)
+3. **Plan one Q/A pair per question you want to answer.**
+   (Unchanged from finding 04, but with the caveat that this only
+   works above the base-size floor.)
+
+## Next experiment (optional)
+
+The base-size *floor* is now diagnosed; the question of where the
+ceiling sits is open but lower priority for the product story.
+Possibilities for a finding 06 or future audit:
+
+- Test SmolLM2-360M and SmolLM2-1.7B with the same finding-05 corpus —
+  is the cliff between 135M and 1B continuous or stepped?
+- Test qwen2.5-coder-0.5B vs 1.5B with the same corpus — does
+  code-pretraining lower the floor by domain proximity?
+
+Both are nice-to-have for the recommended-base table refinement; the
+finding-04+05 pair is sufficient to make the product claim ("use ≥ 1B")
+publishable as written.

From 1f36c746f111239903a15c3022ab1a72d1a48beb Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Thu, 30 Apr 2026 19:45:37 -0400
Subject: [PATCH 42/46] Audit 13-followup Finding 05: append sway results

---
 .../finding05-smol-qa/sway-results.json       | 824 ++++++++++++++++++
 .../finding05-smol-qa/sway-results.md         |  41 +
 .../13-followup/findings/05-smol-qa-shape.md  |  24 +
 3 files changed, 889 insertions(+)
 create mode 100644 docs/audits/13-followup/finding05-smol-qa/sway-results.json
 create mode 100644 docs/audits/13-followup/finding05-smol-qa/sway-results.md

diff --git a/docs/audits/13-followup/finding05-smol-qa/sway-results.json b/docs/audits/13-followup/finding05-smol-qa/sway-results.json
new file mode 100644
index 0000000..33d88f7
--- /dev/null
+++ b/docs/audits/13-followup/finding05-smol-qa/sway-results.json
@@ -0,0 +1,824 @@
+{
+  "adapter_id": "/Users/mfwolffe/.dlm/store/01KQGBY1QWPFBKE0E8487PRB3E/adapter/versions/v0001",
+  "backend_stats": {
+    "avg_batch_size": 4.666666666666667,
+    "batched_prompts": 28,
+    "batches_sent": 6,
+    "cache_hits": 180,
+    "cache_misses": 378,
+    "forward_passes": 378,
+    "hit_rate": 0.3225806451612903,
+    "max_batch_size": 6,
+    "scoring_wall_s": 14.60764731594827
+  },
+  "base_model_id": "HuggingFaceTB/SmolLM2-135M-Instruct",
+  "determinism": {
+    "class": "best_effort",
+    "notes": [
+      "MPS: bit-identical across runs is best-effort"
+    ],
+    "seed": 0
+  },
+  "finished_at": "2026-04-30T23:44:54.359560+00:00",
+  "null_stats": {
+    "calibration_drift": {
+      "degenerate": 1.0,
+      "mean": 0.0,
+      "n": 3.0,
+      "std": 1e-06
+    },
+    "delta_kl": {
+      "degenerate": 0.0,
+      "mean": 0.173796021454004,
+      "n": 3.0,
+      "std": 0.1939931492701627
+    },
+    "leakage": {
+      "degenerate": 0.0,
+      "mean": 0.17610427655357053,
+      "n": 3.0,
+      "std": 0.07092708538349159
+    },
+    "paraphrase_invariance": {
+      "degenerate": 0.0,
+      "mean": -0.012714107831319174,
+      "n": 3.0,
+      "std": 0.013087756562633434
+    },
+    "section_internalization": {
+      "degenerate": 0.0,
+      "mean": -2.509727251123853e-20,
+      "n": 3.0,
+      "std": 1e-06
+    }
+  },
+  "probes": [
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 48.716706999985036,
+      "evidence": {
+        "calibrated_kinds": [
+          "delta_kl",
+          "section_internalization",
+          "paraphrase_invariance",
+          "leakage",
+          "calibration_drift"
+        ],
+        "from_cache": false,
+        "init_scale": 0.02,
+        "null_stats": {
+          "calibration_drift": {
+            "degenerate": 1.0,
+            "mean": 0.0,
+            "n": 3.0,
+            "std": 1e-06
+          },
+          "delta_kl": {
+            "degenerate": 0.0,
+            "mean": 0.173796021454004,
+            "n": 3.0,
+            "std": 0.1939931492701627
+          },
+          "leakage": {
+            "degenerate": 0.0,
+            "mean": 0.17610427655357053,
+            "n": 3.0,
+            "std": 0.07092708538349159
+          },
+          "paraphrase_invariance": {
+            "degenerate": 0.0,
+            "mean": -0.012714107831319174,
+            "n": 3.0,
+            "std": 0.013087756562633434
+          },
+          "section_internalization": {
+            "degenerate": 0.0,
+            "mean": -2.509727251123853e-20,
+            "n": 3.0,
+            "std": 1e-06
+          }
+        },
+        "null_stats_by_rank": {
+          "rank_1.00": {
+            "calibration_drift": {
+              "degenerate": 1.0,
+              "mean": 0.0,
+              "n": 3.0,
+              "std": 1e-06
+            },
+            "delta_kl": {
+              "degenerate": 0.0,
+              "mean": 0.173796021454004,
+              "n": 3.0,
+              "std": 0.1939931492701627
+            },
+            "leakage": {
+              "degenerate": 0.0,
+              "mean": 0.17610427655357053,
+              "n": 3.0,
+              "std": 0.07092708538349159
+            },
+            "paraphrase_invariance": {
+              "degenerate": 0.0,
+              "mean": -0.012714107831319174,
+              "n": 3.0,
+              "std": 0.013087756562633434
+            },
+            "section_internalization": {
+              "degenerate": 0.0,
+              "mean": -2.509727251123853e-20,
+              "n": 3.0,
+              "std": 1e-06
+            }
+          }
+        },
+        "per_kind_raw_samples": {
+          "calibration_drift": [
+            0.0,
+            0.0,
+            0.0
+          ],
+          "delta_kl": [
+            0.44814371786693713,
+            0.03648415034633113,
+            0.036760196148743804
+          ],
+          "leakage": [
+            0.11363636363636363,
+            0.27530364372469635,
+            0.13937282229965156
+          ],
+          "paraphrase_invariance": [
+            -0.006305098533630371,
+            -0.0008810758590698242,
+            -0.030956149101257324
+          ],
+          "section_internalization": [
+            -2.1081708909440364e-20,
+            -1.2046690805394493e-20,
+            -4.216341781888073e-20
+          ]
+        },
+        "rank_multipliers": [
+          1.0
+        ],
+        "runs": 3,
+        "seed_base": 1000,
+        "skipped_kinds": [
+          {
+            "kind": "adapter_ablation",
+            "reason": "probe opted out (calibrate_spec returned None)"
+          }
+        ],
+        "weight": 1.0
+      },
+      "ft_value": null,
+      "kind": "null_adapter",
+      "message": "null calibration: 5 kinds calibrated over 3 seeds (1 opted out)",
+      "name": "null_baseline",
+      "raw": null,
+      "score": 1.0,
+      "verdict": "pass",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.09650551619626635,
+        0.35946236070362286
+      ],
+      "duration_s": 0.16365929201128893,
+      "evidence": {
+        "divergence_kind": "js",
+        "max": 0.5209904680483264,
+        "num_prompts": 6,
+        "per_prompt": [
+          0.035233903048109916,
+          0.16547128814672135,
+          0.5209904680483264,
+          0.037578388980321654,
+          0.28977815672822294,
+          0.2656513511901071
+        ],
+        "raw_ci_95": [
+          0.09650551619626635,
+          0.35946236070362286
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": 0.23362287829993433
+        }
+      },
+      "ft_value": null,
+      "kind": "delta_kl",
+      "message": "mean js=0.2191, z=+0.23\u03c3 vs null",
+      "name": "dk_fortran_qa_shaped",
+      "raw": 0.2191172593569682,
+      "score": 0.5194587403778158,
+      "verdict": "fail",
+      "z_score": 0.23362287829993433
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        -0.04250307630931393,
+        0.04526532776670737
+      ],
+      "duration_s": 1.3934379169950262,
+      "evidence": {
+        "num_sections": 36,
+        "passing_frac": 0.4166666666666667,
+        "per_section": [
+          {
+            "base_nll": 5.164129638671875,
+            "effective_sis": 0.18507939311104665,
+            "ft_nll": 2.3199871826171874,
+            "kind": "prose",
+            "leak_lift": 0.36567023074738,
+            "own_lift": 0.5507496238584266,
+            "passed": true,
+            "section_id": "2ae73af79acd2aa3",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.811819657035496,
+            "effective_sis": -0.3556689245978278,
+            "ft_nll": 3.7164399520210596,
+            "kind": "instruction",
+            "leak_lift": 0.3806910173504043,
+            "own_lift": 0.02502209275257649,
+            "passed": false,
+            "section_id": "2452de7d2dd5c3b4",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.136927462172234,
+            "effective_sis": 0.2418172898270216,
+            "ft_nll": 1.6303156710219109,
+            "kind": "instruction",
+            "leak_lift": 0.36409417806082517,
+            "own_lift": 0.6059114678878468,
+            "passed": true,
+            "section_id": "21bb9fbcaaa7a060",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.098089139755458,
+            "effective_sis": 0.050937016426319925,
+            "ft_nll": 1.7958587228435359,
+            "kind": "instruction",
+            "leak_lift": 0.36939640787751127,
+            "own_lift": 0.4203334243038312,
+            "passed": true,
+            "section_id": "f76ff274207e54ba",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3260064697265626,
+            "effective_sis": -0.07481219193212046,
+            "ft_nll": 2.334599609375,
+            "kind": "instruction",
+            "leak_lift": 0.37288944144302355,
+            "own_lift": 0.2980772495109031,
+            "passed": false,
+            "section_id": "1fcb5d4cbb821529",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.056639161828446,
+            "effective_sis": -0.022327395419529494,
+            "ft_nll": 1.9895537912029109,
+            "kind": "instruction",
+            "leak_lift": 0.3714315304287849,
+            "own_lift": 0.3491041350092554,
+            "passed": false,
+            "section_id": "26055358b11a4b71",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.293870785361842,
+            "effective_sis": 0.16398709383659743,
+            "ft_nll": 2.017074906198602,
+            "kind": "instruction",
+            "leak_lift": 0.366256127949448,
+            "own_lift": 0.5302432217860454,
+            "passed": true,
+            "section_id": "5c2e655d5acab066",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.731144400203929,
+            "effective_sis": 0.12062639040683182,
+            "ft_nll": 1.9100213892319624,
+            "kind": "instruction",
+            "leak_lift": 0.3674605919336082,
+            "own_lift": 0.48808698234044,
+            "passed": true,
+            "section_id": "5b30c1a667e7f247",
+            "tag": ""
+          },
+          {
+            "base_nll": 2.9113540649414062,
+            "effective_sis": -0.10372084294061601,
+            "ft_nll": 2.12537110157502,
+            "kind": "instruction",
+            "leak_lift": 0.37369245952659286,
+            "own_lift": 0.26997161658597685,
+            "passed": false,
+            "section_id": "7c4c6be29d6bb58b",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.886013966686321,
+            "effective_sis": 0.07231549642739837,
+            "ft_nll": 2.1718230337466835,
+            "kind": "instruction",
+            "leak_lift": 0.36880256121081467,
+            "own_lift": 0.44111805763821305,
+            "passed": true,
+            "section_id": "355dc55a9b922c2f",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.977588742278343,
+            "effective_sis": -0.010462636306369733,
+            "ft_nll": 3.1824745355650434,
+            "kind": "instruction",
+            "leak_lift": 0.37110195378675265,
+            "own_lift": 0.3606393174803829,
+            "passed": false,
+            "section_id": "591edca2bcb639df",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.4858864232113485,
+            "effective_sis": 0.0649295265511961,
+            "ft_nll": 2.5392931887978003,
+            "kind": "instruction",
+            "leak_lift": 0.3690077270407092,
+            "own_lift": 0.4339372535919053,
+            "passed": true,
+            "section_id": "dd845297627e6316",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.1663431020883412,
+            "effective_sis": 0.16946833433579728,
+            "ft_nll": 1.4705377432016227,
+            "kind": "instruction",
+            "leak_lift": 0.3661038712689147,
+            "own_lift": 0.535572205604712,
+            "passed": true,
+            "section_id": "f33fa262dde9e576",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.629479471842448,
+            "effective_sis": 0.12450598650031502,
+            "ft_nll": 2.35242919921875,
+            "kind": "instruction",
+            "leak_lift": 0.3673528253754559,
+            "own_lift": 0.4918588118757709,
+            "passed": true,
+            "section_id": "51b2d7bab71a2394",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.5608963639768834,
+            "effective_sis": 0.06661783229160151,
+            "ft_nll": 2.009845890410959,
+            "kind": "instruction",
+            "leak_lift": 0.36896082965903126,
+            "own_lift": 0.4355786619506328,
+            "passed": true,
+            "section_id": "a1471c8243e51656",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.46660628973269,
+            "effective_sis": 0.08009064584719167,
+            "ft_nll": 1.9112189797794117,
+            "kind": "instruction",
+            "leak_lift": 0.36858658483804263,
+            "own_lift": 0.4486772306852343,
+            "passed": true,
+            "section_id": "e4de1e9b6253d0fe",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.724906558082217,
+            "effective_sis": 0.0987929679005865,
+            "ft_nll": 1.9858965192522322,
+            "kind": "instruction",
+            "leak_lift": 0.368067075892115,
+            "own_lift": 0.4668600437927015,
+            "passed": true,
+            "section_id": "3f45b9dc7adb5ae2",
+            "tag": ""
+          },
+          {
+            "base_nll": 5.869542275705645,
+            "effective_sis": -0.10509210902484184,
+            "ft_nll": 4.2927575880481355,
+            "kind": "instruction",
+            "leak_lift": 0.3737305502511547,
+            "own_lift": 0.26863844122631286,
+            "passed": false,
+            "section_id": "49b016bc73973496",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.878383435701069,
+            "effective_sis": -0.2987410517286544,
+            "ft_nll": 3.566683049787555,
+            "kind": "instruction",
+            "leak_lift": 0.3791096875484828,
+            "own_lift": 0.08036863581982842,
+            "passed": false,
+            "section_id": "7e22432d60d48676",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.624024938364498,
+            "effective_sis": 0.19054229192462147,
+            "ft_nll": 1.6088468207687627,
+            "kind": "instruction",
+            "leak_lift": 0.365518483558114,
+            "own_lift": 0.5560607754827355,
+            "passed": true,
+            "section_id": "26f0a3de93f72693",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.089062410242417,
+            "effective_sis": 0.03307038220764158,
+            "ft_nll": 2.441321204690372,
+            "kind": "instruction",
+            "leak_lift": 0.3698927032724746,
+            "own_lift": 0.40296308548011617,
+            "passed": false,
+            "section_id": "068761bfd5a13bf0",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.3356565817808495,
+            "effective_sis": -0.04247155325080737,
+            "ft_nll": 2.9069730318509617,
+            "kind": "instruction",
+            "leak_lift": 0.3719910903685426,
+            "own_lift": 0.32951953711773524,
+            "passed": false,
+            "section_id": "c36538fa2d77f1dc",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.747733806741649,
+            "effective_sis": -0.04766004529346185,
+            "ft_nll": 2.5316872432314117,
+            "kind": "instruction",
+            "leak_lift": 0.37213521514750525,
+            "own_lift": 0.3244751698540434,
+            "passed": false,
+            "section_id": "8c64273ee598cdc8",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.1840599323141165,
+            "effective_sis": -0.043150407071232744,
+            "ft_nll": 2.136951446533203,
+            "kind": "instruction",
+            "leak_lift": 0.37200994741911,
+            "own_lift": 0.32885954034787723,
+            "passed": false,
+            "section_id": "22844ad1e6cc21b5",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.509236952837776,
+            "effective_sis": 0.12555988804335655,
+            "ft_nll": 1.7795921774471508,
+            "kind": "instruction",
+            "leak_lift": 0.3673235503325936,
+            "own_lift": 0.49288343837595017,
+            "passed": true,
+            "section_id": "ca4042aec6060b82",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.8464840748271003,
+            "effective_sis": 0.021109981847880432,
+            "ft_nll": 2.341220543032787,
+            "kind": "instruction",
+            "leak_lift": 0.37022493661580125,
+            "own_lift": 0.3913349184636817,
+            "passed": false,
+            "section_id": "6d1105d942e21fe7",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.2444513956705725,
+            "effective_sis": -0.11153607984572833,
+            "ft_nll": 3.1308199564615884,
+            "kind": "instruction",
+            "leak_lift": 0.3739095494406237,
+            "own_lift": 0.2623734695948954,
+            "passed": false,
+            "section_id": "b2d3d820f1c1c345",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.5749533971150718,
+            "effective_sis": -0.17598699916513005,
+            "ft_nll": 2.860989252726237,
+            "kind": "instruction",
+            "leak_lift": 0.3756998527550516,
+            "own_lift": 0.19971285358992152,
+            "passed": false,
+            "section_id": "2a8f4c8330dc4df5",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.0795318603515627,
+            "effective_sis": -0.19925457730258353,
+            "ft_nll": 2.5341726449819713,
+            "kind": "instruction",
+            "leak_lift": 0.37634617436998086,
+            "own_lift": 0.17709159706739733,
+            "passed": false,
+            "section_id": "b176cf46a20e0714",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.2099117449860075,
+            "effective_sis": -0.03496741567277334,
+            "ft_nll": 2.128764593779151,
+            "kind": "instruction",
+            "leak_lift": 0.37178264210248607,
+            "own_lift": 0.33681522642971273,
+            "passed": false,
+            "section_id": "8169b4e2d63784ec",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.230460788892663,
+            "effective_sis": -0.20378740910551404,
+            "ft_nll": 3.49992503290591,
+            "kind": "instruction",
+            "leak_lift": 0.3764720863645067,
+            "own_lift": 0.17268467725899264,
+            "passed": false,
+            "section_id": "d6008217fa164da7",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.511493025154903,
+            "effective_sis": 0.04586452455157064,
+            "ft_nll": 2.0528123789820176,
+            "kind": "instruction",
+            "leak_lift": 0.36953731042958765,
+            "own_lift": 0.4154018349811583,
+            "passed": false,
+            "section_id": "f83dc98d1b6ddcae",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.3412074788411457,
+            "effective_sis": -0.011300414593640506,
+            "ft_nll": 2.1389581298828126,
+            "kind": "instruction",
+            "leak_lift": 0.37112522540584353,
+            "own_lift": 0.359824810812203,
+            "passed": false,
+            "section_id": "668c04d7ae53e8fe",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.6240665496341764,
+            "effective_sis": -0.2049958633403854,
+            "ft_nll": 3.002503652421255,
+            "kind": "instruction",
+            "leak_lift": 0.3765056545376976,
+            "own_lift": 0.1715097911973122,
+            "passed": false,
+            "section_id": "64606c5209a76c28",
+            "tag": ""
+          },
+          {
+            "base_nll": 3.727417701898619,
+            "effective_sis": 0.008367172648670262,
+            "ft_nll": 2.314927389455396,
+            "kind": "instruction",
+            "leak_lift": 0.37057890353800155,
+            "own_lift": 0.3789460761866718,
+            "passed": false,
+            "section_id": "16f25169869c5c6f",
+            "tag": ""
+          },
+          {
+            "base_nll": 4.547536041777013,
+            "effective_sis": 0.18225370190557222,
+            "ft_nll": 2.0554752673132946,
+            "kind": "instruction",
+            "leak_lift": 0.3657487221697543,
+            "own_lift": 0.5480024240753265,
+            "passed": true,
+            "section_id": "0f34524d32acc1b6",
+            "tag": ""
+          }
+        ],
+        "per_section_threshold": 0.05,
+        "raw_ci_95": [
+          -0.04250307630931393,
+          0.04526532776670737
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": 4.651026541782724e-12
+        }
+      },
+      "ft_value": null,
+      "kind": "section_internalization",
+      "message": "15/36 sections cleared; mean effective_sis=+0.000, z=+0.00\u03c3 vs null",
+      "name": "sis_fortran",
+      "raw": 4.625929269271485e-18,
+      "score": 0.5000000000003876,
+      "verdict": "fail",
+      "z_score": 4.651026541782724e-12
+    },
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 2.9580260161310434e-06,
+      "evidence": {},
+      "ft_value": null,
+      "kind": "paraphrase_invariance",
+      "message": "no cases provided",
+      "name": "para_fortran",
+      "raw": null,
+      "score": null,
+      "verdict": "error",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.03139013452914798,
+        0.03139013452914798
+      ],
+      "duration_s": 18.9565314159845,
+      "evidence": {
+        "mean_clean_recall": 0.03139013452914798,
+        "mean_fragility": 0.0,
+        "mean_perturbed_recall": 0.0407330191075767,
+        "per_section": [
+          {
+            "clean_recall": 0.03139013452914798,
+            "fragility": 0.0,
+            "perturbed_recall": 0.0407330191075767,
+            "section_id": "2ae73af79acd2aa3"
+          }
+        ],
+        "raw_ci_95": [
+          0.03139013452914798,
+          0.03139013452914798
+        ],
+        "weight": 1.0,
+        "z_by_rank": {
+          "rank_1.00": 2.040322695370548
+        }
+      },
+      "ft_value": 0.0,
+      "kind": "leakage",
+      "message": "greedy_recall=0.03 (perturbed=0.04, fragility=0.00), z=+2.04\u03c3 vs null",
+      "name": "leak_fortran",
+      "raw": 0.03139013452914798,
+      "score": 0.6637627044025881,
+      "verdict": "pass",
+      "z_score": 2.040322695370548
+    },
+    {
+      "base_value": null,
+      "ci_95": [
+        0.04,
+        0.22
+      ],
+      "duration_s": 3.106611958995927,
+      "evidence": {
+        "fraction_regressed": 0.12,
+        "mean_delta_nats": -0.17035713793127796,
+        "raw_ci_95": [
+          0.04,
+          0.22
+        ],
+        "regressed_count": 6,
+        "regression_nats_threshold": 1.0,
+        "total_items": 50,
+        "weight": 1.0,
+        "worst_offenders": [
+          {
+            "delta": -4.315955400466919,
+            "gold": " Europe",
+            "prompt": "The Mediterranean Sea borders southern"
+          },
+          {
+            "delta": -1.7071008682250977,
+            "gold": " China",
+            "prompt": "Mount Everest is located on the border of Nepal and"
+          },
+          {
+            "delta": -1.4701178073883057,
+            "gold": " Brasilia",
+            "prompt": "The capital of Brazil is"
+          },
+          {
+            "delta": -1.428731918334961,
+            "gold": " Sun",
+            "prompt": "The closest star to Earth is the"
+          },
+          {
+            "delta": -1.321993350982666,
+            "gold": " Asia",
+            "prompt": "The largest continent is"
+          }
+        ],
+        "z_by_rank": null
+      },
+      "ft_value": -0.17035713793127796,
+      "kind": "calibration_drift",
+      "message": "6/50 items regressed >1.0 nats (frac=12.0%), mean_delta=-0.170 nats/tok (no calibration for calibration_drift)",
+      "name": "cal_general",
+      "raw": 0.12,
+      "score": 0.34123809655165915,
+      "verdict": "pass",
+      "z_score": null
+    },
+    {
+      "base_value": null,
+      "ci_95": null,
+      "duration_s": 1.0620087500137743,
+      "evidence": {
+        "lambdas": [
+          0.0,
+          0.25,
+          0.5,
+          0.75,
+          1.0,
+          1.25
+        ],
+        "linearity": 0.9103879862304414,
+        "mean_divergence_per_lambda": [
+          0.0,
+          0.024096238486922095,
+          0.06339058010139965,
+          0.12162927505983301,
+          0.16378600880464075,
+          0.3086516331232216
+        ],
+        "overshoot": 1.8844810700001389,
+        "passed_linearity": true,
+        "passed_overshoot": true,
+        "passed_saturation": false,
+        "saturation_lambda": 1.25,
+        "saturation_reason": "found",
+        "weight": 1.0,
+        "z_by_rank": null
+      },
+      "ft_value": null,
+      "kind": "adapter_ablation",
+      "message": "R\u00b2=0.91, sat_\u03bb=1.25 (out of band), overshoot=1.88 (no calibration for adapter_ablation)",
+      "name": "abl_fortran",
+      "raw": 0.9103879862304414,
+      "score": 0.79,
+      "verdict": "fail",
+      "z_score": null
+    }
+  ],
+  "schema_version": 1,
+  "score": {
+    "band": "partial",
+    "components": {
+      "ablation": 0.79,
+      "adherence": 0.5194587403778158,
+      "attribution": 0.5000000000003876,
+      "baseline": 1.0,
+      "calibration": 0.5025004004771236
+    },
+    "findings": [
+      "dk_fortran_qa_shaped (delta_kl) failed: mean js=0.2191, z=+0.23\u03c3 vs null",
+      "1 probe(s) errored \u2014 see full report for details"
+    ],
+    "overall": 0.5498377022089052,
+    "weights": {
+      "ablation": 0.15,
+      "adherence": 0.3,
+      "attribution": 0.35,
+      "baseline": 0.0,
+      "calibration": 0.2
+    }
+  },
+  "spec_path": "/Users/mfwolffe/GithubOrgs/tenseleyFlow/DocumentLanguageModel/docs/audits/13-followup/finding05-smol-qa/sway.yaml",
+  "started_at": "2026-04-30T23:43:40.737655+00:00",
+  "sway_version": "0.1.0",
+  "wall_seconds": 73.621905
+}
\ No newline at end of file
diff --git a/docs/audits/13-followup/finding05-smol-qa/sway-results.md b/docs/audits/13-followup/finding05-smol-qa/sway-results.md
new file mode 100644
index 0000000..6ddf710
--- /dev/null
+++ b/docs/audits/13-followup/finding05-smol-qa/sway-results.md
@@ -0,0 +1,41 @@
+# sway report
+
+**Overall:** 0.55 (`partial`)  
+**Base:** `HuggingFaceTB/SmolLM2-135M-Instruct`  
+**Adapter:** `adapter/versions/v0001`  
+**Wall:** 73.6s  
+**Determinism:** `best_effort` (seed=0)  
+**Backend:** cache: 180/558 = 32% | batches: 6 (avg=4.7)  
+
+## Components
+
+| category | score | weight | |
+|---|---:|---:|---|
+| adherence | 0.52 | 0.30 |  |
+| attribution | 0.50 | 0.35 |  |
+| calibration | 0.50 | 0.20 |  |
+| ablation | 0.79 | 0.15 |  |
+| baseline | 1.00 | 0.00 | (informational, weight=0) |
+
+## Probes
+
+| name | kind | verdict | score | raw | ci95 | z | duration | note |
+|---|---|---|---:|---:|---:|---:|---:|---|
+| null_baseline | `null_adapter` | pass | 1.00 | — | — | — | 48.7s | null calibration: 5 kinds calibrated over 3 seeds (1 opted out) |
+| dk_fortran_qa_shaped | `delta_kl` | fail | 0.52 | 0.219 | [0.097, 0.359] | +0.23σ | 0.16s | mean js=0.2191, z=+0.23σ vs null |
+| sis_fortran | `section_internalization` | fail | 0.50 | 0.000 | [-0.043, 0.045] | +0.00σ | 1.39s | 15/36 sections cleared; mean effective_sis=+0.000, z=+0.00σ vs null |
+| para_fortran | `paraphrase_invariance` | error | — | — | — | — | 0.00s | no cases provided |
+| leak_fortran | `leakage` | pass | 0.66 | 0.031 | [0.031, 0.031] | +2.04σ | 19.0s | greedy_recall=0.03 (perturbed=0.04, fragility=0.00), z=+2.04σ vs null |
+| cal_general | `calibration_drift` | pass | 0.34 | 0.120 | [0.040, 0.220] | — | 3.11s | 6/50 items regressed >1.0 nats (frac=12.0%), mean_delta=-0.170 nats/tok (no calibration for calibration_drift) |
+| abl_fortran | `adapter_ablation` | fail | 0.79 | 0.910 | — | — | 1.06s | R²=0.91, sat_λ=1.25 (out of band), overshoot=1.88 (no calibration for adapter_ablation) |
+
+## Top findings
+
+- dk_fortran_qa_shaped (delta_kl) failed: mean js=0.2191, z=+0.23σ vs null
+- 1 probe(s) errored — see full report for details
+
+## Degenerate null calibration
+
+1 probe kind(s) ran null_adapter but the resulting baseline was too narrow for z-scoring (std ≈ 0, typically `runs: 1` or coincidentally-matched seeds). Fix: bump `runs:` in the `null_adapter` spec entry. Affected kinds:
+
+- `calibration_drift`
diff --git a/docs/audits/13-followup/findings/05-smol-qa-shape.md b/docs/audits/13-followup/findings/05-smol-qa-shape.md
index 25b6e0f..51ba215 100644
--- a/docs/audits/13-followup/findings/05-smol-qa-shape.md
+++ b/docs/audits/13-followup/findings/05-smol-qa-shape.md
@@ -42,6 +42,30 @@ endpoint eval is 50× higher; held-out token-acc is 35 percentage
 points lower. The base can memorize the train pairs but lacks the
 parameter capacity to generalize the patterns.
 
+## sway results
+
+Composite **0.55 (`partial`)**. Full report at
+[`finding05-smol-qa/sway-results.md`](../finding05-smol-qa/sway-results.md).
+
+| probe | verdict | z | reading |
+|---|---|---:|---|
+| `dk_fortran_qa_shaped` | **FAIL** | +0.23σ | adapter shift on Q/A prompts is noise-level; the Q/A-shape recipe didn't even teach smol to *fire* on Q/A-shaped inputs |
+| `sis_fortran` | **FAIL** | +0.00σ | 15/36 sections cleared; **no per-section internalization signal whatsoever** |
+| `para_fortran` | ERROR | — | sway bridge "no cases provided" — separate sway issue |
+| `leak_fortran` | PASS | +2.04σ | greedy_recall=0.03, fragility=0.00 — textbook memorization fingerprint |
+| `cal_general` | pass* | — | **12% items regressed >1 nat** (was 26% in finding 02); recipe-shape fix mitigates forgetting but doesn't eliminate it |
+| `abl_fortran` | **FAIL** | — | R²=0.91 linearity, but **overshoot=1.88, sat_λ=1.25 out of band** — adapter is in pathological "more is more" territory, not converged at a coherent minimum |
+
+*`cal_general` formally passed because its null-baseline std collapsed
+to zero (1 of the 3 seeds duplicated), but the raw 12% regression is
+the load-bearing signal — that's 6/50 general-competence items broken
+by the LoRA on a model that started at 100%.
+
+The adherence and attribution probes both at noise-level (z ≈ 0) is
+the cleanest signal here: **the adapter doesn't reliably activate on
+the trained input shape** despite 95% train token-acc. That's the
+parameter-capacity bottleneck on display.
+
 ## Direct-query smoke
 
 Full transcripts at

From 88e989b225d911da669fed3187c450cebd6932e2 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Sat, 2 May 2026 23:09:25 -0400
Subject: [PATCH 43/46] Wrap vllm + mlx-serve target export pipelines in
 dlm.export.entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lifts each server target's prepare → smoke → finalize chain out of the
CLI into a typed dispatcher. CLI just builds a Request, calls the
runner, and renders. Smoke failure surfaces as a populated 'smoke'
field with ok=False (and manifest_path=None), so the CLI keeps full
control of exit codes. Dotted import of dlm.export.targets keeps
existing test fixture monkeypatches visible at call time.
---
 src/dlm/cli/commands/export.py |  95 ++++++++++++-------------
 src/dlm/export/entry.py        | 124 +++++++++++++++++++++++++++++++++
 2 files changed, 169 insertions(+), 50 deletions(-)
 create mode 100644 src/dlm/export/entry.py

diff --git a/src/dlm/cli/commands/export.py b/src/dlm/cli/commands/export.py
index 2ef6b41..f160ced 100644
--- a/src/dlm/cli/commands/export.py
+++ b/src/dlm/cli/commands/export.py
@@ -146,6 +146,12 @@ def export_cmd(
         resolve_export_plan,
         run_export,
     )
+    from dlm.export.entry import (
+        MlxServeExportRequest,
+        VllmExportRequest,
+        run_mlx_serve_target_export,
+        run_vllm_target_export,
+    )
     from dlm.export.ollama import (
         OllamaBinaryNotFoundError,
         OllamaCreateError,
@@ -154,14 +160,7 @@ def export_cmd(
         OllamaVersionError,
     )
     from dlm.export.quantize import run_checked
-    from dlm.export.targets import (
-        finalize_mlx_serve_export,
-        finalize_vllm_export,
-        prepare_llama_server_export,
-        prepare_mlx_serve_export,
-        prepare_vllm_export,
-        resolve_target,
-    )
+    from dlm.export.targets import prepare_llama_server_export, resolve_target
     from dlm.store.paths import for_dlm
 
     console = Console(stderr=True)
@@ -407,44 +406,42 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
 
         declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
         try:
-            vllm_result = prepare_vllm_export(
-                store=store,
-                spec=spec,
-                served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}",
-                training_sequence_len=parsed.frontmatter.training.sequence_len,
-                adapter_name=adapter,
-                adapter_path_override=adapter_path_override,
-                declared_adapter_names=declared_adapter_names,
+            vllm_outcome = run_vllm_target_export(
+                VllmExportRequest(
+                    target=resolved_target,
+                    store=store,
+                    spec=spec,
+                    served_model_name=name or f"dlm-{parsed.frontmatter.dlm_id.lower()}",
+                    training_sequence_len=parsed.frontmatter.training.sequence_len,
+                    adapter_name=adapter,
+                    adapter_path_override=adapter_path_override,
+                    declared_adapter_names=declared_adapter_names,
+                    adapter_mix=mix_entries,
+                    no_smoke=no_smoke,
+                )
             )
         except ExportError as exc:
             console.print(f"[red]export:[/red] {exc}")
             raise typer.Exit(code=1) from exc
 
-        vllm_smoke = None if no_smoke else resolved_target.smoke_test(vllm_result)
-        if vllm_smoke is not None and not vllm_smoke.ok:
+        if vllm_outcome.smoke is not None and not vllm_outcome.smoke.ok:
             console.print(
-                f"[red]smoke:[/red] {vllm_smoke.detail}\n"
+                f"[red]smoke:[/red] {vllm_outcome.smoke.detail}\n"
                 "  re-run with `--no-smoke` to skip the smoke test."
             )
             raise typer.Exit(code=1)
 
-        manifest_path = finalize_vllm_export(
-            store=store,
-            spec=spec,
-            prepared=vllm_result,
-            smoke_output_first_line=None if vllm_smoke is None else vllm_smoke.detail,
-            adapter_name=adapter,
-            adapter_mix=mix_entries,
-        )
+        vllm_result = vllm_outcome.prepared
+        assert vllm_outcome.manifest_path is not None
         console.print(f"[green]exported:[/green] {vllm_result.export_dir}")
         console.print("target:  vllm")
         assert vllm_result.launch_script_path is not None
         assert vllm_result.config_path is not None
         console.print(f"launch:  {vllm_result.launch_script_path.name}")
         console.print(f"config:  {vllm_result.config_path.name}")
-        console.print(f"manifest: {manifest_path.name}")
-        if vllm_smoke is not None and vllm_smoke.detail:
-            console.print(f"smoke:   {vllm_smoke.detail}")
+        console.print(f"manifest: {vllm_outcome.manifest_path.name}")
+        if vllm_outcome.smoke is not None and vllm_outcome.smoke.detail:
+            console.print(f"smoke:   {vllm_outcome.smoke.detail}")
         return
 
     if resolved_target.name == "mlx-serve":
@@ -475,40 +472,38 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
 
         declared_adapter_names = tuple(adapters_declared.keys()) if adapters_declared else None
         try:
-            mlx_serve_result = prepare_mlx_serve_export(
-                store=store,
-                spec=spec,
-                adapter_name=adapter,
-                adapter_path_override=adapter_path_override,
-                declared_adapter_names=declared_adapter_names,
+            mlx_outcome = run_mlx_serve_target_export(
+                MlxServeExportRequest(
+                    target=resolved_target,
+                    store=store,
+                    spec=spec,
+                    adapter_name=adapter,
+                    adapter_path_override=adapter_path_override,
+                    declared_adapter_names=declared_adapter_names,
+                    adapter_mix=mix_entries,
+                    no_smoke=no_smoke,
+                )
             )
         except ExportError as exc:
             console.print(f"[red]export:[/red] {exc}")
             raise typer.Exit(code=1) from exc
 
-        mlx_serve_smoke = None if no_smoke else resolved_target.smoke_test(mlx_serve_result)
-        if mlx_serve_smoke is not None and not mlx_serve_smoke.ok:
+        if mlx_outcome.smoke is not None and not mlx_outcome.smoke.ok:
             console.print(
-                f"[red]smoke:[/red] {mlx_serve_smoke.detail}\n"
+                f"[red]smoke:[/red] {mlx_outcome.smoke.detail}\n"
                 "  re-run with `--no-smoke` to skip the smoke test."
             )
             raise typer.Exit(code=1)
 
-        manifest_path = finalize_mlx_serve_export(
-            store=store,
-            spec=spec,
-            prepared=mlx_serve_result,
-            smoke_output_first_line=None if mlx_serve_smoke is None else mlx_serve_smoke.detail,
-            adapter_name=adapter,
-            adapter_mix=mix_entries,
-        )
+        mlx_serve_result = mlx_outcome.prepared
+        assert mlx_outcome.manifest_path is not None
         console.print(f"[green]exported:[/green] {mlx_serve_result.export_dir}")
         console.print("target:  mlx-serve")
         assert mlx_serve_result.launch_script_path is not None
         console.print(f"launch:  {mlx_serve_result.launch_script_path.name}")
-        console.print(f"manifest: {manifest_path.name}")
-        if mlx_serve_smoke is not None and mlx_serve_smoke.detail:
-            console.print(f"smoke:   {mlx_serve_smoke.detail}")
+        console.print(f"manifest: {mlx_outcome.manifest_path.name}")
+        if mlx_outcome.smoke is not None and mlx_outcome.smoke.detail:
+            console.print(f"smoke:   {mlx_outcome.smoke.detail}")
         return
 
     try:
diff --git a/src/dlm/export/entry.py b/src/dlm/export/entry.py
new file mode 100644
index 0000000..53bc96d
--- /dev/null
+++ b/src/dlm/export/entry.py
@@ -0,0 +1,124 @@
+"""Per-target export orchestration.
+
+Wraps the prepare → smoke → finalize chain for the OpenAI-compat server
+targets (vLLM, MLX-serve) so the CLI doesn't repeat the same plumbing
+for each. Returns typed results the CLI renders; smoke failure surfaces
+as a populated `smoke` field with `ok=False`, leaving `manifest_path`
+unset, so the CLI can decide its own exit code without the dispatcher
+making control-flow choices.
+
+External-module imports are dotted (e.g. `from dlm.export import
+targets as _targets; _targets.prepare_vllm_export(...)`) so test
+fixtures that monkeypatch `dlm.export.targets.<name>` resolve at call
+time.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from dlm.export import targets as _targets
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from dlm.base_models.schema import BaseModelSpec
+    from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
+    from dlm.store.paths import StorePath
+
+
+@dataclass(frozen=True)
+class VllmExportRequest:
+    """Inputs to `run_vllm_target_export`."""
+
+    target: ExportTarget
+    store: StorePath
+    spec: BaseModelSpec
+    served_model_name: str
+    training_sequence_len: int | None
+    adapter_name: str | None
+    adapter_path_override: Path | None
+    declared_adapter_names: tuple[str, ...] | None
+    adapter_mix: list[tuple[str, float]] | None
+    no_smoke: bool
+
+
+@dataclass(frozen=True)
+class MlxServeExportRequest:
+    """Inputs to `run_mlx_serve_target_export`."""
+
+    target: ExportTarget
+    store: StorePath
+    spec: BaseModelSpec
+    adapter_name: str | None
+    adapter_path_override: Path | None
+    declared_adapter_names: tuple[str, ...] | None
+    adapter_mix: list[tuple[str, float]] | None
+    no_smoke: bool
+
+
+@dataclass(frozen=True)
+class ServerTargetExportResult:
+    """Outcome of a server-target export.
+
+    `manifest_path` is `None` when smoke failed (finalize was skipped);
+    the CLI surfaces a smoke-failure exit in that case. `smoke` is `None`
+    when `--no-smoke` was set.
+    """
+
+    prepared: TargetResult
+    smoke: SmokeResult | None
+    manifest_path: Path | None
+
+
+def run_vllm_target_export(req: VllmExportRequest) -> ServerTargetExportResult:
+    """Stage vLLM artifacts, smoke-test the server, then finalize the manifest."""
+    prepared = _targets.prepare_vllm_export(
+        store=req.store,
+        spec=req.spec,
+        served_model_name=req.served_model_name,
+        training_sequence_len=req.training_sequence_len,
+        adapter_name=req.adapter_name,
+        adapter_path_override=req.adapter_path_override,
+        declared_adapter_names=req.declared_adapter_names,
+    )
+
+    smoke = None if req.no_smoke else req.target.smoke_test(prepared)
+    if smoke is not None and not smoke.ok:
+        return ServerTargetExportResult(prepared=prepared, smoke=smoke, manifest_path=None)
+
+    manifest_path = _targets.finalize_vllm_export(
+        store=req.store,
+        spec=req.spec,
+        prepared=prepared,
+        smoke_output_first_line=None if smoke is None else smoke.detail,
+        adapter_name=req.adapter_name,
+        adapter_mix=req.adapter_mix,
+    )
+    return ServerTargetExportResult(prepared=prepared, smoke=smoke, manifest_path=manifest_path)
+
+
+def run_mlx_serve_target_export(req: MlxServeExportRequest) -> ServerTargetExportResult:
+    """Stage MLX-serve artifacts, smoke-test the server, then finalize the manifest."""
+    prepared = _targets.prepare_mlx_serve_export(
+        store=req.store,
+        spec=req.spec,
+        adapter_name=req.adapter_name,
+        adapter_path_override=req.adapter_path_override,
+        declared_adapter_names=req.declared_adapter_names,
+    )
+
+    smoke = None if req.no_smoke else req.target.smoke_test(prepared)
+    if smoke is not None and not smoke.ok:
+        return ServerTargetExportResult(prepared=prepared, smoke=smoke, manifest_path=None)
+
+    manifest_path = _targets.finalize_mlx_serve_export(
+        store=req.store,
+        spec=req.spec,
+        prepared=prepared,
+        smoke_output_first_line=None if smoke is None else smoke.detail,
+        adapter_name=req.adapter_name,
+        adapter_mix=req.adapter_mix,
+    )
+    return ServerTargetExportResult(prepared=prepared, smoke=smoke, manifest_path=manifest_path)

From 3092c524c74416f0b5754be97ec152bfc3b4b02b Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Sat, 2 May 2026 23:17:43 -0400
Subject: [PATCH 44/46] Wrap llama-server post-export prep + smoke in
 dlm.export.entry

Lifts the adapter-dir resolution + prepare_llama_server_export +
smoke chain out of the CLI's llama-server branch. CLI just builds a
LlamaServerPostExportRequest, calls run_llama_server_post_export, and
renders the typed result. VendoringError + ExportError still propagate
to the CLI for target-specific banner formatting.
---
 src/dlm/cli/commands/export.py | 32 ++++++++++----------
 src/dlm/export/entry.py        | 53 ++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 16 deletions(-)

diff --git a/src/dlm/cli/commands/export.py b/src/dlm/cli/commands/export.py
index f160ced..9ea7cf6 100644
--- a/src/dlm/cli/commands/export.py
+++ b/src/dlm/cli/commands/export.py
@@ -147,8 +147,10 @@ def export_cmd(
         run_export,
     )
     from dlm.export.entry import (
+        LlamaServerPostExportRequest,
         MlxServeExportRequest,
         VllmExportRequest,
+        run_llama_server_post_export,
         run_mlx_serve_target_export,
         run_vllm_target_export,
     )
@@ -160,7 +162,7 @@ def export_cmd(
         OllamaVersionError,
     )
     from dlm.export.quantize import run_checked
-    from dlm.export.targets import prepare_llama_server_export, resolve_target
+    from dlm.export.targets import resolve_target
     from dlm.store.paths import for_dlm
 
     console = Console(stderr=True)
@@ -568,21 +570,18 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         raise typer.Exit(code=1) from exc
 
     if resolved_target.name == "llama-server":
-        adapter_dir = adapter_path_override
-        if adapter_dir is None:
-            if adapter is None:
-                adapter_dir = store.resolve_current_adapter()
-            else:
-                adapter_dir = store.resolve_current_adapter_for(adapter)
-        assert adapter_dir is not None
         try:
-            llama_server_result = prepare_llama_server_export(
-                export_dir=result.export_dir,
-                manifest_path=result.manifest_path,
-                artifacts=result.artifacts,
-                adapter_dir=adapter_dir,
-                spec=spec,
-                training_sequence_len=parsed.frontmatter.training.sequence_len,
+            llama_server_outcome = run_llama_server_post_export(
+                LlamaServerPostExportRequest(
+                    target=resolved_target,
+                    store=store,
+                    spec=spec,
+                    base_export=result,
+                    adapter_name=adapter,
+                    adapter_path_override=adapter_path_override,
+                    training_sequence_len=parsed.frontmatter.training.sequence_len,
+                    no_smoke=no_smoke,
+                )
             )
         except VendoringError as exc:
             console.print(
@@ -594,7 +593,8 @@ def _verbose_runner(cmd: Sequence[str]) -> object:
         except ExportError as exc:
             console.print(f"[red]export:[/red] {exc}")
             raise typer.Exit(code=1) from exc
-        llama_server_smoke = None if no_smoke else resolved_target.smoke_test(llama_server_result)
+        llama_server_result = llama_server_outcome.prepared
+        llama_server_smoke = llama_server_outcome.smoke
         if llama_server_smoke is not None and not llama_server_smoke.ok:
             console.print(
                 f"[red]smoke:[/red] {llama_server_smoke.detail}\n"
diff --git a/src/dlm/export/entry.py b/src/dlm/export/entry.py
index 53bc96d..f6878e8 100644
--- a/src/dlm/export/entry.py
+++ b/src/dlm/export/entry.py
@@ -24,6 +24,7 @@
     from pathlib import Path
 
     from dlm.base_models.schema import BaseModelSpec
+    from dlm.export.runner import ExportResult
     from dlm.export.targets.base import ExportTarget, SmokeResult, TargetResult
     from dlm.store.paths import StorePath
 
@@ -122,3 +123,55 @@ def run_mlx_serve_target_export(req: MlxServeExportRequest) -> ServerTargetExpor
         adapter_mix=req.adapter_mix,
     )
     return ServerTargetExportResult(prepared=prepared, smoke=smoke, manifest_path=manifest_path)
+
+
+@dataclass(frozen=True)
+class LlamaServerPostExportRequest:
+    """Inputs to `run_llama_server_post_export`.
+
+    `base_export` is the `ExportResult` returned by `run_export(target=
+    "llama-server")`; the dispatcher resolves the adapter dir, stages
+    the launch artifacts, and runs the smoke test on top of it.
+    """
+
+    target: ExportTarget
+    store: StorePath
+    spec: BaseModelSpec
+    base_export: ExportResult
+    adapter_name: str | None
+    adapter_path_override: Path | None
+    training_sequence_len: int | None
+    no_smoke: bool
+
+
+@dataclass(frozen=True)
+class LlamaServerPostExportResult:
+    """Outcome of `run_llama_server_post_export`. `smoke` is `None`
+    when `--no-smoke` was set."""
+
+    prepared: TargetResult
+    smoke: SmokeResult | None
+
+
+def run_llama_server_post_export(
+    req: LlamaServerPostExportRequest,
+) -> LlamaServerPostExportResult:
+    """Resolve adapter dir, stage llama-server artifacts, then smoke-test."""
+    adapter_dir = req.adapter_path_override
+    if adapter_dir is None:
+        if req.adapter_name is None:
+            adapter_dir = req.store.resolve_current_adapter()
+        else:
+            adapter_dir = req.store.resolve_current_adapter_for(req.adapter_name)
+    assert adapter_dir is not None
+
+    prepared = _targets.prepare_llama_server_export(
+        export_dir=req.base_export.export_dir,
+        manifest_path=req.base_export.manifest_path,
+        artifacts=req.base_export.artifacts,
+        adapter_dir=adapter_dir,
+        spec=req.spec,
+        training_sequence_len=req.training_sequence_len,
+    )
+    smoke = None if req.no_smoke else req.target.smoke_test(prepared)
+    return LlamaServerPostExportResult(prepared=prepared, smoke=smoke)

From b8d14476e274dc787662a332a3f6df4024cccfe6 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Sun, 3 May 2026 16:54:16 -0400
Subject: [PATCH 45/46] Add direct unit tests for new dispatcher modules

Each new dispatcher module now has a tests/unit/ peer that drives
its branches directly, so the per-package coverage gates (store,
train, inference, export) stay at 100% without depending on CLI
tests' indirect coverage. Modules covered: dlm.inference.dispatch,
dlm.train.dispatch, dlm.store.bootstrap, dlm.store.show,
dlm.export.entry.
---
 tests/unit/export/test_entry.py       | 306 ++++++++++++++++++++
 tests/unit/inference/test_dispatch.py |  69 +++++
 tests/unit/store/test_bootstrap.py    | 131 +++++++++
 tests/unit/store/test_show.py         | 394 ++++++++++++++++++++++++++
 tests/unit/train/test_dispatch.py     | 207 ++++++++++++++
 5 files changed, 1107 insertions(+)
 create mode 100644 tests/unit/export/test_entry.py
 create mode 100644 tests/unit/inference/test_dispatch.py
 create mode 100644 tests/unit/store/test_bootstrap.py
 create mode 100644 tests/unit/store/test_show.py
 create mode 100644 tests/unit/train/test_dispatch.py

diff --git a/tests/unit/export/test_entry.py b/tests/unit/export/test_entry.py
new file mode 100644
index 0000000..5be8a36
--- /dev/null
+++ b/tests/unit/export/test_entry.py
@@ -0,0 +1,306 @@
+"""Direct tests for `dlm.export.entry` (per-target dispatcher)."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from dlm.export.entry import (
+    LlamaServerPostExportRequest,
+    LlamaServerPostExportResult,
+    MlxServeExportRequest,
+    ServerTargetExportResult,
+    VllmExportRequest,
+    run_llama_server_post_export,
+    run_mlx_serve_target_export,
+    run_vllm_target_export,
+)
+
+
+def _make_target(smoke_ok: bool, smoke_detail: str = "smoke ok") -> Any:
+    """A fake ExportTarget with a configurable smoke result."""
+    smoke_calls: list[Any] = []
+
+    def _smoke_test(prepared: Any) -> Any:
+        smoke_calls.append(prepared)
+        return SimpleNamespace(attempted=True, ok=smoke_ok, detail=smoke_detail)
+
+    target = SimpleNamespace(name="vllm", smoke_test=_smoke_test)
+    target.smoke_calls = smoke_calls  # type: ignore[attr-defined]
+    return target
+
+
+def _vllm_request(
+    *,
+    target: Any,
+    store: Any = None,
+    spec: Any = None,
+    no_smoke: bool = False,
+) -> VllmExportRequest:
+    return VllmExportRequest(
+        target=target,
+        store=store or SimpleNamespace(),
+        spec=spec or SimpleNamespace(),
+        served_model_name="dlm-test",
+        training_sequence_len=2048,
+        adapter_name=None,
+        adapter_path_override=None,
+        declared_adapter_names=None,
+        adapter_mix=None,
+        no_smoke=no_smoke,
+    )
+
+
+def _mlx_request(
+    *,
+    target: Any,
+    store: Any = None,
+    spec: Any = None,
+    no_smoke: bool = False,
+) -> MlxServeExportRequest:
+    return MlxServeExportRequest(
+        target=target,
+        store=store or SimpleNamespace(),
+        spec=spec or SimpleNamespace(),
+        adapter_name=None,
+        adapter_path_override=None,
+        declared_adapter_names=None,
+        adapter_mix=None,
+        no_smoke=no_smoke,
+    )
+
+
+def test_run_vllm_target_export_finalizes_on_smoke_ok(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    prepared = SimpleNamespace(name="vllm", export_dir=tmp_path)
+    monkeypatch.setattr("dlm.export.targets.prepare_vllm_export", lambda **kwargs: prepared)
+    finalize_calls: dict[str, object] = {}
+
+    def _fake_finalize(**kwargs: object) -> Path:
+        finalize_calls.update(kwargs)
+        return tmp_path / "manifest.json"
+
+    monkeypatch.setattr("dlm.export.targets.finalize_vllm_export", _fake_finalize)
+
+    target = _make_target(smoke_ok=True)
+    result = run_vllm_target_export(_vllm_request(target=target))
+
+    assert isinstance(result, ServerTargetExportResult)
+    assert result.prepared is prepared
+    assert result.smoke is not None
+    assert result.smoke.ok is True
+    assert result.manifest_path == tmp_path / "manifest.json"
+    assert finalize_calls["smoke_output_first_line"] == "smoke ok"
+
+
+def test_run_vllm_target_export_skips_finalize_on_smoke_failure(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    prepared = SimpleNamespace(export_dir=tmp_path)
+    monkeypatch.setattr("dlm.export.targets.prepare_vllm_export", lambda **kwargs: prepared)
+    finalize_called: list[bool] = []
+    monkeypatch.setattr(
+        "dlm.export.targets.finalize_vllm_export",
+        lambda **kwargs: finalize_called.append(True),
+    )
+
+    target = _make_target(smoke_ok=False, smoke_detail="vllm broke")
+    result = run_vllm_target_export(_vllm_request(target=target))
+
+    assert result.manifest_path is None
+    assert result.smoke is not None
+    assert result.smoke.ok is False
+    assert result.smoke.detail == "vllm broke"
+    assert finalize_called == []
+
+
+def test_run_vllm_target_export_skips_smoke_with_no_smoke(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    prepared = SimpleNamespace(export_dir=tmp_path)
+    monkeypatch.setattr("dlm.export.targets.prepare_vllm_export", lambda **kwargs: prepared)
+
+    finalize_calls: dict[str, object] = {}
+
+    def _fake_finalize(**kwargs: object) -> Path:
+        finalize_calls.update(kwargs)
+        return tmp_path / "manifest.json"
+
+    monkeypatch.setattr("dlm.export.targets.finalize_vllm_export", _fake_finalize)
+
+    target = _make_target(smoke_ok=True)  # ignored when no_smoke=True
+    result = run_vllm_target_export(_vllm_request(target=target, no_smoke=True))
+
+    assert result.smoke is None
+    assert result.manifest_path == tmp_path / "manifest.json"
+    assert finalize_calls["smoke_output_first_line"] is None
+    assert target.smoke_calls == []
+
+
+def test_run_mlx_serve_target_export_finalizes_on_smoke_ok(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    prepared = SimpleNamespace(export_dir=tmp_path)
+    monkeypatch.setattr("dlm.export.targets.prepare_mlx_serve_export", lambda **kwargs: prepared)
+    monkeypatch.setattr(
+        "dlm.export.targets.finalize_mlx_serve_export",
+        lambda **kwargs: tmp_path / "manifest.json",
+    )
+
+    target = _make_target(smoke_ok=True)
+    result = run_mlx_serve_target_export(_mlx_request(target=target))
+
+    assert result.manifest_path == tmp_path / "manifest.json"
+    assert result.smoke is not None
+    assert result.smoke.ok is True
+
+
+def test_run_mlx_serve_target_export_skips_finalize_on_smoke_failure(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    prepared = SimpleNamespace(export_dir=tmp_path)
+    monkeypatch.setattr("dlm.export.targets.prepare_mlx_serve_export", lambda **kwargs: prepared)
+    finalize_called: list[bool] = []
+    monkeypatch.setattr(
+        "dlm.export.targets.finalize_mlx_serve_export",
+        lambda **kwargs: finalize_called.append(True),
+    )
+
+    target = _make_target(smoke_ok=False)
+    result = run_mlx_serve_target_export(_mlx_request(target=target))
+
+    assert result.manifest_path is None
+    assert finalize_called == []
+
+
+def test_run_mlx_serve_target_export_skips_smoke_with_no_smoke(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    prepared = SimpleNamespace(export_dir=tmp_path)
+    monkeypatch.setattr("dlm.export.targets.prepare_mlx_serve_export", lambda **kwargs: prepared)
+    monkeypatch.setattr(
+        "dlm.export.targets.finalize_mlx_serve_export",
+        lambda **kwargs: tmp_path / "manifest.json",
+    )
+
+    target = _make_target(smoke_ok=True)
+    result = run_mlx_serve_target_export(_mlx_request(target=target, no_smoke=True))
+
+    assert result.smoke is None
+    assert target.smoke_calls == []
+
+
+def test_run_llama_server_post_export_uses_path_override_when_given(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    override_dir = tmp_path / "override-adapter"
+    prepare_calls: dict[str, object] = {}
+
+    def _fake_prepare(**kwargs: object) -> object:
+        prepare_calls.update(kwargs)
+        return SimpleNamespace(name="llama-server")
+
+    monkeypatch.setattr("dlm.export.targets.prepare_llama_server_export", _fake_prepare)
+
+    target = _make_target(smoke_ok=True)
+    request = LlamaServerPostExportRequest(
+        target=target,
+        store=SimpleNamespace(),  # type: ignore[arg-type]
+        spec=SimpleNamespace(),  # type: ignore[arg-type]
+        base_export=SimpleNamespace(  # type: ignore[arg-type]
+            export_dir=tmp_path,
+            manifest_path=tmp_path / "m.json",
+            artifacts=[tmp_path / "a"],
+        ),
+        adapter_name=None,
+        adapter_path_override=override_dir,
+        training_sequence_len=1024,
+        no_smoke=False,
+    )
+    result = run_llama_server_post_export(request)
+
+    assert isinstance(result, LlamaServerPostExportResult)
+    assert prepare_calls["adapter_dir"] == override_dir
+    assert result.smoke is not None
+    assert result.smoke.ok is True
+
+
+def test_run_llama_server_post_export_resolves_default_current_adapter(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    resolved_dir = tmp_path / "current"
+    store = SimpleNamespace(
+        resolve_current_adapter=lambda: resolved_dir,
+        resolve_current_adapter_for=lambda name: tmp_path / "named",
+    )
+    prepare_calls: dict[str, object] = {}
+
+    def _fake_prepare(**kwargs: object) -> object:
+        prepare_calls.update(kwargs)
+        return SimpleNamespace()
+
+    monkeypatch.setattr("dlm.export.targets.prepare_llama_server_export", _fake_prepare)
+
+    target = _make_target(smoke_ok=True)
+    request = LlamaServerPostExportRequest(
+        target=target,
+        store=store,  # type: ignore[arg-type]
+        spec=SimpleNamespace(),  # type: ignore[arg-type]
+        base_export=SimpleNamespace(  # type: ignore[arg-type]
+            export_dir=tmp_path, manifest_path=tmp_path / "m.json", artifacts=[]
+        ),
+        adapter_name=None,
+        adapter_path_override=None,
+        training_sequence_len=512,
+        no_smoke=True,
+    )
+    result = run_llama_server_post_export(request)
+
+    assert prepare_calls["adapter_dir"] == resolved_dir
+    assert result.smoke is None
+
+
+def test_run_llama_server_post_export_resolves_named_adapter(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    named_dir = tmp_path / "named-adapter"
+    store = SimpleNamespace(
+        resolve_current_adapter=lambda: tmp_path / "wrong",
+        resolve_current_adapter_for=lambda name: named_dir if name == "extras" else None,
+    )
+    prepare_calls: dict[str, object] = {}
+
+    monkeypatch.setattr(
+        "dlm.export.targets.prepare_llama_server_export",
+        lambda **kwargs: prepare_calls.update(kwargs) or SimpleNamespace(),
+    )
+
+    target = _make_target(smoke_ok=True)
+    request = LlamaServerPostExportRequest(
+        target=target,
+        store=store,  # type: ignore[arg-type]
+        spec=SimpleNamespace(),  # type: ignore[arg-type]
+        base_export=SimpleNamespace(  # type: ignore[arg-type]
+            export_dir=tmp_path, manifest_path=tmp_path / "m.json", artifacts=[]
+        ),
+        adapter_name="extras",
+        adapter_path_override=None,
+        training_sequence_len=None,
+        no_smoke=True,
+    )
+    run_llama_server_post_export(request)
+
+    assert prepare_calls["adapter_dir"] == named_dir
diff --git a/tests/unit/inference/test_dispatch.py b/tests/unit/inference/test_dispatch.py
new file mode 100644
index 0000000..dc01e96
--- /dev/null
+++ b/tests/unit/inference/test_dispatch.py
@@ -0,0 +1,69 @@
+"""Direct tests for `dlm.inference.dispatch:run_prompt`."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from dlm.inference.dispatch import PromptRequest, PromptResult, run_prompt
+
+
+class _FakeBackend:
+    def __init__(self) -> None:
+        self.loaded_with: dict[str, Any] | None = None
+        self.generate_with: dict[str, Any] | None = None
+
+    def load(self, spec: object, store: object, *, adapter_name: str | None = None) -> None:
+        self.loaded_with = {"spec": spec, "store": store, "adapter_name": adapter_name}
+
+    def generate(self, query: str, **kwargs: object) -> str:
+        self.generate_with = {"query": query, **kwargs}
+        return "fake response"
+
+    def unload(self) -> None:
+        pass
+
+
+def test_run_prompt_loads_backend_and_returns_typed_result(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    backend = _FakeBackend()
+    monkeypatch.setattr(
+        "dlm.inference.backends.build_backend",
+        lambda *args, **kwargs: backend,
+    )
+
+    spec_obj = object()
+    caps_obj = object()
+    store_obj = object()
+
+    request = PromptRequest(
+        spec=spec_obj,  # type: ignore[arg-type]
+        capabilities=caps_obj,  # type: ignore[arg-type]
+        store=store_obj,  # type: ignore[arg-type]
+        backend_name="pytorch",
+        query="hello there",
+        max_new_tokens=42,
+        temperature=0.5,
+        top_p=0.9,
+        adapter="my-adapter",
+    )
+
+    result = run_prompt(request)
+
+    assert isinstance(result, PromptResult)
+    assert result.response == "fake response"
+    assert result.backend_name == "pytorch"
+
+    assert backend.loaded_with == {
+        "spec": spec_obj,
+        "store": store_obj,
+        "adapter_name": "my-adapter",
+    }
+    assert backend.generate_with == {
+        "query": "hello there",
+        "max_new_tokens": 42,
+        "temperature": 0.5,
+        "top_p": 0.9,
+    }
diff --git a/tests/unit/store/test_bootstrap.py b/tests/unit/store/test_bootstrap.py
new file mode 100644
index 0000000..da19505
--- /dev/null
+++ b/tests/unit/store/test_bootstrap.py
@@ -0,0 +1,131 @@
+"""Direct tests for `dlm.store.bootstrap:run_init`."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from dlm.store.bootstrap import (
+    InitRequest,
+    InitResult,
+    ScaffoldKind,
+    run_init,
+)
+from dlm.store.manifest import load_manifest
+from dlm.store.paths import for_dlm
+
+_REV = "0123456789abcdef0123456789abcdef01234567"
+
+
+def _spec(key: str = "smollm2-135m") -> Any:
+    return SimpleNamespace(key=key, revision=_REV)
+
+
+def _make_request(
+    tmp_path: Path,
+    *,
+    template_name: str | None = None,
+    scaffold_kind: ScaffoldKind = ScaffoldKind.TEXT,
+    force: bool = False,
+) -> InitRequest:
+    return InitRequest(
+        path=tmp_path / "doc.dlm",
+        spec=_spec(),  # type: ignore[arg-type]
+        acceptance=None,
+        force=force,
+        template_name=template_name,
+        scaffold_kind=scaffold_kind,
+    )
+
+
+def test_run_init_writes_text_scaffold_and_provisions_store(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    home = tmp_path / "home"
+    monkeypatch.setenv("DLM_HOME", str(home))
+
+    request = _make_request(tmp_path)
+    result = run_init(request)
+
+    assert isinstance(result, InitResult)
+    assert result.applied_template is None
+    assert result.dlm_id  # minted ULID
+    assert request.path.exists()
+    body = request.path.read_text(encoding="utf-8")
+    assert "::instruction::" in body
+    assert "::image" not in body
+    assert "::audio" not in body
+    assert f"dlm_id: {result.dlm_id}" in body
+
+    store = for_dlm(result.dlm_id, home=home)
+    manifest = load_manifest(store.manifest)
+    assert manifest.dlm_id == result.dlm_id
+    assert manifest.base_model == "smollm2-135m"
+    assert manifest.license_acceptance is None
+
+
+def test_run_init_writes_vision_scaffold(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("DLM_HOME", str(tmp_path / "home"))
+
+    request = _make_request(tmp_path, scaffold_kind=ScaffoldKind.VISION)
+    run_init(request)
+
+    body = request.path.read_text(encoding="utf-8")
+    assert "::image" in body
+    assert "dlm_version: 10" in body
+
+
+def test_run_init_writes_audio_scaffold(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("DLM_HOME", str(tmp_path / "home"))
+
+    request = _make_request(tmp_path, scaffold_kind=ScaffoldKind.AUDIO)
+    run_init(request)
+
+    body = request.path.read_text(encoding="utf-8")
+    assert "::audio" in body
+    assert "dlm_version: 11" in body
+
+
+def test_run_init_applies_template_via_dispatch(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("DLM_HOME", str(tmp_path / "home"))
+
+    sentinel_template = SimpleNamespace(meta=SimpleNamespace(name="custom", title="Custom"))
+    apply_calls: dict[str, object] = {}
+
+    def _fake_apply(
+        name: str,
+        target: Path,
+        *,
+        force: bool = False,
+        accept_license: bool = False,
+    ) -> object:
+        apply_calls["name"] = name
+        apply_calls["target"] = target
+        apply_calls["force"] = force
+        apply_calls["accept_license"] = accept_license
+        return SimpleNamespace(template=sentinel_template, dlm_id="01ABC123")
+
+    monkeypatch.setattr("dlm.templates.init.apply_template", _fake_apply)
+
+    request = _make_request(tmp_path, template_name="my-template", force=True)
+    result = run_init(request)
+
+    assert result.dlm_id == "01ABC123"
+    assert result.applied_template is not None
+    assert result.applied_template.template is sentinel_template
+    assert apply_calls["name"] == "my-template"
+    assert apply_calls["force"] is True
+    assert apply_calls["accept_license"] is True
diff --git a/tests/unit/store/test_show.py b/tests/unit/store/test_show.py
new file mode 100644
index 0000000..dc82e3b
--- /dev/null
+++ b/tests/unit/store/test_show.py
@@ -0,0 +1,394 @@
+"""Direct tests for `dlm.store.show:gather_store_view` + private helpers."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from dlm.store.show import (
+    StoreView,
+    StoreViewRequest,
+    _summarize_base_security,
+    _summarize_gate,
+    _summarize_preference_mining,
+    _summarize_training_cache,
+    _summarize_training_sources_and_discovered,
+    gather_store_view,
+)
+
+_DLM_ID = "01KPQ9X1000000000000000000"
+_REV = "0123456789abcdef0123456789abcdef01234567"
+
+
+def _write_doc(path: Path, *, body: str = "") -> None:
+    payload = f"---\ndlm_id: {_DLM_ID}\ndlm_version: 14\nbase_model: smollm2-135m\n---\n"
+    path.write_text(payload + body, encoding="utf-8")
+
+
+def _parsed(home: Path, doc: Path) -> Any:
+    from dlm.doc.parser import parse_file
+
+    return parse_file(doc)
+
+
+def test_gather_store_view_returns_uninitialized_when_manifest_missing(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("DLM_HOME", str(tmp_path / "home"))
+    doc = tmp_path / "doc.dlm"
+    _write_doc(doc)
+
+    from dlm.store.paths import for_dlm
+
+    parsed = _parsed(tmp_path / "home", doc)
+    store = for_dlm(parsed.frontmatter.dlm_id)
+
+    view = gather_store_view(StoreViewRequest(parsed=parsed, target_path=doc, store=store))
+
+    assert isinstance(view, StoreView)
+    assert view.inspection is None
+    assert view.training_cache is None
+    assert view.gate is None
+    assert view.preference_mining is None
+    assert view.base_security is None
+    assert view.parsed_dlm_id == _DLM_ID
+    assert view.parsed_base_model == "smollm2-135m"
+    assert view.training_sources is None
+    assert view.discovered_configs == []
+
+
+def test_gather_store_view_populates_inspection_when_manifest_exists(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    home = tmp_path / "home"
+    monkeypatch.setenv("DLM_HOME", str(home))
+    doc = tmp_path / "doc.dlm"
+    _write_doc(doc)
+
+    from dlm.store.manifest import Manifest, save_manifest
+    from dlm.store.paths import for_dlm
+
+    parsed = _parsed(home, doc)
+    store = for_dlm(parsed.frontmatter.dlm_id)
+    store.ensure_layout()
+    save_manifest(
+        store.manifest,
+        Manifest(
+            dlm_id=_DLM_ID,
+            base_model="smollm2-135m",
+            base_model_revision=_REV,
+            source_path=doc.resolve(),
+        ),
+    )
+
+    view = gather_store_view(StoreViewRequest(parsed=parsed, target_path=doc, store=store))
+
+    assert view.inspection is not None
+    assert view.inspection.dlm_id == _DLM_ID
+    assert view.inspection.base_model == "smollm2-135m"
+    assert view.training_cache is None  # tokenized_cache_dir doesn't exist yet
+    assert view.gate is None  # no gate config + no events
+    assert view.preference_mining is None  # no metrics
+    assert view.base_security is not None
+    assert view.base_security["base_model"] == "smollm2-135m"
+
+
+def test_summarize_training_sources_returns_none_without_directives() -> None:
+    parsed = SimpleNamespace(frontmatter=SimpleNamespace(training=SimpleNamespace(sources=[])))
+    sources, discovered = _summarize_training_sources_and_discovered(parsed, Path("/tmp"))
+    assert sources is None
+    assert discovered == []
+
+
+def test_summarize_training_sources_returns_declared_on_expand_failure(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from dlm.directives.errors import DirectiveError
+
+    directive = SimpleNamespace(
+        path="docs",
+        include=("**/*",),
+        exclude=(),
+        max_files=None,
+        max_bytes_per_file=None,
+    )
+    parsed = SimpleNamespace(
+        frontmatter=SimpleNamespace(training=SimpleNamespace(sources=[directive]))
+    )
+
+    def _raise(*args: object, **kwargs: object) -> None:
+        raise DirectiveError("expansion failed")
+
+    monkeypatch.setattr("dlm.store.show._expand_sources", _raise)
+
+    sources, discovered = _summarize_training_sources_and_discovered(parsed, Path("/tmp"))
+    assert sources is not None
+    assert len(sources) == 1
+    assert sources[0]["path"] == "docs"
+    assert "file_count" not in sources[0]
+    assert discovered == []
+
+
+def test_summarize_training_sources_succeeds_with_provenance(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    directive = SimpleNamespace(
+        path="docs",
+        include=("**/*",),
+        exclude=(),
+        max_files=None,
+        max_bytes_per_file=None,
+    )
+    parsed = SimpleNamespace(
+        frontmatter=SimpleNamespace(training=SimpleNamespace(sources=[directive]))
+    )
+    prov = SimpleNamespace(
+        file_count=3,
+        total_bytes=1024,
+        skipped_binary=0,
+        skipped_encoding=0,
+        skipped_over_size=1,
+    )
+    discovered_config = SimpleNamespace(
+        anchor=Path("/anchor"),
+        config=SimpleNamespace(
+            include=("*.py",),
+            exclude=(),
+            exclude_defaults=True,
+            metadata={"k": "v"},
+        ),
+        ignore_rules=["pattern1"],
+    )
+
+    def _ok(*args: object, **kwargs: object) -> object:
+        return SimpleNamespace(provenance=[prov], discovered=[discovered_config])
+
+    monkeypatch.setattr("dlm.store.show._expand_sources", _ok)
+
+    sources, discovered = _summarize_training_sources_and_discovered(parsed, Path("/tmp"))
+    assert sources is not None
+    assert sources[0]["file_count"] == 3
+    assert sources[0]["total_bytes"] == 1024
+    assert len(discovered) == 1
+    assert discovered[0]["anchor"] == "/anchor"
+    assert discovered[0]["has_training_yaml"] is True
+    assert discovered[0]["has_ignore"] is True
+
+
+def test_summarize_training_sources_pads_when_provenance_short(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    d1 = SimpleNamespace(path="a", include=(), exclude=(), max_files=None, max_bytes_per_file=None)
+    d2 = SimpleNamespace(path="b", include=(), exclude=(), max_files=None, max_bytes_per_file=None)
+    parsed = SimpleNamespace(
+        frontmatter=SimpleNamespace(training=SimpleNamespace(sources=[d1, d2]))
+    )
+    short_prov = SimpleNamespace(
+        file_count=1, total_bytes=10, skipped_binary=0, skipped_encoding=0, skipped_over_size=0
+    )
+
+    def _ok(*args: object, **kwargs: object) -> object:
+        # one declared has provenance, the other doesn't (defensive padding)
+        discovered_no_cfg = SimpleNamespace(anchor=Path("/anchor"), config=None, ignore_rules=[])
+        return SimpleNamespace(provenance=[short_prov], discovered=[discovered_no_cfg])
+
+    monkeypatch.setattr("dlm.store.show._expand_sources", _ok)
+
+    sources, discovered = _summarize_training_sources_and_discovered(parsed, Path("/tmp"))
+    assert sources is not None
+    assert len(sources) == 2
+    assert sources[0]["file_count"] == 1
+    assert "file_count" not in sources[1]  # padded with declared-only
+    assert discovered[0]["has_training_yaml"] is False
+    assert discovered[0]["exclude_defaults"] is True
+
+
+def test_summarize_training_cache_none_when_dir_missing(tmp_path: Path) -> None:
+    assert _summarize_training_cache(tmp_path / "missing", tmp_path) is None
+
+
+def test_summarize_training_cache_with_no_last_run(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    cache_dir = tmp_path / "cache"
+    cache_dir.mkdir()
+
+    monkeypatch.setattr(
+        "dlm.directives.cache.TokenizedCache.open",
+        classmethod(lambda cls, _path: SimpleNamespace(entry_count=5, total_bytes=2048)),
+    )
+    monkeypatch.setattr("dlm.metrics.queries.latest_tokenization", lambda root: None)
+
+    snap = _summarize_training_cache(cache_dir, tmp_path)
+    assert snap is not None
+    assert snap["entry_count"] == 5
+    assert snap["bytes"] == 2048
+    assert snap["last_run_hit_rate"] is None
+    assert snap["last_run_id"] is None
+
+
+def test_summarize_training_cache_with_last_run(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    cache_dir = tmp_path / "cache"
+    cache_dir.mkdir()
+
+    monkeypatch.setattr(
+        "dlm.directives.cache.TokenizedCache.open",
+        classmethod(lambda cls, _path: SimpleNamespace(entry_count=3, total_bytes=512)),
+    )
+    monkeypatch.setattr(
+        "dlm.metrics.queries.latest_tokenization",
+        lambda root: SimpleNamespace(hit_rate=0.75, run_id=42),
+    )
+
+    snap = _summarize_training_cache(cache_dir, tmp_path)
+    assert snap is not None
+    assert snap["last_run_hit_rate"] == 0.75
+    assert snap["last_run_id"] == 42
+
+
+def test_summarize_gate_none_when_no_cfg_no_events(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    store = SimpleNamespace(root=tmp_path)
+    monkeypatch.setattr(
+        "dlm.train.gate.paths.gate_config_path", lambda s: tmp_path / "missing.json"
+    )
+    monkeypatch.setattr("dlm.metrics.queries.latest_gate_events", lambda root: [])
+
+    assert _summarize_gate(store) is None  # type: ignore[arg-type]
+
+
+def test_summarize_gate_diverged_when_no_cfg_but_diverged_events(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    store = SimpleNamespace(root=tmp_path)
+    event = SimpleNamespace(
+        adapter_name="adapter-a",
+        mean_weight=0.5,
+        sample_count=10,
+        mode="diverged",
+        run_id=99,
+    )
+    monkeypatch.setattr(
+        "dlm.train.gate.paths.gate_config_path", lambda s: tmp_path / "missing.json"
+    )
+    monkeypatch.setattr("dlm.metrics.queries.latest_gate_events", lambda root: [event])
+
+    snap = _summarize_gate(store)  # type: ignore[arg-type]
+    assert snap is not None
+    assert snap["mode"] == "diverged"
+    assert snap["last_run_id"] == 99
+    assert snap["per_adapter"][0]["adapter_name"] == "adapter-a"
+
+
+def test_summarize_gate_with_cfg_and_events(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    cfg_path = tmp_path / "gate_config.json"
+    cfg_path.write_text(
+        json.dumps(
+            {"mode": "trained", "adapter_names": ["a", "b"], "input_dim": 32, "hidden_proj_dim": 16}
+        ),
+        encoding="utf-8",
+    )
+    store = SimpleNamespace(root=tmp_path)
+    event = SimpleNamespace(
+        adapter_name="a", mean_weight=0.6, sample_count=5, mode="active", run_id=7
+    )
+
+    monkeypatch.setattr("dlm.train.gate.paths.gate_config_path", lambda s: cfg_path)
+    monkeypatch.setattr("dlm.metrics.queries.latest_gate_events", lambda root: [event])
+
+    snap = _summarize_gate(store)  # type: ignore[arg-type]
+    assert snap is not None
+    assert snap["mode"] == "trained"
+    assert snap["last_run_id"] == 7
+    assert snap["per_adapter"][0]["adapter_name"] == "a"
+
+
+def test_summarize_gate_with_cfg_no_events(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    cfg_path = tmp_path / "gate_config.json"
+    cfg_path.write_text(
+        json.dumps(
+            {"mode": "trained", "adapter_names": ["a", "b"], "input_dim": 32, "hidden_proj_dim": 16}
+        ),
+        encoding="utf-8",
+    )
+    store = SimpleNamespace(root=tmp_path)
+
+    monkeypatch.setattr("dlm.train.gate.paths.gate_config_path", lambda s: cfg_path)
+    monkeypatch.setattr("dlm.metrics.queries.latest_gate_events", lambda root: [])
+
+    snap = _summarize_gate(store)  # type: ignore[arg-type]
+    assert snap is not None
+    assert snap["last_run_id"] is None
+    assert snap["per_adapter"] == [{"adapter_name": "a"}, {"adapter_name": "b"}]
+
+
+def test_summarize_preference_mining_none_when_no_totals(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr("dlm.metrics.queries.preference_mining_totals", lambda root: None)
+    assert _summarize_preference_mining(tmp_path) is None
+
+
+def test_summarize_preference_mining_with_totals(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    totals = SimpleNamespace(
+        run_count=2, event_count=5, total_mined_pairs=10, total_skipped_prompts=3
+    )
+    last = SimpleNamespace(run_id=42)
+    monkeypatch.setattr("dlm.metrics.queries.preference_mining_totals", lambda root: totals)
+    monkeypatch.setattr("dlm.metrics.queries.latest_preference_mining", lambda root: last)
+    monkeypatch.setattr(
+        "dlm.metrics.queries.preference_mining_for_run", lambda root, run_id: ["row1", "row2"]
+    )
+    monkeypatch.setattr(
+        "dlm.metrics.queries.preference_mining_to_dict", lambda rows: [{"key": "value"}]
+    )
+
+    snap = _summarize_preference_mining(tmp_path)
+    assert snap is not None
+    assert snap["run_count"] == 2
+    assert snap["last_run_id"] == 42
+    assert snap["last_run_event_count"] == 2
+    assert snap["last_event"] == {"key": "value"}
+
+
+def test_summarize_base_security_returns_dict_for_known_base() -> None:
+    snap = _summarize_base_security("smollm2-135m")
+    assert snap is not None
+    assert snap["base_model"] == "smollm2-135m"
+    assert "trust_remote_code" in snap
+
+
+def test_summarize_base_security_returns_none_when_resolve_fails(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from dlm.base_models.errors import UnknownBaseModelError
+
+    def _raise(*args: object, **kwargs: object) -> None:
+        raise UnknownBaseModelError("nope", known_keys=())
+
+    monkeypatch.setattr("dlm.base_models.resolve", _raise)
+
+    assert _summarize_base_security("definitely-not-real") is None
diff --git a/tests/unit/train/test_dispatch.py b/tests/unit/train/test_dispatch.py
new file mode 100644
index 0000000..d9fdccf
--- /dev/null
+++ b/tests/unit/train/test_dispatch.py
@@ -0,0 +1,207 @@
+"""Direct tests for `dlm.train.dispatch:run_train`."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from dlm.train.dispatch import (
+    NoViableTrainingPlanError,
+    TrainRequest,
+    TrainResult,
+    run_train,
+)
+
+
+def _fake_parsed(*, dlm_id: str = "01KPQ9X1000000000000000000") -> Any:
+    """Minimal ParsedDlm shape: frontmatter.training.sequence_len + dlm_id."""
+    training = SimpleNamespace(sequence_len=1024)
+    frontmatter = SimpleNamespace(
+        dlm_id=dlm_id,
+        training=training,
+        base_model="smollm2-135m",
+    )
+    return SimpleNamespace(frontmatter=frontmatter)
+
+
+def _fake_spec() -> Any:
+    """Minimal BaseModelSpec shape: params, effective_context_length, key, revision."""
+    return SimpleNamespace(
+        params=135_000_000,
+        effective_context_length=4096,
+        key="smollm2-135m",
+        revision="0123456789abcdef",
+    )
+
+
+def _make_store(tmp_path: Path, *, manifest_exists: bool = True) -> Any:
+    """Fake StorePath: ensure_layout no-op, manifest path optionally exists."""
+    layout_calls: list[bool] = []
+    manifest_path = tmp_path / "manifest.json"
+    if manifest_exists:
+        manifest_path.write_text("{}", encoding="utf-8")
+
+    class _Store:
+        manifest = manifest_path
+
+        def ensure_layout(self) -> None:
+            layout_calls.append(True)
+
+    store = _Store()
+    store.layout_calls = layout_calls  # type: ignore[attr-defined]
+    return store
+
+
+def _make_request(
+    tmp_path: Path,
+    *,
+    parsed: Any | None = None,
+    spec: Any | None = None,
+    store: Any | None = None,
+) -> TrainRequest:
+    return TrainRequest(
+        parsed=parsed or _fake_parsed(),  # type: ignore[arg-type]
+        target_path=tmp_path / "doc.dlm",
+        spec=spec or _fake_spec(),  # type: ignore[arg-type]
+        store=store or _make_store(tmp_path),  # type: ignore[arg-type]
+        phase="all",
+        mode="fresh",
+        seed=42,
+        max_steps=None,
+        lock_mode="default",
+        world_size=1,
+        strict_metrics=False,
+        include_auto_mined=True,
+    )
+
+
+def test_run_train_returns_typed_result_on_happy_path(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    plan_obj = SimpleNamespace(name="fake-plan")
+    caps_obj = SimpleNamespace()
+    monkeypatch.setattr(
+        "dlm.hardware.doctor",
+        lambda **kwargs: SimpleNamespace(plan=plan_obj, capabilities=caps_obj),
+    )
+    captured: dict[str, object] = {}
+
+    def _fake_run_phases(*args: object, **kwargs: object) -> list[object]:
+        captured["args"] = args
+        captured["kwargs"] = kwargs
+        return ["pr1", "pr2"]
+
+    monkeypatch.setattr(
+        "dlm.train.preference.phase_orchestrator.run_phases",
+        _fake_run_phases,
+    )
+
+    request = _make_request(tmp_path)
+    result = run_train(request)
+
+    assert isinstance(result, TrainResult)
+    assert result.plan is plan_obj
+    assert result.phase_results == ["pr1", "pr2"]
+    assert request.store.layout_calls == [True]
+    assert captured["kwargs"]["capabilities"] is caps_obj
+    assert captured["kwargs"]["world_size"] == 1
+    assert captured["kwargs"]["lock_mode"] == "default"
+
+
+def test_run_train_raises_no_viable_plan_when_doctor_returns_none(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        "dlm.hardware.doctor",
+        lambda **kwargs: SimpleNamespace(plan=None, capabilities=SimpleNamespace()),
+    )
+
+    request = _make_request(tmp_path)
+    with pytest.raises(NoViableTrainingPlanError, match="no viable training plan"):
+        run_train(request)
+
+
+def test_run_train_provisions_manifest_when_missing_for_non_gated_spec(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        "dlm.hardware.doctor",
+        lambda **kwargs: SimpleNamespace(plan=SimpleNamespace(), capabilities=SimpleNamespace()),
+    )
+    monkeypatch.setattr(
+        "dlm.train.preference.phase_orchestrator.run_phases",
+        lambda *args, **kwargs: [],
+    )
+    monkeypatch.setattr("dlm.base_models.is_gated", lambda spec: False)
+
+    saved: dict[str, object] = {}
+
+    def _fake_save(path: object, manifest: object) -> None:
+        saved["path"] = path
+        saved["manifest"] = manifest
+
+    monkeypatch.setattr("dlm.store.manifest.save_manifest", _fake_save)
+
+    store = _make_store(tmp_path, manifest_exists=False)
+    request = _make_request(tmp_path, store=store)
+    run_train(request)
+
+    assert saved["path"] == store.manifest
+    assert saved["manifest"].license_acceptance is None  # type: ignore[union-attr]
+    assert saved["manifest"].base_model == "smollm2-135m"  # type: ignore[union-attr]
+
+
+def test_run_train_provisions_manifest_with_acceptance_for_gated_spec(
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        "dlm.hardware.doctor",
+        lambda **kwargs: SimpleNamespace(plan=SimpleNamespace(), capabilities=SimpleNamespace()),
+    )
+    monkeypatch.setattr(
+        "dlm.train.preference.phase_orchestrator.run_phases",
+        lambda *args, **kwargs: [],
+    )
+    monkeypatch.setattr("dlm.base_models.is_gated", lambda spec: True)
+
+    from datetime import datetime
+
+    from dlm.base_models.license import LicenseAcceptance
+
+    sentinel_acceptance = LicenseAcceptance(
+        accepted_at=datetime(2026, 5, 1),
+        license_url="https://example.test/lic",
+        license_spdx="apache-2.0",
+        via="cli_flag",
+    )
+    require_calls: dict[str, object] = {}
+
+    def _fake_require(spec: object, *, accept_license: bool, via: str) -> LicenseAcceptance:
+        require_calls["spec"] = spec
+        require_calls["accept_license"] = accept_license
+        require_calls["via"] = via
+        return sentinel_acceptance
+
+    monkeypatch.setattr("dlm.base_models.license.require_acceptance", _fake_require)
+
+    saved: dict[str, object] = {}
+
+    def _fake_save(path: object, manifest: object) -> None:
+        saved["manifest"] = manifest
+
+    monkeypatch.setattr("dlm.store.manifest.save_manifest", _fake_save)
+
+    store = _make_store(tmp_path, manifest_exists=False)
+    request = _make_request(tmp_path, store=store)
+    run_train(request)
+
+    assert require_calls["accept_license"] is True
+    assert require_calls["via"] == "cli_flag"
+    assert saved["manifest"].license_acceptance is sentinel_acceptance  # type: ignore[union-attr]

From d4b1b6c05c3ef45c7a890c4d2543ffea6866fe86 Mon Sep 17 00:00:00 2001
From: mfwolffe <wolffemf@dukes.jmu.edu>
Date: Sun, 3 May 2026 22:41:54 -0400
Subject: [PATCH 46/46] Repoint integration-test build_judge patches to
 canonical module path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The preference dispatcher uses dotted import 'from dlm.preference import
judge as _judge_mod; _judge_mod.build_judge(...)'. Tests must patch
'dlm.preference.judge.build_judge' (canonical) for late attribute lookup
to see the patch — patches on the package re-export 'dlm.preference.build_judge'
are invisible to the dispatcher. Caught by Ubuntu CI on PR #18.
---
 tests/integration/cli/test_preference_cli.py    | 2 +-
 tests/integration/preference/test_mine_cycle.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/cli/test_preference_cli.py b/tests/integration/cli/test_preference_cli.py
index abca054..0f945fe 100644
--- a/tests/integration/cli/test_preference_cli.py
+++ b/tests/integration/cli/test_preference_cli.py
@@ -127,7 +127,7 @@ def _patch_mining_runtime(monkeypatch: pytest.MonkeyPatch) -> None:
         lambda *args, **kwargs: _FakeBackend({"What is DGEMM?": ["bad answer", "good answer"]}),
     )
     monkeypatch.setattr(
-        "dlm.preference.build_judge",
+        "dlm.preference.judge.build_judge",
         lambda *args, **kwargs: _FakeJudge(),
     )
 
diff --git a/tests/integration/preference/test_mine_cycle.py b/tests/integration/preference/test_mine_cycle.py
index ed4d39c..17acec7 100644
--- a/tests/integration/preference/test_mine_cycle.py
+++ b/tests/integration/preference/test_mine_cycle.py
@@ -143,7 +143,7 @@ def _patch_mining(monkeypatch: pytest.MonkeyPatch) -> None:
         lambda *args, **kwargs: _FakeMiningBackend(_MINE_RESPONSES),
     )
     monkeypatch.setattr(
-        "dlm.preference.build_judge",
+        "dlm.preference.judge.build_judge",
         lambda *args, **kwargs: _TerseJudge(),
     )