tenseleyFlow · mfwolffe · May 3, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,16 @@ the project targets [Semantic Versioning](https://semver.org/).
 
 ## [Unreleased]
 
+### Changed
+
+- **`dlm metrics` is now a subcommand group with an explicit `show`.**
+  The previous shape — a callback that took `<path>` plus a `watch`
+  subcommand — caused click to error with "Missing argument 'PATH'"
+  whenever an option followed the positional (`dlm metrics PATH
+  --run-id 1`). The fix makes the call explicit:
+  `dlm metrics show PATH [options]`. `dlm metrics watch PATH` is
+  unchanged. Update scripts that called the old form.
+
 ### Fixed
 
 - **`gguf_arch` preflight probe was silently false-negative on every

diff --git a/README.md b/README.md
@@ -234,7 +234,7 @@ dlm push mydoc.dlm --to hf:org/name
 ```sh
 dlm doctor
 dlm show mydoc.dlm --json
-dlm metrics mydoc.dlm
+dlm metrics show mydoc.dlm
 ```
 
 ## Supported Platforms

diff --git a/docs/cli/reference.md b/docs/cli/reference.md
@@ -148,8 +148,8 @@ cancels generation or input. Session history persists at
 Query the per-store SQLite metrics DB (Sprint 26).
 
 ```
-dlm metrics <path> [--json|--csv] [--run-id N] [--phase PHASE] [--since WINDOW] [--limit N]
-dlm metrics <path> watch [--poll-seconds N]
+dlm metrics show <path> [--json|--csv] [--run-id N] [--phase PHASE] [--since WINDOW] [--limit N]
+dlm metrics watch <path> [--poll-seconds N]
 ```
 
 | Option | Default | Notes |
@@ -161,7 +161,7 @@ dlm metrics <path> watch [--poll-seconds N]
 | `--since` | None | Time window (`24h`, `7d`, `30m`, `10s`). |
 | `--limit N` | 20 | Cap the number of runs returned. |
 
-`dlm metrics <path> watch` polls the DB and tails new step/eval rows as
+`dlm metrics watch <path>` polls the DB and tails new step/eval rows as
 they arrive. See the [metrics cookbook](../cookbook/metrics.md) for
 the full flow + optional TensorBoard / W&B sinks (`uv sync --extra
 observability`).

diff --git a/docs/cookbook/directive-cache.md b/docs/cookbook/directive-cache.md
@@ -160,7 +160,7 @@ dlm show /path/to/doc.dlm --json | jq .training_cache
 The metrics DB keeps a row per run:
 
 ```bash
-dlm metrics /path/to/doc.dlm --json | jq '.runs[0].tokenization'
+dlm metrics show /path/to/doc.dlm --json | jq '.runs[0].tokenization'
 ```
 
 Fields on the event: `total_sections`, `cache_hits`, `cache_misses`,

diff --git a/docs/cookbook/metrics.md b/docs/cookbook/metrics.md
@@ -18,12 +18,12 @@ are available behind the `observability` extra.
 
 Writes are best-effort: a metrics failure never takes down training.
 
-## `dlm metrics <path>`
+## `dlm metrics show <path>`
 
 Default view lists the most-recent runs:
 
 ```bash
-$ dlm metrics mydoc.dlm
+$ dlm metrics show mydoc.dlm
 Runs: 3
   run_id=3  phase=sft  seed=42  status=ok  started=2026-04-20T17:12:04Z
   run_id=2  phase=sft  seed=42  status=ok  started=2026-04-20T16:58:11Z
@@ -114,4 +114,4 @@ DELETE FROM runs  WHERE run_id NOT IN (SELECT run_id FROM runs ORDER BY run_id D
 VACUUM;
 ```
 
-A built-in `dlm metrics prune` is on the backlog.
+A built-in `dlm metrics show prune` is on the backlog.
diff --git a/docs/cookbook/reward-model-integration.md b/docs/cookbook/reward-model-integration.md
@@ -106,7 +106,7 @@ uv run dlm train mydoc.dlm --phase preference
 Then inspect:
 
 ```sh
-uv run dlm metrics mydoc.dlm --run-id 7 --json
+uv run dlm metrics show mydoc.dlm --run-id 7 --json
 uv run dlm prompt mydoc.dlm "..." 
 ```
 

diff --git a/docs/cookbook/self-improving-loop.md b/docs/cookbook/self-improving-loop.md
@@ -110,7 +110,7 @@ without deleting anything from the file.
 Use these two commands to see what happened:
 
 ```sh
-uv run dlm metrics release-notes.dlm --run-id 7 --json
+uv run dlm metrics show release-notes.dlm --run-id 7 --json
 uv run dlm show release-notes.dlm --json
 ```
 

diff --git a/src/dlm/cli/app.py b/src/dlm/cli/app.py
@@ -137,14 +137,20 @@ def _root(
 _preference_app.command("list")(commands.preference_list_cmd)
 app.add_typer(_preference_app, name="preference")
 
-# `dlm metrics <path>` + `dlm metrics watch <path>` as a subcommand
-# group. Typer nests naturally via an Annotated sub-app.
+# `dlm metrics show <path>` + `dlm metrics watch <path>` as a
+# subcommand group. The previous shape — a callback that took the
+# positional `path` plus a `watch` subcommand — broke with
+# "Missing argument 'PATH'" when an option came after the positional
+# (`dlm metrics PATH --run-id 1`), because click can't disambiguate
+# a positional-then-option from a subcommand-then-args inside the
+# same group. Audit 13 M13.3. The explicit `show` subcommand removes
+# the ambiguity. Run-time impact: `dlm metrics PATH` now needs to be
+# `dlm metrics show PATH` — flagged in CHANGELOG.
 _metrics_app = typer.Typer(
     help="Query the per-store metrics database.",
     no_args_is_help=True,
-    invoke_without_command=True,
 )
-_metrics_app.callback(invoke_without_command=True)(commands.metrics_cmd)
+_metrics_app.command("show")(commands.metrics_cmd)
 _metrics_app.command("watch")(commands.metrics_watch_cmd)
 app.add_typer(_metrics_app, name="metrics")
 

diff --git a/tests/integration/cli/test_m1_cli_surface.py b/tests/integration/cli/test_m1_cli_surface.py
@@ -43,9 +43,13 @@ def test_metrics_on_untrained_returns_cleanly(self, tmp_path: Path) -> None:
         _write_minimal_dlm(doc)
 
         runner = CliRunner()
+        # `dlm metrics show <path>` is the M13.3 form. The bare
+        # `dlm metrics <path>` shape was the source of the
+        # "Missing argument 'PATH'" parser bug; the explicit `show`
+        # subcommand is its replacement.
         result = runner.invoke(
             app,
-            ["--home", str(tmp_path / "home"), "metrics", str(doc)],
+            ["--home", str(tmp_path / "home"), "metrics", "show", str(doc)],
         )
         assert result.exit_code == 0, result.output
         # An untrained .dlm has no runs — the CLI prints an empty

diff --git a/tests/integration/cli/test_reference_doc_parity.py b/tests/integration/cli/test_reference_doc_parity.py
@@ -66,9 +66,15 @@ def test_reference_doc_covers_audio_and_verify_surface() -> None:
 
 
 def test_reference_doc_uses_actual_metrics_watch_order() -> None:
+    """M13.3 restructured ``metrics`` as a subcommand group: ``show``
+    and ``watch`` are siblings now. The reference doc should show
+    ``watch`` as a subcommand of ``metrics``, not as a positional after
+    ``<path>``."""
     section = _section("metrics")
-    assert "dlm metrics <path> watch [--poll-seconds N]" in section
-    assert "dlm metrics watch <path>" not in section
+    assert "dlm metrics watch <path>" in section
+    assert "dlm metrics <path> watch" not in section
+    # ``show`` is the explicit drill-down sibling of ``watch``.
+    assert "dlm metrics show <path>" in section
 
 
 def test_reference_doc_covers_export_target_surface() -> None:

diff --git a/tests/unit/cli/test_cli_metrics.py b/tests/unit/cli/test_cli_metrics.py
@@ -0,0 +1,69 @@
+"""CLI shape tests for ``dlm metrics`` (M13.3).
+
+The audit hit "Missing argument 'PATH'" on
+``dlm metrics PATH --run-id 1``. Click's group dispatch couldn't
+disambiguate a positional-then-option from a subcommand-then-args
+when the group had both a callback positional AND a registered
+subcommand. The fix made ``show`` explicit. These tests assert the
+new shape and that both arg orders parse without "Missing argument".
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+from typer.testing import CliRunner
+
+
+@pytest.fixture
+def cli_app() -> Any:
+    from dlm.cli.app import app
+
+    return app
+
+
+def test_metrics_help_lists_show_and_watch_subcommands(cli_app: Any) -> None:
+    result = CliRunner().invoke(cli_app, ["metrics", "--help"])
+    assert result.exit_code == 0
+    out = (result.stdout or "") + (result.stderr or "")
+    import re
+
+    plain = re.sub(r"\x1b\[[0-9;]*[A-Za-z]", "", out)
+    assert "show" in plain
+    assert "watch" in plain
+
+
+def test_metrics_show_with_option_after_positional_parses(
+    cli_app: Any,
+) -> None:
+    """Audit 13 M13.3 regression: ``--run-id`` after the positional now
+    parses (it errored before with 'Missing argument PATH'). The actual
+    file doesn't exist so we expect a downstream error, but we must NOT
+    see the old parser error."""
+    result = CliRunner().invoke(
+        cli_app, ["metrics", "show", "/nonexistent/path.dlm", "--run-id", "1"]
+    )
+    combined = (result.stdout or "") + (result.stderr or "")
+    assert "Missing argument" not in combined
+    # Allow any non-zero exit (file not found / parse error / etc.) but
+    # surface a useful message — not a typer Usage block.
+    assert result.exit_code != 0
+
+
+def test_metrics_show_with_option_before_positional_parses(
+    cli_app: Any,
+) -> None:
+    result = CliRunner().invoke(
+        cli_app, ["metrics", "show", "--run-id", "1", "/nonexistent/path.dlm"]
+    )
+    combined = (result.stdout or "") + (result.stderr or "")
+    assert "Missing argument" not in combined
+    assert result.exit_code != 0
+
+
+def test_metrics_watch_subcommand_unchanged(cli_app: Any) -> None:
+    """``dlm metrics watch <path>`` was already unambiguous and still
+    parses — the restructure didn't break it."""
+    result = CliRunner().invoke(cli_app, ["metrics", "watch", "--help"])
+    assert result.exit_code == 0