From 66eedb0977eb4f7ed6551e09e25da0e5de2ed801 Mon Sep 17 00:00:00 2001 From: mfwolffe Date: Wed, 29 Apr 2026 11:40:49 -0400 Subject: [PATCH 1/5] Make 'dlm metrics' a subcommand group (M13.3) --- src/dlm/cli/app.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/dlm/cli/app.py b/src/dlm/cli/app.py index 11ebd06e..1d4cb466 100644 --- a/src/dlm/cli/app.py +++ b/src/dlm/cli/app.py @@ -137,14 +137,20 @@ def _root( _preference_app.command("list")(commands.preference_list_cmd) app.add_typer(_preference_app, name="preference") -# `dlm metrics ` + `dlm metrics watch ` as a subcommand -# group. Typer nests naturally via an Annotated sub-app. +# `dlm metrics show ` + `dlm metrics watch ` as a +# subcommand group. The previous shape — a callback that took the +# positional `path` plus a `watch` subcommand — broke with +# "Missing argument 'PATH'" when an option came after the positional +# (`dlm metrics PATH --run-id 1`), because click can't disambiguate +# a positional-then-option from a subcommand-then-args inside the +# same group. Audit 13 M13.3. The explicit `show` subcommand removes +# the ambiguity. Run-time impact: `dlm metrics PATH` now needs to be +# `dlm metrics show PATH` — flagged in CHANGELOG. _metrics_app = typer.Typer( help="Query the per-store metrics database.", no_args_is_help=True, - invoke_without_command=True, ) -_metrics_app.callback(invoke_without_command=True)(commands.metrics_cmd) +_metrics_app.command("show")(commands.metrics_cmd) _metrics_app.command("watch")(commands.metrics_watch_cmd) app.add_typer(_metrics_app, name="metrics") From e821f16fae99e849dfb2e6add2450a806f4663e1 Mon Sep 17 00:00:00 2001 From: mfwolffe Date: Wed, 29 Apr 2026 11:40:50 -0400 Subject: [PATCH 2/5] Test dlm metrics show parses both arg orders --- tests/unit/cli/test_cli_metrics.py | 69 ++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/unit/cli/test_cli_metrics.py diff --git a/tests/unit/cli/test_cli_metrics.py b/tests/unit/cli/test_cli_metrics.py new file mode 100644 index 00000000..0156a68d --- /dev/null +++ b/tests/unit/cli/test_cli_metrics.py @@ -0,0 +1,69 @@ +"""CLI shape tests for ``dlm metrics`` (M13.3). + +The audit hit "Missing argument 'PATH'" on +``dlm metrics PATH --run-id 1``. Click's group dispatch couldn't +disambiguate a positional-then-option from a subcommand-then-args +when the group had both a callback positional AND a registered +subcommand. The fix made ``show`` explicit. These tests assert the +new shape and that both arg orders parse without "Missing argument". +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from typer.testing import CliRunner + + +@pytest.fixture +def cli_app() -> Any: + from dlm.cli.app import app + + return app + + +def test_metrics_help_lists_show_and_watch_subcommands(cli_app: Any) -> None: + result = CliRunner().invoke(cli_app, ["metrics", "--help"]) + assert result.exit_code == 0 + out = (result.stdout or "") + (result.stderr or "") + import re + + plain = re.sub(r"\x1b\[[0-9;]*[A-Za-z]", "", out) + assert "show" in plain + assert "watch" in plain + + +def test_metrics_show_with_option_after_positional_parses( + cli_app: Any, +) -> None: + """Audit 13 M13.3 regression: ``--run-id`` after the positional now + parses (it errored before with 'Missing argument PATH'). The actual + file doesn't exist so we expect a downstream error, but we must NOT + see the old parser error.""" + result = CliRunner().invoke( + cli_app, ["metrics", "show", "/nonexistent/path.dlm", "--run-id", "1"] + ) + combined = (result.stdout or "") + (result.stderr or "") + assert "Missing argument" not in combined + # Allow any non-zero exit (file not found / parse error / etc.) but + # surface a useful message — not a typer Usage block. + assert result.exit_code != 0 + + +def test_metrics_show_with_option_before_positional_parses( + cli_app: Any, +) -> None: + result = CliRunner().invoke( + cli_app, ["metrics", "show", "--run-id", "1", "/nonexistent/path.dlm"] + ) + combined = (result.stdout or "") + (result.stderr or "") + assert "Missing argument" not in combined + assert result.exit_code != 0 + + +def test_metrics_watch_subcommand_unchanged(cli_app: Any) -> None: + """``dlm metrics watch `` was already unambiguous and still + parses — the restructure didn't break it.""" + result = CliRunner().invoke(cli_app, ["metrics", "watch", "--help"]) + assert result.exit_code == 0 From c0db619db37917020ce58b2a95d2001ae236d9b3 Mon Sep 17 00:00:00 2001 From: mfwolffe Date: Wed, 29 Apr 2026 11:40:50 -0400 Subject: [PATCH 3/5] Update docs to use 'dlm metrics show ' --- README.md | 2 +- docs/cli/reference.md | 6 +++--- docs/cookbook/directive-cache.md | 2 +- docs/cookbook/metrics.md | 6 +++--- docs/cookbook/reward-model-integration.md | 2 +- docs/cookbook/self-improving-loop.md | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3ee98ee0..0a99c2c5 100644 --- a/README.md +++ b/README.md @@ -234,7 +234,7 @@ dlm push mydoc.dlm --to hf:org/name ```sh dlm doctor dlm show mydoc.dlm --json -dlm metrics mydoc.dlm +dlm metrics show mydoc.dlm ``` ## Supported Platforms diff --git a/docs/cli/reference.md b/docs/cli/reference.md index 3905a4fd..71bf2572 100644 --- a/docs/cli/reference.md +++ b/docs/cli/reference.md @@ -148,8 +148,8 @@ cancels generation or input. Session history persists at Query the per-store SQLite metrics DB (Sprint 26). ``` -dlm metrics [--json|--csv] [--run-id N] [--phase PHASE] [--since WINDOW] [--limit N] -dlm metrics watch [--poll-seconds N] +dlm metrics show [--json|--csv] [--run-id N] [--phase PHASE] [--since WINDOW] [--limit N] +dlm metrics watch [--poll-seconds N] ``` | Option | Default | Notes | @@ -161,7 +161,7 @@ dlm metrics watch [--poll-seconds N] | `--since` | None | Time window (`24h`, `7d`, `30m`, `10s`). | | `--limit N` | 20 | Cap the number of runs returned. | -`dlm metrics watch` polls the DB and tails new step/eval rows as +`dlm metrics watch ` polls the DB and tails new step/eval rows as they arrive. See the [metrics cookbook](../cookbook/metrics.md) for the full flow + optional TensorBoard / W&B sinks (`uv sync --extra observability`). diff --git a/docs/cookbook/directive-cache.md b/docs/cookbook/directive-cache.md index 0fc3eb15..1e20641b 100644 --- a/docs/cookbook/directive-cache.md +++ b/docs/cookbook/directive-cache.md @@ -160,7 +160,7 @@ dlm show /path/to/doc.dlm --json | jq .training_cache The metrics DB keeps a row per run: ```bash -dlm metrics /path/to/doc.dlm --json | jq '.runs[0].tokenization' +dlm metrics show /path/to/doc.dlm --json | jq '.runs[0].tokenization' ``` Fields on the event: `total_sections`, `cache_hits`, `cache_misses`, diff --git a/docs/cookbook/metrics.md b/docs/cookbook/metrics.md index db044bc4..19fc8030 100644 --- a/docs/cookbook/metrics.md +++ b/docs/cookbook/metrics.md @@ -18,12 +18,12 @@ are available behind the `observability` extra. Writes are best-effort: a metrics failure never takes down training. -## `dlm metrics ` +## `dlm metrics show ` Default view lists the most-recent runs: ```bash -$ dlm metrics mydoc.dlm +$ dlm metrics show mydoc.dlm Runs: 3 run_id=3 phase=sft seed=42 status=ok started=2026-04-20T17:12:04Z run_id=2 phase=sft seed=42 status=ok started=2026-04-20T16:58:11Z @@ -114,4 +114,4 @@ DELETE FROM runs WHERE run_id NOT IN (SELECT run_id FROM runs ORDER BY run_id D VACUUM; ``` -A built-in `dlm metrics prune` is on the backlog. +A built-in `dlm metrics show prune` is on the backlog. diff --git a/docs/cookbook/reward-model-integration.md b/docs/cookbook/reward-model-integration.md index 0abcfb28..b7268ccb 100644 --- a/docs/cookbook/reward-model-integration.md +++ b/docs/cookbook/reward-model-integration.md @@ -106,7 +106,7 @@ uv run dlm train mydoc.dlm --phase preference Then inspect: ```sh -uv run dlm metrics mydoc.dlm --run-id 7 --json +uv run dlm metrics show mydoc.dlm --run-id 7 --json uv run dlm prompt mydoc.dlm "..." ``` diff --git a/docs/cookbook/self-improving-loop.md b/docs/cookbook/self-improving-loop.md index 41407248..cadefd44 100644 --- a/docs/cookbook/self-improving-loop.md +++ b/docs/cookbook/self-improving-loop.md @@ -110,7 +110,7 @@ without deleting anything from the file. Use these two commands to see what happened: ```sh -uv run dlm metrics release-notes.dlm --run-id 7 --json +uv run dlm metrics show release-notes.dlm --run-id 7 --json uv run dlm show release-notes.dlm --json ``` From cbaa4bc32aeffcfd4c494ea3f8dfe69c9ffe3f50 Mon Sep 17 00:00:00 2001 From: mfwolffe Date: Wed, 29 Apr 2026 11:40:50 -0400 Subject: [PATCH 4/5] CHANGELOG: dlm metrics show subcommand --- CHANGELOG.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7ecb478..fd323572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,16 @@ the project targets [Semantic Versioning](https://semver.org/). ## [Unreleased] +### Changed + +- **`dlm metrics` is now a subcommand group with an explicit `show`.** + The previous shape — a callback that took `` plus a `watch` + subcommand — caused click to error with "Missing argument 'PATH'" + whenever an option followed the positional (`dlm metrics PATH + --run-id 1`). The fix makes the call explicit: + `dlm metrics show PATH [options]`. `dlm metrics watch PATH` is + unchanged. Update scripts that called the old form. + ### Fixed - **`gguf_arch` preflight probe was silently false-negative on every From 6ef0d34b0c28b7aa987ec61a72f95cc022353bb4 Mon Sep 17 00:00:00 2001 From: mfwolffe Date: Wed, 29 Apr 2026 11:58:53 -0400 Subject: [PATCH 5/5] Update CLI integration tests for 'dlm metrics show' subcommand --- tests/integration/cli/test_m1_cli_surface.py | 6 +++++- tests/integration/cli/test_reference_doc_parity.py | 10 ++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integration/cli/test_m1_cli_surface.py b/tests/integration/cli/test_m1_cli_surface.py index ec9f882d..65c103f4 100644 --- a/tests/integration/cli/test_m1_cli_surface.py +++ b/tests/integration/cli/test_m1_cli_surface.py @@ -43,9 +43,13 @@ def test_metrics_on_untrained_returns_cleanly(self, tmp_path: Path) -> None: _write_minimal_dlm(doc) runner = CliRunner() + # `dlm metrics show ` is the M13.3 form. The bare + # `dlm metrics ` shape was the source of the + # "Missing argument 'PATH'" parser bug; the explicit `show` + # subcommand is its replacement. result = runner.invoke( app, - ["--home", str(tmp_path / "home"), "metrics", str(doc)], + ["--home", str(tmp_path / "home"), "metrics", "show", str(doc)], ) assert result.exit_code == 0, result.output # An untrained .dlm has no runs — the CLI prints an empty diff --git a/tests/integration/cli/test_reference_doc_parity.py b/tests/integration/cli/test_reference_doc_parity.py index 88ed49d9..6ffae45f 100644 --- a/tests/integration/cli/test_reference_doc_parity.py +++ b/tests/integration/cli/test_reference_doc_parity.py @@ -66,9 +66,15 @@ def test_reference_doc_covers_audio_and_verify_surface() -> None: def test_reference_doc_uses_actual_metrics_watch_order() -> None: + """M13.3 restructured ``metrics`` as a subcommand group: ``show`` + and ``watch`` are siblings now. The reference doc should show + ``watch`` as a subcommand of ``metrics``, not as a positional after + ````.""" section = _section("metrics") - assert "dlm metrics watch [--poll-seconds N]" in section - assert "dlm metrics watch " not in section + assert "dlm metrics watch " in section + assert "dlm metrics watch" not in section + # ``show`` is the explicit drill-down sibling of ``watch``. + assert "dlm metrics show " in section def test_reference_doc_covers_export_target_surface() -> None: