diff --git a/src/dlm/directives/expand.py b/src/dlm/directives/expand.py index d2ed3c92..a85e1869 100644 --- a/src/dlm/directives/expand.py +++ b/src/dlm/directives/expand.py @@ -210,7 +210,7 @@ def _expand_one( for file_path in _iter_candidates(resolved_root): if directive.max_files is not None and _section_cap_reached(sections, directive.max_files): - _LOG.info( + _LOG.debug( "directive: hit max_files=%d for %s; truncating deterministically", directive.max_files, directive.path, @@ -238,7 +238,7 @@ def _expand_one( continue if directive.max_bytes_per_file is not None and size > directive.max_bytes_per_file: - _LOG.info( + _LOG.debug( "directive: %s (%d bytes) exceeds max_bytes_per_file=%d; skipping", file_path, size, @@ -252,7 +252,7 @@ def _expand_one( # section carries only the path + blob sha. if file_path.suffix.lower() in _IMAGE_EXTENSIONS: if blob_store is None: - _LOG.info( + _LOG.debug( "directive: %s is an image but no blob_store supplied; skipping", file_path, ) @@ -283,7 +283,7 @@ def _expand_one( # (has .txt) and "reference" audio (no .txt) side by side. if file_path.suffix.lower() in _AUDIO_EXTENSIONS: if blob_store is None: - _LOG.info( + _LOG.debug( "directive: %s is audio but no blob_store supplied; skipping", file_path, ) @@ -291,7 +291,7 @@ def _expand_one( continue transcript = _read_audio_transcript(file_path) if transcript is None: - _LOG.info( + _LOG.debug( "directive: %s has no %s sidecar; skipping " "(audio without transcript has no training signal)", file_path, @@ -322,7 +322,7 @@ def _expand_one( continue if is_probably_binary(raw): - _LOG.info("directive: %s looks binary (NUL in first KiB); skipping", file_path) + _LOG.debug("directive: %s looks binary (NUL in first KiB); skipping", file_path) skipped_binary += 1 continue diff --git a/tests/unit/directives/test_expand.py b/tests/unit/directives/test_expand.py index 16f496cf..214e54ad 100644 --- a/tests/unit/directives/test_expand.py +++ b/tests/unit/directives/test_expand.py @@ -108,6 +108,33 @@ def test_max_bytes_per_file_skips_oversize(tmp_path: Path) -> None: assert prov.skipped_over_size == 1 +def test_per_file_skip_logs_are_debug_not_info( + tmp_path: Path, caplog: pytest.LogCaptureFixture +) -> None: + """Audit 13 M13.5: per-file skip messages were emitted at INFO, + spamming 243 lines to stderr on every ``dlm show`` of a 2k-file + corpus. They're now DEBUG so the default-level log stream stays + clean while ``--verbose``/``LOG_LEVEL=DEBUG`` still surfaces them + for diagnosis. The summary count remains in provenance.""" + import logging + + src = tmp_path / "src" + src.mkdir() + for n in range(5): + (src / f"big{n}.py").write_text("x" * 100) + body = " sources:\n - path: src\n include: ['**/*.py']\n max_bytes_per_file: 10\n" + parsed, _ = _make_parsed(body, tmp_path) + + with caplog.at_level(logging.INFO, logger="dlm.directives.expand"): + result = expand_sources(parsed, base_path=tmp_path) # type: ignore[arg-type] + + assert result.provenance[0].skipped_over_size == 5 + info_records = [r for r in caplog.records if r.levelno >= logging.INFO] + assert info_records == [], ( + f"per-file skip should not emit INFO records, got: {[r.message for r in info_records]}" + ) + + def test_binary_file_skipped(tmp_path: Path) -> None: src = tmp_path / "src" src.mkdir()