Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/dlm/directives/expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def _expand_one(

for file_path in _iter_candidates(resolved_root):
if directive.max_files is not None and _section_cap_reached(sections, directive.max_files):
_LOG.info(
_LOG.debug(
"directive: hit max_files=%d for %s; truncating deterministically",
directive.max_files,
directive.path,
Expand Down Expand Up @@ -238,7 +238,7 @@ def _expand_one(
continue

if directive.max_bytes_per_file is not None and size > directive.max_bytes_per_file:
_LOG.info(
_LOG.debug(
"directive: %s (%d bytes) exceeds max_bytes_per_file=%d; skipping",
file_path,
size,
Expand All @@ -252,7 +252,7 @@ def _expand_one(
# section carries only the path + blob sha.
if file_path.suffix.lower() in _IMAGE_EXTENSIONS:
if blob_store is None:
_LOG.info(
_LOG.debug(
"directive: %s is an image but no blob_store supplied; skipping",
file_path,
)
Expand Down Expand Up @@ -283,15 +283,15 @@ def _expand_one(
# (has .txt) and "reference" audio (no .txt) side by side.
if file_path.suffix.lower() in _AUDIO_EXTENSIONS:
if blob_store is None:
_LOG.info(
_LOG.debug(
"directive: %s is audio but no blob_store supplied; skipping",
file_path,
)
skipped_audio_no_store += 1
continue
transcript = _read_audio_transcript(file_path)
if transcript is None:
_LOG.info(
_LOG.debug(
"directive: %s has no %s sidecar; skipping "
"(audio without transcript has no training signal)",
file_path,
Expand Down Expand Up @@ -322,7 +322,7 @@ def _expand_one(
continue

if is_probably_binary(raw):
_LOG.info("directive: %s looks binary (NUL in first KiB); skipping", file_path)
_LOG.debug("directive: %s looks binary (NUL in first KiB); skipping", file_path)
skipped_binary += 1
continue

Expand Down
27 changes: 27 additions & 0 deletions tests/unit/directives/test_expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,33 @@ def test_max_bytes_per_file_skips_oversize(tmp_path: Path) -> None:
assert prov.skipped_over_size == 1


def test_per_file_skip_logs_are_debug_not_info(
tmp_path: Path, caplog: pytest.LogCaptureFixture
) -> None:
"""Audit 13 M13.5: per-file skip messages were emitted at INFO,
spamming 243 lines to stderr on every ``dlm show`` of a 2k-file
corpus. They're now DEBUG so the default-level log stream stays
clean while ``--verbose``/``LOG_LEVEL=DEBUG`` still surfaces them
for diagnosis. The summary count remains in provenance."""
import logging

src = tmp_path / "src"
src.mkdir()
for n in range(5):
(src / f"big{n}.py").write_text("x" * 100)
body = " sources:\n - path: src\n include: ['**/*.py']\n max_bytes_per_file: 10\n"
parsed, _ = _make_parsed(body, tmp_path)

with caplog.at_level(logging.INFO, logger="dlm.directives.expand"):
result = expand_sources(parsed, base_path=tmp_path) # type: ignore[arg-type]

assert result.provenance[0].skipped_over_size == 5
info_records = [r for r in caplog.records if r.levelno >= logging.INFO]
assert info_records == [], (
f"per-file skip should not emit INFO records, got: {[r.message for r in info_records]}"
)


def test_binary_file_skipped(tmp_path: Path) -> None:
src = tmp_path / "src"
src.mkdir()
Expand Down
Loading