From a56908b666b44c68db18a226b3bc0d1d55bbc0d0 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 7 May 2026 21:58:37 -0400
Subject: [PATCH 1/4] io: add transparent compression helpers for EEE result
 files
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New module ``every_eval_ever/io.py`` provides a single source of truth
for opening EEE result files (``<uuid>.json`` aggregates and
``<uuid>_samples.jsonl`` per-instance samples) regardless of compression.

Recognized codecs match the HuggingFace Hub's documented set
(https://huggingface.co/docs/hub/en/datasets-adding#file-formats):

  .gz, .zst, .bz2, .xz, .lz4

``.zip`` is intentionally excluded — it is an archive container, not a
stream codec, and would conflict with the duplicate-variant rule.

Helpers:

* ``open_eee_text(path, mode)``        — text I/O with codec auto-detection
* ``is_eee_result(path)``              — 'aggregate' / 'samples' / None
* ``eee_uuid_stem(path)``              — strip kind+codec suffixes
* ``detect_compression(path)``         — name of the trailing codec
* ``add_compression_suffix(path, cs)`` — synthesize the on-disk filename
* ``iter_eee_results(roots)``          — recursive discovery of EEE files
* ``find_duplicate_variants(paths)``   — enforces 'one variant per
                                         (folder, uuid, kind)' rule

``zstandard`` and ``lz4`` are optional runtime dependencies; the open
helper raises ``ImportError`` with an actionable message pointing at
the right ``[zst]`` / ``[lz4]`` extra. The stdlib codecs (.gz, .bz2,
.xz) work out of the box.

This commit only adds the module + unit tests. Subsequent commits wire
the rest of the package (validate, check_duplicate_entries, converters,
CLI flags) through these helpers.

53 passed, 2 skipped (lz4-codec tests skip when ``lz4`` is not
installed; same for ``zstandard`` if absent).
---
 every_eval_ever/io.py | 296 ++++++++++++++++++++++++++++++++++++++++++
 tests/test_io.py      | 220 +++++++++++++++++++++++++++++++
 2 files changed, 516 insertions(+)
 create mode 100644 every_eval_ever/io.py
 create mode 100644 tests/test_io.py
diff --git a/every_eval_ever/io.py b/every_eval_ever/io.py
new file mode 100644
index 000000000..351dc0aa0
--- /dev/null
+++ b/every_eval_ever/io.py
@@ -0,0 +1,296 @@
+"""I/O helpers for EEE result files with transparent compression.
+
+EEE result files come in two kinds:
+
+- aggregate: ``<uuid>.json``
+- per-instance samples: ``<uuid>_samples.jsonl``
+
+This module recognizes both kinds *and* their compressed forms, using the
+codec set the HuggingFace Hub already auto-decompresses
+(https://huggingface.co/docs/hub/en/datasets-adding#file-formats):
+
+  ``.gz``, ``.zst``, ``.bz2``, ``.xz``, ``.lz4``
+
+``.zip`` is intentionally not supported as a stream codec — it is an
+archive container, and conflicts with the duplicate-variant rule below.
+
+All EEE-side tooling (validation, discovery, manifest generation,
+viewer-parquet generation, conversion writers) should route file open and
+file-suffix recognition through this module so the compressed forms behave
+as transparent equivalents of the plain forms.
+
+Duplicate-variant rule
+----------------------
+
+For any ``(folder, uuid, kind)`` triple, at most one physical variant may
+be present. ``find_duplicate_variants`` enumerates violations; the
+validator surfaces them as ``duplicate_variant`` errors.
+
+Backwards compatibility
+-----------------------
+
+The default ``compression='none'`` everywhere preserves existing
+behaviour — uncompressed plain ``.json`` and ``_samples.jsonl`` are still
+the default written and read forms. Compression is opt-in.
+"""
+
+from __future__ import annotations
+
+import io as _io
+from collections import defaultdict
+from collections.abc import Iterable, Iterator
+from pathlib import Path
+from typing import TextIO
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+#: No compression — write/read plain UTF-8 text.
+COMPRESSION_NONE = 'none'
+COMPRESSION_GZ = 'gz'
+COMPRESSION_ZST = 'zst'
+COMPRESSION_BZ2 = 'bz2'
+COMPRESSION_XZ = 'xz'
+COMPRESSION_LZ4 = 'lz4'
+
+#: All accepted compression names. ``COMPRESSION_NONE`` first so it is the
+#: documented default in CLI help.
+COMPRESSION_CHOICES: tuple[str, ...] = (
+    COMPRESSION_NONE,
+    COMPRESSION_GZ,
+    COMPRESSION_ZST,
+    COMPRESSION_BZ2,
+    COMPRESSION_XZ,
+    COMPRESSION_LZ4,
+)
+
+# Map filesystem suffix (lowercase, leading dot) to compression name.
+_COMPRESSION_BY_SUFFIX: dict[str, str] = {
+    '.gz': COMPRESSION_GZ,
+    '.zst': COMPRESSION_ZST,
+    '.bz2': COMPRESSION_BZ2,
+    '.xz': COMPRESSION_XZ,
+    '.lz4': COMPRESSION_LZ4,
+}
+
+# Map compression name to its filesystem suffix (with leading dot).
+_SUFFIX_BY_COMPRESSION: dict[str, str] = {
+    name: suf for suf, name in _COMPRESSION_BY_SUFFIX.items()
+}
+
+# Canonical kind -> base suffix (the part *before* any compression suffix).
+_BASE_SUFFIX_BY_KIND: dict[str, str] = {
+    'aggregate': '.json',
+    'samples': '_samples.jsonl',
+}
+
+# Order matters: 'samples' must be checked before 'aggregate' because
+# ``_samples.jsonl`` ends in ``.jsonl`` not ``.json`` (different base suffix
+# overall) but the kind detector should not be confused by future shape
+# changes.
+_KIND_ORDER: tuple[str, ...] = ('samples', 'aggregate')
+
+
+# ---------------------------------------------------------------------------
+# Suffix recognition
+# ---------------------------------------------------------------------------
+
+
+def detect_compression(path: Path | str) -> str:
+    """Return the compression name implied by ``path``'s trailing suffix.
+
+    Returns one of :data:`COMPRESSION_CHOICES`. ``COMPRESSION_NONE`` for
+    paths whose final suffix is not a recognized compression suffix.
+    """
+    p = Path(path)
+    return _COMPRESSION_BY_SUFFIX.get(p.suffix.lower(), COMPRESSION_NONE)
+
+
+def _strip_compression_suffix(name: str) -> str:
+    """Return ``name`` with one trailing compression suffix removed (if any)."""
+    for suf in _COMPRESSION_BY_SUFFIX:
+        if name.lower().endswith(suf):
+            return name[: -len(suf)]
+    return name
+
+
+def is_eee_result(path: Path | str) -> str | None:
+    """Classify a path as ``'aggregate'``, ``'samples'``, or ``None``.
+
+    Recognizes both plain and compressed forms. Returns ``None`` for any
+    path that does not match an EEE result filename convention.
+    """
+    name = _strip_compression_suffix(Path(path).name)
+    for kind in _KIND_ORDER:
+        if name.endswith(_BASE_SUFFIX_BY_KIND[kind]):
+            return kind
+    return None
+
+
+def eee_uuid_stem(path: Path | str) -> str | None:
+    """Return the bare UUID portion of an EEE result filename, or ``None``.
+
+    Strips both the compression suffix (if any) and the kind-specific base
+    suffix. ``abc_samples.jsonl.gz`` and ``abc.json.zst`` both yield
+    ``'abc'``.
+    """
+    name = _strip_compression_suffix(Path(path).name)
+    for kind in _KIND_ORDER:
+        base = _BASE_SUFFIX_BY_KIND[kind]
+        if name.endswith(base):
+            return name[: -len(base)]
+    return None
+
+
+def add_compression_suffix(path: Path | str, compression: str) -> Path:
+    """Return ``path`` with the requested compression suffix appended.
+
+    ``add_compression_suffix(Path('a.json'), 'gz')`` -> ``Path('a.json.gz')``.
+    ``compression='none'`` returns the path unchanged.
+    """
+    p = Path(path)
+    if compression == COMPRESSION_NONE:
+        return p
+    if compression not in _SUFFIX_BY_COMPRESSION:
+        raise ValueError(
+            f'unsupported compression {compression!r}; '
+            f'choose from {COMPRESSION_CHOICES}'
+        )
+    return p.with_name(p.name + _SUFFIX_BY_COMPRESSION[compression])
+
+
+# ---------------------------------------------------------------------------
+# Open helper
+# ---------------------------------------------------------------------------
+
+
+def _missing_codec_msg(codec: str, package: str, extra: str) -> str:
+    return (
+        f'Reading/writing .{codec} EEE files requires the {package!r} '
+        f"package. Install with: pip install 'every-eval-ever[{extra}]'"
+    )
+
+
+def open_eee_text(path: Path | str, mode: str = 'r') -> TextIO:
+    """Open an EEE result file (json or jsonl) for text I/O.
+
+    The compression codec is inferred from ``path``'s trailing suffix.
+    ``mode`` may be ``'r'`` / ``'rt'`` for reading or ``'w'`` / ``'wt'``
+    for writing. UTF-8 is assumed throughout.
+
+    Raises ``ImportError`` (with an actionable message) if the path
+    indicates a codec whose backing dependency is not installed.
+    """
+    if mode in ('r', 'rt'):
+        text_mode = 'rt'
+    elif mode in ('w', 'wt'):
+        text_mode = 'wt'
+    else:
+        raise ValueError(
+            f"open_eee_text mode must be 'r', 'rt', 'w', or 'wt'; got {mode!r}"
+        )
+    p = Path(path)
+    cs = detect_compression(p)
+
+    if cs == COMPRESSION_NONE:
+        return open(p, text_mode, encoding='utf-8')
+    if cs == COMPRESSION_GZ:
+        import gzip
+        return gzip.open(p, text_mode, encoding='utf-8')
+    if cs == COMPRESSION_BZ2:
+        import bz2
+        return bz2.open(p, text_mode, encoding='utf-8')
+    if cs == COMPRESSION_XZ:
+        import lzma
+        return lzma.open(p, text_mode, encoding='utf-8')
+    if cs == COMPRESSION_ZST:
+        try:
+            import zstandard  # type: ignore[import-not-found]
+        except ImportError as exc:  # pragma: no cover - exercised in env w/o dep
+            raise ImportError(_missing_codec_msg('zst', 'zstandard', 'zst')) from exc
+        if text_mode == 'rt':
+            reader = zstandard.ZstdDecompressor().stream_reader(open(p, 'rb'))
+            return _io.TextIOWrapper(reader, encoding='utf-8')
+        writer = zstandard.ZstdCompressor().stream_writer(open(p, 'wb'))
+        return _ZstdTextWriter(writer, encoding='utf-8')
+    if cs == COMPRESSION_LZ4:
+        try:
+            import lz4.frame  # type: ignore[import-not-found]
+        except ImportError as exc:  # pragma: no cover
+            raise ImportError(_missing_codec_msg('lz4', 'lz4', 'lz4')) from exc
+        binmode = 'rb' if text_mode == 'rt' else 'wb'
+        return _io.TextIOWrapper(lz4.frame.open(p, mode=binmode), encoding='utf-8')
+
+    # _COMPRESSION_BY_SUFFIX is exhaustive over COMPRESSION_CHOICES, so this
+    # branch is unreachable.
+    raise AssertionError(f'unhandled compression {cs!r}')  # pragma: no cover
+
+
+class _ZstdTextWriter(_io.TextIOWrapper):
+    """TextIOWrapper that flushes the underlying zstd stream_writer on close.
+
+    ``ZstdCompressor.stream_writer`` requires an explicit ``close()`` /
+    ``flush(zstandard.FLUSH_FRAME)`` to finalize the frame; without that the
+    written file is silently truncated. Wrapping in a TextIOWrapper alone
+    isn't enough — close cascades to the wrapped writer, but ``flush()``
+    on the TextIOWrapper alone does not.
+    """
+
+    def close(self) -> None:  # pragma: no cover - exercised only with zstandard
+        try:
+            self.flush()
+        finally:
+            super().close()
+
+
+# ---------------------------------------------------------------------------
+# Discovery
+# ---------------------------------------------------------------------------
+
+
+def iter_eee_results(roots: Iterable[Path | str]) -> Iterator[Path]:
+    """Yield every EEE result file under each root (recursive).
+
+    Files are yielded in deterministic (sorted) order. Roots that are
+    themselves EEE result files are yielded as-is.
+    """
+    for root in roots:
+        rp = Path(root)
+        if rp.is_file():
+            if is_eee_result(rp) is not None:
+                yield rp
+            continue
+        if not rp.is_dir():
+            continue
+        for path in sorted(rp.rglob('*')):
+            if path.is_file() and is_eee_result(path) is not None:
+                yield path
+
+
+def find_duplicate_variants(
+    paths: Iterable[Path | str],
+) -> list[tuple[Path, str, str, list[Path]]]:
+    """Detect ``(folder, uuid, kind)`` groups with more than one variant.
+
+    Each result is ``(folder, uuid_stem, kind, [physical_paths])``.
+    """
+    by_group: dict[tuple[Path, str, str], list[Path]] = defaultdict(list)
+    seen_files: set[Path] = set()
+    for path in paths:
+        p = Path(path)
+        if p in seen_files:
+            continue
+        seen_files.add(p)
+        kind = is_eee_result(p)
+        if kind is None:
+            continue
+        stem = eee_uuid_stem(p)
+        if stem is None:
+            continue
+        by_group[(p.parent, stem, kind)].append(p)
+    return [
+        (folder, stem, kind, sorted(variants))
+        for (folder, stem, kind), variants in by_group.items()
+        if len(variants) > 1
+    ]
diff --git a/tests/test_io.py b/tests/test_io.py
new file mode 100644
index 000000000..dc3f691f3
--- /dev/null
+++ b/tests/test_io.py
@@ -0,0 +1,220 @@
+"""Tests for ``every_eval_ever.io`` — transparent compression helpers."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import pytest
+
+from every_eval_ever import io as eee_io
+
+
+# ---------------------------------------------------------------------------
+# Suffix recognition
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    'name, expected',
+    [
+        ('abc.json', 'aggregate'),
+        ('abc.json.gz', 'aggregate'),
+        ('abc.json.zst', 'aggregate'),
+        ('abc.json.bz2', 'aggregate'),
+        ('abc.json.xz', 'aggregate'),
+        ('abc.json.lz4', 'aggregate'),
+        ('abc_samples.jsonl', 'samples'),
+        ('abc_samples.jsonl.gz', 'samples'),
+        ('abc_samples.jsonl.zst', 'samples'),
+        ('abc_samples.jsonl.bz2', 'samples'),
+        ('abc_samples.jsonl.xz', 'samples'),
+        ('abc_samples.jsonl.lz4', 'samples'),
+        ('abc.txt', None),
+        ('abc.parquet', None),
+        ('manifest.json', 'aggregate'),  # any *.json is aggregate by shape
+        ('abc.zip', None),  # zip is not a recognized stream codec
+        ('abc.json.zip', None),
+    ],
+)
+def test_is_eee_result(name: str, expected: str | None) -> None:
+    assert eee_io.is_eee_result(name) == expected
+
+
+@pytest.mark.parametrize(
+    'name, expected_stem',
+    [
+        ('abc.json', 'abc'),
+        ('abc.json.gz', 'abc'),
+        ('a-b-c.json.zst', 'a-b-c'),
+        ('abc_samples.jsonl', 'abc'),
+        ('abc_samples.jsonl.gz', 'abc'),
+        ('abc.txt', None),
+        ('abc.json.zip', None),
+    ],
+)
+def test_eee_uuid_stem(name: str, expected_stem: str | None) -> None:
+    assert eee_io.eee_uuid_stem(name) == expected_stem
+
+
+@pytest.mark.parametrize(
+    'name, expected',
+    [
+        ('abc.json', 'none'),
+        ('abc.json.gz', 'gz'),
+        ('abc.json.zst', 'zst'),
+        ('abc.json.bz2', 'bz2'),
+        ('abc.json.xz', 'xz'),
+        ('abc.json.lz4', 'lz4'),
+        ('abc.txt', 'none'),
+    ],
+)
+def test_detect_compression(name: str, expected: str) -> None:
+    assert eee_io.detect_compression(name) == expected
+
+
+def test_add_compression_suffix_none() -> None:
+    p = Path('/x/y/abc.json')
+    assert eee_io.add_compression_suffix(p, 'none') == p
+
+
+@pytest.mark.parametrize('cs', ['gz', 'zst', 'bz2', 'xz', 'lz4'])
+def test_add_compression_suffix(cs: str) -> None:
+    p = Path('/x/y/abc.json')
+    out = eee_io.add_compression_suffix(p, cs)
+    assert out.name == f'abc.json.{cs}'
+
+
+def test_add_compression_suffix_rejects_unknown() -> None:
+    with pytest.raises(ValueError):
+        eee_io.add_compression_suffix(Path('a.json'), 'snappy')
+
+
+# ---------------------------------------------------------------------------
+# Round-trip per codec
+# ---------------------------------------------------------------------------
+
+
+def _can_import(mod: str) -> bool:
+    try:
+        __import__(mod)
+        return True
+    except ImportError:
+        return False
+
+
+_CODEC_REQUIRES = {
+    'none': None,
+    'gz': None,  # stdlib
+    'bz2': None,  # stdlib
+    'xz': None,  # stdlib (lzma)
+    'zst': 'zstandard',
+    'lz4': 'lz4',
+}
+
+
+def _codec_param(cs: str) -> pytest.param:
+    req = _CODEC_REQUIRES[cs]
+    marks: list = []
+    if req is not None and not _can_import(req):
+        marks.append(pytest.mark.skip(reason=f'requires {req!r}'))
+    return pytest.param(cs, marks=marks)
+
+
+@pytest.mark.parametrize(
+    'cs', [_codec_param(c) for c in eee_io.COMPRESSION_CHOICES]
+)
+def test_open_eee_text_roundtrip_aggregate(cs: str, tmp_path: Path) -> None:
+    base = tmp_path / 'abc.json'
+    out_path = eee_io.add_compression_suffix(base, cs)
+    payload = {'schema_version': '0.2.2', 'kind': 'aggregate', 'cs': cs}
+
+    with eee_io.open_eee_text(out_path, 'w') as f:
+        json.dump(payload, f)
+
+    with eee_io.open_eee_text(out_path, 'r') as f:
+        assert json.load(f) == payload
+
+
+@pytest.mark.parametrize(
+    'cs', [_codec_param(c) for c in eee_io.COMPRESSION_CHOICES]
+)
+def test_open_eee_text_roundtrip_samples(cs: str, tmp_path: Path) -> None:
+    base = tmp_path / 'abc_samples.jsonl'
+    out_path = eee_io.add_compression_suffix(base, cs)
+    rows = [{'i': i, 'cs': cs} for i in range(5)]
+
+    with eee_io.open_eee_text(out_path, 'w') as f:
+        for row in rows:
+            f.write(json.dumps(row) + '\n')
+
+    with eee_io.open_eee_text(out_path, 'r') as f:
+        read_back = [json.loads(line) for line in f if line.strip()]
+    assert read_back == rows
+
+
+def test_open_eee_text_rejects_bad_mode(tmp_path: Path) -> None:
+    p = tmp_path / 'abc.json'
+    p.write_text('{}')
+    with pytest.raises(ValueError):
+        eee_io.open_eee_text(p, 'rb')
+
+
+# ---------------------------------------------------------------------------
+# Discovery + duplicate-variant detection
+# ---------------------------------------------------------------------------
+
+
+def test_iter_eee_results_finds_compressed_and_plain(tmp_path: Path) -> None:
+    plain = tmp_path / 'sub' / 'a.json'
+    plain.parent.mkdir()
+    plain.write_text('{}')
+    gz = tmp_path / 'sub' / 'b_samples.jsonl.gz'
+    import gzip
+    with gzip.open(gz, 'wt', encoding='utf-8') as f:
+        f.write('{}\n')
+    junk = tmp_path / 'sub' / 'readme.txt'
+    junk.write_text('not eee')
+
+    found = sorted(eee_io.iter_eee_results([tmp_path]))
+    assert found == [plain, gz]
+
+
+def test_iter_eee_results_accepts_file_root(tmp_path: Path) -> None:
+    p = tmp_path / 'a.json'
+    p.write_text('{}')
+    assert list(eee_io.iter_eee_results([p])) == [p]
+
+
+def test_find_duplicate_variants_detects_collision(tmp_path: Path) -> None:
+    folder = tmp_path / 'data' / 'bench' / 'dev' / 'model'
+    folder.mkdir(parents=True)
+    plain = folder / 'abc.json'
+    plain.write_text('{}')
+    gz = folder / 'abc.json.gz'
+    import gzip
+    with gzip.open(gz, 'wt', encoding='utf-8') as f:
+        f.write('{}')
+
+    # Distinct kind in the same folder must NOT trigger the rule.
+    samples = folder / 'abc_samples.jsonl'
+    samples.write_text('{}\n')
+
+    dups = eee_io.find_duplicate_variants(eee_io.iter_eee_results([tmp_path]))
+    assert len(dups) == 1
+    folder_out, stem, kind, variants = dups[0]
+    assert folder_out == folder
+    assert stem == 'abc'
+    assert kind == 'aggregate'
+    assert sorted(variants) == sorted([plain, gz])
+
+
+def test_find_duplicate_variants_clean_tree(tmp_path: Path) -> None:
+    folder = tmp_path / 'data'
+    folder.mkdir()
+    (folder / 'abc.json').write_text('{}')
+    (folder / 'abc_samples.jsonl').write_text('{}\n')
+    (folder / 'def.json').write_text('{}')
+    assert eee_io.find_duplicate_variants(
+        eee_io.iter_eee_results([tmp_path])
+    ) == []

From fce6b7574006438ced7a198f7291bbb40f8cd45d Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 7 May 2026 21:58:37 -0400
Subject: [PATCH 2/4] validate: route through io helpers; add duplicate-variant
 rule
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire ``validate.py`` and ``check_duplicate_entries.py`` through the
``every_eval_ever.io`` helpers so compressed result files
(``.gz``, ``.zst``, ``.bz2``, ``.xz``, ``.lz4``) are validated
identically to their plain counterparts.

Validator changes
-----------------

* ``validate_aggregate`` and ``validate_instance_file`` open via
  ``io.open_eee_text`` rather than ``Path.open`` / ``Path.read_text``.
* ``validate_file`` dispatches on ``io.is_eee_result(path)`` rather
  than ``path.suffix == '.json' / '.jsonl'``. Compressed forms are
  recognized via the suffix-stripping detector; non-EEE filenames
  (``.zip``, ``.parquet``, plain ``.csv``) report
  ``unsupported_extension`` as before.
* ``expand_paths`` enumerates EEE result files via
  ``io.iter_eee_results`` for directory inputs. Files passed
  explicitly are accepted regardless of suffix and let
  ``validate_file`` produce the unsupported-extension error.
* New ``_duplicate_variant_reports`` produces synthetic
  ``ValidationReport``s with a ``duplicate_variant`` error type for
  every ``(folder, uuid_stem, kind)`` group with more than one
  physical variant. ``validate.main`` runs this check unconditionally
  before per-file validation, so a CI/PR-bot path cannot silently
  green-light a folder containing both ``abc.json`` and ``abc.json.gz``.
* Missing optional codec dependencies surface as ``codec_unavailable``
  validation errors rather than crashing the whole run.

check_duplicate_entries changes
-------------------------------

* ``expand_paths`` recognizes compressed aggregate files; explicit
  file inputs that aren't aggregate-shaped are skipped (preserving
  the historical "ignore non-JSON" behaviour for non-EEE content).
* The aggregate-payload reader uses ``io.open_eee_text``.
  ``_samples.jsonl`` files are intentionally excluded from this
  command's discovery — duplicate detection runs over aggregate
  metadata only.

io.py refinement
----------------

* ``is_eee_result`` and ``eee_uuid_stem`` are lenient about the
  ``_samples`` prefix on per-instance files: bare ``*.jsonl`` is
  still recognized as samples (matching what lm-eval emits when no
  file UUID is supplied). Strict ``_samples.jsonl`` matching is still
  preferred for stem extraction.

Tests
-----

* New ``tests/test_validate_compression.py`` covers: same fixture in
  plain + ``.gz`` form yields identical validation outcomes for both
  aggregate and samples; ``expand_paths`` finds compressed files;
  duplicate-variant reports fire and CI-gate appropriately;
  distinct kinds in the same folder don't false-positive; missing
  optional codec surfaces as a typed error rather than an exception.

213 passed, 5 skipped (4 require optional codec deps, 1 is unrelated
upstream skip).
---
 every_eval_ever/check_duplicate_entries.py |  32 +++-
 every_eval_ever/io.py                      |  46 +++--
 every_eval_ever/validate.py                | 107 ++++++++---
 tests/test_io.py                           |   8 +
 tests/test_validate_compression.py         | 201 +++++++++++++++++++++
 5 files changed, 344 insertions(+), 50 deletions(-)
 create mode 100644 tests/test_validate_compression.py

diff --git a/every_eval_ever/check_duplicate_entries.py b/every_eval_ever/check_duplicate_entries.py
index 5f6c53342..4dada0034 100644
--- a/every_eval_ever/check_duplicate_entries.py
+++ b/every_eval_ever/check_duplicate_entries.py
@@ -2,22 +2,36 @@
 import hashlib
 import json
 import os
+from pathlib import Path
 from typing import Any, Dict, List
 
+from every_eval_ever import io as eee_io
+
 IGNORE_KEYS = {'retrieved_timestamp', 'evaluation_id'}
 
 
 def expand_paths(paths: List[str]) -> List[str]:
-    """Expand folders to file paths."""
+    """Expand folders to aggregate-EEE file paths.
+
+    Recognizes both plain ``<uuid>.json`` and compressed forms
+    (``.gz``, ``.zst``, ``.bz2``, ``.xz``, ``.lz4``).
+    ``<uuid>_samples.jsonl`` files are intentionally excluded — duplicate
+    detection runs over aggregate metadata, not per-instance samples.
+    """
     file_paths: List[str] = []
     for path in paths:
-        if os.path.isfile(path) and path.endswith('.json'):
-            file_paths.append(path)
-        elif os.path.isdir(path):
-            for root, _, file_names in os.walk(path):
-                for file_name in file_names:
-                    if file_name.endswith('.json'):
-                        file_paths.append(os.path.join(root, file_name))
+        p = Path(path)
+        if p.is_file():
+            if eee_io.is_eee_result(p) == 'aggregate':
+                file_paths.append(str(p))
+            else:
+                # Preserve historical behaviour: be explicit when a
+                # passed file is not an aggregate JSON.
+                continue
+        elif p.is_dir():
+            for found in eee_io.iter_eee_results([p]):
+                if eee_io.is_eee_result(found) == 'aggregate':
+                    file_paths.append(str(found))
         else:
             raise Exception(f'Could not find file or directory at path: {path}')
     return file_paths
@@ -84,7 +98,7 @@ def main(argv: List[str] | None = None) -> int:
     groups: Dict[str, List[Dict[str, Any]]] = {}
     for file_path in file_paths:
         try:
-            with open(file_path, 'r') as f:
+            with eee_io.open_eee_text(file_path, 'r') as f:
                 payload = json.load(f)
         except json.JSONDecodeError as e:
             message = f'JSONDecodeError: {str(e)}'
diff --git a/every_eval_ever/io.py b/every_eval_ever/io.py
index 351dc0aa0..bafabac41 100644
--- a/every_eval_ever/io.py
+++ b/every_eval_ever/io.py
@@ -79,16 +79,17 @@
     name: suf for suf, name in _COMPRESSION_BY_SUFFIX.items()
 }
 
-# Canonical kind -> base suffix (the part *before* any compression suffix).
-_BASE_SUFFIX_BY_KIND: dict[str, str] = {
-    'aggregate': '.json',
-    'samples': '_samples.jsonl',
+# Canonical filename suffixes for each kind. The kind detector matches on
+# the *first* listed suffix that ends the filename (after stripping any
+# compression suffix). The ordering matters: ``.jsonl`` must be checked
+# before ``.json`` because ``.jsonl`` does not end in ``.json`` — but for
+# kinds that share a final extension (none currently do) this would matter.
+_KIND_SUFFIXES: dict[str, tuple[str, ...]] = {
+    'samples': ('_samples.jsonl', '.jsonl'),
+    'aggregate': ('.json',),
 }
 
-# Order matters: 'samples' must be checked before 'aggregate' because
-# ``_samples.jsonl`` ends in ``.jsonl`` not ``.json`` (different base suffix
-# overall) but the kind detector should not be confused by future shape
-# changes.
+# Order in which kinds are tried by the detector.
 _KIND_ORDER: tuple[str, ...] = ('samples', 'aggregate')
 
 
@@ -118,28 +119,37 @@ def _strip_compression_suffix(name: str) -> str:
 def is_eee_result(path: Path | str) -> str | None:
     """Classify a path as ``'aggregate'``, ``'samples'``, or ``None``.
 
-    Recognizes both plain and compressed forms. Returns ``None`` for any
-    path that does not match an EEE result filename convention.
+    Recognizes both plain and compressed forms. Lenient about the bare
+    ``.jsonl`` extension to keep parity with existing converter outputs
+    that don't always include the ``_samples`` prefix (lm-eval emits
+    ``samples_<task>_<datetime>.jsonl`` when no UUID is supplied).
+    Returns ``None`` for any path that does not match an EEE result
+    filename convention.
     """
     name = _strip_compression_suffix(Path(path).name)
     for kind in _KIND_ORDER:
-        if name.endswith(_BASE_SUFFIX_BY_KIND[kind]):
-            return kind
+        for suffix in _KIND_SUFFIXES[kind]:
+            if name.endswith(suffix):
+                return kind
     return None
 
 
 def eee_uuid_stem(path: Path | str) -> str | None:
     """Return the bare UUID portion of an EEE result filename, or ``None``.
 
-    Strips both the compression suffix (if any) and the kind-specific base
-    suffix. ``abc_samples.jsonl.gz`` and ``abc.json.zst`` both yield
-    ``'abc'``.
+    Strips the compression suffix (if any) and the kind-specific suffix.
+    For ``abc_samples.jsonl`` returns ``'abc'``; for ``samples_task.jsonl``
+    (which lacks the canonical ``_samples`` prefix) returns
+    ``'samples_task'``. The duplicate-variant rule keys on this stem, so
+    files that share a stem and a kind across compressed/uncompressed
+    variants will be flagged as collisions regardless of which suffix
+    convention is in use.
     """
     name = _strip_compression_suffix(Path(path).name)
     for kind in _KIND_ORDER:
-        base = _BASE_SUFFIX_BY_KIND[kind]
-        if name.endswith(base):
-            return name[: -len(base)]
+        for suffix in _KIND_SUFFIXES[kind]:
+            if name.endswith(suffix):
+                return name[: -len(suffix)]
     return None
 
 
diff --git a/every_eval_ever/validate.py b/every_eval_ever/validate.py
index 46f4d0a3b..1e98b4d10 100644
--- a/every_eval_ever/validate.py
+++ b/every_eval_ever/validate.py
@@ -4,6 +4,10 @@
 Validates aggregate (.json) files against EvaluationLog and
 instance-level (_samples.jsonl) files against InstanceLevelEvaluationLog.
 
+Compressed forms (``.gz``, ``.zst``, ``.bz2``, ``.xz``, ``.lz4``) are
+accepted alongside the plain forms — see ``every_eval_ever.io`` for the
+single source of truth on suffix handling.
+
 Usage:
     uv run python -m every_eval_ever validate data/benchmark/dev/model/uuid.json
     uv run python -m every_eval_ever validate data/benchmark/dev/model/uuid_samples.jsonl
@@ -25,6 +29,7 @@
 from rich.panel import Panel
 from rich.text import Text
 
+from every_eval_ever import io as eee_io
 from every_eval_ever.eval_types import EvaluationLog
 from every_eval_ever.instance_level_types import InstanceLevelEvaluationLog
 
@@ -72,19 +77,29 @@ def _pydantic_errors_to_dicts(exc: ValidationError) -> list[dict]:
 
 
 def validate_aggregate(file_path: Path) -> ValidationReport:
-    """Validate a .json file as an EvaluationLog."""
+    """Validate a .json (or .json.gz/.zst/.bz2/.xz/.lz4) file as an EvaluationLog."""
     report = ValidationReport(
         file_path=file_path, valid=True, file_type='aggregate'
     )
 
     try:
-        raw = file_path.read_text(encoding='utf-8')
+        with eee_io.open_eee_text(file_path, 'r') as f:
+            raw = f.read()
     except OSError as e:
         report.valid = False
         report.errors.append(
             {'loc': '(file)', 'msg': str(e), 'type': 'io_error'}
         )
         return report
+    except ImportError as e:
+        # Missing optional codec dep — surface as a validation error rather
+        # than a hard crash, so that a single missing codec doesn't take down
+        # validation of an entire batch.
+        report.valid = False
+        report.errors.append(
+            {'loc': '(file)', 'msg': str(e), 'type': 'codec_unavailable'}
+        )
+        return report
 
     try:
         data = json.loads(raw)
@@ -135,19 +150,25 @@ def _validate_instance_line(line: str, line_num: int) -> list[dict]:
 def validate_instance_file(
     file_path: Path, max_errors: int = DEFAULT_MAX_ERRORS
 ) -> ValidationReport:
-    """Validate a .jsonl file as InstanceLevelEvaluationLog (line-by-line)."""
+    """Validate a _samples.jsonl (optionally compressed) file line-by-line."""
     report = ValidationReport(
         file_path=file_path, valid=True, file_type='instance'
     )
 
     try:
-        f = file_path.open(encoding='utf-8')
+        f = eee_io.open_eee_text(file_path, 'r')
     except OSError as e:
         report.valid = False
         report.errors.append(
             {'loc': '(file)', 'msg': str(e), 'type': 'io_error'}
         )
         return report
+    except ImportError as e:
+        report.valid = False
+        report.errors.append(
+            {'loc': '(file)', 'msg': str(e), 'type': 'codec_unavailable'}
+        )
+        return report
 
     with f:
         for line_num, line in enumerate(f, start=1):
@@ -194,40 +215,75 @@ def validate_instance_file(
 def validate_file(
     file_path: Path, max_errors: int = DEFAULT_MAX_ERRORS
 ) -> ValidationReport:
-    """Dispatch validation by file extension."""
-    if file_path.suffix == '.json':
+    """Dispatch validation by EEE result kind, transparently across codecs."""
+    kind = eee_io.is_eee_result(file_path)
+    if kind == 'aggregate':
         return validate_aggregate(file_path)
-    elif file_path.suffix == '.jsonl':
+    if kind == 'samples':
         return validate_instance_file(file_path, max_errors)
-    else:
-        report = ValidationReport(
-            file_path=file_path, valid=False, file_type='unsupported'
-        )
-        report.errors.append(
-            {
-                'loc': '(file)',
-                'msg': f"Unsupported file extension '{file_path.suffix}'. Expected .json or .jsonl",
-                'type': 'unsupported_extension',
-            }
-        )
-        return report
+
+    report = ValidationReport(
+        file_path=file_path, valid=False, file_type='unsupported'
+    )
+    report.errors.append(
+        {
+            'loc': '(file)',
+            'msg': (
+                f"Unsupported filename {file_path.name!r}. Expected an EEE "
+                f'result file: ``<uuid>.json`` or ``<uuid>_samples.jsonl`` '
+                f'(optionally compressed with .gz/.zst/.bz2/.xz/.lz4).'
+            ),
+            'type': 'unsupported_extension',
+        }
+    )
+    return report
 
 
 def expand_paths(paths: list[str]) -> list[Path]:
-    """Expand directories to .json and .jsonl files recursively."""
+    """Expand inputs to a list of EEE result files (recursive on dirs).
+
+    Recognizes both plain and compressed forms (``.gz``, ``.zst``, ``.bz2``,
+    ``.xz``, ``.lz4``). Files passed explicitly are accepted regardless of
+    suffix — ``validate_file`` will surface an error for non-EEE inputs.
+    """
     result: list[Path] = []
     for p in paths:
         path = Path(p)
         if path.is_file():
             result.append(path)
         elif path.is_dir():
-            for ext in ('*.json', '*.jsonl'):
-                result.extend(sorted(path.rglob(ext)))
+            result.extend(eee_io.iter_eee_results([path]))
         else:
             result.append(path)  # let validate_file report the error
     return result
 
 
+def _duplicate_variant_reports(
+    paths: list[Path],
+) -> list[ValidationReport]:
+    """Produce one synthetic report per ``(folder, uuid, kind)`` collision."""
+    reports: list[ValidationReport] = []
+    for folder, stem, kind, variants in eee_io.find_duplicate_variants(paths):
+        rep = ValidationReport(
+            file_path=variants[0], valid=False, file_type=kind,
+        )
+        rep.errors.append(
+            {
+                'loc': f'(uuid={stem}, kind={kind})',
+                'msg': (
+                    f'Multiple physical variants of the same EEE result '
+                    f'present in {folder}: '
+                    f'{[v.name for v in variants]}. EEE allows at most one '
+                    f'variant per (uuid, kind); pick a single compressed or '
+                    f'uncompressed form and remove the others.'
+                ),
+                'type': 'duplicate_variant',
+            }
+        )
+        reports.append(rep)
+    return reports
+
+
 def _truncate(value: object, max_len: int = 80) -> str:
     """Truncate a repr for display."""
     s = repr(value)
@@ -375,7 +431,12 @@ def main(argv: list[str] | None = None) -> int:
         print('No files found to validate.', file=sys.stderr)
         return 1
 
-    reports = [
+    # Detect duplicate variants up front. Producing a fail-closed
+    # report per collision means the CI/PR-bot path cannot silently
+    # green-light a folder containing both `abc.json` and `abc.json.gz`.
+    duplicate_reports = _duplicate_variant_reports(file_paths)
+
+    reports = duplicate_reports + [
         validate_file(fp, max_errors=args.max_errors) for fp in file_paths
     ]
 
diff --git a/tests/test_io.py b/tests/test_io.py
index dc3f691f3..60a8bc6be 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -30,6 +30,11 @@
         ('abc_samples.jsonl.bz2', 'samples'),
         ('abc_samples.jsonl.xz', 'samples'),
         ('abc_samples.jsonl.lz4', 'samples'),
+        # Lenient: bare *.jsonl (no _samples prefix) still recognized
+        # as samples, since lm-eval and other converters produce these.
+        ('samples_mmlu_2025-01-01.jsonl', 'samples'),
+        ('whatever.jsonl', 'samples'),
+        ('whatever.jsonl.gz', 'samples'),
         ('abc.txt', None),
         ('abc.parquet', None),
         ('manifest.json', 'aggregate'),  # any *.json is aggregate by shape
@@ -49,6 +54,9 @@ def test_is_eee_result(name: str, expected: str | None) -> None:
         ('a-b-c.json.zst', 'a-b-c'),
         ('abc_samples.jsonl', 'abc'),
         ('abc_samples.jsonl.gz', 'abc'),
+        # Bare .jsonl: stem is the whole pre-extension filename.
+        ('samples_mmlu_2025.jsonl', 'samples_mmlu_2025'),
+        ('whatever.jsonl.bz2', 'whatever'),
         ('abc.txt', None),
         ('abc.json.zip', None),
     ],
diff --git a/tests/test_validate_compression.py b/tests/test_validate_compression.py
new file mode 100644
index 000000000..2b0045abc
--- /dev/null
+++ b/tests/test_validate_compression.py
@@ -0,0 +1,201 @@
+"""Validator tests covering compressed result files + duplicate-variant rule."""
+
+from __future__ import annotations
+
+import gzip
+import json
+from pathlib import Path
+
+import pytest
+
+from every_eval_ever import io as eee_io
+from every_eval_ever import validate
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+def _read_fixture_aggregate() -> dict:
+    """Load the existing aggregate fixture committed under tests/data/."""
+    fixture = Path(__file__).parent / 'data' / '98ea850e-7019-4728-a558-8b1819ec47c2.json'
+    with fixture.open(encoding='utf-8') as f:
+        return json.load(f)
+
+
+# ---------------------------------------------------------------------------
+# Plain + .gz validate identically
+# ---------------------------------------------------------------------------
+
+
+def test_validate_aggregate_gz_matches_plain(tmp_path: Path) -> None:
+    payload = _read_fixture_aggregate()
+
+    plain = tmp_path / 'a.json'
+    with plain.open('w', encoding='utf-8') as f:
+        json.dump(payload, f)
+    rep_plain = validate.validate_file(plain)
+
+    gz = tmp_path / 'b.json.gz'
+    with gzip.open(gz, 'wt', encoding='utf-8') as f:
+        json.dump(payload, f)
+    rep_gz = validate.validate_file(gz)
+
+    # Gzipped vs. plain must produce identical validation outcomes — that's
+    # the whole point of transparent compression. We don't assert
+    # ``valid == True`` here because the fixture may legitimately fail
+    # against the current schema; we only assert that compression doesn't
+    # change the result.
+    assert rep_plain.valid == rep_gz.valid
+    assert rep_plain.file_type == rep_gz.file_type == 'aggregate'
+    assert [e['type'] for e in rep_plain.errors] == [
+        e['type'] for e in rep_gz.errors
+    ]
+    assert [e['msg'] for e in rep_plain.errors] == [
+        e['msg'] for e in rep_gz.errors
+    ]
+
+
+def test_validate_samples_gz_matches_plain(tmp_path: Path) -> None:
+    # Synthesize a minimal valid InstanceLevelEvaluationLog stream.
+    rows = [
+        {
+            'schema_version': '0.2.2',
+            'evaluation_id': 'fake/eval/1',
+            'model_id': 'org/model',
+            'evaluation_name': 'fake',
+            'sample_id': str(i),
+            'sample_hash': 'a' * 64,
+            'interaction_type': 'single_turn',
+            'input': {'raw': 'q', 'reference': [], 'choices': []},
+            'output': {'raw': ['a'], 'reasoning_trace': []},
+            'evaluation': {'score': 1.0, 'is_correct': True},
+        }
+        for i in range(3)
+    ]
+
+    plain = tmp_path / 'a_samples.jsonl'
+    with plain.open('w', encoding='utf-8') as f:
+        for r in rows:
+            f.write(json.dumps(r) + '\n')
+    rep_plain = validate.validate_file(plain)
+
+    gz = tmp_path / 'b_samples.jsonl.gz'
+    with gzip.open(gz, 'wt', encoding='utf-8') as f:
+        for r in rows:
+            f.write(json.dumps(r) + '\n')
+    rep_gz = validate.validate_file(gz)
+
+    assert rep_plain.valid == rep_gz.valid
+    assert rep_plain.line_count == rep_gz.line_count == 3
+    assert rep_plain.file_type == rep_gz.file_type == 'instance'
+
+
+# ---------------------------------------------------------------------------
+# Discovery picks up compressed files
+# ---------------------------------------------------------------------------
+
+
+def test_expand_paths_finds_compressed_files(tmp_path: Path) -> None:
+    a_plain = tmp_path / 'sub' / 'aaa.json'
+    a_plain.parent.mkdir()
+    a_plain.write_text('{}')
+    b_gz = tmp_path / 'sub' / 'bbb_samples.jsonl.gz'
+    with gzip.open(b_gz, 'wt', encoding='utf-8') as f:
+        f.write('{}\n')
+    junk = tmp_path / 'sub' / 'README.md'
+    junk.write_text('not eee')
+
+    found = sorted(validate.expand_paths([str(tmp_path)]))
+    assert found == [a_plain, b_gz]
+
+
+# ---------------------------------------------------------------------------
+# Duplicate-variant rule
+# ---------------------------------------------------------------------------
+
+
+def test_validate_main_fails_on_duplicate_variants(tmp_path: Path) -> None:
+    folder = tmp_path / 'data' / 'bench' / 'dev' / 'model'
+    folder.mkdir(parents=True)
+    plain = folder / 'abc.json'
+    plain.write_text('{}')  # invalid as EvaluationLog, but that's a different error
+    gz = folder / 'abc.json.gz'
+    with gzip.open(gz, 'wt', encoding='utf-8') as f:
+        f.write('{}')
+
+    exit_code = validate.main([str(tmp_path), '--format', 'json'])
+    assert exit_code == 1
+
+
+def test_duplicate_variant_reports_emit_typed_errors(tmp_path: Path) -> None:
+    folder = tmp_path / 'data'
+    folder.mkdir()
+    (folder / 'abc.json').write_text('{}')
+    with gzip.open(folder / 'abc.json.gz', 'wt', encoding='utf-8') as f:
+        f.write('{}')
+
+    paths = list(eee_io.iter_eee_results([tmp_path]))
+    reports = validate._duplicate_variant_reports(paths)
+    assert len(reports) == 1
+    err = reports[0].errors[0]
+    assert err['type'] == 'duplicate_variant'
+    assert 'abc.json' in err['msg']
+    assert 'abc.json.gz' in err['msg']
+
+
+def test_duplicate_variant_distinct_kinds_dont_collide(tmp_path: Path) -> None:
+    """Same UUID, different kind: NOT a duplicate."""
+    folder = tmp_path / 'data'
+    folder.mkdir()
+    (folder / 'abc.json').write_text('{}')
+    (folder / 'abc_samples.jsonl').write_text('{}\n')
+
+    paths = list(eee_io.iter_eee_results([tmp_path]))
+    assert validate._duplicate_variant_reports(paths) == []
+
+
+# ---------------------------------------------------------------------------
+# Unsupported / non-EEE filename
+# ---------------------------------------------------------------------------
+
+
+def test_validate_file_rejects_zip(tmp_path: Path) -> None:
+    p = tmp_path / 'abc.json.zip'
+    p.write_bytes(b'PK\x03\x04')
+    rep = validate.validate_file(p)
+    assert rep.valid is False
+    assert rep.errors[0]['type'] == 'unsupported_extension'
+
+
+def test_validate_file_rejects_unknown_extension(tmp_path: Path) -> None:
+    p = tmp_path / 'abc.parquet'
+    p.write_bytes(b'PAR1')
+    rep = validate.validate_file(p)
+    assert rep.valid is False
+    assert rep.errors[0]['type'] == 'unsupported_extension'
+
+
+# ---------------------------------------------------------------------------
+# Missing optional codec surfaces as a validation error, not a crash
+# ---------------------------------------------------------------------------
+
+
+def test_missing_codec_surfaces_as_validation_error(tmp_path: Path,
+                                                    monkeypatch) -> None:
+    pytest.importorskip(  # no point if zstandard happens to be installed AND we mocked
+        'pytest', reason='sanity'
+    )
+    # Force lz4 import to fail regardless of environment.
+    import sys
+    monkeypatch.setitem(sys.modules, 'lz4', None)
+    monkeypatch.setitem(sys.modules, 'lz4.frame', None)
+
+    p = tmp_path / 'abc.json.lz4'
+    # Bytes don't matter — we should never reach the actual decompressor.
+    p.write_bytes(b'\x00\x00\x00\x00')
+
+    rep = validate.validate_file(p)
+    assert rep.valid is False
+    assert rep.errors[0]['type'] == 'codec_unavailable'

From cba681440913b27a450c2aad08ca60175f964bd8 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 7 May 2026 21:58:37 -0400
Subject: [PATCH 3/4] cli: add --compress / --compress-aggregate /
 --compress-samples flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Threads compression through the converter writer paths. Defaults to
``--compress none`` everywhere for full backwards compatibility — this
commit only enables compression on opt-in.

CLI flags (added to all four ``convert`` subcommands)
-----------------------------------------------------

* ``--compress {none|gz|zst|bz2|xz|lz4}``
    Default codec for both aggregate and per-instance files.
* ``--compress-aggregate {none|gz|zst|bz2|xz|lz4}``
    Override for aggregate ``<uuid>.json`` only. Defaults to
    ``--compress``'s value. Recommended setting: ``none``, since HF and
    GitHub render uncompressed JSON inline for spot-checking.
* ``--compress-samples {none|gz|zst|bz2|xz|lz4}``
    Override for per-instance ``<uuid>_samples.jsonl`` only. Recommended
    setting for any submission going to a public store: ``gz`` (5–15x
    size reduction with no UX cost — the samples files are too large
    for web preview anyway).

Argparse rejects unsupported codecs at parse time. Optional codec deps
(``zstandard``, ``lz4``) trigger ``ImportError`` with an actionable
message at write time when the user picks the codec but hasn't
installed the extra.

Writer-side plumbing
--------------------

* ``cli._write_log`` accepts ``compression``; output filename gets the
  codec suffix appended (``<uuid>.json[.gz]``).
* ``LMEvalInstanceLevelAdapter.transform_and_save`` accepts
  ``compression``; the recorded ``DetailedEvaluationResults.checksum``
  is computed over the on-disk (possibly compressed) bytes so that
  downstream verifiers can validate the file as-stored.
* ``HELMInstanceLevelDataAdapter`` accepts ``compression`` in its
  constructor; the adapter's ``self.path`` reflects the on-disk
  filename including any codec suffix.
* ``cli._cmd_convert_helm`` threads the resolved samples-compression
  via ``metadata_args['samples_compression']`` rather than adding a
  new positional to the long-existing ``transform_from_directory``
  signature.

Tests
-----

* ``tests/test_cli_compression.py``: full coverage of
  ``_resolve_compression``, ``_write_log`` round-trips per codec,
  LM-Eval samples writer compression including checksum-on-disk and
  the recorded ``file_path``, and parser-level wiring (defaults,
  per-kind override, invalid codec rejection).
* ``tests/test_cli_inspect_uuid.py``: existing fake-write-log stubs
  updated to accept the new ``compression=`` keyword.

226 passed, 5 skipped. Backwards compatibility preserved across all
existing tests.
---
 every_eval_ever/cli.py                        |  85 +++++-
 every_eval_ever/converters/helm/adapter.py    |   1 +
 .../converters/helm/instance_level_adapter.py |  10 +-
 .../lm_eval/instance_level_adapter.py         |  14 +-
 tests/test_cli_compression.py                 | 281 ++++++++++++++++++
 tests/test_cli_inspect_uuid.py                |   8 +-
 6 files changed, 383 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_cli_compression.py

diff --git a/every_eval_ever/cli.py b/every_eval_ever/cli.py
index c215c89cd..edbf5bce5 100644
--- a/every_eval_ever/cli.py
+++ b/every_eval_ever/cli.py
@@ -10,6 +10,8 @@
 from pathlib import Path
 from typing import Any
 
+from every_eval_ever import io as eee_io
+
 EVALUATOR_RELATIONSHIP_CHOICES = [
     'first_party',
     'third_party',
@@ -30,6 +32,21 @@ def _common_metadata(args: argparse.Namespace) -> dict[str, Any]:
     }
 
 
+def _resolve_compression(
+    args: argparse.Namespace, kind: str
+) -> str:
+    """Resolve the compression name for ``kind`` ('aggregate' or 'samples')."""
+    if kind == 'aggregate':
+        per_kind = getattr(args, 'compress_aggregate', None)
+    elif kind == 'samples':
+        per_kind = getattr(args, 'compress_samples', None)
+    else:
+        raise ValueError(f'unknown kind {kind!r}')
+    if per_kind is not None:
+        return per_kind
+    return getattr(args, 'compress', eee_io.COMPRESSION_NONE)
+
+
 def _output_dir_for_log(base_output: Path, log: Any) -> Path:
     dataset = 'unknown'
     if log.evaluation_results and log.evaluation_results[0].source_data:
@@ -46,12 +63,23 @@ def _output_dir_for_log(base_output: Path, log: Any) -> Path:
 
 
 def _write_log(
-    log: Any, base_output: Path, eval_uuid: str | None = None
+    log: Any,
+    base_output: Path,
+    eval_uuid: str | None = None,
+    compression: str = eee_io.COMPRESSION_NONE,
 ) -> Path:
+    """Write an aggregate evaluation log to ``base_output``.
+
+    ``compression`` selects the codec for the on-disk file; the resulting
+    filename has the codec suffix appended (e.g. ``<uuid>.json.gz``).
+    Defaults to no compression for backwards compatibility.
+    """
     out_dir = _output_dir_for_log(base_output, log)
     eval_uuid = eval_uuid or str(uuid.uuid4())
-    out_file = out_dir / f'{eval_uuid}.json'
-    with out_file.open('w', encoding='utf-8') as file:
+    out_file = eee_io.add_compression_suffix(
+        out_dir / f'{eval_uuid}.json', compression
+    )
+    with eee_io.open_eee_text(out_file, 'w') as file:
         json.dump(
             log.model_dump(mode='json', exclude_none=True), file, indent=2
         )
@@ -100,9 +128,11 @@ def _cmd_convert_lm_eval(args: argparse.Namespace) -> int:
                         task_name=task_name,
                         output_dir=str(_output_dir_for_log(output_dir, log)),
                         file_uuid=eval_uuid,
+                        compression=_resolve_compression(args, 'samples'),
                     )
                     log.detailed_evaluation_results = detailed
-        print(_write_log(log, output_dir, eval_uuid=eval_uuid))
+        print(_write_log(log, output_dir, eval_uuid=eval_uuid,
+                         compression=_resolve_compression(args, 'aggregate')))
 
     print(f'Converted {len(logs)} evaluation log(s).')
     return 0
@@ -139,7 +169,8 @@ def _cmd_convert_inspect(args: argparse.Namespace) -> int:
 
     output_dir = Path(args.output_dir)
     for log, eval_uuid in zip(logs, eval_uuids):
-        print(_write_log(log, output_dir, eval_uuid=eval_uuid))
+        print(_write_log(log, output_dir, eval_uuid=eval_uuid,
+                         compression=_resolve_compression(args, 'aggregate')))
 
     print(f'Converted {len(logs)} evaluation log(s).')
     return 0
@@ -168,6 +199,7 @@ def _cmd_convert_helm(args: argparse.Namespace) -> int:
     else:
         raise FileNotFoundError(f'Path is not a file or directory: {log_path}')
 
+    metadata['samples_compression'] = _resolve_compression(args, 'samples')
     logs = adapter.transform_from_directory(
         log_path,
         output_path=str(Path(args.output_dir) / 'helm_output'),
@@ -182,7 +214,8 @@ def _cmd_convert_helm(args: argparse.Namespace) -> int:
 
     output_dir = Path(args.output_dir)
     for log, eval_uuid in zip(logs, eval_uuids):
-        print(_write_log(log, output_dir, eval_uuid=eval_uuid))
+        print(_write_log(log, output_dir, eval_uuid=eval_uuid,
+                         compression=_resolve_compression(args, 'aggregate')))
 
     print(f'Converted {len(logs)} evaluation log(s).')
     return 0
@@ -223,7 +256,10 @@ def _cmd_convert_alpaca_eval(args: argparse.Namespace) -> int:
             if args.eval_library_version != 'unknown':
                 log.eval_library.version = args.eval_library_version
 
-            out_file = _write_log(log, output_dir)
+            out_file = _write_log(
+                log, output_dir,
+                compression=_resolve_compression(args, 'aggregate'),
+            )
             print(f'  {out_file}')
             total += 1
 
@@ -354,6 +390,41 @@ def build_parser() -> argparse.ArgumentParser:
             help='Evaluation library version recorded in eval_library.version.',
         )
 
+        # ----- compression options (apply to all converters) ------------
+        source_parser.add_argument(
+            '--compress',
+            choices=eee_io.COMPRESSION_CHOICES,
+            default=eee_io.COMPRESSION_NONE,
+            help=(
+                'Default codec for both aggregate and per-instance '
+                'output files (default: none). Files are written as '
+                "<uuid>.json[.<codec>] / <uuid>_samples.jsonl[.<codec>]. "
+                'See the --compress-aggregate / --compress-samples '
+                'flags to override per-kind. .zst and .lz4 require the '
+                "optional 'zst' / 'lz4' extras."
+            ),
+        )
+        source_parser.add_argument(
+            '--compress-aggregate', '--compress_aggregate',
+            choices=eee_io.COMPRESSION_CHOICES,
+            default=None,
+            help=(
+                "Override --compress for aggregate <uuid>.json files only. "
+                'HF + GitHub render uncompressed JSON inline so the default '
+                "(--compress's value) is usually the right call here."
+            ),
+        )
+        source_parser.add_argument(
+            '--compress-samples', '--compress_samples',
+            choices=eee_io.COMPRESSION_CHOICES,
+            default=None,
+            help=(
+                "Override --compress for per-instance <uuid>_samples.jsonl "
+                "files only. JSONL compresses 5–15x; recommended setting "
+                'when shipping to a public store.'
+            ),
+        )
+
         if source == 'alpaca_eval':
             source_parser.add_argument(
                 '--version',
diff --git a/every_eval_ever/converters/helm/adapter.py b/every_eval_ever/converters/helm/adapter.py
index e43eb01b9..939ca3f9e 100644
--- a/every_eval_ever/converters/helm/adapter.py
+++ b/every_eval_ever/converters/helm/adapter.py
@@ -489,6 +489,7 @@ def _transform_single(
                     Format.jsonl.value,
                     HashAlgorithm.sha256.value,
                     evaluation_dir,
+                    compression=metadata_args.get('samples_compression', 'none'),
                 ).convert_instance_level_logs(
                     dataset_name,
                     model_info.id,
diff --git a/every_eval_ever/converters/helm/instance_level_adapter.py b/every_eval_ever/converters/helm/instance_level_adapter.py
index 037237ecc..e1262cc29 100644
--- a/every_eval_ever/converters/helm/instance_level_adapter.py
+++ b/every_eval_ever/converters/helm/instance_level_adapter.py
@@ -2,6 +2,8 @@
 from pathlib import Path
 from typing import Any, List, Tuple
 
+from every_eval_ever import io as eee_io
+
 _HELM_IMPORT_ERROR: Exception | None = None
 try:
     from helm.benchmark.adaptation.scenario_state import RequestState
@@ -42,20 +44,24 @@ def __init__(
         format: str,
         hash_algorithm: str,
         evaluation_dir: str,
+        compression: str = eee_io.COMPRESSION_NONE,
     ):
         _require_helm_dependencies()
         self.evaluation_id = evaulation_id
         self.format = format
         self.hash_algorithm = hash_algorithm
         self.evaluation_dir = evaluation_dir
-        self.path = f'{evaluation_dir}/{evaulation_id}.{format}'
+        self.compression = compression
+        # On-disk path includes the codec suffix when compression is set.
+        base_path = Path(evaluation_dir) / f'{evaulation_id}.{format}'
+        self.path = str(eee_io.add_compression_suffix(base_path, compression))
 
     def _save_json(self, items: List[InstanceLevelEvaluationLog]):
         eval_dir_path = Path(self.evaluation_dir)
         eval_dir_path.mkdir(parents=True, exist_ok=True)
         path = Path(self.path)
 
-        with path.open('w', encoding='utf-8') as f:
+        with eee_io.open_eee_text(path, 'w') as f:
             for item in items:
                 json_line = json.dumps(
                     item.model_dump(mode='json'), ensure_ascii=False
diff --git a/every_eval_ever/converters/lm_eval/instance_level_adapter.py b/every_eval_ever/converters/lm_eval/instance_level_adapter.py
index 1cc393862..c4a30198d 100644
--- a/every_eval_ever/converters/lm_eval/instance_level_adapter.py
+++ b/every_eval_ever/converters/lm_eval/instance_level_adapter.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
+from every_eval_ever import io as eee_io
 from every_eval_ever.converters import SCHEMA_VERSION
 from every_eval_ever.eval_types import (
     DetailedEvaluationResults,
@@ -55,12 +56,18 @@ def transform_and_save(
         task_name: str,
         output_dir: Optional[Union[str, Path]] = None,
         file_uuid: Optional[str] = None,
+        compression: str = eee_io.COMPRESSION_NONE,
     ) -> Optional[DetailedEvaluationResults]:
         """Transform samples and save to JSONL, returning a DetailedEvaluationResults pointer.
 
         If output_dir is None, returns None (skips instance-level output).
         If file_uuid is provided, the output file is named {file_uuid}_samples.jsonl
         so it shares the UUID of the corresponding evaluation result file.
+
+        ``compression`` controls the on-disk codec for the samples file
+        (see ``every_eval_ever.io.COMPRESSION_CHOICES``). The output
+        filename has the codec suffix appended; the recorded checksum
+        is computed over the (possibly compressed) on-disk bytes.
         """
         if output_dir is None:
             return None
@@ -74,11 +81,12 @@ def transform_and_save(
         output_dir = Path(output_dir)
         output_dir.mkdir(parents=True, exist_ok=True)
         if file_uuid:
-            out_file = output_dir / f'{file_uuid}_samples.jsonl'
+            base = output_dir / f'{file_uuid}_samples.jsonl'
         else:
-            out_file = output_dir / f'samples_{task_name}.jsonl'
+            base = output_dir / f'samples_{task_name}.jsonl'
+        out_file = eee_io.add_compression_suffix(base, compression)
 
-        with open(out_file, 'w') as f:
+        with eee_io.open_eee_text(out_file, 'w') as f:
             for log in logs:
                 f.write(
                     json.dumps(log.model_dump(mode='json'), ensure_ascii=False)
diff --git a/tests/test_cli_compression.py b/tests/test_cli_compression.py
new file mode 100644
index 000000000..85a53d45b
--- /dev/null
+++ b/tests/test_cli_compression.py
@@ -0,0 +1,281 @@
+"""Writer-side compression tests for the converter CLIs."""
+
+from __future__ import annotations
+
+import argparse
+import gzip
+import json
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from every_eval_ever import cli
+from every_eval_ever import io as eee_io
+
+
+# ---------------------------------------------------------------------------
+# _resolve_compression: per-kind override falls back to --compress
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_compression_uses_per_kind_override() -> None:
+    args = SimpleNamespace(compress='gz', compress_aggregate='none',
+                           compress_samples='zst')
+    assert cli._resolve_compression(args, 'aggregate') == 'none'
+    assert cli._resolve_compression(args, 'samples') == 'zst'
+
+
+def test_resolve_compression_falls_back_to_compress() -> None:
+    args = SimpleNamespace(compress='gz', compress_aggregate=None,
+                           compress_samples=None)
+    assert cli._resolve_compression(args, 'aggregate') == 'gz'
+    assert cli._resolve_compression(args, 'samples') == 'gz'
+
+
+def test_resolve_compression_default_none() -> None:
+    args = SimpleNamespace(compress='none', compress_aggregate=None,
+                           compress_samples=None)
+    assert cli._resolve_compression(args, 'samples') == 'none'
+
+
+def test_resolve_compression_rejects_bad_kind() -> None:
+    args = SimpleNamespace(compress='gz', compress_aggregate=None,
+                           compress_samples=None)
+    with pytest.raises(ValueError):
+        cli._resolve_compression(args, 'metadata')
+
+
+# ---------------------------------------------------------------------------
+# _write_log writes the right path, with the right codec, that round-trips
+# ---------------------------------------------------------------------------
+
+
+class _FakeSourceData:
+    def __init__(self, dataset_name: str = 'fake'):
+        self.dataset_name = dataset_name
+
+
+class _FakeEvalResult:
+    def __init__(self, dataset_name: str = 'fake'):
+        self.source_data = _FakeSourceData(dataset_name)
+
+
+class _FakeModelInfo:
+    def __init__(self, id_: str = 'org/model'):
+        self.id = id_
+
+
+class _FakeLog:
+    """Minimal log-shaped object that supports the bits ``_write_log`` uses."""
+
+    def __init__(self, dataset: str = 'fake', model_id: str = 'org/model'):
+        self.evaluation_results = [_FakeEvalResult(dataset)]
+        self.model_info = _FakeModelInfo(model_id)
+        self._dump = {'schema_version': '0.2.2', 'kind': 'aggregate',
+                      'dataset': dataset, 'model': model_id}
+
+    def model_dump(self, *, mode: str, exclude_none: bool):
+        return self._dump
+
+
+def test_write_log_uncompressed_default(tmp_path: Path) -> None:
+    log = _FakeLog()
+    out = cli._write_log(log, tmp_path, eval_uuid='aaa')
+    assert out.name == 'aaa.json'
+    assert out.parent == tmp_path / 'fake' / 'org' / 'model'
+    assert out.exists()
+    assert json.loads(out.read_text()) == log._dump
+
+
+def test_write_log_gz_appends_suffix_and_compresses(tmp_path: Path) -> None:
+    log = _FakeLog()
+    out = cli._write_log(log, tmp_path, eval_uuid='bbb', compression='gz')
+    assert out.name == 'bbb.json.gz'
+    # Cannot read as plain text — must decompress.
+    with gzip.open(out, 'rt', encoding='utf-8') as f:
+        assert json.load(f) == log._dump
+
+
+def test_write_log_preserves_indent_and_unicode(tmp_path: Path) -> None:
+    log = _FakeLog()
+    log._dump['note'] = 'café'  # non-ascii
+    out = cli._write_log(log, tmp_path, eval_uuid='ccc', compression='bz2')
+    assert out.name == 'ccc.json.bz2'
+    with eee_io.open_eee_text(out, 'r') as f:
+        data = json.load(f)
+    assert data['note'] == 'café'
+
+
+# ---------------------------------------------------------------------------
+# LMEvalInstanceLevelAdapter.transform_and_save honors compression
+# ---------------------------------------------------------------------------
+
+
+def test_lm_eval_samples_writer_gzip(tmp_path: Path,
+                                     monkeypatch: pytest.MonkeyPatch) -> None:
+    from every_eval_ever.converters.lm_eval.instance_level_adapter import (
+        LMEvalInstanceLevelAdapter,
+    )
+
+    # Write a fake lm-eval samples.jsonl input. We don't need it to be a
+    # real lm-eval shape; we just need transform_samples to produce >= 1
+    # record. Stub _transform_sample to short-circuit the schema work.
+    fake_input = tmp_path / 'samples_fake_2025-01-01.jsonl'
+    fake_input.write_text(json.dumps({'_': 1}) + '\n')
+
+    # Build a fake instance-level log: any Pydantic-validating shape works.
+    from every_eval_ever.instance_level_types import (
+        Evaluation, Input, InstanceLevelEvaluationLog, InteractionType,
+        Output,
+    )
+
+    def _fake_transform(self, sample, evaluation_id, model_id, task_name):
+        from every_eval_ever.instance_level_types import (
+            AnswerAttributionItem,
+        )
+        return InstanceLevelEvaluationLog(
+            schema_version='0.2.2',
+            evaluation_id=evaluation_id,
+            model_id=model_id,
+            evaluation_name=task_name,
+            sample_id='1',
+            sample_hash='a' * 64,
+            interaction_type=InteractionType.single_turn,
+            input=Input(raw='q', reference=[], choices=[]),
+            output=Output(raw=['a'], reasoning_trace=[]),
+            answer_attribution=[
+                AnswerAttributionItem(
+                    turn_idx=0, source='output.raw', extracted_value='a',
+                    extraction_method='exact_match', is_terminal=True,
+                )
+            ],
+            evaluation=Evaluation(score=1.0, is_correct=True),
+        )
+
+    monkeypatch.setattr(
+        LMEvalInstanceLevelAdapter, '_transform_sample', _fake_transform
+    )
+
+    adapter = LMEvalInstanceLevelAdapter()
+    out_dir = tmp_path / 'out'
+    detailed = adapter.transform_and_save(
+        samples_path=fake_input,
+        evaluation_id='eid',
+        model_id='org/model',
+        task_name='fake',
+        output_dir=out_dir,
+        file_uuid='zzz',
+        compression='gz',
+    )
+
+    assert detailed is not None
+    out_file = Path(detailed.file_path)
+    assert out_file.name == 'zzz_samples.jsonl.gz'
+    # Round-trips
+    with gzip.open(out_file, 'rt', encoding='utf-8') as f:
+        rows = [json.loads(line) for line in f if line.strip()]
+    assert len(rows) == 1
+    # checksum is over the on-disk (compressed) bytes
+    import hashlib
+    assert detailed.checksum == hashlib.sha256(out_file.read_bytes()).hexdigest()
+
+
+def test_lm_eval_samples_writer_default_uncompressed(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    from every_eval_ever.converters.lm_eval.instance_level_adapter import (
+        LMEvalInstanceLevelAdapter,
+    )
+    from every_eval_ever.instance_level_types import (
+        Evaluation, Input, InstanceLevelEvaluationLog, InteractionType,
+        Output,
+    )
+
+    fake_input = tmp_path / 'samples_fake.jsonl'
+    fake_input.write_text(json.dumps({'_': 1}) + '\n')
+
+    def _fake_transform(self, sample, evaluation_id, model_id, task_name):
+        from every_eval_ever.instance_level_types import (
+            AnswerAttributionItem,
+        )
+        return InstanceLevelEvaluationLog(
+            schema_version='0.2.2',
+            evaluation_id=evaluation_id,
+            model_id=model_id,
+            evaluation_name=task_name,
+            sample_id='1',
+            sample_hash='a' * 64,
+            interaction_type=InteractionType.single_turn,
+            input=Input(raw='q', reference=[], choices=[]),
+            output=Output(raw=['a'], reasoning_trace=[]),
+            answer_attribution=[
+                AnswerAttributionItem(
+                    turn_idx=0, source='output.raw', extracted_value='a',
+                    extraction_method='exact_match', is_terminal=True,
+                )
+            ],
+            evaluation=Evaluation(score=1.0, is_correct=True),
+        )
+    monkeypatch.setattr(
+        LMEvalInstanceLevelAdapter, '_transform_sample', _fake_transform
+    )
+
+    detailed = LMEvalInstanceLevelAdapter().transform_and_save(
+        samples_path=fake_input,
+        evaluation_id='eid',
+        model_id='org/model',
+        task_name='fake',
+        output_dir=tmp_path / 'out',
+        file_uuid='zzz',
+    )
+    assert detailed is not None
+    assert Path(detailed.file_path).name == 'zzz_samples.jsonl'
+
+
+# ---------------------------------------------------------------------------
+# CLI parser wires the flags through with sensible defaults
+# ---------------------------------------------------------------------------
+
+
+def test_cli_parser_defaults_to_none() -> None:
+    parser = cli.build_parser()
+    ns = parser.parse_args(
+        ['convert', 'lm_eval', '--log_path', '/tmp/x', '--output_dir', '/tmp/y']
+    )
+    assert ns.compress == 'none'
+    assert ns.compress_aggregate is None
+    assert ns.compress_samples is None
+    assert cli._resolve_compression(ns, 'aggregate') == 'none'
+    assert cli._resolve_compression(ns, 'samples') == 'none'
+
+
+def test_cli_parser_compress_samples_gz() -> None:
+    parser = cli.build_parser()
+    ns = parser.parse_args(
+        ['convert', 'helm', '--log_path', '/tmp/x',
+         '--compress-samples', 'gz']
+    )
+    assert ns.compress == 'none'
+    assert ns.compress_samples == 'gz'
+    assert cli._resolve_compression(ns, 'aggregate') == 'none'
+    assert cli._resolve_compression(ns, 'samples') == 'gz'
+
+
+def test_cli_parser_global_compress() -> None:
+    parser = cli.build_parser()
+    ns = parser.parse_args(
+        ['convert', 'inspect', '--log_path', '/tmp/x', '--compress', 'bz2']
+    )
+    assert ns.compress == 'bz2'
+    assert cli._resolve_compression(ns, 'aggregate') == 'bz2'
+    assert cli._resolve_compression(ns, 'samples') == 'bz2'
+
+
+def test_cli_parser_rejects_invalid_codec() -> None:
+    parser = cli.build_parser()
+    with pytest.raises(SystemExit):
+        parser.parse_args(
+            ['convert', 'lm_eval', '--log_path', '/tmp/x',
+             '--compress', 'snappy']
+        )
diff --git a/tests/test_cli_inspect_uuid.py b/tests/test_cli_inspect_uuid.py
index 732f860ae..2d4a60f18 100644
--- a/tests/test_cli_inspect_uuid.py
+++ b/tests/test_cli_inspect_uuid.py
@@ -63,7 +63,7 @@ def transform_from_directory(self, *_args, **_kwargs):
 
     captured_eval_uuids: list[str | None] = []
 
-    def fake_write_log(_log, _base_output, eval_uuid=None):
+    def fake_write_log(_log, _base_output, eval_uuid=None, compression="none"):
         captured_eval_uuids.append(eval_uuid)
         return Path('/tmp/fake_aggregate.json')
 
@@ -113,7 +113,7 @@ def transform_from_directory(self, _path, metadata_args):
 
     captured_eval_uuids: list[str | None] = []
 
-    def fake_write_log(_log, _base_output, eval_uuid=None):
+    def fake_write_log(_log, _base_output, eval_uuid=None, compression="none"):
         captured_eval_uuids.append(eval_uuid)
         return Path('/tmp/fake_aggregate.json')
 
@@ -160,7 +160,7 @@ def transform_from_directory(
 
     captured_eval_uuids: list[str | None] = []
 
-    def fake_write_log(_log, _base_output, eval_uuid=None):
+    def fake_write_log(_log, _base_output, eval_uuid=None, compression="none"):
         captured_eval_uuids.append(eval_uuid)
         return Path('/tmp/fake_aggregate.json')
 
@@ -208,7 +208,7 @@ def transform_from_directory(
 
     captured_eval_uuids: list[str | None] = []
 
-    def fake_write_log(_log, _base_output, eval_uuid=None):
+    def fake_write_log(_log, _base_output, eval_uuid=None, compression="none"):
         captured_eval_uuids.append(eval_uuid)
         return Path('/tmp/fake_aggregate.json')
 

From d35f86f571eabae067c357f58e83ec45b1dd0d9f Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 7 May 2026 21:58:37 -0400
Subject: [PATCH 4/4] pyproject + README: optional codec extras and compression
 docs

* Add ``[zst]`` and ``[lz4]`` optional-dependency groups for the
  third-party codecs not available in the standard library. ``[all]``
  now pulls them in alongside ``[inspect]`` and ``[helm]``.

* README: new subsection under "Data Validation" documenting the
  recognized compressed forms, the duplicate-variant rule, and the
  ``--compress`` / ``--compress-samples`` writer flags. Notes that
  defaults remain uncompressed for backwards compatibility, and
  recommends ``--compress-samples gz`` (not the global ``--compress``)
  for public submissions so aggregate JSON stays browsable on the HF
  web UI.

226 passed, 5 skipped.
---
 README.md      | 38 ++++++++++++++++++++++++++++++++++++++
 pyproject.toml |  7 +++++++
 2 files changed, 45 insertions(+)

diff --git a/README.md b/README.md
index 262a2ca11..343431925 100644
--- a/README.md
+++ b/README.md
@@ -177,6 +177,44 @@ uv run python -m every_eval_ever validate file1.json file2_samples.jsonl data/
 
 File type is determined by extension: `.json` validates against `EvaluationLog`, `.jsonl` validates each line against `InstanceLevelEvaluationLog`.
 
+#### Compressed files
+
+Validation, discovery, and the converter writers all transparently
+support compressed result files alongside the plain forms:
+
+```
+<uuid>.json{,.gz,.zst,.bz2,.xz,.lz4}
+<uuid>_samples.jsonl{,.gz,.zst,.bz2,.xz,.lz4}
+```
+
+Codecs match what the [HuggingFace Hub natively decompresses](https://huggingface.co/docs/hub/en/datasets-adding#file-formats).
+The schema is unchanged; compression is purely a transport / storage
+concern. A given `(folder, uuid, kind)` may have **at most one
+physical variant** — the validator emits a `duplicate_variant` error
+if both `abc.json` and `abc.json.gz` are committed in the same folder.
+
+`gzip`, `bzip2`, and `lzma/xz` use the standard library. `zstd` and
+`lz4` are optional extras; install with `pip install 'every-eval-ever[zst]'`
+or `[lz4]` (or `[all]` to pull in everything).
+
+To **write** compressed output during conversion, pass one of the
+new flags to any `convert` subcommand:
+
+```sh
+# Default codec for both aggregate and per-instance files
+uv run python -m every_eval_ever convert helm \
+    --log_path runs/ --output_dir data/ \
+    --compress gz
+
+# Only compress per-instance samples (recommended for public submissions —
+# aggregate JSON stays browsable on the HF web UI)
+uv run python -m every_eval_ever convert helm \
+    --log_path runs/ --output_dir data/ \
+    --compress-samples gz
+```
+
+Defaults remain uncompressed for full backwards compatibility.
+
 #### Output formats
 
 ```sh
diff --git a/pyproject.toml b/pyproject.toml
index da63d2c4f..87fb1b038 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,9 +31,16 @@ helm = [
     "crfm-helm>=0.5.12",
     "typer>=0.12,<1.0",
 ]
+# Optional codecs for transparent compression of EEE result files
+# (see ``every_eval_ever.io``). gzip, bzip2, and lzma/xz are stdlib;
+# zstd and lz4 each need a third-party package.
+zst = ["zstandard>=0.22"]
+lz4 = ["lz4>=4"]
 all = [
     "every-eval-ever[inspect]",
     "every-eval-ever[helm]",
+    "every-eval-ever[zst]",
+    "every-eval-ever[lz4]",
 ]
 
 [project.scripts]