diff --git a/pyproject.toml b/pyproject.toml
index 834cab0d..a55a2e4e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -120,6 +120,7 @@ docs = [
 ]
 dev = [
     "codecov",
+    "hypothesis>=6.100",
     "mypy",
     "pandas-stubs",
     "pooch",
@@ -330,6 +331,7 @@ markers = [
     "network: marks test requiring internet (select with '--run-network-tests')",
     "slow: marks test as slow (select with '--run-slow-tests')",
     "minio: marks test requiring docker and minio (select with '--run-minio-tests')",
+    "hypothesis_tests: property-based tests using hypothesis (slower; exclude with '-m \"not hypothesis_tests\"')",
 ]
 filterwarnings = [
     "ignore:Numcodecs codecs are not in the Zarr version 3 specification*:UserWarning:numcodecs"
diff --git a/virtualizarr/tests/test_parsers/_fill_value_common.py b/virtualizarr/tests/test_parsers/_fill_value_common.py
new file mode 100644
index 00000000..f9687c15
--- /dev/null
+++ b/virtualizarr/tests/test_parsers/_fill_value_common.py
@@ -0,0 +1,372 @@
+"""Shared infrastructure for parser fill-value equivalence test modules.
+
+Each parser has its own equivalence test module (`test_hdf_fill_value_equivalence.py`,
+`test_zarr_fill_value_equivalence.py`, ...). The format-agnostic pieces live
+here so adding parser N+1 means writing only the parser-specific bits:
+
+- a `_write_<format>` file writer
+- an `open_observed` and `open_reference` pair of callables
+- format-specific dtype / fill-value strategies
+- a `_DatasetSpec`-shaped dataclass with any format-specific extra fields
+- the test classes themselves
+
+The shared infrastructure here covers: the `_UNSET` sentinel, hypothesis
+profile registration, the module-level `pytestmark` items, two-layer
+equivalence assertion helpers, and a generic numeric-dtype strategy.
+
+Test philosophy: strategies generate the full parameter range
+`docs/custom_parsers.md` says the parser should support. Assertions are
+plain `assert_identical` — no `xfail`, no `pytest.raises` for
+known-broken combinations. Failures are TODO items. Suite green =
+parser stack matches spec.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any, Callable
+
+import numpy as np
+import pytest
+import xarray as xr
+from hypothesis import settings
+from hypothesis import strategies as st
+from hypothesis.extra import numpy as npst
+
+# ---------------------------------------------------------------------------
+# Sentinel for "this parameter was not set" — `None` is a meaningful value
+# in several places (e.g. zarr's `fill_value=None` means "use default"), so
+# we need a third state.
+# ---------------------------------------------------------------------------
+
+
+class _Unset:
+    """Sentinel for parameters that should be omitted entirely.
+
+    None is a meaningful value for most fill/encoding knobs, so we need a
+    third state. Used as the default value on parser-specific
+    `_DatasetSpec` dataclasses so the test writer can pass `_UNSET` to
+    mean "don't set this attribute at all".
+    """
+
+    _instance = None
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __repr__(self):  # pragma: no cover
+        return "_UNSET"
+
+
+_UNSET = _Unset()
+
+
+# ---------------------------------------------------------------------------
+# Hypothesis profile registration and the module-level pytestmark items
+# that every fill-value equivalence module should set.
+# ---------------------------------------------------------------------------
+
+
+def register_hypothesis_profiles() -> None:
+    """Register `ci` (max_examples=10) and `thorough` (max_examples=50)
+    profiles and load whichever `VIRTUALIZARR_HYPOTHESIS_PROFILE` names
+    (default: `thorough`).
+
+    Idempotent — safe to call multiple times. Each fill-value equivalence
+    test module calls this at import time. `register_profile` overwrites
+    existing profiles of the same name, so re-calling is harmless.
+    """
+    settings.register_profile("ci", deadline=None, max_examples=10)
+    settings.register_profile("thorough", deadline=None, max_examples=50)
+    settings.load_profile(os.environ.get("VIRTUALIZARR_HYPOTHESIS_PROFILE", "thorough"))
+
+
+def base_pytestmark() -> list:
+    """Return the module-level pytestmark list every fill-value equivalence
+    module uses: the `hypothesis_tests` marker plus a filter for zarr's
+    "this dtype has no Zarr V3 spec" warnings.
+
+    Returns a fresh list each call so callers can extend it without
+    mutating shared state.
+    """
+    return [
+        pytest.mark.hypothesis_tests,
+        # Several test cases deliberately exercise dtypes like S* that
+        # don't yet have a Zarr V3 spec; zarr-python emits an
+        # informational `UnstableSpecificationWarning` on every read. We
+        # want those dtypes covered, so silence the warning module-locally.
+        pytest.mark.filterwarnings("ignore::zarr.errors.UnstableSpecificationWarning"),
+    ]
+
+
+# ---------------------------------------------------------------------------
+# Two-layer equivalence assertion helpers.
+#
+# Each parser test module provides `open_observed` and `open_reference`
+# callbacks that take (filepath, *, decode_cf) and return an xr.Dataset.
+# The helpers below run both opens, clear the legitimately-different
+# `encoding` dicts, and assert dataset identity.
+#
+# Attribution: when one or both opens fail, the helpers produce specific
+# error messages so the developer can tell whether the failure is
+# virtualizarr-specific or downstream-shared.
+# ---------------------------------------------------------------------------
+
+
+OpenCallable = Callable[..., xr.Dataset]
+
+
+class BothEnginesFailedIdenticallyError(AssertionError):
+    """Both engines raised matching exceptions when opening the test file.
+
+    Still a spec failure (the parser stack doesn't satisfy what
+    `docs/custom_parsers.md` says should work), but **not a
+    virtualizarr-specific bug** — the fix likely lives downstream
+    (xarray's `FillValueCoder`, zarr-python, etc.). The exception
+    message includes the signature so the developer can attribute the
+    fix correctly.
+    """
+
+
+def _exception_signature(e: BaseException) -> str:
+    """Stable string signature for comparing exceptions.
+
+    Truncates the message to avoid noise from path differences and
+    long tracebacks. Two exceptions are considered "matching" when
+    their signatures are equal.
+    """
+    msg = str(e)
+    # Trim — many xarray/zarr errors include long type repr lines.
+    if len(msg) > 200:
+        msg = msg[:200]
+    return f"{type(e).__name__}: {msg}"
+
+
+_OPEN_OK = "ok"
+_OPEN_FAIL = "fail"
+
+
+def _open_with_capture(
+    open_fn: OpenCallable, filepath: str, *, decode_cf: bool
+) -> tuple[str, Any]:
+    """Try `open_fn(filepath, decode_cf=decode_cf)`; return either
+    (`_OPEN_OK`, dataset) or (`_OPEN_FAIL`, exception).
+    """
+    try:
+        return (_OPEN_OK, open_fn(filepath, decode_cf=decode_cf))
+    except BaseException as e:  # pragma: no cover — exhaustive catch
+        return (_OPEN_FAIL, e)
+
+
+def _assert_pair_identical(
+    filepath: str,
+    *,
+    decode_cf: bool,
+    layer_name: str,
+    open_observed: OpenCallable,
+    open_reference: OpenCallable,
+) -> None:
+    """Open both engines at the given decode layer and assert they agree.
+
+    Four outcome shapes, each producing a distinct failure mode:
+
+    - both succeed → standard `assert_identical` comparison.
+    - both fail with matching exception signatures →
+      `BothEnginesFailedIdenticallyError` (downstream-shared bug).
+    - both fail differently → plain `AssertionError` with both sigs.
+    - one fails, one succeeds → `AssertionError` naming which side
+      failed.
+    """
+    obs_status, obs = _open_with_capture(open_observed, filepath, decode_cf=decode_cf)
+    ref_status, ref = _open_with_capture(open_reference, filepath, decode_cf=decode_cf)
+
+    # Case: both failed.
+    if obs_status == _OPEN_FAIL and ref_status == _OPEN_FAIL:
+        obs_sig = _exception_signature(obs)
+        ref_sig = _exception_signature(ref)
+        if obs_sig == ref_sig:
+            raise BothEnginesFailedIdenticallyError(
+                f"[{layer_name}] both engines failed identically — "
+                f"likely a downstream-shared issue, not a virtualizarr-"
+                f"specific bug.\n  {obs_sig}"
+            ) from obs
+        raise AssertionError(
+            f"[{layer_name}] both engines failed but differently — "
+            f"unexpected divergence.\n"
+            f"  observed (virtualizarr): {obs_sig}\n"
+            f"  reference:               {ref_sig}"
+        ) from obs
+
+    # Case: only one failed.
+    if obs_status == _OPEN_FAIL:
+        ref.close()
+        raise AssertionError(
+            f"[{layer_name}] observed (virtualizarr) failed; reference "
+            f"succeeded — likely a virtualizarr-specific bug.\n"
+            f"  observed: {_exception_signature(obs)}"
+        ) from obs
+    if ref_status == _OPEN_FAIL:
+        obs.close()
+        raise AssertionError(
+            f"[{layer_name}] reference failed; observed (virtualizarr) "
+            f"succeeded — unexpected (virtualizarr accepts what the "
+            f"reference engine doesn't).\n"
+            f"  reference: {_exception_signature(ref)}"
+        ) from ref
+
+    # Case: both succeeded — compare.
+    try:
+        # Encoding dicts legitimately differ between engines (HDF5 chunk
+        # / filter encoding vs Zarr codec encoding), so clear them
+        # before comparing. Attribute and value comparison stays strict.
+        for v in obs.variables:
+            obs[v].encoding.clear()
+        for v in ref.variables:
+            ref[v].encoding.clear()
+        xr.testing.assert_identical(obs.load(), ref.load())
+    finally:
+        obs.close()
+        ref.close()
+
+
+def assert_decoded_data_identical(
+    filepath: str,
+    *,
+    open_observed: OpenCallable,
+    open_reference: OpenCallable,
+) -> None:
+    """Assert the two engines produce identical datasets *after* xarray's
+    CF decoding (mask_and_scale, _FillValue masking, scale_factor/add_offset,
+    _Unsigned reinterpretation).
+
+    User-facing correctness check: does the data look the same when
+    consumed via either engine?
+    """
+    _assert_pair_identical(
+        filepath,
+        decode_cf=True,
+        layer_name="decoded-data",
+        open_observed=open_observed,
+        open_reference=open_reference,
+    )
+
+
+def assert_raw_attributes_identical(
+    filepath: str,
+    *,
+    open_observed: OpenCallable,
+    open_reference: OpenCallable,
+) -> None:
+    """Assert the two engines produce identical raw datasets with CF decoding
+    *off*.
+
+    Exposes the metadata layer directly: any attribute the parser drops,
+    adds, or transforms (e.g. `_FillValue` encoding mismatches, lost
+    `_Unsigned`, missing `missing_value`) shows up here even though the
+    decoded-data helper would mask it via CF decoding.
+    """
+    _assert_pair_identical(
+        filepath,
+        decode_cf=False,
+        layer_name="raw-attributes",
+        open_observed=open_observed,
+        open_reference=open_reference,
+    )
+
+
+def assert_equivalent(
+    filepath: str,
+    *,
+    open_observed: OpenCallable,
+    open_reference: OpenCallable,
+) -> None:
+    """Convenience wrapper: run both raw-attribute and decoded-data assertions."""
+    assert_raw_attributes_identical(
+        filepath, open_observed=open_observed, open_reference=open_reference
+    )
+    assert_decoded_data_identical(
+        filepath, open_observed=open_observed, open_reference=open_reference
+    )
+
+
+# ---------------------------------------------------------------------------
+# Generic numeric-dtype strategies shared across parser modules.
+#
+# Each parser module adds format-specific extensions (HDF adds vlen-string
+# and S*, TIFF restricts to GDAL-supported dtypes, etc.).
+# ---------------------------------------------------------------------------
+
+
+def base_numeric_dtype_strategy() -> st.SearchStrategy[np.dtype]:
+    """Sample one of the numeric dtypes every format supports: bool,
+    signed/unsigned int, float, complex. Excludes string and structured
+    dtypes — those are too format-specific to share.
+    """
+    return st.sampled_from(
+        [
+            np.dtype("?"),  # bool
+            np.dtype("i1"),
+            np.dtype("i2"),
+            np.dtype("i4"),
+            np.dtype("i8"),
+            np.dtype("u1"),
+            np.dtype("u2"),
+            np.dtype("u4"),
+            np.dtype("u8"),
+            np.dtype("f4"),
+            np.dtype("f8"),
+            np.dtype("c8"),
+            np.dtype("c16"),
+        ]
+    )
+
+
+def value_in_dtype_strategy(dtype: np.dtype) -> st.SearchStrategy[Any]:
+    """Return a strategy producing a value compatible with `dtype`.
+
+    Used for both `dataset_fillvalue` and `_FillValue` draws. Doing the
+    dtype-aware draw at strategy-build time (not via `assume()`) keeps
+    shrinking efficient.
+
+    Handles the universally-supported kinds (`b`, `i`, `u`, `f`, `c`, `S`).
+    Parser modules that exercise additional kinds (e.g. `O` for
+    h5py.string_dtype()) extend this in their own helper.
+    """
+    if dtype.kind == "b":
+        return st.booleans()
+    if dtype.kind in "iu":
+        info = np.iinfo(dtype)
+        return st.integers(min_value=int(info.min), max_value=int(info.max))
+    if dtype.kind == "f":
+        return st.floats(
+            width=64 if dtype.itemsize == 8 else 32,
+            allow_nan=True,
+            allow_infinity=False,
+        )
+    if dtype.kind == "c":
+        component = st.floats(
+            width=32 if dtype.itemsize == 8 else 64,
+            allow_nan=False,
+            allow_infinity=False,
+        )
+        return st.builds(complex, component, component)
+    if dtype.kind == "S":
+        return st.binary(min_size=0, max_size=dtype.itemsize)
+    raise ValueError(
+        f"No fill-value strategy for dtype {dtype!r}; extend the parser "
+        f"module's own strategy if you need this dtype."
+    )
+
+
+def data_in_dtype_strategy(
+    dtype: np.dtype, shape: tuple[int, ...]
+) -> st.SearchStrategy[np.ndarray]:
+    """Generate a numpy array of the given dtype and shape for the dtypes
+    handled by `value_in_dtype_strategy`.
+
+    Parser modules whose strategies introduce additional kinds (e.g. vlen
+    strings) should branch in their own data-strategy helper.
+    """
+    return npst.arrays(dtype=dtype, shape=shape)
diff --git a/virtualizarr/tests/test_parsers/test_hdf/test_hdf_fill_value_equivalence.py b/virtualizarr/tests/test_parsers/test_hdf/test_hdf_fill_value_equivalence.py
new file mode 100644
index 00000000..3a88bbf1
--- /dev/null
+++ b/virtualizarr/tests/test_parsers/test_hdf/test_hdf_fill_value_equivalence.py
@@ -0,0 +1,895 @@
+"""Property-based equivalence tests for the HDF5 parser's fill-value handling.
+
+Each test writes an HDF5 file via h5py, opens it both via xarray+h5netcdf and
+via xarray+virtualizarr (manifest_store_from_hdf_url), and asserts the two
+loaded datasets are identical at two layers:
+
+  1. raw attribute layer (decode_cf=False) — exposes metadata bugs that CF
+     decoding would otherwise mask (e.g. dropped attributes, wrong fill-value
+     encoding, lost _Unsigned).
+  2. decoded data layer (decode_cf=True) — the user-facing correctness
+     check (mask_and_scale, _FillValue masking, scale_factor/add_offset,
+     _Unsigned reinterpretation).
+
+See docs/custom_parsers.md ("Fill values" and "Packing and scaling" sections)
+for the rules these tests exercise.
+
+The strategies in TestBasicEquivalence / TestPackedEquivalence /
+TestUnsignedEquivalence generate the *full* parameter range that
+`docs/custom_parsers.md` says the parser should support. Assertions are
+plain `_assert_h5netcdf_virtualizarr_identical` (raw-attribute +
+decoded-data) — no `xfail`, no `pytest.raises` adapter for cases that
+don't work today.
+
+This is intentional: the suite functions as an executable specification.
+Failures are TODO items. Suite green = HDFParser stack matches spec.
+
+Compound/structured dtypes can't be generated by the uniform-dtype
+basic strategy and are covered by curated cases in `TestCompoundDtype`.
+
+Shared infrastructure (sentinel, hypothesis profile registration,
+equivalence assertion helpers, generic numeric-dtype strategy) lives in
+`virtualizarr/tests/test_parsers/_fill_value_common.py` so other
+parsers can reuse it without copy-paste.
+
+CI throttling:
+- VIRTUALIZARR_HYPOTHESIS_PROFILE=ci      -> max_examples=10  (faster)
+- VIRTUALIZARR_HYPOTHESIS_PROFILE=thorough -> max_examples=50  (default)
+- pytest -m "not hypothesis_tests"        -> skip this module entirely
+"""
+
+from __future__ import annotations
+
+import string
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import h5py
+import numpy as np
+import pytest
+import xarray as xr
+from hypothesis import HealthCheck, example, given, settings
+from hypothesis import strategies as st
+from hypothesis.extra import numpy as npst
+
+from virtualizarr.tests import requires_hdf5plugin, requires_imagecodecs
+from virtualizarr.tests.test_parsers._fill_value_common import (
+    _UNSET,
+    assert_equivalent,
+    base_numeric_dtype_strategy,
+    base_pytestmark,
+    register_hypothesis_profiles,
+)
+from virtualizarr.tests.test_parsers._fill_value_common import (
+    data_in_dtype_strategy as _base_data_in_dtype_strategy,
+)
+from virtualizarr.tests.test_parsers._fill_value_common import (
+    value_in_dtype_strategy as _base_value_in_dtype_strategy,
+)
+from virtualizarr.tests.utils import manifest_store_from_hdf_url
+
+pytestmark = base_pytestmark()
+register_hypothesis_profiles()
+
+
+@dataclass
+class _DatasetSpec:
+    """A self-contained description of one HDF5 file to write for testing.
+
+    Every field passed to _write_hdf5 by tests should land in this dataclass
+    so failures shrink to a single repr-able value.
+    """
+
+    dtype: np.dtype
+    data: np.ndarray
+    dataset_fillvalue: Any = _UNSET
+    fill_value_attr: Any = _UNSET
+    scale_factor: float | None = None
+    add_offset: float | None = None
+    unsigned: bool = False
+    chunks: tuple[int, ...] | None = None
+    # Free-form attributes written verbatim to the dataset. Used to cover
+    # CF attributes that xarray passes through without special processing
+    # (missing_value, valid_range, valid_min, valid_max). The raw-attrs
+    # invariant catches any drop automatically.
+    extra_attrs: dict[str, Any] | None = None
+
+
+def _write_hdf5(
+    filepath: Path | str,
+    *,
+    dtype: np.dtype,
+    data: np.ndarray,
+    dataset_fillvalue: Any = _UNSET,
+    fill_value_attr: Any = _UNSET,
+    scale_factor: float | None = None,
+    add_offset: float | None = None,
+    unsigned: bool = False,
+    chunks: tuple[int, ...] | None = None,
+    extra_attrs: dict[str, Any] | None = None,
+) -> str:
+    """Write a single-variable HDF5 file with the requested fill-value shape.
+
+    Returns the file:// URL for use with manifest_store_from_hdf_url.
+    """
+    filepath = str(filepath)
+
+    create_kwargs: dict[str, Any] = {"name": "data", "data": data, "dtype": dtype}
+    if chunks is not None:
+        create_kwargs["chunks"] = chunks
+    if dataset_fillvalue is not _UNSET:
+        create_kwargs["fillvalue"] = dataset_fillvalue
+
+    with h5py.File(filepath, "w") as f:
+        dset = f.create_dataset(**create_kwargs)
+        if fill_value_attr is not _UNSET:
+            dset.attrs["_FillValue"] = fill_value_attr
+        if scale_factor is not None:
+            dset.attrs["scale_factor"] = scale_factor
+        if add_offset is not None:
+            dset.attrs["add_offset"] = add_offset
+        if unsigned:
+            dset.attrs["_Unsigned"] = "true"
+        if extra_attrs is not None:
+            for k, v in extra_attrs.items():
+                dset.attrs[k] = v
+        # Attach a real dim scale to each axis so xarray+h5netcdf and the
+        # HDF parser both produce named "dim_<i>" dims instead of
+        # phony_dim_0 — keeps equivalence assertions about fill values,
+        # not dim naming.
+        for axis, size in enumerate(data.shape):
+            scale_name = f"dim_{axis}"
+            dim = f.create_dataset(scale_name, data=np.arange(size))
+            dim.make_scale(scale_name)
+            dset.dims[axis].attach_scale(dim)
+
+    return f"file://{filepath}"
+
+
+def _open_virtualizarr(filepath: str, *, decode_cf: bool) -> xr.Dataset:
+    """Open `filepath` via virtualizarr's HDFParser (manifest store) + xarray's
+    zarr backend. Plugged into the shared equivalence helpers as the
+    `open_observed` callback.
+    """
+    manifest_store = manifest_store_from_hdf_url(f"file://{filepath}")
+    return xr.open_dataset(
+        manifest_store,
+        engine="zarr",
+        consolidated=False,
+        zarr_format=3,
+        decode_cf=decode_cf,
+    )
+
+
+def _open_h5netcdf(filepath: str, *, decode_cf: bool) -> xr.Dataset:
+    """Open `filepath` via xarray+h5netcdf. The reference engine for the HDF
+    equivalence sweep — what a user expects to see when reading the file
+    directly without virtualizarr.
+    """
+    return xr.open_dataset(filepath, engine="h5netcdf", decode_cf=decode_cf)
+
+
+def _assert_h5netcdf_virtualizarr_identical(filepath: str) -> None:
+    """Run both raw-attribute and decoded-data invariants against the HDF
+    parser. Thin wrapper around the shared helper that binds the engines.
+    """
+    assert_equivalent(
+        filepath,
+        open_observed=_open_virtualizarr,
+        open_reference=_open_h5netcdf,
+    )
+
+
+def test_module_loads():
+    """Smoke test: confirms the module imports, the marker registers,
+    and hypothesis profiles loaded without error.
+    """
+    assert settings().max_examples in (10, 50)
+
+
+def test_write_hdf5_basic_roundtrip(tmp_path):
+    """_write_hdf5 writes data, dataset.fillvalue, _FillValue attr,
+    and dim scale that h5py can read back."""
+    url = _write_hdf5(
+        tmp_path / "data.nc",
+        dtype=np.dtype("f8"),
+        data=np.array([1.0, 2.0, 3.0], dtype="f8"),
+        dataset_fillvalue=-9999.0,
+        fill_value_attr=-9999.0,
+        chunks=(2,),
+    )
+    assert url.startswith("file://")
+
+    with h5py.File(url.removeprefix("file://"), "r") as f:
+        dset = f["data"]
+        np.testing.assert_array_equal(dset[:], [1.0, 2.0, 3.0])
+        assert dset.fillvalue == -9999.0
+        assert dset.attrs["_FillValue"] == -9999.0
+        assert dset.chunks == (2,)
+        assert len(dset.dims[0]) == 1
+        assert dset.dims[0][0].name.lstrip("/") == "dim_0"
+
+
+def test_write_hdf5_unset_skips_fillvalue(tmp_path):
+    """When dataset_fillvalue=_UNSET, h5py uses its dtype default;
+    when fill_value_attr=_UNSET, no _FillValue attribute is written.
+    """
+    url = _write_hdf5(
+        tmp_path / "data.nc",
+        dtype=np.dtype("i4"),
+        data=np.array([1, 2, 3], dtype="i4"),
+        # both fill axes left UNSET
+    )
+    with h5py.File(url.removeprefix("file://"), "r") as f:
+        assert "_FillValue" not in f["data"].attrs
+
+
+def test_equivalence_helper_passes_on_simple_file(tmp_path):
+    """A trivial float64 file with no fill values must round-trip
+    identically through both paths."""
+    url = _write_hdf5(
+        tmp_path / "simple.nc",
+        dtype=np.dtype("f8"),
+        data=np.array([1.0, 2.0, 3.0, 4.0], dtype="f8"),
+    )
+    _assert_h5netcdf_virtualizarr_identical(url.removeprefix("file://"))
+
+
+# A small enum-label map reused for all enum-dtype samples; concrete values
+# don't matter for the property test, only that the dtype carries enum
+# metadata h5py preserves.
+_ENUM_LABELS = {"NONE": 0, "LOW": 1, "MED": 2, "HIGH": 3}
+
+
+def _h5py_compatible_dtype_strategy() -> st.SearchStrategy[np.dtype]:
+    """Sample one of the dtypes h5py can store in a dataset.
+
+    Extends `base_numeric_dtype_strategy()` (bool/int*/uint*/float*/complex*)
+    with HDF5-specific kinds:
+
+    - fixed-length bytes (`|S*`, numpy kind `S`)
+    - vlen utf-8 string (`h5py.string_dtype()`, numpy kind `O`)
+    - vlen ASCII string (`h5py.string_dtype(encoding="ascii")`, kind `O`)
+    - vlen array of a numeric base (`h5py.vlen_dtype(base)`, kind `O`,
+      each element is its own variable-length numpy array)
+    - enum (`h5py.enum_dtype({...}, basetype=...)`, integer kind with
+      label metadata that doesn't survive the JSON metadata round-trip)
+
+    All currently expose at least one form of downstream breakage —
+    that's intentional: the suite *highlights* breakages rather than
+    hides them. The new attribution categories distinguish parser-side
+    failures from upstream / downstream-shared ones.
+
+    Excluded:
+
+    - Structured/compound dtypes — can't be parametrised by a single
+      `np.dtype`; covered by curated `TestCompoundDtype` cases.
+    - Object/region reference dtypes (`h5py.ref_dtype`,
+      `h5py.regionref_dtype`) — require a multi-dataset graph
+      (referrer + referent), not suitable for the single-dataset
+      writer template. Worth a separate curated module if real bugs
+      surface.
+    - Opaque dtype (`np.dtype(("V", n))`) — rare; not worth the
+      strategy weight today.
+    """
+    return st.one_of(
+        base_numeric_dtype_strategy(),
+        st.sampled_from(
+            [
+                np.dtype("S1"),
+                np.dtype("S5"),
+                np.dtype("S20"),
+                h5py.string_dtype(),  # vlen utf-8 (kind 'O')
+                h5py.string_dtype(encoding="ascii"),  # vlen ASCII (kind 'O')
+                h5py.vlen_dtype(np.dtype("i4")),  # vlen array of int32
+                h5py.vlen_dtype(np.dtype("f8")),  # vlen array of float64
+                h5py.enum_dtype(_ENUM_LABELS, basetype=np.dtype("i1")),
+                h5py.enum_dtype(_ENUM_LABELS, basetype=np.dtype("i4")),
+            ]
+        ),
+    )
+
+
+def _value_in_dtype_strategy(dtype: np.dtype) -> st.SearchStrategy[Any]:
+    """Return a strategy producing a value compatible with `dtype`.
+
+    Delegates to `base_value_in_dtype_strategy` for the universally-supported
+    kinds and adds HDF-specific branches:
+
+    - vlen utf-8 string: any unicode text
+    - vlen ASCII string: only ASCII codepoints (h5py rejects non-ASCII
+      bytes when the dtype declares ASCII encoding)
+    - enum: an integer from the enum's label set (h5py would accept any
+      int of the base dtype, but values outside the label set don't
+      round-trip semantically; we constrain for realism)
+    """
+    str_info = h5py.check_string_dtype(dtype)
+    if str_info is not None:
+        if str_info.encoding == "ascii":
+            return st.text(
+                alphabet=string.ascii_letters + string.digits + " ",
+                min_size=0,
+                max_size=20,
+            )
+        return st.text(min_size=0, max_size=20)
+    # Enum dtypes look numeric (kind 'i'/'u') with metadata; if metadata
+    # carries an enum label-map, constrain values to those labels.
+    enum_map = h5py.check_enum_dtype(dtype)
+    if enum_map is not None:
+        return st.sampled_from(sorted(set(enum_map.values())))
+    return _base_value_in_dtype_strategy(dtype)
+
+
+def _data_in_dtype_strategy(
+    dtype: np.dtype, shape: tuple[int, ...]
+) -> st.SearchStrategy[np.ndarray]:
+    """Generate a numpy array of the given dtype and shape.
+
+    Branches per HDF5-specific dtype because `hypothesis.extra.numpy.arrays`
+    doesn't handle h5py's vlen / enum dtypes.
+
+    - vlen string (utf-8 or ASCII): build a flat list of strings and
+      reshape into the requested shape.
+    - vlen array: each element is its own length-varying numpy array
+      of the vlen base type.
+    - enum: integer storage with values drawn from the label set, then
+      reshape.
+    - everything else: delegate to the shared numeric data strategy.
+    """
+    str_info = h5py.check_string_dtype(dtype)
+    if str_info is not None:
+        n = int(np.prod(shape)) if len(shape) else 1
+        if str_info.encoding == "ascii":
+            text_strategy = st.text(
+                alphabet=string.ascii_letters + string.digits + " ",
+                min_size=0,
+                max_size=20,
+            )
+        else:
+            text_strategy = st.text(min_size=0, max_size=20)
+        return st.lists(text_strategy, min_size=n, max_size=n).map(
+            lambda xs: np.array(xs, dtype=dtype).reshape(shape)
+        )
+
+    vlen_base = h5py.check_vlen_dtype(dtype)
+    if vlen_base is not None:
+        # Each element of the outer array is itself a length-varying
+        # numpy array of `vlen_base`. Build flat then reshape.
+        n = int(np.prod(shape)) if len(shape) else 1
+
+        @st.composite
+        def _vlen_array_data(draw):
+            lengths = draw(
+                st.lists(
+                    st.integers(min_value=0, max_value=4),
+                    min_size=n,
+                    max_size=n,
+                )
+            )
+            arr = np.empty(n, dtype=dtype)
+            for i, length in enumerate(lengths):
+                arr[i] = draw(npst.arrays(dtype=vlen_base, shape=(length,)))
+            return arr.reshape(shape)
+
+        return _vlen_array_data()
+
+    enum_map = h5py.check_enum_dtype(dtype)
+    if enum_map is not None:
+        # Storage is the integer base; constrain values to the label set
+        # so the data is semantically meaningful for an enum.
+        labels = sorted(set(enum_map.values()))
+        n = int(np.prod(shape)) if len(shape) else 1
+        return st.lists(st.sampled_from(labels), min_size=n, max_size=n).map(
+            lambda xs: np.array(xs, dtype=dtype).reshape(shape)
+        )
+
+    return _base_data_in_dtype_strategy(dtype, shape)
+
+
+@st.composite
+def _basic_dataset_strategy(draw) -> _DatasetSpec:
+    dtype = draw(_h5py_compatible_dtype_strategy())
+    # 1- to 3-D shapes with modest sides; covers axis-naming, multi-chunk,
+    # and chunk-vs-shape edge cases without blowing up data volume.
+    shape = draw(npst.array_shapes(min_dims=1, max_dims=3, min_side=1, max_side=8))
+    chunks_choice = draw(st.booleans())
+    chunks = (
+        tuple(draw(st.integers(min_value=1, max_value=size)) for size in shape)
+        if chunks_choice
+        else None
+    )
+    data = draw(_data_in_dtype_strategy(dtype, shape))
+
+    # Vlen-array dtypes don't have a meaningful array-level fill value
+    # (h5py's default is an empty inner array). Skip the fill-value
+    # sweep for them; data-shape coverage alone is the signal here.
+    if h5py.check_vlen_dtype(dtype) is not None:
+        return _DatasetSpec(dtype=dtype, data=data, chunks=chunks)
+
+    fill_in_dtype = draw(_value_in_dtype_strategy(dtype))
+    dataset_fillvalue = draw(st.sampled_from([_UNSET, fill_in_dtype]))
+    fill_value_attr = draw(st.sampled_from([_UNSET, fill_in_dtype]))
+    return _DatasetSpec(
+        dtype=dtype,
+        data=data,
+        dataset_fillvalue=dataset_fillvalue,
+        fill_value_attr=fill_value_attr,
+        chunks=chunks,
+    )
+
+
+def _spec_to_url(spec: _DatasetSpec, tmp_path: Path) -> str:
+    """Translate a _DatasetSpec into a written file and return its path
+    (without file:// prefix, for use with the equivalence helper)."""
+    url = _write_hdf5(
+        tmp_path / "data.nc",
+        dtype=spec.dtype,
+        data=spec.data,
+        dataset_fillvalue=spec.dataset_fillvalue,
+        fill_value_attr=spec.fill_value_attr,
+        scale_factor=spec.scale_factor,
+        add_offset=spec.add_offset,
+        unsigned=spec.unsigned,
+        chunks=spec.chunks,
+        extra_attrs=spec.extra_attrs,
+    )
+    return url.removeprefix("file://")
+
+
+# Curated examples — each forces a specific dtype/fill combination we want
+# guaranteed coverage of, separate from the random strategy draws.
+_BASIC_EXAMPLES = [
+    # NaN float _FillValue
+    _DatasetSpec(
+        dtype=np.dtype("f8"),
+        data=np.array([1.0, 2.0, 3.0], dtype="f8"),
+        fill_value_attr=np.nan,
+    ),
+    # uint8 with both dataset.fillvalue and matching _FillValue
+    _DatasetSpec(
+        dtype=np.dtype("u1"),
+        data=np.array([1, 2, 3], dtype="u1"),
+        dataset_fillvalue=255,
+        fill_value_attr=255,
+    ),
+    # bool _FillValue=True
+    _DatasetSpec(
+        dtype=np.dtype("?"),
+        data=np.array([True, False, True], dtype="?"),
+        fill_value_attr=True,
+    ),
+    # vlen utf-8 with empty-string sentinel _FillValue
+    _DatasetSpec(
+        dtype=h5py.string_dtype(),
+        data=np.array(["a", "b"], dtype=h5py.string_dtype()),
+        fill_value_attr="",
+    ),
+    # vlen utf-8 with a real sentinel value
+    _DatasetSpec(
+        dtype=h5py.string_dtype(),
+        data=np.array(["a", "b"], dtype=h5py.string_dtype()),
+        fill_value_attr="MISSING",
+    ),
+    # vlen utf-8 with no _FillValue at all
+    _DatasetSpec(
+        dtype=h5py.string_dtype(),
+        data=np.array(["a", "b"], dtype=h5py.string_dtype()),
+    ),
+    # S10 with empty-bytes sentinel _FillValue
+    _DatasetSpec(
+        dtype=np.dtype("S10"),
+        data=np.array([b"hello", b"world"], dtype="S10"),
+        fill_value_attr=b"",
+    ),
+    # S5 with a real sentinel _FillValue
+    _DatasetSpec(
+        dtype=np.dtype("S5"),
+        data=np.array([b"a", b"b"], dtype="S5"),
+        fill_value_attr=b"x",
+    ),
+    # ASCII vlen string with an explicit ASCII sentinel
+    _DatasetSpec(
+        dtype=h5py.string_dtype(encoding="ascii"),
+        data=np.array(["alpha", "beta"], dtype=h5py.string_dtype(encoding="ascii")),
+        fill_value_attr="MISSING",
+    ),
+    # int8-backed enum with fill = one of the labels (LOW=1)
+    _DatasetSpec(
+        dtype=h5py.enum_dtype(_ENUM_LABELS, basetype=np.dtype("i1")),
+        data=np.array(
+            [0, 1, 2, 3], dtype=h5py.enum_dtype(_ENUM_LABELS, basetype=np.dtype("i1"))
+        ),
+        fill_value_attr=np.int8(1),
+    ),
+    # int32-backed enum with no fill
+    _DatasetSpec(
+        dtype=h5py.enum_dtype(_ENUM_LABELS, basetype=np.dtype("i4")),
+        data=np.array(
+            [0, 2], dtype=h5py.enum_dtype(_ENUM_LABELS, basetype=np.dtype("i4"))
+        ),
+    ),
+    # vlen array of int32, no fill (vlen-array dtypes don't have one)
+    _DatasetSpec(
+        dtype=h5py.vlen_dtype(np.dtype("i4")),
+        data=np.array(
+            [
+                np.array([1, 2, 3], dtype="i4"),
+                np.array([], dtype="i4"),
+                np.array([4], dtype="i4"),
+            ],
+            dtype=h5py.vlen_dtype(np.dtype("i4")),
+        ),
+    ),
+    # vlen array of float64, no fill
+    _DatasetSpec(
+        dtype=h5py.vlen_dtype(np.dtype("f8")),
+        data=np.array(
+            [
+                np.array([1.0, 2.5], dtype="f8"),
+                np.array([np.nan], dtype="f8"),
+            ],
+            dtype=h5py.vlen_dtype(np.dtype("f8")),
+        ),
+    ),
+]
+
+
+_BASIC_EXAMPLE_IDS = [
+    "float64-nan-fill",
+    "uint8-with-fill",
+    "bool-fill",
+    "vlen-empty-fill",
+    "vlen-sentinel-fill",
+    "vlen-no-fill",
+    "S10-empty-fill",
+    "S5-sentinel-fill",
+    "ascii-vlen-sentinel-fill",
+    "enum-i1-with-fill",
+    "enum-i4-no-fill",
+    "vlen-array-i4",
+    "vlen-array-f8",
+]
+
+
+@requires_hdf5plugin
+@requires_imagecodecs
+class TestBasicEquivalence:
+    """Equivalence sweep across the dtype × fill matrix.
+
+    The random-draw strategy generates the *full* parameter range that
+    `docs/custom_parsers.md` says the parser should support — including
+    string-kind dtypes with explicit `_FillValue`, vlen-utf-8 with and
+    without fill, etc.
+
+    Curated examples and random draws are **separate test methods** so
+    pytest-xdist can parallelise the curated cases and a single failing
+    example doesn't shadow the others. Each curated example shows as
+    its own test ID (`test_equivalence_curated[<name>]`); random draws
+    aggregate under `test_equivalence_random`.
+
+    The assertion is plain `_assert_h5netcdf_virtualizarr_identical`.
+    Cases the parser/stack doesn't handle correctly today **will fail**;
+    those failures are the to-fix list. Suite green = fixes complete.
+    """
+
+    @pytest.mark.parametrize("spec", _BASIC_EXAMPLES, ids=_BASIC_EXAMPLE_IDS)
+    def test_equivalence_curated(self, spec, tmp_path):
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
+
+    # tmp_path is function-scoped, but each hypothesis example writes the
+    # same `data.nc` and reads it before the next example runs. The
+    # overwrite-in-place is intentional.
+    @settings(suppress_health_check=[HealthCheck.function_scoped_fixture])
+    @given(spec=_basic_dataset_strategy())
+    def test_equivalence_random(self, spec, tmp_path):
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
+
+
+# String-dtype known failures are now covered by _BASIC_EXAMPLES (curated
+# entries) + random draws (via _h5py_compatible_dtype_strategy) inside
+# TestBasicEquivalence. The `_expected_failure_for(spec)` predicate
+# asserts the precise failure mode, so a change there is detected.
+# No separate test class needed.
+
+
+@st.composite
+def _packed_dataset_strategy(draw) -> _DatasetSpec:
+    """Packed-data scenarios: integer storage + scale_factor + add_offset
+    + _FillValue in the packed (encoded) domain.
+
+    Per docs/custom_parsers.md "Fill values in packed data": the
+    _FillValue must be in the packed domain; xarray applies CFMaskCoder
+    before CFScaleOffsetCoder on decode.
+    """
+    dtype = draw(
+        st.sampled_from(
+            [
+                np.dtype("i2"),
+                np.dtype("i4"),
+                np.dtype("u2"),
+                np.dtype("u4"),
+            ]
+        )
+    )
+    scale_factor = draw(
+        st.floats(min_value=1e-3, max_value=1e3, allow_nan=False, allow_infinity=False)
+    )
+    add_offset = draw(
+        st.floats(min_value=-1e3, max_value=1e3, allow_nan=False, allow_infinity=False)
+    )
+    shape = draw(npst.array_shapes(min_dims=1, max_dims=3, min_side=1, max_side=8))
+    data = draw(npst.arrays(dtype=dtype, shape=shape))
+    fill_value_attr = draw(_value_in_dtype_strategy(dtype))
+    return _DatasetSpec(
+        dtype=dtype,
+        data=data,
+        fill_value_attr=fill_value_attr,
+        scale_factor=scale_factor,
+        add_offset=add_offset,
+    )
+
+
+_PACKED_EXAMPLES = [
+    # int16 packed: scale=0.01, offset=0, _FillValue=-9999 (canonical CF case)
+    _DatasetSpec(
+        dtype=np.dtype("i2"),
+        data=np.array([100, 200, 300], dtype="i2"),
+        fill_value_attr=np.int16(-9999),
+        scale_factor=0.01,
+        add_offset=0.0,
+    ),
+    # uint16 packed: scale=0.1, offset=1000, _FillValue=0
+    _DatasetSpec(
+        dtype=np.dtype("u2"),
+        data=np.array([10, 20, 30], dtype="u2"),
+        fill_value_attr=np.uint16(0),
+        scale_factor=0.1,
+        add_offset=1000.0,
+    ),
+]
+
+
+@requires_hdf5plugin
+@requires_imagecodecs
+class TestPackedEquivalence:
+    @settings(suppress_health_check=[HealthCheck.function_scoped_fixture])
+    @given(spec=_packed_dataset_strategy())
+    @example(spec=_PACKED_EXAMPLES[0])
+    @example(spec=_PACKED_EXAMPLES[1])
+    def test_equivalence(self, spec, tmp_path):
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
+
+
+@st.composite
+def _unsigned_dataset_strategy(draw) -> _DatasetSpec:
+    """Signed-integer storage with _Unsigned="true" attribute.
+
+    Per docs/custom_parsers.md "_Unsigned": fill value is interpreted in
+    the unsigned domain; e.g. _FillValue=-1 stored as int16 corresponds
+    to 65535 when reinterpreted as uint16.
+    """
+    dtype = draw(
+        st.sampled_from(
+            [
+                np.dtype("i1"),
+                np.dtype("i2"),
+                np.dtype("i4"),
+                np.dtype("i8"),
+            ]
+        )
+    )
+    shape = draw(npst.array_shapes(min_dims=1, max_dims=3, min_side=1, max_side=8))
+    data = draw(npst.arrays(dtype=dtype, shape=shape))
+    fill_value_attr = draw(_value_in_dtype_strategy(dtype))
+    return _DatasetSpec(
+        dtype=dtype,
+        data=data,
+        fill_value_attr=fill_value_attr,
+        unsigned=True,
+    )
+
+
+_UNSIGNED_EXAMPLES = [
+    # int16 stored, reinterpreted as uint16; _FillValue=-1 -> 65535
+    _DatasetSpec(
+        dtype=np.dtype("i2"),
+        data=np.array([1, 2, 3], dtype="i2"),
+        fill_value_attr=np.int16(-1),
+        unsigned=True,
+    ),
+]
+
+
+@requires_hdf5plugin
+@requires_imagecodecs
+class TestUnsignedEquivalence:
+    @settings(suppress_health_check=[HealthCheck.function_scoped_fixture])
+    @given(spec=_unsigned_dataset_strategy())
+    @example(spec=_UNSIGNED_EXAMPLES[0])
+    def test_equivalence(self, spec, tmp_path):
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
+
+
+# Compound/structured dtypes don't fit the orthogonal axes of the basic /
+# packed / _Unsigned strategies (no integer/float kind, no scale_factor
+# semantics, no signed-vs-unsigned reinterpretation). They're also too
+# combinatorially varied to draw from a hypothesis strategy without heavy
+# `assume()` filtering. A handful of curated examples is enough.
+_COMPOUND_DTYPES_AND_FILLS = [
+    (
+        np.dtype([("a", "i4"), ("b", "f8")]),
+        np.array([(1, 1.0), (2, 2.0), (3, 3.0)], dtype=[("a", "i4"), ("b", "f8")]),
+        np.array((0, 0.0), dtype=[("a", "i4"), ("b", "f8")]),
+    ),
+    (
+        np.dtype([("x", "u2"), ("y", "u2"), ("z", "u2")]),
+        np.array(
+            [(10, 20, 30), (40, 50, 60)],
+            dtype=[("x", "u2"), ("y", "u2"), ("z", "u2")],
+        ),
+        np.array(
+            (65535, 65535, 65535),
+            dtype=[("x", "u2"), ("y", "u2"), ("z", "u2")],
+        ),
+    ),
+]
+
+
+@requires_hdf5plugin
+@requires_imagecodecs
+class TestCompoundDtype:
+    """Compound/structured-dtype datasets with _FillValue.
+
+    Compound dtypes can't be generated by the uniform-dtype basic
+    strategy (no single `np.dtype` to sample); they're covered here as
+    curated parametrized cases. Assertion is plain equivalence — any
+    failure is a real issue to fix.
+    """
+
+    @pytest.mark.parametrize(
+        "dtype,data,fill_value_attr",
+        _COMPOUND_DTYPES_AND_FILLS,
+        ids=["two-field", "three-field"],
+    )
+    def test_equivalence(self, dtype, data, fill_value_attr, tmp_path):
+        spec = _DatasetSpec(dtype=dtype, data=data, fill_value_attr=fill_value_attr)
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
+
+
+# Cross-axis interactions: the basic / packed / _Unsigned classes are
+# deliberately orthogonal so each hypothesis strategy stays clean, but
+# real files combine multiple features. These curated examples cover the
+# combinations most likely to harbor latent bugs: packed + chunked + fill,
+# _Unsigned + chunked + fill, multi-dim + packed + fill, multi-dim + _Unsigned.
+_COMBINED_EXAMPLES = [
+    # packed + chunked
+    _DatasetSpec(
+        dtype=np.dtype("i2"),
+        data=np.arange(100, dtype="i2"),
+        fill_value_attr=np.int16(-9999),
+        scale_factor=0.01,
+        add_offset=0.0,
+        chunks=(10,),
+    ),
+    # _Unsigned + chunked + explicit dataset.fillvalue
+    _DatasetSpec(
+        dtype=np.dtype("i2"),
+        data=np.arange(64, dtype="i2"),
+        dataset_fillvalue=np.int16(-1),
+        fill_value_attr=np.int16(-1),
+        unsigned=True,
+        chunks=(8,),
+    ),
+    # 2D packed + chunked
+    _DatasetSpec(
+        dtype=np.dtype("i4"),
+        data=np.arange(24, dtype="i4").reshape(4, 6),
+        fill_value_attr=np.int32(-32768),
+        scale_factor=0.001,
+        add_offset=-100.0,
+        chunks=(2, 3),
+    ),
+    # 3D _Unsigned
+    _DatasetSpec(
+        dtype=np.dtype("i1"),
+        data=np.arange(8, dtype="i1").reshape(2, 2, 2),
+        fill_value_attr=np.int8(-1),
+        unsigned=True,
+    ),
+    # packed + dataset.fillvalue in packed domain matching _FillValue
+    _DatasetSpec(
+        dtype=np.dtype("u2"),
+        data=np.array([10, 20, 30], dtype="u2"),
+        dataset_fillvalue=np.uint16(65535),
+        fill_value_attr=np.uint16(65535),
+        scale_factor=0.1,
+        add_offset=0.0,
+    ),
+]
+
+
+@requires_hdf5plugin
+@requires_imagecodecs
+class TestCombinedEquivalence:
+    @pytest.mark.parametrize(
+        "spec",
+        _COMBINED_EXAMPLES,
+        ids=[
+            "packed+chunked",
+            "unsigned+chunked+fill",
+            "2D+packed+chunked",
+            "3D+unsigned",
+            "packed+matching-fill",
+        ],
+    )
+    def test_equivalence(self, spec, tmp_path):
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
+
+
+# Extra-CF-attribute scenarios: xarray does not specially process these
+# attributes; the parser MUST round-trip them as-is. The raw-attrs
+# invariant (decode_cf=False) is what surfaces a drop or transformation
+# directly; the decoded-data invariant covers the user-facing path.
+_EXTRA_ATTR_EXAMPLES = [
+    # missing_value alongside _FillValue
+    _DatasetSpec(
+        dtype=np.dtype("f4"),
+        data=np.array([1.0, 2.0, 3.0], dtype="f4"),
+        fill_value_attr=np.float32(-9999.0),
+        extra_attrs={"missing_value": np.float32(-9998.0)},
+    ),
+    # valid_range as a 2-element array
+    _DatasetSpec(
+        dtype=np.dtype("i2"),
+        data=np.array([10, 20, 30], dtype="i2"),
+        extra_attrs={"valid_range": np.array([0, 100], dtype="i2")},
+    ),
+    # valid_min / valid_max as separate scalars
+    _DatasetSpec(
+        dtype=np.dtype("f8"),
+        data=np.array([0.5, 1.0, 1.5], dtype="f8"),
+        extra_attrs={"valid_min": 0.0, "valid_max": 2.0},
+    ),
+    # missing_value + valid_range together
+    _DatasetSpec(
+        dtype=np.dtype("i4"),
+        data=np.array([1, 2, 3], dtype="i4"),
+        fill_value_attr=np.int32(-1),
+        extra_attrs={
+            "missing_value": np.int32(-2),
+            "valid_range": np.array([0, 1000], dtype="i4"),
+        },
+    ),
+]
+
+
+@requires_hdf5plugin
+@requires_imagecodecs
+class TestExtraAttributes:
+    """Pass-through of missing_value / valid_range / valid_min / valid_max.
+
+    xarray does not specially process these CF attributes — the parser
+    must preserve them verbatim. The raw-attrs assertion catches any drop
+    or transformation directly.
+    """
+
+    @pytest.mark.parametrize(
+        "spec",
+        _EXTRA_ATTR_EXAMPLES,
+        ids=[
+            "missing_value+fill",
+            "valid_range",
+            "valid_min+valid_max",
+            "missing_value+valid_range",
+        ],
+    )
+    def test_equivalence(self, spec, tmp_path):
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_h5netcdf_virtualizarr_identical(filepath)
diff --git a/virtualizarr/tests/test_parsers/test_zarr_fill_value_equivalence.py b/virtualizarr/tests/test_parsers/test_zarr_fill_value_equivalence.py
new file mode 100644
index 00000000..cb48a301
--- /dev/null
+++ b/virtualizarr/tests/test_parsers/test_zarr_fill_value_equivalence.py
@@ -0,0 +1,410 @@
+"""Property-based equivalence tests for the ZarrParser's fill-value handling.
+
+Each test writes a Zarr v3 store via zarr-python, then opens it two ways:
+
+  1. `xr.open_zarr(path)` directly (reference) — the user's "no virtualizarr"
+     baseline.
+  2. virtualizarr's `ZarrParser` → `ManifestStore` → `xr.open_dataset(...,
+     engine="zarr")` (observed) — the under-test path.
+
+Both go through xarray's zarr backend ultimately; we're asserting that
+virtualizarr's manifest-construction layer is transparent. Failures
+indicate the parser is either dropping/transforming metadata or
+mis-resolving fill values during manifest construction.
+
+Test philosophy mirrors the HDF equivalence module: strategies generate
+the full parameter range that should work; assertions are plain (no
+`xfail`, no `pytest.raises`). Failures are TODO items.
+
+Concrete issue coverage:
+
+- **#811** (`ZarrParser get_metadata fails for LocalStore Zarr with
+  dtype uint8`): a curated example with `dtype=uint8` and no explicit
+  fill_value exercises the default-fill-lookup path that crashed in
+  the bug report.
+- **Cross-parser fill-value consistency:** the random strategy
+  generates every numeric dtype with various fill-value shapes,
+  surfacing any divergence from xarray's direct view.
+
+Shared infrastructure (sentinel, hypothesis profiles, equivalence
+helpers, generic dtype strategies) lives in
+`virtualizarr/tests/test_parsers/_fill_value_common.py`.
+
+CI throttling:
+- VIRTUALIZARR_HYPOTHESIS_PROFILE=ci      -> max_examples=10  (faster)
+- VIRTUALIZARR_HYPOTHESIS_PROFILE=thorough -> max_examples=50  (default)
+- pytest -m "not hypothesis_tests"        -> skip this module entirely
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pytest
+import xarray as xr
+import zarr
+from hypothesis import HealthCheck, given, settings
+from hypothesis import strategies as st
+from hypothesis.extra import numpy as npst
+from obspec_utils.registry import ObjectStoreRegistry
+from obstore.store import LocalStore
+
+from virtualizarr.parsers import ZarrParser
+from virtualizarr.tests import requires_arro3
+from virtualizarr.tests.test_parsers._fill_value_common import (
+    _UNSET,
+    assert_equivalent,
+    base_numeric_dtype_strategy,
+    base_pytestmark,
+    data_in_dtype_strategy,
+    register_hypothesis_profiles,
+    value_in_dtype_strategy,
+)
+
+pytestmark = base_pytestmark() + [requires_arro3]
+register_hypothesis_profiles()
+
+
+# ---------------------------------------------------------------------------
+# _DatasetSpec — Zarr-flavoured. Compared to the HDF one this is much
+# simpler: no h5py `dataset.fillvalue` / explicit-vs-default distinction
+# (zarr v3 always has a fill_value), no _Unsigned, no scale_factor/add_offset
+# in the metadata (those would be plain attributes if present).
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class _DatasetSpec:
+    """A self-contained description of one Zarr v3 store to write for testing.
+
+    Fields:
+        dtype: the array's numeric dtype.
+        data: the data to write.
+        fill_value: zarr's storage-level `fill_value`. `_UNSET` means "let
+            zarr-python pick the dtype default" (which is what triggers
+            #811 for uint8).
+        fill_value_attr: the CF `_FillValue` attribute on the array. Set
+            independently of `fill_value` — they're allowed to disagree
+            in CF.
+        chunks: explicit chunk shape; None means single-chunk.
+        extra_attrs: free-form CF-style attributes to round-trip
+            (`missing_value`, `valid_range`, etc.).
+    """
+
+    dtype: np.dtype
+    data: np.ndarray
+    fill_value: Any = _UNSET
+    fill_value_attr: Any = _UNSET
+    chunks: tuple[int, ...] | None = None
+    extra_attrs: dict[str, Any] | None = None
+
+
+def _write_zarr(
+    filepath: Path | str,
+    *,
+    dtype: np.dtype,
+    data: np.ndarray,
+    fill_value: Any = _UNSET,
+    fill_value_attr: Any = _UNSET,
+    chunks: tuple[int, ...] | None = None,
+    extra_attrs: dict[str, Any] | None = None,
+) -> str:
+    """Write a single-variable Zarr v3 store with the requested shape.
+
+    Returns the file:// URL for use with the ZarrParser. Coordinate
+    variables (one per dim) are written too so xarray's zarr backend
+    builds a proper Dataset with named dims rather than anonymous ones.
+    """
+    filepath = str(filepath)
+    store = zarr.storage.LocalStore(filepath)
+    root = zarr.group(store=store, zarr_format=3)
+
+    dimension_names = tuple(f"dim_{i}" for i in range(data.ndim))
+    chunk_shape = chunks if chunks is not None else data.shape
+
+    # zarr-python's `create_array` requires a fill_value. Passing
+    # `fill_value=None` would explicitly write null; to exercise the
+    # "user didn't specify" code path (issue #811) we want to omit the
+    # kwarg entirely.
+    arr_kwargs: dict[str, Any] = {
+        "name": "data",
+        "shape": data.shape,
+        "dtype": dtype,
+        "chunks": chunk_shape,
+        "dimension_names": dimension_names,
+    }
+    if fill_value is not _UNSET:
+        arr_kwargs["fill_value"] = fill_value
+    arr = root.create_array(**arr_kwargs)
+    arr[...] = data
+
+    # zarr v3 attributes are stored in JSON, which doesn't accept numpy
+    # scalars. Real users storing CF-style fill values write Python
+    # primitives (int / float / str / list), so coerce here — this isn't
+    # a fidelity loss for the equivalence test, just matches the user
+    # contract. (How virtualizarr's parsers handle numpy-scalar attrs
+    # they read from h5py / etc. is a separate concern — see Issue #715.)
+    def _to_json_safe(v: Any) -> Any:
+        if isinstance(v, np.generic):
+            return v.item()
+        if isinstance(v, np.ndarray):
+            return v.tolist()
+        return v
+
+    if fill_value_attr is not _UNSET:
+        arr.attrs["_FillValue"] = _to_json_safe(fill_value_attr)
+    if extra_attrs is not None:
+        for k, v in extra_attrs.items():
+            arr.attrs[k] = _to_json_safe(v)
+
+    # Write coordinate arrays for each dim so xarray sees a proper
+    # named-dim Dataset rather than just a bare DataArray.
+    for axis, (dim_name, size) in enumerate(zip(dimension_names, data.shape)):
+        coord = root.create_array(
+            name=dim_name,
+            shape=(size,),
+            dtype="i8",
+            chunks=(size,),
+            dimension_names=(dim_name,),
+        )
+        coord[:] = np.arange(size)
+
+    return f"file://{filepath}"
+
+
+# ---------------------------------------------------------------------------
+# Open callbacks: virtualizarr-via-ZarrParser (observed) vs xarray-direct
+# (reference). Both ultimately go through xarray's zarr backend; the
+# difference is whether virtualizarr's manifest layer sits in between.
+# ---------------------------------------------------------------------------
+
+
+def _open_virtualizarr(filepath: str, *, decode_cf: bool) -> xr.Dataset:
+    """Open `filepath` via virtualizarr's ZarrParser. Plugged into the
+    shared equivalence helpers as the `open_observed` callback.
+    """
+    url = f"file://{filepath}"
+    store = LocalStore(prefix=filepath)
+    registry = ObjectStoreRegistry({url: store})
+    parser = ZarrParser()
+    manifest_store = parser(url=url, registry=registry)
+    return xr.open_dataset(
+        manifest_store,
+        engine="zarr",
+        consolidated=False,
+        zarr_format=3,
+        decode_cf=decode_cf,
+    )
+
+
+def _open_zarr_direct(filepath: str, *, decode_cf: bool) -> xr.Dataset:
+    """Open `filepath` directly via `xr.open_zarr` — no virtualizarr. The
+    reference for the equivalence sweep.
+    """
+    return xr.open_dataset(
+        filepath,
+        engine="zarr",
+        consolidated=False,
+        zarr_format=3,
+        decode_cf=decode_cf,
+    )
+
+
+def _assert_zarrparser_xarray_identical(filepath: str) -> None:
+    """Run both raw-attribute and decoded-data invariants against the Zarr
+    parser. Thin wrapper that binds the engines.
+    """
+    assert_equivalent(
+        filepath,
+        open_observed=_open_virtualizarr,
+        open_reference=_open_zarr_direct,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Sanity tests — confirm the writer and equivalence helper work on a
+# trivially-trivial file before any hypothesis runs.
+# ---------------------------------------------------------------------------
+
+
+def test_write_zarr_basic_roundtrip(tmp_path):
+    """`_write_zarr` produces a Zarr store with the expected shape, dtype,
+    and fill_value that zarr-python can read back."""
+    url = _write_zarr(
+        tmp_path / "data.zarr",
+        dtype=np.dtype("f8"),
+        data=np.array([1.0, 2.0, 3.0], dtype="f8"),
+        fill_value=-9999.0,
+        chunks=(2,),
+    )
+    assert url.startswith("file://")
+
+    arr = zarr.open_array(
+        store=zarr.storage.LocalStore(url.removeprefix("file://")),
+        path="data",
+        mode="r",
+    )
+    np.testing.assert_array_equal(arr[:], [1.0, 2.0, 3.0])
+    assert arr.metadata.fill_value == -9999.0
+
+
+def test_equivalence_helper_passes_on_simple_zarr(tmp_path):
+    """A trivial float64 store should round-trip identically through both
+    open paths."""
+    url = _write_zarr(
+        tmp_path / "simple.zarr",
+        dtype=np.dtype("f8"),
+        data=np.array([1.0, 2.0, 3.0, 4.0], dtype="f8"),
+        fill_value=0.0,
+    )
+    _assert_zarrparser_xarray_identical(url.removeprefix("file://"))
+
+
+# ---------------------------------------------------------------------------
+# Random-draw strategy and basic equivalence sweep.
+#
+# Strategy generates the full numeric dtype matrix from
+# base_numeric_dtype_strategy() (bool / signed int / unsigned int / float /
+# complex) plus several shapes / fill-value patterns. Failures from
+# random draws are real signals to investigate; the curated `@example`s
+# pin specific bug-report cases (#811).
+# ---------------------------------------------------------------------------
+
+
+@st.composite
+def _basic_zarr_dataset_strategy(draw) -> _DatasetSpec:
+    dtype = draw(base_numeric_dtype_strategy())
+    shape = draw(npst.array_shapes(min_dims=1, max_dims=3, min_side=1, max_side=8))
+    chunks_choice = draw(st.booleans())
+    chunks = (
+        tuple(draw(st.integers(min_value=1, max_value=size)) for size in shape)
+        if chunks_choice
+        else None
+    )
+    data = draw(data_in_dtype_strategy(dtype, shape))
+    fill_in_dtype = draw(value_in_dtype_strategy(dtype))
+    # zarr v3 requires fill_value to be of the array dtype (or None for
+    # numeric "default zero"). We sample three options: omit entirely
+    # (triggers default-fill path — #811), set to a typed value, or set
+    # to a typed value and *also* expose it as a CF _FillValue attr.
+    fill_value = draw(st.sampled_from([_UNSET, fill_in_dtype]))
+    fill_value_attr = draw(st.sampled_from([_UNSET, fill_in_dtype]))
+    return _DatasetSpec(
+        dtype=dtype,
+        data=data,
+        fill_value=fill_value,
+        fill_value_attr=fill_value_attr,
+        chunks=chunks,
+    )
+
+
+def _spec_to_url(spec: _DatasetSpec, tmp_path: Path) -> str:
+    """Translate a `_DatasetSpec` to a written Zarr store and return its
+    bare path (no file:// prefix), for use with the equivalence helper.
+    """
+    url = _write_zarr(
+        tmp_path / "data.zarr",
+        dtype=spec.dtype,
+        data=spec.data,
+        fill_value=spec.fill_value,
+        fill_value_attr=spec.fill_value_attr,
+        chunks=spec.chunks,
+        extra_attrs=spec.extra_attrs,
+    )
+    return url.removeprefix("file://")
+
+
+# Curated examples. Each pins a specific bug-report case or scenario we
+# want guaranteed coverage of, separate from the random draws.
+_BASIC_EXAMPLES = [
+    # Issue #811: uint8 with no explicit fill_value — exercises the
+    # default-fill-lookup path that crashed `get_metadata` in the bug
+    # report. The Zarr v3 default for uint8 is 0; both engines should
+    # agree.
+    _DatasetSpec(
+        dtype=np.dtype("u1"),
+        data=np.array([1, 2, 3], dtype="u1"),
+    ),
+    # uint8 *with* an explicit fill_value — same dtype, but bypassing
+    # the default-lookup path. If only the default-lookup branch is
+    # broken, this passes while the no-fill case fails — useful signal.
+    _DatasetSpec(
+        dtype=np.dtype("u1"),
+        data=np.array([1, 2, 3], dtype="u1"),
+        fill_value=np.uint8(255),
+    ),
+    # NaN-valued _FillValue on float64. Tests NaN equality semantics
+    # through the parser.
+    _DatasetSpec(
+        dtype=np.dtype("f8"),
+        data=np.array([1.0, 2.0, 3.0], dtype="f8"),
+        fill_value=np.nan,
+        fill_value_attr=np.nan,
+    ),
+    # int32 with both fill_value and matching CF _FillValue.
+    _DatasetSpec(
+        dtype=np.dtype("i4"),
+        data=np.array([10, 20, 30], dtype="i4"),
+        fill_value=np.int32(-9999),
+        fill_value_attr=np.int32(-9999),
+    ),
+    # bool array with explicit fill_value=True.
+    _DatasetSpec(
+        dtype=np.dtype("?"),
+        data=np.array([True, False, True], dtype="?"),
+        fill_value=True,
+    ),
+]
+
+
+_BASIC_EXAMPLE_IDS = [
+    "uint8-no-fill",  # #811 reproducer
+    "uint8-with-fill",
+    "float64-nan-fill",
+    "int32-matching-fill",
+    "bool-fill",
+]
+
+
+def _ensure_clean_zarr_path(tmp_path: Path) -> None:
+    """Zarr's LocalStore needs an empty directory each draw. Tear down
+    any previous `data.zarr` so the writer starts clean.
+    """
+    store_path = tmp_path / "data.zarr"
+    if store_path.exists():
+        import shutil
+
+        shutil.rmtree(store_path)
+
+
+@requires_arro3
+class TestBasicEquivalence:
+    """Equivalence sweep across the Zarr numeric dtype × fill matrix.
+
+    Curated examples and random draws are split into separate test
+    methods so pytest-xdist can parallelise the curated cases and a
+    single failing example doesn't shadow the others.
+
+    Assertion is plain `_assert_zarrparser_xarray_identical`. Cases the
+    parser/stack doesn't handle correctly today **will fail** — those
+    failures are the to-fix list.
+    """
+
+    @pytest.mark.parametrize("spec", _BASIC_EXAMPLES, ids=_BASIC_EXAMPLE_IDS)
+    def test_equivalence_curated(self, spec, tmp_path):
+        _ensure_clean_zarr_path(tmp_path)
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_zarrparser_xarray_identical(filepath)
+
+    # tmp_path is function-scoped, but each hypothesis example writes the
+    # same `data.zarr` and reads it before the next example runs. The
+    # overwrite-in-place is intentional.
+    @settings(suppress_health_check=[HealthCheck.function_scoped_fixture])
+    @given(spec=_basic_zarr_dataset_strategy())
+    def test_equivalence_random(self, spec, tmp_path):
+        _ensure_clean_zarr_path(tmp_path)
+        filepath = _spec_to_url(spec, tmp_path)
+        _assert_zarrparser_xarray_identical(filepath)
diff --git a/virtualizarr/tests/test_parsers/test_zarr_spec_compliance.py b/virtualizarr/tests/test_parsers/test_zarr_spec_compliance.py
new file mode 100644
index 00000000..e4ed5d74
--- /dev/null
+++ b/virtualizarr/tests/test_parsers/test_zarr_spec_compliance.py
@@ -0,0 +1,268 @@
+"""Zarr-spec compliance tests for the ZarrParser — xarray NOT in the loop.
+
+Asserts that virtualizarr's ZarrParser produces ManifestStores whose
+metadata matches what zarr-python reads from the same source store.
+**xarray is deliberately absent from the assertion path** — the
+"reference" is zarr-python itself.
+
+Why this exists alongside `test_zarr_fill_value_equivalence.py`:
+
+The equivalence test compares `virtualizarr+xarray.open_zarr` against
+`xarray.open_zarr` direct. When both engines hit xarray's
+`FillValueCoder.decode` on a value the coder can't handle (e.g.
+`_FillValue=NaN` as a JSON-native number), *both* sides fail with the
+same exception — and the test correctly reports it as
+`BothEnginesFailedIdenticallyError`. But that "failure" isn't a
+virtualizarr bug: the parser is producing zarr-spec-compliant metadata,
+and xarray's coder is what can't handle it.
+
+The root cause is that xarray's `_FillValue` encoding uses HDF5-style
+base64 even when writing to Zarr (whose metadata is JSON-native). The
+fix lives upstream in xarray (tracked at pydata/xarray#11332) — or in
+a future zarr-native nodata convention (zarr-specs#351,
+zarr-extensions#33).
+
+By testing against zarr-python directly, this module:
+
+1. Asserts virtualizarr correctness independent of xarray's quirks.
+2. Stays green even when the equivalence module is red on the same
+   spec — surfacing that the gap is upstream, not in virtualizarr.
+3. Establishes the "virtualizarr produces zarr-spec-compliant
+   manifests" contract as the primary correctness check.
+
+Tests here are deliberately simple: open via parser, open via
+zarr-python, compare metadata fields directly. No hypothesis sweeps
+(the equivalence module covers the dtype × fill matrix); these are
+property-asserting curated cases.
+
+Shared infra (pytestmark, hypothesis profile) is imported from
+`_fill_value_common.py` for consistency.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+import zarr
+from obspec_utils.registry import ObjectStoreRegistry
+from obstore.store import LocalStore
+
+from virtualizarr.parsers import ZarrParser
+from virtualizarr.tests import requires_arro3
+from virtualizarr.tests.test_parsers._fill_value_common import (
+    _UNSET,
+    base_pytestmark,
+    register_hypothesis_profiles,
+)
+
+pytestmark = base_pytestmark() + [requires_arro3]
+register_hypothesis_profiles()
+
+
+# ---------------------------------------------------------------------------
+# Writer + opener helpers — minimal, since we're not sweeping over a matrix.
+# ---------------------------------------------------------------------------
+
+
+def _write_zarr_simple(
+    filepath,
+    *,
+    shape: tuple[int, ...],
+    dtype: Any,
+    fill_value: Any = _UNSET,
+    attrs: dict[str, Any] | None = None,
+) -> str:
+    """Write a single-variable Zarr v3 store; return its file:// URL.
+
+    Coordinate arrays are created for each dim so the store is
+    well-formed.
+    """
+    store = zarr.storage.LocalStore(str(filepath))
+    root = zarr.group(store=store, zarr_format=3)
+    dimension_names = tuple(f"dim_{i}" for i in range(len(shape)))
+
+    arr_kwargs: dict[str, Any] = {
+        "name": "data",
+        "shape": shape,
+        "dtype": dtype,
+        "chunks": shape,
+        "dimension_names": dimension_names,
+    }
+    if fill_value is not _UNSET:
+        arr_kwargs["fill_value"] = fill_value
+    arr = root.create_array(**arr_kwargs)
+    arr[...] = np.zeros(shape, dtype=dtype)
+
+    if attrs:
+        for k, v in attrs.items():
+            arr.attrs[k] = v
+
+    for i, size in enumerate(shape):
+        coord = root.create_array(
+            name=dimension_names[i],
+            shape=(size,),
+            dtype="i8",
+            chunks=(size,),
+            dimension_names=(dimension_names[i],),
+        )
+        coord[:] = np.arange(size)
+
+    return f"file://{filepath}"
+
+
+def _open_manifest(url: str):
+    """Open the store via virtualizarr's ZarrParser; return a ManifestStore."""
+    bare = url.removeprefix("file://")
+    store = LocalStore(prefix=bare)
+    registry = ObjectStoreRegistry({url: store})
+    return ZarrParser()(url=url, registry=registry)
+
+
+def _open_reference(url: str) -> zarr.Group:
+    """Open the store directly with zarr-python."""
+    return zarr.open_group(url.removeprefix("file://"), zarr_format=3, mode="r")
+
+
+def _manifest_data_metadata(ms):
+    """Reach into the ManifestStore for the `data` array's metadata.
+
+    `_group` is the (currently underscore-prefixed) accessor used by
+    existing tests in `test_zarr.py` and `test_hdf.py`. If/when a
+    public accessor lands, update here in one place.
+    """
+    return ms._group.arrays["data"].metadata
+
+
+# ---------------------------------------------------------------------------
+# Tests — each asserts one spec-compliance property.
+# ---------------------------------------------------------------------------
+
+
+@requires_arro3
+class TestZarrSpecCompliance:
+    """Virtualizarr's ZarrParser produces metadata that matches what
+    zarr-python reads from the same source. No xarray involvement.
+    """
+
+    def test_dtype_preserved(self, tmp_path):
+        url = _write_zarr_simple(tmp_path / "data.zarr", shape=(3,), dtype="f8")
+        ms_meta = _manifest_data_metadata(_open_manifest(url))
+        ref_meta = _open_reference(url)["data"].metadata
+        assert ms_meta.data_type == ref_meta.data_type
+
+    def test_shape_and_chunks_preserved(self, tmp_path):
+        url = _write_zarr_simple(tmp_path / "data.zarr", shape=(4, 6), dtype="i4")
+        ms_meta = _manifest_data_metadata(_open_manifest(url))
+        ref_meta = _open_reference(url)["data"].metadata
+        assert ms_meta.shape == ref_meta.shape
+        assert ms_meta.chunk_grid == ref_meta.chunk_grid
+
+    def test_default_fill_value_matches_zarr_python(self, tmp_path):
+        """When no explicit fill_value is set, the manifest store's
+        `fill_value` should match whatever zarr-python defaults to
+        (typically 0 for numeric dtypes).
+
+        Also asserts the original #811 path: uint8 with no explicit
+        fill_value doesn't crash and produces sensible metadata.
+        """
+        url = _write_zarr_simple(tmp_path / "data.zarr", shape=(3,), dtype="u1")
+        ms_meta = _manifest_data_metadata(_open_manifest(url))
+        ref_meta = _open_reference(url)["data"].metadata
+        assert ms_meta.fill_value == ref_meta.fill_value
+
+    def test_nan_fill_value_preserved(self, tmp_path):
+        """`fill_value=NaN` on a float dtype round-trips: zarr-python and
+        virtualizarr's manifest agree (both store NaN; xarray's
+        ability to decode it is a separate question, out of scope here).
+        """
+        url = _write_zarr_simple(
+            tmp_path / "data.zarr",
+            shape=(3,),
+            dtype="f8",
+            fill_value=float("nan"),
+        )
+        ms_fv = _manifest_data_metadata(_open_manifest(url)).fill_value
+        ref_fv = _open_reference(url)["data"].metadata.fill_value
+        assert np.isnan(ms_fv) and np.isnan(ref_fv)
+
+    def test_fill_value_attr_is_json_native(self, tmp_path):
+        """A `_FillValue` attribute stored as a plain JSON number stays a
+        plain JSON number in the manifest — virtualizarr doesn't
+        re-encode it into HDF5-style base64 just because xarray's
+        `FillValueCoder` would.
+
+        This is the test that distinguishes virtualizarr's contract from
+        xarray's: zarr metadata is JSON; scalar attributes should use
+        JSON-native types.
+        """
+        url = _write_zarr_simple(
+            tmp_path / "data.zarr",
+            shape=(3,),
+            dtype="f4",
+            attrs={"_FillValue": 0.0},
+        )
+        attrs = _manifest_data_metadata(_open_manifest(url)).attributes
+        assert "_FillValue" in attrs
+        assert attrs["_FillValue"] == 0.0
+        assert not isinstance(attrs["_FillValue"], (bytes, bytearray, np.ndarray)), (
+            f"_FillValue should be a JSON-native scalar, got {type(attrs['_FillValue']).__name__}"
+        )
+
+    def test_attrs_match_zarr_python(self, tmp_path):
+        """Free-form CF-style attributes (missing_value, valid_range,
+        etc.) round-trip with the same JSON shape zarr-python sees.
+        """
+        url = _write_zarr_simple(
+            tmp_path / "data.zarr",
+            shape=(3,),
+            dtype="f4",
+            attrs={
+                "_FillValue": 0.0,
+                "valid_min": -10.0,
+                "valid_max": 10.0,
+                "missing_value": -9999.0,
+            },
+        )
+        ms_attrs = dict(_manifest_data_metadata(_open_manifest(url)).attributes)
+        ref_attrs = dict(_open_reference(url)["data"].attrs)
+        # Filter out any zarr-internal book-keeping keys (none today,
+        # but defensive — these tests shouldn't break on future zarr
+        # versions adding metadata).
+        for k in ("_ARRAY_DIMENSIONS",):
+            ms_attrs.pop(k, None)
+            ref_attrs.pop(k, None)
+        assert ms_attrs == ref_attrs
+
+
+class TestZarrParserVsXarrayContrast:
+    """One-test demonstration of the framing shift: a `_FillValue` value
+    that virtualizarr produces spec-compliantly but xarray's
+    `FillValueCoder.decode` can't consume.
+
+    Green test here + red test in `test_zarr_fill_value_equivalence.py`
+    on the same fixture proves: the gap is upstream of virtualizarr,
+    not in the parser.
+    """
+
+    def test_nan_fill_value_attr_zarr_compliant_but_xarray_breaks(self, tmp_path):
+        """`_FillValue=NaN` as a JSON-native value: virtualizarr's
+        ManifestStore preserves it correctly (assertion below passes).
+        xarray's `FillValueCoder.decode` on the same store raises
+        `TypeError: Failed to decode fill_value: expected str or bytes
+        for dtype float64, got float` — see the equivalence module's
+        `test_equivalence_curated[float64-nan-fill]` failure. The two
+        outcomes together establish the upstream attribution.
+        """
+        url = _write_zarr_simple(
+            tmp_path / "data.zarr",
+            shape=(3,),
+            dtype="f8",
+            fill_value=float("nan"),
+            attrs={"_FillValue": float("nan")},
+        )
+        attrs = _manifest_data_metadata(_open_manifest(url)).attributes
+        assert "_FillValue" in attrs
+        # Plain Python float, not base64-encoded bytes.
+        assert isinstance(attrs["_FillValue"], float)
+        assert np.isnan(attrs["_FillValue"])