diff --git a/docs/about/releases.md b/docs/about/releases.md index 26c9cd7f..31f671e7 100644 --- a/docs/about/releases.md +++ b/docs/about/releases.md @@ -3,6 +3,7 @@ ## v2.7.1 (unreleased) ### New Features +- Added [VirtualiZarrDatasetAccessor.nrefs][virtualizarr.accessor.VirtualiZarrDatasetAccessor.nrefs] — a method that returns the total number of virtual chunk references in the dataset, ignoring non-virtual variables. Closes #573. ### Breaking changes diff --git a/docs/api/virtualizarr.md b/docs/api/virtualizarr.md index d7cb28af..36125eb7 100644 --- a/docs/api/virtualizarr.md +++ b/docs/api/virtualizarr.md @@ -16,6 +16,8 @@ Users can use xarray for every step apart from reading and serializing virtual r ::: virtualizarr.accessor.VirtualiZarrDatasetAccessor.nbytes +::: virtualizarr.accessor.VirtualiZarrDatasetAccessor.nrefs + ## Renaming paths ::: virtualizarr.accessor.VirtualiZarrDatasetAccessor.rename_paths diff --git a/virtualizarr/accessor.py b/virtualizarr/accessor.py index 5a63e733..5f51833f 100644 --- a/virtualizarr/accessor.py +++ b/virtualizarr/accessor.py @@ -285,6 +285,27 @@ def nbytes(self) -> int: for var in self.ds.variables.values() ) + def nrefs(self) -> int: + """Count the total number of virtual chunk references in the dataset. + + Ignores non-virtual variables (i.e. those not backed by a ManifestArray). + + Returns + ------- + int + Total number of virtual references across all virtual variables. + + Examples + -------- + >>> vds.vz.nrefs() # doctest: +SKIP + 42 + """ + return sum( + len(var.data.manifest) + for var in self.ds.variables.values() + if isinstance(var.data, ManifestArray) + ) + @xr.register_dataset_accessor("vz") class VirtualiZarrDatasetAccessor(_VirtualiZarrDatasetAccessor): diff --git a/virtualizarr/tests/test_xarray.py b/virtualizarr/tests/test_xarray.py index 05ccca23..1c882bf6 100644 --- a/virtualizarr/tests/test_xarray.py +++ b/virtualizarr/tests/test_xarray.py @@ -1163,3 +1163,25 @@ def test_isel_iterative_append_simulation(self, array_v3_metadata): # every original chunk should have been visited exactly once assert sorted(seen_refs) == [0, 100, 200, 300] + + +@requires_hdf5plugin +@requires_imagecodecs +def test_nrefs(simple_netcdf4, local_registry): + parser = HDFParser() + with open_virtual_dataset( + url=simple_netcdf4, + registry=local_registry, + parser=parser, + ) as vds: + # simple_netcdf4 has one virtual variable 'foo' with a single chunk + assert vds.vz.nrefs() == 1 + + with open_virtual_dataset( + url=simple_netcdf4, + registry=local_registry, + parser=parser, + loadable_variables=["foo"], + ) as vds: + # when the only variable is loadable (non-virtual), nrefs should be 0 + assert vds.vz.nrefs() == 0