From bad5ad0af7d2f622c592b76024ee9c29cc4e42a4 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:00:05 -0500 Subject: [PATCH 1/8] feat: populate per-asset dataStandard for BIDS, HED, NWB, and OME/NGFF - BIDSDatasetDescriptionAsset.get_metadata(): always sets BIDS standard (with BIDSVersion), adds HED when HEDVersion present in JSON, warns on read failure - NWBAsset.get_metadata(): sets NWB standard - ZarrBIDSAsset.get_metadata(): sets OME/NGFF for .ome.zarr assets - Guard for older dandischema without dataStandard on BareAsset; RuntimeError if dandischema >= 0.12.2 lacks it - Register ai_generated pytest marker in tox.ini Requires dandischema with per-asset dataStandard support (0.12.2+). Works silently with older dandischema. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 8 ++++- dandi/files/bids.py | 64 +++++++++++++++++++++++++++++++++++++-- dandi/tests/test_files.py | 63 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 15bc616c4..1ba0bb7b6 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -18,7 +18,7 @@ from dandischema.digests.dandietag import DandiETag from dandischema.models import BareAsset, CommonModel from dandischema.models import Dandiset as DandisetMeta -from dandischema.models import get_schema_version +from dandischema.models import StandardsType, get_schema_version, nwb_standard from packaging.version import Version from pydantic import ValidationError from pydantic_core import ErrorDetails @@ -504,6 +504,12 @@ def get_metadata( else: raise metadata.path = self.path + if "dataStandard" in BareAsset.model_fields: + nwb = StandardsType(**nwb_standard) + if metadata.dataStandard is None: + metadata.dataStandard = [nwb] + elif nwb not in metadata.dataStandard: + metadata.dataStandard.append(nwb) return metadata # TODO: @validate_cache.memoize_path diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 7e43163ec..17573d5e9 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -3,12 +3,22 @@ from collections import defaultdict from dataclasses import dataclass, field from datetime import datetime +import json from pathlib import Path from threading import Lock import weakref -from dandischema.models import BareAsset +from dandischema import __version__ as dandischema_version +from dandischema.models import ( + BareAsset, + StandardsType, + bids_standard, + hed_standard, + ome_ngff_standard, +) +from packaging.version import Version +import dandi from dandi.bids_validator_deno import bids_validate from .bases import GenericAsset, LocalFileAsset, NWBAsset @@ -23,9 +33,36 @@ ValidationResult, ) +lgr = dandi.get_logger() + BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",) BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",) +_HAS_DATA_STANDARD = "dataStandard" in BareAsset.model_fields +if not _HAS_DATA_STANDARD and Version(dandischema_version) >= Version("0.12.2"): + raise RuntimeError( + f"dandischema {dandischema_version} should have " + f"'dataStandard' field on BareAsset" + ) + + +def _add_standard( + metadata: BareAsset, + standard_dict: dict, + version: str | None = None, +) -> None: + """Add a data standard to asset metadata if the field is available.""" + if not _HAS_DATA_STANDARD: + return + kwargs = dict(standard_dict) + if version and "version" in StandardsType.model_fields: + kwargs["version"] = version + standard = StandardsType(**kwargs) + if metadata.dataStandard is None: + metadata.dataStandard = [standard] + elif standard not in metadata.dataStandard: + metadata.dataStandard.append(standard) + @dataclass class BIDSDatasetDescriptionAsset(LocalFileAsset): @@ -192,7 +229,28 @@ def get_validation_errors( assert self._dataset_errors is not None return self._dataset_errors.copy() - # get_metadata(): inherit use of default metadata from LocalFileAsset + def get_metadata( + self, + digest: Digest | None = None, + ignore_errors: bool = True, + ) -> BareAsset: + metadata = super().get_metadata(digest=digest, ignore_errors=ignore_errors) + try: + with open(self.filepath) as f: + desc = json.load(f) + except (OSError, json.JSONDecodeError) as e: + lgr.warning("Failed to read %s: %s", self.filepath, e) + _add_standard(metadata, bids_standard) + return metadata + _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion")) + if hed_version := desc.get("HEDVersion"): + # HEDVersion can be a string or list; use first element as version + if isinstance(hed_version, list): + version = hed_version[0] if hed_version else None + else: + version = hed_version + _add_standard(metadata, hed_standard, version=version) + return metadata @dataclass @@ -312,6 +370,8 @@ def get_metadata( add_common_metadata(metadata, self.filepath, start_time, end_time, digest) metadata.path = self.path metadata.encodingFormat = ZARR_MIME_TYPE + if Path(self.path).suffixes == [".ome", ".zarr"]: + _add_standard(metadata, ome_ngff_standard) return metadata diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index cef6c8ac3..fcc51d268 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -608,3 +608,66 @@ def test_validate_invalid_zarr3(path: str, expected_result_ids: set[str]) -> Non result_ids = {r.id for r in zf.get_validation_errors()} assert result_ids == expected_result_ids + + +@pytest.mark.ai_generated +class TestBIDSDatasetDescriptionDataStandard: + """Tests for per-asset dataStandard population from dataset_description.json""" + + @staticmethod + def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: + import json + + dd_path = tmp_path / "dataset_description.json" + dd_path.write_text(json.dumps(content)) + return BIDSDatasetDescriptionAsset( + filepath=dd_path, + path="dataset_description.json", + dandiset_path=tmp_path, + ) + + @staticmethod + def _standard_names(metadata): # type: ignore[no-untyped-def] + from dandischema.models import BareAsset + + if "dataStandard" not in BareAsset.model_fields: + pytest.skip("dandischema too old, no dataStandard on BareAsset") + return [s.name for s in (metadata.dataStandard or [])] + + def test_bids_always_set(self, tmp_path: Path) -> None: + asset = self._make_bids_dd( + tmp_path, + {"Name": "Test", "BIDSVersion": "1.9.0"}, + ) + names = self._standard_names(asset.get_metadata()) + assert "Brain Imaging Data Structure (BIDS)" in names + + def test_hed_detected_when_hedversion_present(self, tmp_path: Path) -> None: + asset = self._make_bids_dd( + tmp_path, + {"Name": "Test", "BIDSVersion": "1.9.0", "HEDVersion": "8.2.0"}, + ) + names = self._standard_names(asset.get_metadata()) + assert "Hierarchical Event Descriptors (HED)" in names + assert "Brain Imaging Data Structure (BIDS)" in names + + def test_hed_not_detected_when_hedversion_absent(self, tmp_path: Path) -> None: + asset = self._make_bids_dd( + tmp_path, + {"Name": "Test", "BIDSVersion": "1.9.0"}, + ) + names = self._standard_names(asset.get_metadata()) + assert "Hierarchical Event Descriptors (HED)" not in names + + def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: + """HEDVersion can be a list of strings per BIDS spec.""" + asset = self._make_bids_dd( + tmp_path, + { + "Name": "Test", + "BIDSVersion": "1.9.0", + "HEDVersion": ["8.2.0", "sc:1.0.0"], + }, + ) + names = self._standard_names(asset.get_metadata()) + assert "Hierarchical Event Descriptors (HED)" in names From 9d5ea6432433f8dfec2a082fd69742f8a2f5ba21 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:09:37 -0500 Subject: [PATCH 2/8] feat: extract NWB extensions and HED library schemas into dataStandard - Add get_nwb_extensions() to pynwb_utils that reads h5file["specifications"] to discover ndx-* namespaces and their versions (filtering out core/hdmf) - NWBAsset.get_metadata() populates StandardsType.extensions with ndx-* extensions found in the NWB file - BIDSDatasetDescriptionAsset.get_metadata() extracts HED library schemas from list-valued HEDVersion (e.g. ["8.2.0", "sc:1.0.0"]) into extensions - Add tests for both NWB extension extraction and HED library schema parsing Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 23 +++++++++++++++++- dandi/files/bids.py | 29 ++++++++++++++++++++-- dandi/pynwb_utils.py | 43 +++++++++++++++++++++++++++++++++ dandi/tests/test_files.py | 30 +++++++++++++++++++++++ dandi/tests/test_pynwb_utils.py | 39 ++++++++++++++++++++++++++++++ 5 files changed, 161 insertions(+), 3 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 1ba0bb7b6..48e49bcdd 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -505,7 +505,28 @@ def get_metadata( raise metadata.path = self.path if "dataStandard" in BareAsset.model_fields: - nwb = StandardsType(**nwb_standard) + kwargs: dict[str, Any] = dict(nwb_standard) + # Populate NWB extensions (ndx-*) if the schema supports it + if "extensions" in StandardsType.model_fields: + from dandi.pynwb_utils import get_nwb_extensions + + try: + nwb_exts = get_nwb_extensions(self.filepath) + except Exception: + lgr.debug( + "Failed to extract NWB extensions from %s", + self.filepath, + exc_info=True, + ) + nwb_exts = {} + if nwb_exts: + kwargs["extensions"] = [ + StandardsType(name=name, version=ver).model_dump( + mode="json", exclude_none=True + ) + for name, ver in sorted(nwb_exts.items()) + ] + nwb = StandardsType(**kwargs) if metadata.dataStandard is None: metadata.dataStandard = [nwb] elif nwb not in metadata.dataStandard: diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 17573d5e9..1e8319acf 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -244,12 +244,37 @@ def get_metadata( return metadata _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion")) if hed_version := desc.get("HEDVersion"): - # HEDVersion can be a string or list; use first element as version + # HEDVersion can be a string or list. + # List form: ["8.2.0", "sc:1.0.0"] where first element is base + # HED version and subsequent "prefix:version" entries are library + # schemas recorded as extensions. if isinstance(hed_version, list): version = hed_version[0] if hed_version else None + library_entries = hed_version[1:] else: version = hed_version - _add_standard(metadata, hed_standard, version=version) + library_entries = [] + kwargs: dict = dict(hed_standard) + if version and "version" in StandardsType.model_fields: + kwargs["version"] = version + if ( + library_entries + and "extensions" in StandardsType.model_fields + ): + extensions = [] + for entry in library_entries: + # Format is "prefix:version" (e.g. "sc:1.0.0") + if ":" in str(entry): + lib_name, lib_ver = str(entry).split(":", 1) + else: + lib_name, lib_ver = str(entry), None + ext = StandardsType(name=lib_name, version=lib_ver) + extensions.append( + ext.model_dump(mode="json", exclude_none=True) + ) + if extensions: + kwargs["extensions"] = extensions + _add_standard(metadata, kwargs) return metadata diff --git a/dandi/pynwb_utils.py b/dandi/pynwb_utils.py index 7ffe552a8..ff41bbe6b 100644 --- a/dandi/pynwb_utils.py +++ b/dandi/pynwb_utils.py @@ -152,6 +152,49 @@ def _sanitize(v: Any) -> str: return None +# Namespaces bundled with NWB/HDMF core — not extensions +_NWB_CORE_NAMESPACES = frozenset({"core", "hdmf-common", "hdmf-experimental"}) + + +def get_nwb_extensions(filepath: str | Path | Readable) -> dict[str, str]: + """Return NWB extensions embedded in an HDF5 file. + + Reads the ``specifications`` group of an NWB HDF5 file and returns a + mapping of extension namespace names to their latest embedded version, + excluding core NWB/HDMF namespaces. + + Parameters + ---------- + filepath + Path to an NWB ``.nwb`` HDF5 file, or a :class:`Readable`. + + Returns + ------- + dict[str, str] + ``{namespace_name: latest_version}`` for each non-core namespace + found in the file's ``specifications`` group. Empty dict if no + extensions are present or the group does not exist. + """ + extensions: dict[str, str] = {} + with open_readable(filepath) as fp, h5py.File(fp, "r") as h5file: + specs = h5file.get("specifications") + if specs is None: + return extensions + for name in specs: + if name in _NWB_CORE_NAMESPACES: + continue + ns_group = specs[name] + if not isinstance(ns_group, h5py.Group): + continue + try: + sorted_versions = sorted(ns_group, key=Version) + if sorted_versions: + extensions[name] = sorted_versions[-1] + except Exception: + lgr.debug("Failed to parse versions for NWB extension %s", name) + return extensions + + def get_neurodata_types_to_modalities_map() -> dict[str, str]: """Return a dict to map neurodata types known to pynwb to "modalities" diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index fcc51d268..00e4bad4e 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -671,3 +671,33 @@ def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: ) names = self._standard_names(asset.get_metadata()) assert "Hierarchical Event Descriptors (HED)" in names + + def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: + """HED library schemas in list HEDVersion populate extensions.""" + from dandischema.models import BareAsset, StandardsType + + if "extensions" not in StandardsType.model_fields: + pytest.skip("dandischema too old, no extensions on StandardsType") + if "dataStandard" not in BareAsset.model_fields: + pytest.skip("dandischema too old, no dataStandard on BareAsset") + asset = self._make_bids_dd( + tmp_path, + { + "Name": "Test", + "BIDSVersion": "1.9.0", + "HEDVersion": ["8.2.0", "sc:1.0.0", "lang:1.1.0"], + }, + ) + metadata = asset.get_metadata() + hed_standards = [ + s for s in (metadata.dataStandard or []) + if s.name == "Hierarchical Event Descriptors (HED)" + ] + assert len(hed_standards) == 1 + hed = hed_standards[0] + assert hed.version == "8.2.0" + assert hed.extensions is not None + ext_names = {e.name for e in hed.extensions} + assert ext_names == {"sc", "lang"} + ext_map = {e.name: e.version for e in hed.extensions} + assert ext_map == {"sc": "1.0.0", "lang": "1.1.0"} diff --git a/dandi/tests/test_pynwb_utils.py b/dandi/tests/test_pynwb_utils.py index 0de33d555..d47f294f7 100644 --- a/dandi/tests/test_pynwb_utils.py +++ b/dandi/tests/test_pynwb_utils.py @@ -103,3 +103,42 @@ def test_nwb_has_external_links(tmp_path): assert not nwb_has_external_links(filename1) assert nwb_has_external_links(filename4) + + +def test_get_nwb_extensions(tmp_path: Path) -> None: + """Test extraction of NWB extensions from HDF5 specifications group.""" + import h5py + + from ..pynwb_utils import get_nwb_extensions + + h5path = tmp_path / "test.nwb" + with h5py.File(h5path, "w") as f: + specs = f.create_group("specifications") + # Core namespaces should be excluded + core_grp = specs.create_group("core") + core_grp.create_group("2.7.0") + hdmf_grp = specs.create_group("hdmf-common") + hdmf_grp.create_group("1.8.0") + # An extension namespace should be included, latest version used + ndx_ecog = specs.create_group("ndx-ecog") + ndx_ecog.create_group("0.1.0") + ndx_ecog.create_group("0.2.0") + # Another extension + ndx_events = specs.create_group("ndx-events") + ndx_events.create_group("0.3.0") + + result = get_nwb_extensions(h5path) + assert result == {"ndx-ecog": "0.2.0", "ndx-events": "0.3.0"} + + +def test_get_nwb_extensions_no_specs(tmp_path: Path) -> None: + """No specifications group returns empty dict.""" + import h5py + + from ..pynwb_utils import get_nwb_extensions + + h5path = tmp_path / "test.nwb" + with h5py.File(h5path, "w") as f: + f.attrs["nwb_version"] = "2.7.0" + + assert get_nwb_extensions(h5path) == {} From 1cd1d6a03af50e6b4e31f74f63387ea8ffed72c4 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:23:10 -0500 Subject: [PATCH 3/8] chore: git ignore uv.lock --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d0f2d8d8f..a2de93108 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ sandbox/ venv/ venvs/ .DS_Store +uv.lock From 7dfde133a56ec32167fb5b1e045e096268cce567 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:29:30 -0500 Subject: [PATCH 4/8] refactor: centralize dandischema compat guard as _SCHEMA_BAREASSET_HAS_DATASTANDARD Single bool in bases.py guards all dataStandard/version/extensions features that ship together in dandischema >= 0.12.2, replacing scattered "field in Model.model_fields" checks. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 51 ++++++++++++++++++++++++--------------- dandi/files/bids.py | 24 +++++------------- dandi/tests/test_files.py | 10 +++----- 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 48e49bcdd..b926bd8c7 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -21,6 +21,20 @@ from dandischema.models import StandardsType, get_schema_version, nwb_standard from packaging.version import Version from pydantic import ValidationError + +# True when the installed dandischema exposes the per-asset dataStandard field +# and related StandardsType enhancements (version, extensions). All these +# fields ship together starting with dandischema 0.12.2. +# TODO: remove this guard (and all branches that check it) once the minimum +# required dandischema version is >= 0.12.2. +_SCHEMA_BAREASSET_HAS_DATASTANDARD = "dataStandard" in BareAsset.model_fields +if not _SCHEMA_BAREASSET_HAS_DATASTANDARD and Version( + dandischema.__version__ +) >= Version("0.12.2"): + raise RuntimeError( + f"dandischema {dandischema.__version__} should have " + f"'dataStandard' field on BareAsset" + ) from pydantic_core import ErrorDetails import requests @@ -504,28 +518,27 @@ def get_metadata( else: raise metadata.path = self.path - if "dataStandard" in BareAsset.model_fields: + if _SCHEMA_BAREASSET_HAS_DATASTANDARD: kwargs: dict[str, Any] = dict(nwb_standard) - # Populate NWB extensions (ndx-*) if the schema supports it - if "extensions" in StandardsType.model_fields: - from dandi.pynwb_utils import get_nwb_extensions + # Populate NWB extensions (ndx-*) from the h5 specifications group + from dandi.pynwb_utils import get_nwb_extensions - try: - nwb_exts = get_nwb_extensions(self.filepath) - except Exception: - lgr.debug( - "Failed to extract NWB extensions from %s", - self.filepath, - exc_info=True, + try: + nwb_exts = get_nwb_extensions(self.filepath) + except Exception: + lgr.debug( + "Failed to extract NWB extensions from %s", + self.filepath, + exc_info=True, + ) + nwb_exts = {} + if nwb_exts: + kwargs["extensions"] = [ + StandardsType(name=name, version=ver).model_dump( + mode="json", exclude_none=True ) - nwb_exts = {} - if nwb_exts: - kwargs["extensions"] = [ - StandardsType(name=name, version=ver).model_dump( - mode="json", exclude_none=True - ) - for name, ver in sorted(nwb_exts.items()) - ] + for name, ver in sorted(nwb_exts.items()) + ] nwb = StandardsType(**kwargs) if metadata.dataStandard is None: metadata.dataStandard = [nwb] diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 1e8319acf..30a8f3baf 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -8,7 +8,6 @@ from threading import Lock import weakref -from dandischema import __version__ as dandischema_version from dandischema.models import ( BareAsset, StandardsType, @@ -16,12 +15,11 @@ hed_standard, ome_ngff_standard, ) -from packaging.version import Version import dandi from dandi.bids_validator_deno import bids_validate -from .bases import GenericAsset, LocalFileAsset, NWBAsset +from .bases import GenericAsset, LocalFileAsset, NWBAsset, _SCHEMA_BAREASSET_HAS_DATASTANDARD from .zarr import ZarrAsset from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..metadata.core import add_common_metadata, prepare_metadata @@ -38,24 +36,17 @@ BIDS_ASSET_ERRORS = ("BIDS.NON_BIDS_PATH_PLACEHOLDER",) BIDS_DATASET_ERRORS = ("BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER",) -_HAS_DATA_STANDARD = "dataStandard" in BareAsset.model_fields -if not _HAS_DATA_STANDARD and Version(dandischema_version) >= Version("0.12.2"): - raise RuntimeError( - f"dandischema {dandischema_version} should have " - f"'dataStandard' field on BareAsset" - ) - def _add_standard( - metadata: BareAsset, + metadata, # type: ignore[no-untyped-def] standard_dict: dict, version: str | None = None, ) -> None: """Add a data standard to asset metadata if the field is available.""" - if not _HAS_DATA_STANDARD: + if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: return kwargs = dict(standard_dict) - if version and "version" in StandardsType.model_fields: + if version: kwargs["version"] = version standard = StandardsType(**kwargs) if metadata.dataStandard is None: @@ -255,12 +246,9 @@ def get_metadata( version = hed_version library_entries = [] kwargs: dict = dict(hed_standard) - if version and "version" in StandardsType.model_fields: + if version: kwargs["version"] = version - if ( - library_entries - and "extensions" in StandardsType.model_fields - ): + if library_entries: extensions = [] for entry in library_entries: # Format is "prefix:version" (e.g. "sc:1.0.0") diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index 00e4bad4e..c8bbcae08 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -628,9 +628,9 @@ def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: @staticmethod def _standard_names(metadata): # type: ignore[no-untyped-def] - from dandischema.models import BareAsset + from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if "dataStandard" not in BareAsset.model_fields: + if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") return [s.name for s in (metadata.dataStandard or [])] @@ -674,11 +674,9 @@ def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: """HED library schemas in list HEDVersion populate extensions.""" - from dandischema.models import BareAsset, StandardsType + from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if "extensions" not in StandardsType.model_fields: - pytest.skip("dandischema too old, no extensions on StandardsType") - if "dataStandard" not in BareAsset.model_fields: + if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") asset = self._make_bids_dd( tmp_path, From dcb03b4d7eb5b7db0d0a09d16f73402a21397b56 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 21:39:57 -0500 Subject: [PATCH 5/8] refactor: move imports to top level, document import policy in CLAUDE.md Move json, h5py, get_nwb_extensions, and _SCHEMA_BAREASSET_HAS_DATASTANDARD imports to module top level in tests. Keep pynwb_utils import deferred in bases.py (heavy transitive deps: h5py/pynwb/hdmf/numpy) per existing convention. Add import guidance to CLAUDE.md. Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 5 +++++ dandi/files/bases.py | 2 +- dandi/tests/test_files.py | 8 ++------ dandi/tests/test_pynwb_utils.py | 11 ++--------- 4 files changed, 10 insertions(+), 16 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f3e1abe16..20eb8869c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,6 +31,11 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co - Prefer specific exceptions over generic ones - For CLI, use click library patterns - Imports organized: stdlib, third-party, local (alphabetical within groups) +- **Imports must be at the top of the file** — do NOT place imports inside + functions or methods unless there is a concrete reason (circular dependency, + or heavy transitive imports like `pynwb`/`h5py`/`nwbinspector` that would + slow down module load for unrelated code paths). When deferring an import + for weight, add the comment `# Avoid heavy import by importing within function:`. ## Documentation - Keep docstrings updated when changing function signatures diff --git a/dandi/files/bases.py b/dandi/files/bases.py index b926bd8c7..6ade4d09b 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -520,7 +520,7 @@ def get_metadata( metadata.path = self.path if _SCHEMA_BAREASSET_HAS_DATASTANDARD: kwargs: dict[str, Any] = dict(nwb_standard) - # Populate NWB extensions (ndx-*) from the h5 specifications group + # Avoid heavy import by importing within function: from dandi.pynwb_utils import get_nwb_extensions try: diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index c8bbcae08..b25d86ffe 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from operator import attrgetter import os from pathlib import Path @@ -16,6 +17,7 @@ from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..dandiapi import AssetType, RemoteZarrAsset from ..exceptions import UnknownAssetError +from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD from ..files import ( BIDSDatasetDescriptionAsset, DandisetMetadataFile, @@ -616,8 +618,6 @@ class TestBIDSDatasetDescriptionDataStandard: @staticmethod def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: - import json - dd_path = tmp_path / "dataset_description.json" dd_path.write_text(json.dumps(content)) return BIDSDatasetDescriptionAsset( @@ -628,8 +628,6 @@ def _make_bids_dd(tmp_path: Path, content: dict) -> BIDSDatasetDescriptionAsset: @staticmethod def _standard_names(metadata): # type: ignore[no-untyped-def] - from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") return [s.name for s in (metadata.dataStandard or [])] @@ -674,8 +672,6 @@ def test_hed_detected_with_list_hedversion(self, tmp_path: Path) -> None: def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: """HED library schemas in list HEDVersion populate extensions.""" - from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD - if not _SCHEMA_BAREASSET_HAS_DATASTANDARD: pytest.skip("dandischema too old, no dataStandard on BareAsset") asset = self._make_bids_dd( diff --git a/dandi/tests/test_pynwb_utils.py b/dandi/tests/test_pynwb_utils.py index d47f294f7..47bac6fa4 100644 --- a/dandi/tests/test_pynwb_utils.py +++ b/dandi/tests/test_pynwb_utils.py @@ -6,10 +6,11 @@ import re from typing import Any, NoReturn +import h5py import numpy as np from pynwb import NWBHDF5IO, NWBFile, TimeSeries -from ..pynwb_utils import _sanitize_nwb_version, nwb_has_external_links +from ..pynwb_utils import _sanitize_nwb_version, get_nwb_extensions, nwb_has_external_links def test_pynwb_io(simple1_nwb: Path) -> None: @@ -107,10 +108,6 @@ def test_nwb_has_external_links(tmp_path): def test_get_nwb_extensions(tmp_path: Path) -> None: """Test extraction of NWB extensions from HDF5 specifications group.""" - import h5py - - from ..pynwb_utils import get_nwb_extensions - h5path = tmp_path / "test.nwb" with h5py.File(h5path, "w") as f: specs = f.create_group("specifications") @@ -133,10 +130,6 @@ def test_get_nwb_extensions(tmp_path: Path) -> None: def test_get_nwb_extensions_no_specs(tmp_path: Path) -> None: """No specifications group returns empty dict.""" - import h5py - - from ..pynwb_utils import get_nwb_extensions - h5path = tmp_path / "test.nwb" with h5py.File(h5path, "w") as f: f.attrs["nwb_version"] = "2.7.0" From 7170ec32776dee3f2942149151d57171fd8fa90e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 18 Feb 2026 22:34:11 -0500 Subject: [PATCH 6/8] fix: guard hed_standard import for compat with released dandischema hed_standard does not exist in dandischema < 0.12.2, so gate the import on _SCHEMA_BAREASSET_HAS_DATASTANDARD (all new symbols ship together). HED detection is skipped when unavailable. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bids.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 30a8f3baf..e9507aac8 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -12,7 +12,6 @@ BareAsset, StandardsType, bids_standard, - hed_standard, ome_ngff_standard, ) @@ -20,6 +19,11 @@ from dandi.bids_validator_deno import bids_validate from .bases import GenericAsset, LocalFileAsset, NWBAsset, _SCHEMA_BAREASSET_HAS_DATASTANDARD + +if _SCHEMA_BAREASSET_HAS_DATASTANDARD: + from dandischema.models import hed_standard +else: + hed_standard = None # type: ignore[assignment] from .zarr import ZarrAsset from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..metadata.core import add_common_metadata, prepare_metadata @@ -234,7 +238,7 @@ def get_metadata( _add_standard(metadata, bids_standard) return metadata _add_standard(metadata, bids_standard, version=desc.get("BIDSVersion")) - if hed_version := desc.get("HEDVersion"): + if hed_standard and (hed_version := desc.get("HEDVersion")): # HEDVersion can be a string or list. # List form: ["8.2.0", "sc:1.0.0"] where first element is base # HED version and subsequent "prefix:version" entries are library From 4f7d15478d8da2eb67e1f4d4a21b948f6772c4cc Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Tue, 3 Mar 2026 15:07:06 -0500 Subject: [PATCH 7/8] fix: resolve lint (E402) and typing errors for dandischema compat Move _SCHEMA_BAREASSET_HAS_DATASTANDARD guard block after all imports to fix E402. Use getattr/setattr for dataStandard access and type: ignore[call-arg] for StandardsType(version=...) to satisfy mypy against released dandischema that lacks these fields. Co-Authored-By: Claude Opus 4.6 (1M context) --- dandi/files/bases.py | 40 ++++++++++++++++++++------------------- dandi/files/bids.py | 27 +++++++++++++++----------- dandi/tests/test_files.py | 6 ++++-- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 6ade4d09b..9f32806ef 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -21,20 +21,6 @@ from dandischema.models import StandardsType, get_schema_version, nwb_standard from packaging.version import Version from pydantic import ValidationError - -# True when the installed dandischema exposes the per-asset dataStandard field -# and related StandardsType enhancements (version, extensions). All these -# fields ship together starting with dandischema 0.12.2. -# TODO: remove this guard (and all branches that check it) once the minimum -# required dandischema version is >= 0.12.2. -_SCHEMA_BAREASSET_HAS_DATASTANDARD = "dataStandard" in BareAsset.model_fields -if not _SCHEMA_BAREASSET_HAS_DATASTANDARD and Version( - dandischema.__version__ -) >= Version("0.12.2"): - raise RuntimeError( - f"dandischema {dandischema.__version__} should have " - f"'dataStandard' field on BareAsset" - ) from pydantic_core import ErrorDetails import requests @@ -55,6 +41,20 @@ Validator, ) +# True when the installed dandischema exposes the per-asset dataStandard field +# and related StandardsType enhancements (version, extensions). All these +# fields ship together starting with dandischema 0.12.2. +# TODO: remove this guard (and all branches that check it) once the minimum +# required dandischema version is >= 0.12.2. +_SCHEMA_BAREASSET_HAS_DATASTANDARD = "dataStandard" in BareAsset.model_fields +if not _SCHEMA_BAREASSET_HAS_DATASTANDARD and Version( + dandischema.__version__ +) >= Version("0.12.2"): + raise RuntimeError( + f"dandischema {dandischema.__version__} should have " + f"'dataStandard' field on BareAsset" + ) + lgr = dandi.get_logger() # TODO -- should come from schema. This is just a simplistic example for now @@ -534,16 +534,18 @@ def get_metadata( nwb_exts = {} if nwb_exts: kwargs["extensions"] = [ - StandardsType(name=name, version=ver).model_dump( + StandardsType(name=name, version=ver).model_dump( # type: ignore[call-arg] mode="json", exclude_none=True ) for name, ver in sorted(nwb_exts.items()) ] nwb = StandardsType(**kwargs) - if metadata.dataStandard is None: - metadata.dataStandard = [nwb] - elif nwb not in metadata.dataStandard: - metadata.dataStandard.append(nwb) + # TODO: use metadata.dataStandard directly once min dandischema >= 0.12.2 + cur = getattr(metadata, "dataStandard", None) + if cur is None: + setattr(metadata, "dataStandard", [nwb]) + elif nwb not in cur: + cur.append(nwb) return metadata # TODO: @validate_cache.memoize_path diff --git a/dandi/files/bids.py b/dandi/files/bids.py index e9507aac8..1a8ba33ee 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -18,10 +18,15 @@ import dandi from dandi.bids_validator_deno import bids_validate -from .bases import GenericAsset, LocalFileAsset, NWBAsset, _SCHEMA_BAREASSET_HAS_DATASTANDARD +from .bases import ( + _SCHEMA_BAREASSET_HAS_DATASTANDARD, + GenericAsset, + LocalFileAsset, + NWBAsset, +) if _SCHEMA_BAREASSET_HAS_DATASTANDARD: - from dandischema.models import hed_standard + from dandischema.models import hed_standard # type: ignore[attr-defined] else: hed_standard = None # type: ignore[assignment] from .zarr import ZarrAsset @@ -42,7 +47,7 @@ def _add_standard( - metadata, # type: ignore[no-untyped-def] + metadata: BareAsset, standard_dict: dict, version: str | None = None, ) -> None: @@ -53,10 +58,12 @@ def _add_standard( if version: kwargs["version"] = version standard = StandardsType(**kwargs) - if metadata.dataStandard is None: - metadata.dataStandard = [standard] - elif standard not in metadata.dataStandard: - metadata.dataStandard.append(standard) + # TODO: use metadata.dataStandard directly once min dandischema >= 0.12.2 + cur = getattr(metadata, "dataStandard", None) + if cur is None: + setattr(metadata, "dataStandard", [standard]) + elif standard not in cur: + cur.append(standard) @dataclass @@ -260,10 +267,8 @@ def get_metadata( lib_name, lib_ver = str(entry).split(":", 1) else: lib_name, lib_ver = str(entry), None - ext = StandardsType(name=lib_name, version=lib_ver) - extensions.append( - ext.model_dump(mode="json", exclude_none=True) - ) + ext = StandardsType(name=lib_name, version=lib_ver) # type: ignore[call-arg] + extensions.append(ext.model_dump(mode="json", exclude_none=True)) if extensions: kwargs["extensions"] = extensions _add_standard(metadata, kwargs) diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index b25d86ffe..6bb08bcea 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -17,7 +17,6 @@ from ..consts import ZARR_MIME_TYPE, dandiset_metadata_file from ..dandiapi import AssetType, RemoteZarrAsset from ..exceptions import UnknownAssetError -from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD from ..files import ( BIDSDatasetDescriptionAsset, DandisetMetadataFile, @@ -31,6 +30,7 @@ dandi_file, find_dandi_files, ) +from ..files.bases import _SCHEMA_BAREASSET_HAS_DATASTANDARD lgr = get_logger() @@ -684,7 +684,9 @@ def test_hed_library_schemas_as_extensions(self, tmp_path: Path) -> None: ) metadata = asset.get_metadata() hed_standards = [ - s for s in (metadata.dataStandard or []) + # TODO: use metadata.dataStandard directly once min dandischema >= 0.12.2 + s + for s in (getattr(metadata, "dataStandard", None) or []) if s.name == "Hierarchical Event Descriptors (HED)" ] assert len(hed_standards) == 1 From 279c297920a57c1eeb1bc6a5cc51104f09e17ec6 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Tue, 3 Mar 2026 15:22:30 -0500 Subject: [PATCH 8/8] Extract NWB spec version into dataStandard Use the existing get_nwb_version() to populate the version field on the NWB StandardsType entry so per-asset metadata reports the NWB spec version (e.g. 2.9.0) alongside extensions. Co-Authored-By: Claude Opus 4.6 --- dandi/files/bases.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dandi/files/bases.py b/dandi/files/bases.py index 9f32806ef..44304f78d 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -521,8 +521,10 @@ def get_metadata( if _SCHEMA_BAREASSET_HAS_DATASTANDARD: kwargs: dict[str, Any] = dict(nwb_standard) # Avoid heavy import by importing within function: - from dandi.pynwb_utils import get_nwb_extensions + from dandi.pynwb_utils import get_nwb_extensions, get_nwb_version + if nwb_ver := get_nwb_version(self.filepath): + kwargs["version"] = nwb_ver try: nwb_exts = get_nwb_extensions(self.filepath) except Exception: