From 996d7f4c7a4140ac9021938c86d5f28b556c4470 Mon Sep 17 00:00:00 2001 From: Sejal Date: Sat, 7 Mar 2026 08:04:17 +0000 Subject: [PATCH 1/3] Fix #775: handle empty datasets in get_type with informative error --- src/hdmf/validate/validator.py | 8 ++++++++ tests/unit/validator_tests/test_validate.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index c774a8bf1..440358834 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -175,6 +175,14 @@ def _get_type_from_dtype_attr(data: Any, builder_dtype: list | None) -> tuple[st def get_type(data, builder_dtype=None): """Return a tuple of (the string representation of the type, the format of the string data) for the given data.""" + if data is None: + return None, None + # Check for empty data safely (Fix for Issue #775) + try: + if hasattr(data, "__len__") and len(data) == 0: + raise EmptyArrayError("Dataset is empty; cannot determine type.") + except TypeError: + pass # String data if isinstance(data, str): return 'utf', get_string_format(data) diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index 8cdfb953e..f29db2bff 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -4,6 +4,8 @@ import h5py import numpy as np +import pytest +from hdmf.validate.validator import get_type, EmptyArrayError from dateutil.tz import tzlocal from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, TypeMap, BuildManager from hdmf.spec import (GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, @@ -1907,3 +1909,9 @@ def test_isodatetime_no_time_component_fails(self): # This confirms it fails because it lacks the 'T' and timezone self.assertEqual(len(result), 1) self.assertIsInstance(result[0], Error) + +def test_get_type_empty_data(): + """Test that get_type handles empty data with an informative error (Issue #775).""" + assert get_type(None) == (None, None) + with pytest.raises(EmptyArrayError, match="Dataset is empty; cannot determine type."): + get_type([]) \ No newline at end of file From b403b9c9f11c9415e12956477b7c318fc6229131 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 7 Mar 2026 08:05:15 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/unit/validator_tests/test_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index f29db2bff..cbb6915c1 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -1914,4 +1914,4 @@ def test_get_type_empty_data(): """Test that get_type handles empty data with an informative error (Issue #775).""" assert get_type(None) == (None, None) with pytest.raises(EmptyArrayError, match="Dataset is empty; cannot determine type."): - get_type([]) \ No newline at end of file + get_type([]) From 30ce93eb6a48049345227082db717c9027f4301e Mon Sep 17 00:00:00 2001 From: Sejal Date: Sun, 8 Mar 2026 04:40:47 +0000 Subject: [PATCH 3/3] refactor: reduce McCabe complexity in get_type --- src/hdmf/validate/validator.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index 440358834..625207186 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -177,12 +177,9 @@ def get_type(data, builder_dtype=None): """Return a tuple of (the string representation of the type, the format of the string data) for the given data.""" if data is None: return None, None - # Check for empty data safely (Fix for Issue #775) - try: - if hasattr(data, "__len__") and len(data) == 0: - raise EmptyArrayError("Dataset is empty; cannot determine type.") - except TypeError: - pass + # Check for empty data safely + if hasattr(data, "__len__") and getattr(data, 'shape', (1,)) != () and len(data) == 0: + raise EmptyArrayError("Dataset is empty; cannot determine type.") # String data if isinstance(data, str): return 'utf', get_string_format(data)