From 1d149ee7c900b65c86adc62843fa9a1b024ea7a9 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 23 May 2025 07:53:01 -0600 Subject: [PATCH 01/12] Add complex number rejection to convert_dtype and corresponding tests --- src/hdmf/build/objectmapper.py | 21 +++++++++++++++- tests/unit/build_tests/test_convert_dtype.py | 26 ++++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index e683e60bf..1cace373d 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -185,7 +185,7 @@ def no_convert(cls, obj_type): cls.__no_convert.add(obj_type) @classmethod - def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 + def convert_dtype(cls, spec, value, spec_dtype=None) -> tuple: # noqa: C901 """ Convert values to the specified dtype. For example, if a literal int is passed in to a field that is specified as a unsigned integer, this function @@ -276,11 +276,30 @@ def __check_convert_numeric(cls, value_type): np.issubdtype(value_dtype, np.integer)): raise ValueError("Cannot convert from %s to 'numeric' specification dtype." % value_type) + @classmethod + def __check_for_complex_numbers(cls, value): + """ + Check if a value contains complex numbers and raise a ValueError if found. + """ + if isinstance(value, complex): + raise ValueError("Complex numbers are not supported") + + # Check numpy array + if isinstance(value, np.ndarray) and np.issubdtype(value.dtype, np.complexfloating): + raise ValueError("Complex numbers are not supported") + + # Check list/tuple elements + if isinstance(value, (list, tuple)): + for item in value: + cls.__check_for_complex_numbers(item) + @classmethod def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 """ Check edge cases in converting data to a dtype """ + # Check for complex numbers first + cls.__check_for_complex_numbers(value) if value is None: # Data is missing. Determine dtype from spec dt = spec_dtype diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 8f30386d8..377b85a58 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -569,3 +569,29 @@ def test_isodate_spec(self): self.assertEqual(ret, b'2020-11-10') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') + + def test_complex_number_rejection(self): + """Test that complex numbers are properly rejected.""" + spec = DatasetSpec('an example dataset', 'float64', name='data') + + # Test single complex number + with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): + ObjectMapper.convert_dtype(spec, 1 + 2j) + + # Test complex numpy array + with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): + ObjectMapper.convert_dtype(spec, np.array([1 + 2j, 3 + 4j])) + + # Test list containing complex numbers + with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): + ObjectMapper.convert_dtype(spec, [1.0, 2 + 3j, 4.0]) + + # Test that real numbers still work + ret, ret_dtype = ObjectMapper.convert_dtype(spec, np.array([1.0, 2.0, 3.0])) + self.assertIsInstance(ret, np.ndarray) + self.assertEqual(ret_dtype, np.float64) + + # Test that regular Python float still works + ret, ret_dtype = ObjectMapper.convert_dtype(spec, 3.14) + self.assertIsInstance(ret, np.float64) + self.assertEqual(ret_dtype, np.float64) From 2730c8d5f5ce4512ff6277e033c1894fc31e1485 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 23 May 2025 13:56:38 +0000 Subject: [PATCH 02/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/hdmf/build/objectmapper.py | 6 +++--- tests/unit/build_tests/test_convert_dtype.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 1cace373d..03396e6ac 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -283,16 +283,16 @@ def __check_for_complex_numbers(cls, value): """ if isinstance(value, complex): raise ValueError("Complex numbers are not supported") - + # Check numpy array if isinstance(value, np.ndarray) and np.issubdtype(value.dtype, np.complexfloating): raise ValueError("Complex numbers are not supported") - + # Check list/tuple elements if isinstance(value, (list, tuple)): for item in value: cls.__check_for_complex_numbers(item) - + @classmethod def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 """ diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 377b85a58..6bd796d2e 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -569,28 +569,28 @@ def test_isodate_spec(self): self.assertEqual(ret, b'2020-11-10') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') - + def test_complex_number_rejection(self): """Test that complex numbers are properly rejected.""" spec = DatasetSpec('an example dataset', 'float64', name='data') - + # Test single complex number with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): ObjectMapper.convert_dtype(spec, 1 + 2j) - + # Test complex numpy array with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): ObjectMapper.convert_dtype(spec, np.array([1 + 2j, 3 + 4j])) - + # Test list containing complex numbers with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): ObjectMapper.convert_dtype(spec, [1.0, 2 + 3j, 4.0]) - + # Test that real numbers still work ret, ret_dtype = ObjectMapper.convert_dtype(spec, np.array([1.0, 2.0, 3.0])) self.assertIsInstance(ret, np.ndarray) self.assertEqual(ret_dtype, np.float64) - + # Test that regular Python float still works ret, ret_dtype = ObjectMapper.convert_dtype(spec, 3.14) self.assertIsInstance(ret, np.float64) From 0200b3db90c2d531817da5fb35827f260e6dab8e Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Fri, 23 May 2025 07:57:24 -0600 Subject: [PATCH 03/12] Add to changelog --- CHANGELOG.md | 1 + .../build_tests/test_complex_protection.py | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 tests/unit/build_tests/test_complex_protection.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 128c69262..b1e56a814 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ ### Fixed - Fixed `get_data_shape` returning the wrong shape for lists of Data objects. @rly [#1270](https://github.com/hdmf-dev/hdmf/pull/1270) +- Added protection against complex numbers in arrays and other data types. @bdichter [#1279](https://github.com/hdmf-dev/hdmf/pull/1279) ## HDMF 4.0.0 (January 22, 2025) diff --git a/tests/unit/build_tests/test_complex_protection.py b/tests/unit/build_tests/test_complex_protection.py new file mode 100644 index 000000000..135c585fc --- /dev/null +++ b/tests/unit/build_tests/test_complex_protection.py @@ -0,0 +1,45 @@ +import unittest +import numpy as np + +from hdmf.build.objectmapper import ObjectMapper +from hdmf.spec import DatasetSpec +from hdmf.testing import TestCase + + +class TestComplexProtection(TestCase): + """Test that complex numbers are properly rejected.""" + + def setUp(self): + self.spec = DatasetSpec('an example dataset', 'float64', name='data') + + def test_single_complex_number(self): + """Test that a single complex number is rejected.""" + with self.assertRaises(ValueError) as cm: + ObjectMapper.convert_dtype(self.spec, 1 + 2j) + self.assertEqual(str(cm.exception), "Complex numbers are not supported") + + def test_complex_array(self): + """Test that an array of complex numbers is rejected.""" + with self.assertRaises(ValueError) as cm: + ObjectMapper.convert_dtype(self.spec, np.array([1 + 2j, 3 + 4j])) + self.assertEqual(str(cm.exception), "Complex numbers are not supported") + + def test_complex_in_list(self): + """Test that a list containing complex numbers is rejected.""" + with self.assertRaises(ValueError) as cm: + ObjectMapper.convert_dtype(self.spec, [1.0, 2 + 3j, 4.0]) + self.assertEqual(str(cm.exception), "Complex numbers are not supported") + + def test_real_array(self): + """Test that a real array is not rejected.""" + ret, ret_dtype = ObjectMapper.convert_dtype(self.spec, np.array([1.0, 2.0, 3.0])) + self.assertIsInstance(ret, np.ndarray) + + def test_real_number(self): + """Test that a real number is not rejected.""" + ret, ret_dtype = ObjectMapper.convert_dtype(self.spec, 3.14) + self.assertIsInstance(ret, np.float64) + + +if __name__ == '__main__': + unittest.main() From caf0183fcdbd3121ede649d74f17356556c6c280 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 11:59:58 -0400 Subject: [PATCH 04/12] Add object deduplication support via soft links Add `deduplicate_objects` parameter to HDF5IO, HDMFIO, and BuildManager classes to enable deduplication of identical container objects by creating soft links. This feature is enabled by default and helps reduce file size and improve performance when dealing with repeated objects. --- src/hdmf/backends/hdf5/h5tools.py | 12 +- src/hdmf/backends/io.py | 7 +- src/hdmf/build/manager.py | 20 ++- .../build_tests/test_deduplicate_objects.py | 148 ++++++++++++++++++ 4 files changed, 178 insertions(+), 9 deletions(-) create mode 100644 tests/unit/build_tests/test_deduplicate_objects.py diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index e1873dd5d..f1ec18fda 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -69,14 +69,16 @@ def can_read(path): 'default': None }, {'name': 'herd_path', 'type': str, - 'doc': 'The path to read/write the HERD file', 'default': None},) + 'doc': 'The path to read/write the HERD file', 'default': None}, + {'name': 'deduplicate_objects', 'type': bool, + 'doc': 'whether to deduplicate identical container objects by creating soft links', 'default': True}) def __init__(self, **kwargs): """Open an HDF5 file for IO. """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) - path, manager, mode, comm, file_obj, driver, aws_region, herd_path = popargs('path', 'manager', 'mode', + path, manager, mode, comm, file_obj, driver, aws_region, herd_path, deduplicate_objects = popargs('path', 'manager', 'mode', 'comm', 'file', 'driver', - 'aws_region', 'herd_path', + 'aws_region', 'herd_path', 'deduplicate_objects', kwargs) self.__open_links = [] # keep track of other files opened from links in this file @@ -93,9 +95,9 @@ def __init__(self, **kwargs): raise UnsupportedOperation(msg) if manager is None: - manager = BuildManager(TypeMap(NamespaceCatalog())) + manager = BuildManager(TypeMap(NamespaceCatalog()), deduplicate_objects=deduplicate_objects) elif isinstance(manager, TypeMap): - manager = BuildManager(manager) + manager = BuildManager(manager, deduplicate_objects=deduplicate_objects) self.__driver = driver self.__aws_region = aws_region self.__comm = comm diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 86fd25b26..1af3decc9 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -22,9 +22,11 @@ def can_read(path): {"name": "source", "type": (str, Path), "doc": "the source of container being built i.e. file path", 'default': None}, {'name': 'herd_path', 'type': str, - 'doc': 'The path to read/write the HERD file', 'default': None},) + 'doc': 'The path to read/write the HERD file', 'default': None}, + {'name': 'deduplicate_objects', 'type': bool, + 'doc': 'whether to deduplicate identical container objects by creating soft links', 'default': True}) def __init__(self, **kwargs): - manager, source, herd_path = getargs('manager', 'source', 'herd_path', kwargs) + manager, source, herd_path, deduplicate_objects = getargs('manager', 'source', 'herd_path', 'deduplicate_objects', kwargs) if isinstance(source, Path): source = source.resolve() elif (isinstance(source, str) and @@ -38,6 +40,7 @@ def __init__(self, **kwargs): self.__source = source self.herd_path = herd_path self.herd = None + self.__deduplicate_objects = deduplicate_objects self.open() @property diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index bc586013c..f7791f2ea 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -86,13 +86,19 @@ class BuildManager: A class for managing builds of AbstractContainers """ - def __init__(self, type_map): + def __init__(self, type_map, deduplicate_objects=True): + """ + Args: + type_map (TypeMap): the TypeMap to use for mapping container classes to specifications + deduplicate_objects (bool): whether to deduplicate identical container objects by creating soft links + """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) self.__builders = dict() self.__containers = dict() self.__active_builders = set() self.__type_map = type_map self.__ref_queue = deque() # a queue of the ReferenceBuilders that need to be added + self.__deduplicate_objects = deduplicate_objects @property def namespace_catalog(self): @@ -102,6 +108,11 @@ def namespace_catalog(self): def type_map(self): return self.__type_map + @property + def deduplicate_objects(self): + """Whether to deduplicate identical container objects by creating soft links.""" + return self.__deduplicate_objects + @docval({"name": "object", "type": (BaseBuilder, AbstractContainer), "doc": "the container or builder to get a proxy for"}, {"name": "source", "type": str, @@ -260,8 +271,13 @@ def clear_cache(self): @docval({"name": "container", "type": AbstractContainer, "doc": "the container to get the builder for"}) def get_builder(self, **kwargs): - """Return the prebuilt builder for the given container or None if it does not exist.""" + """Return the prebuilt builder for the given container or None if it does not exist. + + If deduplicate_objects is False, this will always return None to force creation of new builders. + """ container = getargs('container', kwargs) + if not self.__deduplicate_objects: + return None container_id = self.__conthash__(container) result = self.__builders.get(container_id) return result diff --git a/tests/unit/build_tests/test_deduplicate_objects.py b/tests/unit/build_tests/test_deduplicate_objects.py new file mode 100644 index 000000000..a95917614 --- /dev/null +++ b/tests/unit/build_tests/test_deduplicate_objects.py @@ -0,0 +1,148 @@ +"""Tests for the deduplicate_objects functionality in BuildManager""" + +from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, ObjectMapper +from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog +from hdmf.testing import TestCase +from hdmf.container import Data + +from tests.unit.helpers.utils import Foo, CORE_NAMESPACE + + +class FooMapper(ObjectMapper): + """Maps nested 'attr2' attribute on dataset 'my_data' to Foo.attr2 in constructor and attribute map""" + + def __init__(self, spec): + super().__init__(spec) + my_data_spec = spec.get_dataset('my_data') + self.map_spec('attr2', my_data_spec.get_attribute('attr2')) + + +class TestBuildManagerDeduplication(TestCase): + """Test BuildManager deduplication functionality""" + + def setUp(self): + self.foo_spec = GroupSpec( + doc='A test group specification with a data type', + data_type_def='Foo', + datasets=[ + DatasetSpec( + doc='an example dataset', + dtype='int', + name='my_data', + attributes=[ + AttributeSpec( + name='attr2', + doc='an example integer attribute', + dtype='int' + ) + ] + ) + ], + attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')] + ) + + self.spec_catalog = SpecCatalog() + self.spec_catalog.register_spec(self.foo_spec, 'test.yaml') + self.namespace = SpecNamespace( + 'a test namespace', + CORE_NAMESPACE, + [{'source': 'test.yaml'}], + version='0.1.0', + catalog=self.spec_catalog) + self.namespace_catalog = NamespaceCatalog() + self.namespace_catalog.add_namespace(CORE_NAMESPACE, self.namespace) + self.type_map = TypeMap(self.namespace_catalog) + self.type_map.register_container_type(CORE_NAMESPACE, 'Foo', Foo) + self.type_map.register_map(Foo, FooMapper) + + def test_default_deduplicate_objects_true(self): + """Test that deduplicate_objects defaults to True""" + manager = BuildManager(self.type_map) + self.assertTrue(manager.deduplicate_objects) + + def test_deduplicate_objects_explicit_true(self): + """Test that deduplicate_objects can be explicitly set to True""" + manager = BuildManager(self.type_map, deduplicate_objects=True) + self.assertTrue(manager.deduplicate_objects) + + def test_deduplicate_objects_explicit_false(self): + """Test that deduplicate_objects can be explicitly set to False""" + manager = BuildManager(self.type_map, deduplicate_objects=False) + self.assertFalse(manager.deduplicate_objects) + + def test_get_builder_with_deduplication_enabled(self): + """Test that get_builder returns cached builder when deduplication is enabled""" + manager = BuildManager(self.type_map, deduplicate_objects=True) + + # Create a simple Data container + container = Data(name="test_data", data=[1, 2, 3]) + + # Create and cache a builder + builder = DatasetBuilder(name="test_data", data=[1, 2, 3]) + manager.prebuilt(container, builder) + + # get_builder should return the cached builder + cached_builder = manager.get_builder(container) + self.assertIs(cached_builder, builder) + + def test_get_builder_with_deduplication_disabled(self): + """Test that get_builder returns None when deduplication is disabled""" + manager = BuildManager(self.type_map, deduplicate_objects=False) + + # Create a simple Data container + container = Data(name="test_data", data=[1, 2, 3]) + + # Create and cache a builder + builder = DatasetBuilder(name="test_data", data=[1, 2, 3]) + manager.prebuilt(container, builder) + + # get_builder should return None when deduplication is disabled + cached_builder = manager.get_builder(container) + self.assertIsNone(cached_builder) + + def test_build_memoization_with_deduplication_enabled(self): + """Test that repeated builds return same builder when deduplication is enabled""" + manager = BuildManager(self.type_map, deduplicate_objects=True) + + container_inst = Foo('my_foo', list(range(10)), 'value1', 10) + + # Build twice - should get same builder + builder1 = manager.build(container_inst) + builder2 = manager.build(container_inst) + + self.assertIs(builder1, builder2) + + def test_build_no_memoization_with_deduplication_disabled(self): + """Test that repeated builds create new builders when deduplication is disabled""" + manager = BuildManager(self.type_map, deduplicate_objects=False) + + container_inst = Foo('my_foo', list(range(10)), 'value1', 10) + + # Build twice - should get different builders + builder1 = manager.build(container_inst) + builder2 = manager.build(container_inst) + + self.assertIsNot(builder1, builder2) + # But they should have the same content + self.assertDictEqual(builder1, builder2) + + def test_clear_cache_behavior(self): + """Test that clear_cache works regardless of deduplication setting""" + # Test with deduplication enabled + manager_true = BuildManager(self.type_map, deduplicate_objects=True) + container = Data(name="test_data", data=[1, 2, 3]) + builder = DatasetBuilder(name="test_data", data=[1, 2, 3]) + manager_true.prebuilt(container, builder) + + self.assertIs(manager_true.get_builder(container), builder) + manager_true.clear_cache() + self.assertIsNone(manager_true.get_builder(container)) + + # Test with deduplication disabled + manager_false = BuildManager(self.type_map, deduplicate_objects=False) + manager_false.prebuilt(container, builder) + + # Should return None even before clearing cache + self.assertIsNone(manager_false.get_builder(container)) + manager_false.clear_cache() + self.assertIsNone(manager_false.get_builder(container)) From 80ddda584ddc167519f1be42009c7fea8acaf794 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:01:01 -0400 Subject: [PATCH 05/12] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1e56a814..128c69262 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,6 @@ ### Fixed - Fixed `get_data_shape` returning the wrong shape for lists of Data objects. @rly [#1270](https://github.com/hdmf-dev/hdmf/pull/1270) -- Added protection against complex numbers in arrays and other data types. @bdichter [#1279](https://github.com/hdmf-dev/hdmf/pull/1279) ## HDMF 4.0.0 (January 22, 2025) From 191d9c3cee0a5a2316f7dbf6cf7b133a8794fdb4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 14 Aug 2025 16:01:31 +0000 Subject: [PATCH 06/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/hdmf/build/manager.py | 2 +- .../build_tests/test_deduplicate_objects.py | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/hdmf/build/manager.py b/src/hdmf/build/manager.py index f7791f2ea..b00f953a1 100644 --- a/src/hdmf/build/manager.py +++ b/src/hdmf/build/manager.py @@ -272,7 +272,7 @@ def clear_cache(self): @docval({"name": "container", "type": AbstractContainer, "doc": "the container to get the builder for"}) def get_builder(self, **kwargs): """Return the prebuilt builder for the given container or None if it does not exist. - + If deduplicate_objects is False, this will always return None to force creation of new builders. """ container = getargs('container', kwargs) diff --git a/tests/unit/build_tests/test_deduplicate_objects.py b/tests/unit/build_tests/test_deduplicate_objects.py index a95917614..f2384b613 100644 --- a/tests/unit/build_tests/test_deduplicate_objects.py +++ b/tests/unit/build_tests/test_deduplicate_objects.py @@ -73,14 +73,14 @@ def test_deduplicate_objects_explicit_false(self): def test_get_builder_with_deduplication_enabled(self): """Test that get_builder returns cached builder when deduplication is enabled""" manager = BuildManager(self.type_map, deduplicate_objects=True) - + # Create a simple Data container container = Data(name="test_data", data=[1, 2, 3]) - + # Create and cache a builder builder = DatasetBuilder(name="test_data", data=[1, 2, 3]) manager.prebuilt(container, builder) - + # get_builder should return the cached builder cached_builder = manager.get_builder(container) self.assertIs(cached_builder, builder) @@ -88,14 +88,14 @@ def test_get_builder_with_deduplication_enabled(self): def test_get_builder_with_deduplication_disabled(self): """Test that get_builder returns None when deduplication is disabled""" manager = BuildManager(self.type_map, deduplicate_objects=False) - + # Create a simple Data container container = Data(name="test_data", data=[1, 2, 3]) - + # Create and cache a builder builder = DatasetBuilder(name="test_data", data=[1, 2, 3]) manager.prebuilt(container, builder) - + # get_builder should return None when deduplication is disabled cached_builder = manager.get_builder(container) self.assertIsNone(cached_builder) @@ -103,25 +103,25 @@ def test_get_builder_with_deduplication_disabled(self): def test_build_memoization_with_deduplication_enabled(self): """Test that repeated builds return same builder when deduplication is enabled""" manager = BuildManager(self.type_map, deduplicate_objects=True) - + container_inst = Foo('my_foo', list(range(10)), 'value1', 10) - + # Build twice - should get same builder builder1 = manager.build(container_inst) builder2 = manager.build(container_inst) - + self.assertIs(builder1, builder2) def test_build_no_memoization_with_deduplication_disabled(self): """Test that repeated builds create new builders when deduplication is disabled""" manager = BuildManager(self.type_map, deduplicate_objects=False) - + container_inst = Foo('my_foo', list(range(10)), 'value1', 10) - + # Build twice - should get different builders builder1 = manager.build(container_inst) builder2 = manager.build(container_inst) - + self.assertIsNot(builder1, builder2) # But they should have the same content self.assertDictEqual(builder1, builder2) @@ -133,15 +133,15 @@ def test_clear_cache_behavior(self): container = Data(name="test_data", data=[1, 2, 3]) builder = DatasetBuilder(name="test_data", data=[1, 2, 3]) manager_true.prebuilt(container, builder) - + self.assertIs(manager_true.get_builder(container), builder) manager_true.clear_cache() self.assertIsNone(manager_true.get_builder(container)) - + # Test with deduplication disabled manager_false = BuildManager(self.type_map, deduplicate_objects=False) manager_false.prebuilt(container, builder) - + # Should return None even before clearing cache self.assertIsNone(manager_false.get_builder(container)) manager_false.clear_cache() From 27047e126d00e3551b93d74f4a4ce5fa0ddfa327 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:01:37 -0400 Subject: [PATCH 07/12] Update src/hdmf/build/objectmapper.py --- src/hdmf/build/objectmapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 03396e6ac..5d9e3f8ef 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -185,7 +185,7 @@ def no_convert(cls, obj_type): cls.__no_convert.add(obj_type) @classmethod - def convert_dtype(cls, spec, value, spec_dtype=None) -> tuple: # noqa: C901 + def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 """ Convert values to the specified dtype. For example, if a literal int is passed in to a field that is specified as a unsigned integer, this function From fa552f1695d757bcdfa82fcce74090fd1de53350 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:01:58 -0400 Subject: [PATCH 08/12] Update src/hdmf/build/objectmapper.py --- src/hdmf/build/objectmapper.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 5d9e3f8ef..fe81a4c76 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -276,23 +276,6 @@ def __check_convert_numeric(cls, value_type): np.issubdtype(value_dtype, np.integer)): raise ValueError("Cannot convert from %s to 'numeric' specification dtype." % value_type) - @classmethod - def __check_for_complex_numbers(cls, value): - """ - Check if a value contains complex numbers and raise a ValueError if found. - """ - if isinstance(value, complex): - raise ValueError("Complex numbers are not supported") - - # Check numpy array - if isinstance(value, np.ndarray) and np.issubdtype(value.dtype, np.complexfloating): - raise ValueError("Complex numbers are not supported") - - # Check list/tuple elements - if isinstance(value, (list, tuple)): - for item in value: - cls.__check_for_complex_numbers(item) - @classmethod def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 """ From c7aa697a79c13bba2b5479cd70cd42aa32b19c7f Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:02:29 -0400 Subject: [PATCH 09/12] Update src/hdmf/build/objectmapper.py --- src/hdmf/build/objectmapper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index fe81a4c76..e683e60bf 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -281,8 +281,6 @@ def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 """ Check edge cases in converting data to a dtype """ - # Check for complex numbers first - cls.__check_for_complex_numbers(value) if value is None: # Data is missing. Determine dtype from spec dt = spec_dtype From 4528570005e235dbb273e3651b47ca2dc605263f Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:02:50 -0400 Subject: [PATCH 10/12] Update tests/unit/build_tests/test_convert_dtype.py --- tests/unit/build_tests/test_convert_dtype.py | 26 -------------------- 1 file changed, 26 deletions(-) diff --git a/tests/unit/build_tests/test_convert_dtype.py b/tests/unit/build_tests/test_convert_dtype.py index 6bd796d2e..8f30386d8 100644 --- a/tests/unit/build_tests/test_convert_dtype.py +++ b/tests/unit/build_tests/test_convert_dtype.py @@ -569,29 +569,3 @@ def test_isodate_spec(self): self.assertEqual(ret, b'2020-11-10') self.assertIs(type(ret), bytes) self.assertEqual(ret_dtype, 'ascii') - - def test_complex_number_rejection(self): - """Test that complex numbers are properly rejected.""" - spec = DatasetSpec('an example dataset', 'float64', name='data') - - # Test single complex number - with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): - ObjectMapper.convert_dtype(spec, 1 + 2j) - - # Test complex numpy array - with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): - ObjectMapper.convert_dtype(spec, np.array([1 + 2j, 3 + 4j])) - - # Test list containing complex numbers - with self.assertRaisesWith(ValueError, "Complex numbers are not supported"): - ObjectMapper.convert_dtype(spec, [1.0, 2 + 3j, 4.0]) - - # Test that real numbers still work - ret, ret_dtype = ObjectMapper.convert_dtype(spec, np.array([1.0, 2.0, 3.0])) - self.assertIsInstance(ret, np.ndarray) - self.assertEqual(ret_dtype, np.float64) - - # Test that regular Python float still works - ret, ret_dtype = ObjectMapper.convert_dtype(spec, 3.14) - self.assertIsInstance(ret, np.float64) - self.assertEqual(ret_dtype, np.float64) From fa3a54ec0608b4f327ed00f6d22fd7863ce68932 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:03:11 -0400 Subject: [PATCH 11/12] Delete tests/unit/build_tests/test_complex_protection.py --- .../build_tests/test_complex_protection.py | 45 ------------------- 1 file changed, 45 deletions(-) delete mode 100644 tests/unit/build_tests/test_complex_protection.py diff --git a/tests/unit/build_tests/test_complex_protection.py b/tests/unit/build_tests/test_complex_protection.py deleted file mode 100644 index 135c585fc..000000000 --- a/tests/unit/build_tests/test_complex_protection.py +++ /dev/null @@ -1,45 +0,0 @@ -import unittest -import numpy as np - -from hdmf.build.objectmapper import ObjectMapper -from hdmf.spec import DatasetSpec -from hdmf.testing import TestCase - - -class TestComplexProtection(TestCase): - """Test that complex numbers are properly rejected.""" - - def setUp(self): - self.spec = DatasetSpec('an example dataset', 'float64', name='data') - - def test_single_complex_number(self): - """Test that a single complex number is rejected.""" - with self.assertRaises(ValueError) as cm: - ObjectMapper.convert_dtype(self.spec, 1 + 2j) - self.assertEqual(str(cm.exception), "Complex numbers are not supported") - - def test_complex_array(self): - """Test that an array of complex numbers is rejected.""" - with self.assertRaises(ValueError) as cm: - ObjectMapper.convert_dtype(self.spec, np.array([1 + 2j, 3 + 4j])) - self.assertEqual(str(cm.exception), "Complex numbers are not supported") - - def test_complex_in_list(self): - """Test that a list containing complex numbers is rejected.""" - with self.assertRaises(ValueError) as cm: - ObjectMapper.convert_dtype(self.spec, [1.0, 2 + 3j, 4.0]) - self.assertEqual(str(cm.exception), "Complex numbers are not supported") - - def test_real_array(self): - """Test that a real array is not rejected.""" - ret, ret_dtype = ObjectMapper.convert_dtype(self.spec, np.array([1.0, 2.0, 3.0])) - self.assertIsInstance(ret, np.ndarray) - - def test_real_number(self): - """Test that a real number is not rejected.""" - ret, ret_dtype = ObjectMapper.convert_dtype(self.spec, 3.14) - self.assertIsInstance(ret, np.float64) - - -if __name__ == '__main__': - unittest.main() From bf23774fc6957d823ace7ecf4ffdd2d1cdd6d5f1 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 14 Aug 2025 12:19:53 -0400 Subject: [PATCH 12/12] ruff fixes --- src/hdmf/backends/hdf5/h5tools.py | 8 ++++---- src/hdmf/backends/io.py | 4 +++- tests/unit/build_tests/test_deduplicate_objects.py | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index f1ec18fda..a9a1c43c4 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -76,10 +76,10 @@ def __init__(self, **kwargs): """Open an HDF5 file for IO. """ self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) - path, manager, mode, comm, file_obj, driver, aws_region, herd_path, deduplicate_objects = popargs('path', 'manager', 'mode', - 'comm', 'file', 'driver', - 'aws_region', 'herd_path', 'deduplicate_objects', - kwargs) + path, manager, mode, comm, file_obj, driver, aws_region, herd_path, deduplicate_objects = popargs( + 'path', 'manager', 'mode', 'comm', 'file', 'driver', 'aws_region', 'herd_path', + 'deduplicate_objects', kwargs + ) self.__open_links = [] # keep track of other files opened from links in this file self.__file = None # This will be set below, but set to None first in case an error occurs and we need to close diff --git a/src/hdmf/backends/io.py b/src/hdmf/backends/io.py index 1af3decc9..95a181e64 100644 --- a/src/hdmf/backends/io.py +++ b/src/hdmf/backends/io.py @@ -26,7 +26,9 @@ def can_read(path): {'name': 'deduplicate_objects', 'type': bool, 'doc': 'whether to deduplicate identical container objects by creating soft links', 'default': True}) def __init__(self, **kwargs): - manager, source, herd_path, deduplicate_objects = getargs('manager', 'source', 'herd_path', 'deduplicate_objects', kwargs) + manager, source, herd_path, deduplicate_objects = getargs( + 'manager', 'source', 'herd_path', 'deduplicate_objects', kwargs + ) if isinstance(source, Path): source = source.resolve() elif (isinstance(source, str) and diff --git a/tests/unit/build_tests/test_deduplicate_objects.py b/tests/unit/build_tests/test_deduplicate_objects.py index f2384b613..bba7819fb 100644 --- a/tests/unit/build_tests/test_deduplicate_objects.py +++ b/tests/unit/build_tests/test_deduplicate_objects.py @@ -1,6 +1,6 @@ """Tests for the deduplicate_objects functionality in BuildManager""" -from hdmf.build import GroupBuilder, DatasetBuilder, BuildManager, TypeMap, ObjectMapper +from hdmf.build import DatasetBuilder, BuildManager, TypeMap, ObjectMapper from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog from hdmf.testing import TestCase from hdmf.container import Data