diff --git a/requirements.txt b/requirements.txt index 35328e0c8..7210077c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ numpy==1.17.2 scipy==1.3.1 pandas==0.25.1 ruamel.yaml==0.16.5 +xarray==0.13.0 diff --git a/src/hdmf/__init__.py b/src/hdmf/__init__.py index da66e59df..5e78ebb19 100644 --- a/src/hdmf/__init__.py +++ b/src/hdmf/__init__.py @@ -1,6 +1,6 @@ from . import query # noqa: F401 from .container import Container, Data, DataRegion -from .utils import docval, getargs +from .utils import docval, getargs, popargs from .region import ListSlicer from .backends.hdf5.h5_utils import H5RegionSlicer, H5Dataset diff --git a/src/hdmf/backends/hdf5/h5tools.py b/src/hdmf/backends/hdf5/h5tools.py index 283752cdd..9f825b6e6 100644 --- a/src/hdmf/backends/hdf5/h5tools.py +++ b/src/hdmf/backends/hdf5/h5tools.py @@ -4,6 +4,7 @@ from functools import partial from h5py import File, Group, Dataset, special_dtype, SoftLink, ExternalLink, Reference, RegionReference, check_dtype import warnings +import json from ...container import Container from ...utils import docval, getargs, popargs, call_docval_func, get_data_shape @@ -26,6 +27,9 @@ H5_REF = special_dtype(ref=Reference) H5_REGREF = special_dtype(ref=RegionReference) +DIMS_ATTR = 'dimensions' +COORDS_ATTR = 'coordinates' + class HDF5IO(HDMFIO): @@ -404,12 +408,20 @@ def __read_group(self, h5obj, name=None, ignore=set()): ret.written = True return ret - def __read_dataset(self, h5obj, name=None): + def __read_dataset(self, h5obj, name=None): # noqa: C901 kwargs = { "attributes": self.__read_attrs(h5obj), "dtype": h5obj.dtype, "maxshape": h5obj.maxshape } + if DIMS_ATTR in kwargs['attributes']: + dims = tuple(json.loads(kwargs['attributes'].pop(DIMS_ATTR))) + if dims: + kwargs['dims'] = dims + if COORDS_ATTR in kwargs['attributes']: + coords = json.loads(kwargs['attributes'].pop(COORDS_ATTR)) + if coords: + kwargs['coords'] = coords for key, val in kwargs['attributes'].items(): if isinstance(val, bytes): kwargs['attributes'][key] = val.decode('UTF-8') @@ -743,6 +755,10 @@ def write_dataset(self, **kwargs): else: options['io_settings'] = {} attributes = builder.attributes + if builder.dims: + attributes[DIMS_ATTR] = json.dumps(builder.dims) + if builder.coords: + attributes[COORDS_ATTR] = json.dumps(builder.coords) options['dtype'] = builder.dtype dset = None link = None diff --git a/src/hdmf/build/__init__.py b/src/hdmf/build/__init__.py index dd3333085..c99442bb3 100644 --- a/src/hdmf/build/__init__.py +++ b/src/hdmf/build/__init__.py @@ -4,8 +4,11 @@ from .builders import ReferenceBuilder from .builders import RegionBuilder from .builders import LinkBuilder +from .builders import CoordBuilder from .objectmapper import ObjectMapper - +from .objectmapper import BuildError +from .objectmapper import ConstructError +from .objectmapper import ConvertError from .map import BuildManager from .map import TypeMap diff --git a/src/hdmf/build/builders.py b/src/hdmf/build/builders.py index 06de5830e..3c83830ed 100644 --- a/src/hdmf/build/builders.py +++ b/src/hdmf/build/builders.py @@ -6,17 +6,19 @@ from abc import ABCMeta import warnings from collections.abc import Iterable +from collections import namedtuple from datetime import datetime -from ..utils import docval, getargs, popargs, call_docval_func, fmt_docval_args +from ..utils import docval, getargs, popargs, fmt_docval_args, get_docval class Builder(dict, metaclass=ABCMeta): + ''' Abstract class used to represent an object within a hierarchy. ''' - @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, - {'name': 'parent', 'type': 'Builder', 'doc': 'the parent builder of this Builder', 'default': None}, + @docval({'name': 'name', 'type': str, 'doc': 'the name of the Builder'}, + {'name': 'parent', 'type': 'Builder', 'doc': 'the parent Builder of this Builder', 'default': None}, {'name': 'source', 'type': str, - 'doc': 'the source of the data in this builder e.g. file name', 'default': None}) + 'doc': 'the source of the data in this Builder, e.g., file name', 'default': None}) def __init__(self, **kwargs): name, parent, source = getargs('name', 'parent', 'source', kwargs) super().__init__() @@ -32,9 +34,7 @@ def __init__(self, **kwargs): @property def path(self): - """ - Get the path of this Builder - """ + ''' The path of this Builder ''' s = list() c = self while c is not None: @@ -50,7 +50,7 @@ def written(self): @written.setter def written(self, s): if self.__written and not s: - raise ValueError("cannot change written to not written") + raise AttributeError('Cannot change written to not written') self.__written = s @property @@ -65,10 +65,9 @@ def source(self): @source.setter def source(self, s): - if self.__source is None: - self.__source = s - else: - raise ValueError('Cannot reset source once it is specified') + if self.__source is not None: + raise AttributeError('Cannot reset source once it is specified') + self.__source = s @property def parent(self): @@ -77,27 +76,28 @@ def parent(self): @parent.setter def parent(self, p): - if self.__parent is None: - self.__parent = p - if self.__source is None: - self.source = p.source - else: - raise ValueError('Cannot reset parent once it is specified') + if self.__parent is not None: + raise AttributeError('Cannot reset parent once it is specified') + self.__parent = p + if self.__source is None: + self.source = p.source def __repr__(self): - ret = "%s %s %s" % (self.path, self.__class__.__name__, super().__repr__()) + dict_repr = super().__repr__() + ret = "%s %s %s" % (self.name, self.__class__.__name__, dict_repr) return ret class BaseBuilder(Builder): - __attribute = 'attributes' + ''' A builder that contains a location and a dictionary of attributes ''' - @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, - {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this group', + __attribute = 'attributes' # key for attributes dictionary in this dictionary + + @docval({'name': 'name', 'type': str, 'doc': 'the name of the Builder'}, + {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this Builder', 'default': dict()}, - {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'the parent builder of this Builder', 'default': None}, - {'name': 'source', 'type': str, - 'doc': 'the source of the data represented in this Builder', 'default': None}) + {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'the parent Builder of this Builder', 'default': None}, + *get_docval(Builder.__init__, 'source')) def __init__(self, **kwargs): name, attributes, parent, source = getargs('name', 'attributes', 'parent', 'source', kwargs) super().__init__(name, parent, source) @@ -108,9 +108,7 @@ def __init__(self, **kwargs): @property def location(self): - """ - The location of this Builder in its source - """ + ''' The location of this Builder in its source ''' return self.__location @location.setter @@ -125,57 +123,58 @@ def attributes(self): @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute'}, {'name': 'value', 'type': None, 'doc': 'the attribute value'}) def set_attribute(self, **kwargs): - ''' Set an attribute for this group. ''' + ''' Set an attribute for this Builder ''' name, value = getargs('name', 'value', kwargs) - super().__getitem__(BaseBuilder.__attribute)[name] = value - # self.obj_type[name] = BaseBuilder.__attribute + self.attributes[name] = value - @docval({'name': 'builder', 'type': 'BaseBuilder', 'doc': 'the BaseBuilder to merge attributes from '}) + @docval({'name': 'builder', 'type': 'BaseBuilder', 'doc': 'the BaseBuilder to merge attributes from'}) def deep_update(self, **kwargs): - ''' Merge attributes from the given BaseBuilder into this builder ''' + ''' Merge attributes from the given BaseBuilder into this Builder ''' builder = kwargs['builder'] - # merge attributes - for name, value in super(BaseBuilder, builder).__getitem__(BaseBuilder.__attribute).items(): + for name, value in builder.attributes.items(): self.set_attribute(name, value) class GroupBuilder(BaseBuilder): + ''' + This class is a dictionary that holds the contents of a Container rearranged based on the ObjectMapper for that + Container into subgroups, datasets, attributes, and links. On write, the backend code writes the contents of these + builders to the backend. On read, the backend code creates builders to hold data that has been read from the + backend. + ''' + + # keys for child dictionaries/lists in this dictionary __link = 'links' __group = 'groups' __dataset = 'datasets' - __attribute = 'attributes' + __attribute = 'attributes' # matches BaseBuilder.__attribute @docval({'name': 'name', 'type': str, 'doc': 'the name of the group'}, - {'name': 'groups', 'type': (dict, list), 'doc': 'a dictionary of subgroups to create in this group', - 'default': dict()}, - {'name': 'datasets', 'type': (dict, list), 'doc': 'a dictionary of datasets to create in this group', - 'default': dict()}, + {'name': 'groups', 'type': (dict, list), 'doc': 'a dictionary/list of subgroups to create in this group', + 'default': list()}, + {'name': 'datasets', 'type': (dict, list), 'doc': 'a dictionary/list of datasets to create in this group', + 'default': list()}, {'name': 'attributes', 'type': dict, 'doc': 'a dictionary of attributes to create in this group', 'default': dict()}, - {'name': 'links', 'type': (dict, list), 'doc': 'a dictionary of links to create in this group', - 'default': dict()}, - {'name': 'parent', 'type': 'GroupBuilder', 'doc': 'the parent builder of this Builder', 'default': None}, - {'name': 'source', 'type': str, - 'doc': 'the source of the data represented in this Builder', 'default': None}) + {'name': 'links', 'type': (dict, list), 'doc': 'a dictionary/list of links to create in this group', + 'default': list()}, + *get_docval(BaseBuilder.__init__, 'parent', 'source')) def __init__(self, **kwargs): - ''' - Create a GroupBuilder object - ''' name, groups, datasets, links, attributes, parent, source = getargs( 'name', 'groups', 'datasets', 'links', 'attributes', 'parent', 'source', kwargs) + # convert groups, datasets, links to lists based on dict values if dict is given (i.e., ignore dict keys) groups = self.__to_list(groups) datasets = self.__to_list(datasets) links = self.__to_list(links) self.obj_type = dict() - super().__init__(name, attributes, parent, source) + super().__init__(name, attributes, parent, source) # superclass handles attributes super().__setitem__(GroupBuilder.__group, dict()) super().__setitem__(GroupBuilder.__dataset, dict()) super().__setitem__(GroupBuilder.__link, dict()) - self.__name = name for group in groups: self.set_group(group) for dataset in datasets: - if not (dataset is None): + if dataset is not None: self.set_dataset(dataset) for link in links: self.set_link(link) @@ -192,11 +191,8 @@ def source(self): @source.setter def source(self, s): - ''' - A recursive setter to set all subgroups/datasets/links - source when this source is set - ''' - super(GroupBuilder, self.__class__).source.fset(self, s) + ''' A recursive setter to set all subgroups/datasets/links source when this source is set ''' + super(GroupBuilder, self.__class__).source.fset(self, s) # call parent setter for g in self.groups.values(): if g.source is None: g.source = s @@ -219,33 +215,31 @@ def datasets(self): @property def links(self): - ''' The datasets contained in this GroupBuilder ''' + ''' The links contained in this GroupBuilder ''' return super().__getitem__(GroupBuilder.__link) - @docval({'name': 'name', 'type': str, 'doc': 'the name of the attribute'}, - {'name': 'value', 'type': None, 'doc': 'the attribute value'}) + @docval(*get_docval(BaseBuilder.set_attribute)) def set_attribute(self, **kwargs): ''' Set an attribute for this group ''' name, value = getargs('name', 'value', kwargs) super().set_attribute(name, value) - self.obj_type[name] = GroupBuilder.__attribute + self.obj_type[name] = GroupBuilder.__attribute # track that this name is associated with an attribute @docval({'name': 'builder', 'type': 'Builder', 'doc': 'the Builder to add to this GroupBuilder'}) def set_builder(self, **kwargs): - ''' - Add an existing builder to this this GroupBuilder - ''' - builder = getargs('builder', kwargs) + ''' Add an existing Builder to this GroupBuilder ''' + builder = kwargs['builder'] if isinstance(builder, LinkBuilder): self.__set_builder(builder, GroupBuilder.__link) elif isinstance(builder, GroupBuilder): - self.__set_builder(builder, GroupBuilder.__dataset) + self.__set_builder(builder, GroupBuilder.__group) elif isinstance(builder, DatasetBuilder): self.__set_builder(builder, GroupBuilder.__dataset) else: - raise ValueError("Got unexpected builder type: %s" % type(builder)) + raise ValueError('Got unexpected builder type: %s' % type(builder)) def __set_builder(self, builder, obj_type): + ''' Store the given child builder in the groups/datasets/attributes/links dict under its name as the key ''' name = builder.name if name in self.obj_type: if self.obj_type[name] != obj_type: @@ -262,6 +256,8 @@ def __set_builder(self, builder, obj_type): if builder.parent is None: builder.parent = self + # these are the same docval args as for DatasetBuilder, except this omits parent and source and does not allow a + # RegionBuilder or datatime for data @docval({'name': 'name', 'type': str, 'doc': 'the name of this dataset'}, {'name': 'data', 'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', Iterable), 'doc': 'a dictionary of datasets to create in this dataset', 'default': None}, @@ -272,11 +268,13 @@ def __set_builder(self, builder, obj_type): {'name': 'maxshape', 'type': (int, tuple), 'doc': 'the shape of this dataset. Use None for scalars', 'default': None}, {'name': 'chunks', 'type': bool, 'doc': 'whether or not to chunk this dataset', 'default': False}, + {'name': 'dims', 'type': (list, tuple), 'doc': 'a list of dimensions of this dataset', 'default': None}, + {'name': 'coords', 'type': dict, 'doc': 'a dictionary of coordinates of this dataset', + 'default': None}, returns='the DatasetBuilder object for the dataset', rtype='DatasetBuilder') def add_dataset(self, **kwargs): - ''' Create a dataset and add it to this group ''' + ''' Create a dataset and add it to this group, setting parent and source ''' kwargs['parent'] = self - kwargs['source'] = self.source pargs, pkwargs = fmt_docval_args(DatasetBuilder.__init__, kwargs) builder = DatasetBuilder(*pargs, **pkwargs) self.set_dataset(builder) @@ -288,6 +286,9 @@ def set_dataset(self, **kwargs): builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__dataset) + # these are the same docval args as for GroupBuilder, except this omits parent and source and does not allow + # lists for groups, datasets, and links and has different default values accordingly + # TODO: groups, datasets, and links should really be lists with default list() because the keys are all ignored. @docval({'name': 'name', 'type': str, 'doc': 'the name of this subgroup'}, {'name': 'groups', 'type': dict, 'doc': 'a dictionary of subgroups to create in this subgroup', 'default': dict()}, @@ -311,13 +312,14 @@ def set_group(self, **kwargs): builder = getargs('builder', kwargs) self.__set_builder(builder, GroupBuilder.__group) + # these are the same docval args as for LinkBuilder, except this omits parent and source @docval({'name': 'target', 'type': ('GroupBuilder', 'DatasetBuilder'), 'doc': 'the target Builder'}, {'name': 'name', 'type': str, 'doc': 'the name of this link', 'default': None}, returns='the builder object for the soft link', rtype='LinkBuilder') def add_link(self, **kwargs): ''' Create a soft link and add it to this group ''' name, target = getargs('name', 'target', kwargs) - builder = LinkBuilder(target, name, self) + builder = LinkBuilder(target, name, parent=self) self.set_link(builder) return builder @@ -328,34 +330,31 @@ def set_link(self, **kwargs): self.__set_builder(builder, GroupBuilder.__link) # TODO: write unittests for this method - def deep_update(self, builder): - ''' Recursively update subgroups in this group ''' + @docval({'name': 'builder', 'type': 'GroupBuilder', 'doc': 'the GroupBuilder to merge into this GroupBuilder'}) + def deep_update(self, **kwargs): + ''' Recursively merge subgroups, datasets, and links rrom the given builder into this group ''' + builder = kwargs['builder'] super().deep_update(builder) # merge subgroups - groups = super(GroupBuilder, builder).__getitem__(GroupBuilder.__group) - self_groups = super().__getitem__(GroupBuilder.__group) - for name, subgroup in groups.items(): - if name in self_groups: - self_groups[name].deep_update(subgroup) + for name, subgroup in builder.groups.items(): + if name in self.groups: + self.groups[name].deep_update(subgroup) else: self.set_group(subgroup) # merge datasets - datasets = super(GroupBuilder, builder).__getitem__(GroupBuilder.__dataset) - self_datasets = super().__getitem__(GroupBuilder.__dataset) - for name, dataset in datasets.items(): - # self.add_dataset(name, dataset) - if name in self_datasets: - self_datasets[name].deep_update(dataset) - # super().__getitem__(GroupBuilder.__dataset)[name] = dataset + for name, dataset in builder.datasets.items(): + if name in self.datasets: + self.datasets[name].deep_update(dataset) else: self.set_dataset(dataset) # merge links - for name, link in super(GroupBuilder, builder).__getitem__(GroupBuilder.__link).items(): + for name, link in builder.links.items(): self.set_link(link) def is_empty(self): - '''Returns true if there are no datasets, attributes, links or - subgroups that contain datasets, attributes or links. False otherwise. + ''' + Returns True if there are no datasets, attributes, links or subgroups that contain datasets, attributes or + links. False otherwise. ''' if (len(super().__getitem__(GroupBuilder.__dataset)) or len(super().__getitem__(GroupBuilder.__attribute)) or @@ -367,9 +366,7 @@ def is_empty(self): return True def __getitem__(self, key): - '''Like dict.__getitem__, but looks in groups, - datasets, attributes, and links sub-dictionaries. - ''' + ''' Like dict.__getitem__, but looks in groups, datasets, attributes, and links sub-dictionaries. ''' try: key_ar = _posixpath.normpath(key).split('/') return self.__get_rec(key_ar) @@ -377,9 +374,7 @@ def __getitem__(self, key): raise KeyError(key) def get(self, key, default=None): - '''Like dict.get, but looks in groups, - datasets, attributes, and links sub-dictionaries. - ''' + ''' Like dict.get, but looks in groups, datasets, attributes, and links sub-dictionaries. ''' try: key_ar = _posixpath.normpath(key).split('/') return self.__get_rec(key_ar) @@ -402,8 +397,8 @@ def __contains__(self, item): return self.obj_type.__contains__(item) def items(self): - '''Like dict.items, but iterates over key-value pairs in groups, - datasets, attributes, and links sub-dictionaries. + ''' + Like dict.items, but iterates over key-value pairs in groups, datasets, attributes, and links sub-dictionaries. ''' return _itertools.chain(super().__getitem__(GroupBuilder.__group).items(), super().__getitem__(GroupBuilder.__dataset).items(), @@ -411,18 +406,14 @@ def items(self): super().__getitem__(GroupBuilder.__link).items()) def keys(self): - '''Like dict.keys, but iterates over keys in groups, datasets, - attributes, and links sub-dictionaries. - ''' + ''' Like dict.keys, but iterates over keys in groups, datasets, attributes, and links sub-dictionaries. ''' return _itertools.chain(super().__getitem__(GroupBuilder.__group).keys(), super().__getitem__(GroupBuilder.__dataset).keys(), super().__getitem__(GroupBuilder.__attribute).keys(), super().__getitem__(GroupBuilder.__link).keys()) def values(self): - '''Like dict.values, but iterates over values in groups, datasets, - attributes, and links sub-dictionaries. - ''' + ''' Like dict.values, but iterates over values in groups, datasets, attributes, and links sub-dictionaries. ''' return _itertools.chain(super().__getitem__(GroupBuilder.__group).values(), super().__getitem__(GroupBuilder.__dataset).values(), super().__getitem__(GroupBuilder.__attribute).values(), @@ -430,6 +421,12 @@ def values(self): class DatasetBuilder(BaseBuilder): + ''' + This class is a dictionary that holds a particular dataset of a Container (e.g., scalar, array, string, etc.) as + well as any fields relevant to the data, such as data type, maxshape, whether to chunk the data, its dimensions, + and its coordinates. + ''' + OBJECT_REF_TYPE = 'object' REGION_REF_TYPE = 'region' @@ -444,22 +441,27 @@ class DatasetBuilder(BaseBuilder): {'name': 'maxshape', 'type': (int, tuple), 'doc': 'the shape of this dataset. Use None for scalars', 'default': None}, {'name': 'chunks', 'type': bool, 'doc': 'whether or not to chunk this dataset', 'default': False}, - {'name': 'parent', 'type': GroupBuilder, 'doc': 'the parent builder of this Builder', 'default': None}, - {'name': 'source', 'type': str, 'doc': 'the source of the data in this builder', 'default': None}) + {'name': 'dims', 'type': (list, tuple), 'doc': 'a list of dimensions of this dataset', 'default': None}, + {'name': 'coords', 'type': dict, 'doc': 'a dictionary of coordinates of this dataset', + 'default': None}, + *get_docval(BaseBuilder.__init__, 'parent', 'source')) def __init__(self, **kwargs): ''' Create a Builder object for a dataset ''' - name, data, dtype, attributes, maxshape, chunks, parent, source = getargs( - 'name', 'data', 'dtype', 'attributes', 'maxshape', 'chunks', 'parent', 'source', kwargs) + name, data, dtype, attributes, maxshape, chunks, parent, source, dims, coords = getargs( + 'name', 'data', 'dtype', 'attributes', 'maxshape', 'chunks', 'parent', 'source', 'dims', 'coords', kwargs) super().__init__(name, attributes, parent, source) + self['attributes'] = _copy.copy(attributes) # TODO: is this necessary? it is set (but not copied) earlier self['data'] = data - self['attributes'] = _copy.copy(attributes) self.__chunks = chunks self.__maxshape = maxshape - if isinstance(data, BaseBuilder): - if dtype is None: - dtype = self.OBJECT_REF_TYPE + + self['dims'] = dims + self['coords'] = coords + + # if data is a group/dataset/link builder and dtype is not provided, set dtype to represent an object reference + if isinstance(data, BaseBuilder) and dtype is None: + dtype = self.OBJECT_REF_TYPE self.__dtype = dtype - self.__name = name @property def data(self): @@ -469,9 +471,36 @@ def data(self): @data.setter def data(self, val): if self['data'] is not None: - raise AttributeError("'data' already set") + raise AttributeError('Cannot reset data once it is specified') self['data'] = val + @property + def dims(self): + ''' The dimensions of the dataset represented by this builder ''' + return self['dims'] + + @dims.setter + def dims(self, val): + ''' + Set the dimensions of this DatasetBuilder. Raises error if dims are already set AND differ from the new value. + ''' + if self.dims is not None and self.dims != val: + raise AttributeError('Cannot reset dims once it is specified. Old value: %s, new value: %s' + % (self.dims, val)) + self['dims'] = val + + @property + def coords(self): + ''' The coordinates of the dataset represented by this builder ''' + return self['coords'] + + @coords.setter + def coords(self, val): + if self['coords'] is not None and self.coords != val: + raise AttributeError('Cannot reset coords once it is specified. Old value: %s, new value: %s' + % (self.coords, val)) + self['coords'] = val + @property def chunks(self): ''' Whether or not this dataset is chunked ''' @@ -490,19 +519,18 @@ def dtype(self): @dtype.setter def dtype(self, val): ''' The data type of this object ''' - if self.__dtype is None: - self.__dtype = val - else: - raise AttributeError("cannot overwrite dtype") + if self.__dtype is not None: + raise AttributeError('Cannot reset dtype once it is specified') + self.__dtype = val @docval({'name': 'dataset', 'type': 'DatasetBuilder', 'doc': 'the DatasetBuilder to merge into this DatasetBuilder'}) def deep_update(self, **kwargs): - '''Merge data and attributes from given DatasetBuilder into this DatasetBuilder''' + ''' Merge data and attributes from given DatasetBuilder into this DatasetBuilder ''' dataset = getargs('dataset', kwargs) + super().deep_update(dataset) if dataset.data: self['data'] = dataset.data # TODO: figure out if we want to add a check for overwrite - self['attributes'].update(dataset.attributes) class LinkBuilder(Builder): @@ -525,8 +553,9 @@ def builder(self): class ReferenceBuilder(dict): + # TODO why is this a dictionary? - @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the Dataset this region applies to'}) + @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 'doc': 'the Dataset this reference applies to'}) def __init__(self, **kwargs): builder = getargs('builder', kwargs) self['builder'] = builder @@ -543,11 +572,31 @@ class RegionBuilder(ReferenceBuilder): 'doc': 'the region i.e. slice or indices into the target Dataset'}, {'name': 'builder', 'type': DatasetBuilder, 'doc': 'the Dataset this region applies to'}) def __init__(self, **kwargs): - region = popargs('region', kwargs) - call_docval_func(super().__init__, kwargs) + region, builder = popargs('region', 'builder', kwargs) + super().__init__(builder) self['region'] = region @property def region(self): ''' The target builder object ''' return self['region'] + + +class CoordBuilder(namedtuple('CoordBuilder', 'name axes coord_dataset_name coord_axes coord_type')): + ''' + An immutable object that represents a coordinate with fields name, axes, coord_dataset, coord_axes, coord_type. + + NOTE: 'axes' = 'dims_index' + ''' + + @docval({'name': 'name', 'type': str, 'doc': 'The name of this coordinate'}, + {'name': 'axes', 'type': (int, list, tuple), + 'doc': 'The axes (0-indexed) of the dataset that this coordinate acts on'}, + {'name': 'coord_dataset_name', 'type': str, 'doc': 'The name of the dataset of this coordinate'}, + {'name': 'coord_axes', 'type': (int, list, tuple), + 'doc': 'The axes (0-indexed) of the dataset of this coordinate'}, + {'name': 'coord_type', 'type': str, 'doc': 'The type of this coordinate'}) + def __new__(cls, **kwargs): + # initialize a new CoordBuilder with argument documentation and validation + # to override initialization of a namedtuple, need to override __new__, not __init__ + return super().__new__(cls, **kwargs) diff --git a/src/hdmf/build/map.py b/src/hdmf/build/map.py index c0348873c..52a4e34f8 100644 --- a/src/hdmf/build/map.py +++ b/src/hdmf/build/map.py @@ -435,6 +435,7 @@ def __set_default_name(docval_args, default_name): def __get_cls_dict(self, base, addl_fields, name=None, default_name=None): """ Get __init__ and fields of new class. + :param base: The base class of the new class :param addl_fields: Dict of additional fields that are not in the base class :param name: Fixed name of instances of this class, or None if name is not fixed to a particular value @@ -461,7 +462,7 @@ def __get_cls_dict(self, base, addl_fields, name=None, default_name=None): # add new fields to docval and class fields for f, field_spec in addl_fields.items(): - if not f == 'help': # (legacy) do not all help to any part of class object + if not f == 'help': # (legacy) do not add help to any part of class object # build docval arguments for generated constructor dtype = self.__get_type(field_spec) if dtype is None: @@ -470,7 +471,6 @@ def __get_cls_dict(self, base, addl_fields, name=None, default_name=None): docval_arg = {'name': f, 'type': dtype, 'doc': field_spec.doc} if hasattr(field_spec, 'shape') and field_spec.shape is not None: docval_arg.update(shape=field_spec.shape) - # docval_arg['shape'] = field_spec.shape if not field_spec.required: docval_arg['default'] = getattr(field_spec, 'default_value', None) docval_args.append(docval_arg) diff --git a/src/hdmf/build/objectmapper.py b/src/hdmf/build/objectmapper.py index 80e481fef..16a315755 100644 --- a/src/hdmf/build/objectmapper.py +++ b/src/hdmf/build/objectmapper.py @@ -5,13 +5,13 @@ from copy import copy from datetime import datetime -from ..utils import docval, getargs, ExtenderMeta, get_docval +from ..utils import docval, getargs, ExtenderMeta, get_docval, get_data_shape from ..container import AbstractContainer, Container, Data, DataRegion from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, NAME_WILDCARD, RefSpec from ..data_utils import DataIO, AbstractDataChunkIterator from ..query import ReferenceResolver from ..spec.spec import BaseStorageSpec -from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder +from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, CoordBuilder from .map import Proxy, BuildManager from .warnings import OrphanContainerWarning, MissingRequiredWarning @@ -68,7 +68,7 @@ def _unicode(s): elif isinstance(s, bytes): return s.decode('utf-8') else: - raise ValueError("Expected unicode or ascii string, got %s" % type(s)) + raise ConvertError("Expected unicode or ascii string, got %s" % type(s)) def _ascii(s): @@ -80,7 +80,7 @@ def _ascii(s): elif isinstance(s, bytes): return s else: - raise ValueError("Expected unicode or ascii string, got %s" % type(s)) + raise ConvertError("Expected unicode or ascii string, got %s" % type(s)) class ObjectMapper(metaclass=ExtenderMeta): @@ -131,10 +131,10 @@ def __resolve_dtype(cls, given, specified): else: if g.name[:3] != s.name[:3]: # different types if s.itemsize < 8: - msg = "expected %s, received %s - must supply %s or higher precision" % (s.name, g.name, s.name) + msg = "Expected %s, received %s - must supply %s or higher precision" % (s.name, g.name, s.name) else: - msg = "expected %s, received %s - must supply %s" % (s.name, g.name, s.name) - raise ValueError(msg) + msg = "Expected %s, received %s - must supply %s" % (s.name, g.name, s.name) + raise ConvertError(msg) else: return g.type @@ -156,43 +156,47 @@ def convert_dtype(cls, spec, value): The value is returned as the function may convert the input value to comply with the dtype specified in the schema. """ - ret, ret_dtype = cls.__check_edgecases(spec, value) - if ret is not None or ret_dtype is not None: - return ret, ret_dtype - spec_dtype = cls.__dtypes[spec.dtype] - if isinstance(value, np.ndarray): - if spec_dtype is _unicode: - ret = value.astype('U') - ret_dtype = "utf8" - elif spec_dtype is _ascii: - ret = value.astype('S') - ret_dtype = "ascii" - else: - dtype_func = cls.__resolve_dtype(value.dtype, spec_dtype) - ret = np.asarray(value).astype(dtype_func) - ret_dtype = ret.dtype.type - elif isinstance(value, (tuple, list)): - if len(value) == 0: - return value, spec_dtype - ret = list() - for elem in value: - tmp, tmp_dtype = cls.convert_dtype(spec, elem) - ret.append(tmp) - ret = type(value)(ret) - ret_dtype = tmp_dtype - elif isinstance(value, AbstractDataChunkIterator): - ret = value - ret_dtype = cls.__resolve_dtype(value.dtype, spec_dtype) - else: - if spec_dtype in (_unicode, _ascii): - ret_dtype = 'ascii' - if spec_dtype == _unicode: - ret_dtype = 'utf8' - ret = spec_dtype(value) + try: + ret, ret_dtype = cls.__check_edgecases(spec, value) + if ret is not None or ret_dtype is not None: + return ret, ret_dtype + spec_dtype = cls.__dtypes[spec.dtype] + if isinstance(value, np.ndarray): + if spec_dtype is _unicode: + ret = value.astype('U') + ret_dtype = "utf8" + elif spec_dtype is _ascii: + ret = value.astype('S') + ret_dtype = "ascii" + else: + dtype_func = cls.__resolve_dtype(value.dtype, spec_dtype) + ret = np.asarray(value).astype(dtype_func) + ret_dtype = ret.dtype.type + elif isinstance(value, (tuple, list)): + if len(value) == 0: + return value, spec_dtype + ret = list() + for elem in value: + tmp, tmp_dtype = cls.convert_dtype(spec, elem) + ret.append(tmp) + ret = type(value)(ret) + ret_dtype = tmp_dtype + elif isinstance(value, AbstractDataChunkIterator): + ret = value + ret_dtype = cls.__resolve_dtype(value.dtype, spec_dtype) else: - dtype_func = cls.__resolve_dtype(type(value), spec_dtype) - ret = dtype_func(value) - ret_dtype = type(ret) + if spec_dtype in (_unicode, _ascii): + ret_dtype = 'ascii' + if spec_dtype == _unicode: + ret_dtype = 'utf8' + ret = spec_dtype(value) + else: + dtype_func = cls.__resolve_dtype(type(value), spec_dtype) + ret = dtype_func(value) + ret_dtype = type(ret) + except ValueError as e: + msg = "Could not convert data '%s' to dtype '%s': %s" % (spec.name, spec.dtype, value) + raise ConvertError(msg) from e return ret, ret_dtype @classmethod @@ -217,8 +221,8 @@ def __check_edgecases(cls, spec, value): return value, value.dtype.type if isinstance(value, (list, tuple)): if len(value) == 0: - msg = "cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype." - raise ValueError(msg) + msg = "Cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype." + raise ConvertError(msg) return value, cls.__check_edgecases(spec, value[0])[1] # infer dtype from first element ret_dtype = type(value) if ret_dtype is str: @@ -228,12 +232,12 @@ def __check_edgecases(cls, spec, value): return value, ret_dtype if isinstance(spec.dtype, RefSpec): if not isinstance(value, ReferenceBuilder): - msg = "got RefSpec for value of type %s" % type(value) - raise ValueError(msg) + msg = "Got RefSpec for value of type %s" % type(value) + raise ConvertError(msg) return value, spec.dtype if spec.dtype is not None and spec.dtype not in cls.__dtypes: - msg = "unrecognized dtype: %s -- cannot convert value" % spec.dtype - raise ValueError(msg) + msg = "Unrecognized dtype for spec '%s': %s" % (spec.name, spec.dtype) + raise ConvertError(msg) return None, None _const_arg = '__constructor_arg' @@ -529,8 +533,7 @@ def get_const_arg(self, **kwargs): def build(self, **kwargs): ''' Convert a AbstractContainer to a Builder representation ''' container, manager, parent, source = getargs('container', 'manager', 'parent', 'source', kwargs) - spec_ext = getargs('spec_ext', kwargs) - builder = getargs('builder', kwargs) + spec_ext, builder = getargs('spec_ext', 'builder', kwargs) name = manager.get_builder_name(container) if isinstance(self.__spec, GroupSpec): if builder is None: @@ -540,8 +543,8 @@ def build(self, **kwargs): self.__add_links(builder, self.__spec.links, container, manager, source) else: if not isinstance(container, Data): - msg = "'container' must be of type Data with DatasetSpec" - raise ValueError(msg) + raise ValueError("'container' must be of type Data with DatasetSpec") + spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext) if isinstance(spec_dtype, RefSpec): # a dataset of references @@ -549,9 +552,7 @@ def build(self, **kwargs): builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=spec_dtype.reftype) elif isinstance(spec_dtype, list): # a compound dataset - # - # check for any references in the compound dtype, and - # convert them if necessary + # check for any references in the compound dtype, and convert them if necessary refs = [(i, subt) for i, subt in enumerate(spec_dtype) if isinstance(subt.dtype, RefSpec)] bldr_data = copy(container.data) bldr_data = list() @@ -562,9 +563,10 @@ def build(self, **kwargs): bldr_data.append(tuple(tmp)) try: bldr_data, dtype = self.convert_dtype(spec, bldr_data) - except Exception as ex: - msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) - raise Exception(msg) from ex + except ConvertError as ex: + msg = ("Could not build %s for %s '%s' due to: %s" + % (spec.name, type(container).__name__, container.name, ex)) + raise BuildError(msg) from ex builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype) else: # a regular dtype @@ -579,13 +581,14 @@ def build(self, **kwargs): builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype='object') else: - # a dataset that has no references, pass the donversion off to + # a dataset that has no references, pass the conversion off to # the convert_dtype method try: bldr_data, dtype = self.convert_dtype(spec, container.data) - except Exception as ex: - msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) - raise Exception(msg) from ex + except ConvertError as ex: + msg = ("Could not build %s for %s '%s' due to: %s" + % (spec.name, type(container).__name__, container.name, ex)) + raise BuildError(msg) from ex builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype) self.__add_attributes(builder, self.__spec.attributes, container, manager, source) return builder @@ -683,9 +686,10 @@ def __add_attributes(self, builder, attributes, container, build_manager, source if attr_value is not None: try: attr_value, attr_dtype = self.convert_dtype(spec, attr_value) - except Exception as ex: - msg = 'could not convert %s for %s %s' % (spec.name, type(container).__name__, container.name) - raise Exception(msg) from ex + except ConvertError as ex: + msg = ("Could not build %s for %s '%s' due to: %s" + % (spec.name, type(container).__name__, container.name, ex)) + raise BuildError(msg) from ex # do not write empty or null valued objects if attr_value is None: @@ -697,14 +701,14 @@ def __add_attributes(self, builder, attributes, container, build_manager, source builder.set_attribute(spec.name, attr_value) - def __add_links(self, builder, links, container, build_manager, source): + def __add_links(self, group_builder, links, container, build_manager, source): for spec in links: attr_value = self.get_attr_value(spec, container, build_manager) if not attr_value: continue - self.__add_containers(builder, spec, attr_value, build_manager, source, container) + self.__add_containers(group_builder, spec, attr_value, build_manager, source, container) - def __add_datasets(self, builder, datasets, container, build_manager, source): + def __add_datasets(self, group_builder, datasets, container, build_manager, source): for spec in datasets: attr_value = self.get_attr_value(spec, container, build_manager) if attr_value is None: @@ -713,32 +717,114 @@ def __add_datasets(self, builder, datasets, container, build_manager, source): if isinstance(attr_value, DataIO) and attr_value.data is None: continue if isinstance(attr_value, Builder): - builder.set_builder(attr_value) + group_builder.set_builder(attr_value) elif spec.data_type_def is None and spec.data_type_inc is None: - if spec.name in builder.datasets: - sub_builder = builder.datasets[spec.name] + # a non-Container/Data dataset, e.g. a float or nd-array + if spec.name in group_builder.datasets: + dataset_builder = group_builder.datasets[spec.name] else: try: + # convert the given data values to the spec dtype data, dtype = self.convert_dtype(spec, attr_value) - except Exception as ex: - msg = 'could not convert \'%s\' for %s \'%s\'' - msg = msg % (spec.name, type(container).__name__, container.name) - raise Exception(msg) from ex - sub_builder = builder.add_dataset(spec.name, data, dtype=dtype) - self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source) + dims = self.__check_dims(spec, data) + except ConvertError as ex: + msg = ("Could not build '%s' for %s '%s' due to: %s" + % (spec.name, type(container).__name__, container.name, ex)) + raise BuildError(msg) from ex + dataset_builder = group_builder.add_dataset(spec.name, data, dtype=dtype) + if dims: + dataset_builder.dims = dims + self.__add_attributes(dataset_builder, spec.attributes, container, build_manager, source) else: - self.__add_containers(builder, spec, attr_value, build_manager, source, container) + # a Container/Data dataset, e.g. a VectorData + self.__add_containers(group_builder, spec, attr_value, build_manager, source, container) + + # set dataset coords after all dataset builders have been created + for dataset_spec in datasets: + if dataset_spec.name is None or dataset_spec.coords is None: + # TODO handle VectorData case where name is not known + continue + dataset_builder = group_builder.datasets[dataset_spec.name] + try: + coords = self.__check_coords(dataset_spec, group_builder, dataset_builder) + if coords: + dataset_builder.coords = coords + except ConvertError as ex: + msg = ("Could not build '%s' for %s '%s' due to: %s" + % (dataset_spec.name, type(container).__name__, container.name, ex)) + raise BuildError(msg) from ex + + @classmethod + def __check_dims(cls, dataset_spec, data): + """ + Validate that the dimensions (number of dims, length of each dim) of data are allowed based on the spec. + Returns tuple of dimension names corresponding to the dimensions used. + """ + data_shape = get_data_shape(data) + if not dataset_spec.dims: # dims not specified + return None + + # check required dims + if len(data_shape) < dataset_spec.min_num_dims: + msg = "Data must have at least %d dimensions but has %d." % (dataset_spec.min_num_dims, len(data_shape)) + raise ConvertError(msg) + + if len(data_shape) > len(dataset_spec.dims): + msg = "Data must have at most %d dimensions but has %d." % (len(dataset_spec.dims), len(data_shape)) + raise ConvertError(msg) + + used_dim_names = [] + for s in range(len(data_shape)): + # check dimension length + if dataset_spec.dims[s].length is not None: + if dataset_spec.dims[s].length != data_shape[s]: + msg = ("Data dimension '%s' (axis %d) must have length %d but has length %d." + % (dataset_spec.dims[s].name, s, dataset_spec.dims[s].length, data_shape[s])) + raise ConvertError(msg) + used_dim_names.append(dataset_spec.dims[s].name) + return tuple(used_dim_names) - def __add_groups(self, builder, groups, container, build_manager, source): + @classmethod + def __check_coords(cls, dataset_spec, group_builder, dataset_builder): + """ + Returns dict of CoordBuilders corresponding to the dimensions used. + """ + used_coords = dict() + if dataset_spec.coords: + for coord_spec in dataset_spec.coords: + for dim_index in coord_spec.dims_index: + if dim_index < len(dataset_builder.dims): # check the dimension exists on the dataset + coord_dataset_builder = group_builder.datasets.get(coord_spec.coord.dataset_name, None) + if coord_dataset_builder is not None: # check the coord dataset exists in the group + # TODO store coord_dataset_builder in CoordBuilder + # TODO check the axis exists on the coord dataset + # TODO check the coord_type is appropriate + # TODO check that coord name is not already in used_coords + # copy key-value pairs from CoordSpec to CoordBuilder + coord_builder = CoordBuilder(name=coord_spec.name, + axes=coord_spec.dims_index, + coord_dataset_name=coord_spec.coord.dataset_name, + coord_axes=coord_spec.coord.dims_index, + coord_type=coord_spec.coord.type + ) + used_coords[coord_spec.name] = coord_builder + # else: + # # TODO this should be OK + # msg = ("Coord dataset '%s' of coord '%s' does not exist." + # % (coord_spec.coord.dataset_name, coord_spec.name)) + # raise ConvertError(msg) + return used_coords + + def __add_groups(self, group_builder, groups, container, build_manager, source): for spec in groups: if spec.data_type_def is None and spec.data_type_inc is None: # we don't need to get attr_name since any named # group does not have the concept of value - sub_builder = builder.groups.get(spec.name) - if sub_builder is None: - sub_builder = GroupBuilder(spec.name, source=source) - self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source) - self.__add_datasets(sub_builder, spec.datasets, container, build_manager, source) + subgroup_builder = group_builder.groups.get(spec.name) + if subgroup_builder is None: + subgroup_builder = GroupBuilder(spec.name, source=source) + self.__add_attributes(subgroup_builder, spec.attributes, container, build_manager, source) + self.__add_datasets(subgroup_builder, spec.datasets, container, build_manager, source) # handle subgroups that are not Containers attr_name = self.get_attribute(spec) @@ -750,31 +836,31 @@ def __add_groups(self, builder, groups, container, build_manager, source): it = iter(attr_value.values()) for item in it: if isinstance(item, Container): - self.__add_containers(sub_builder, spec, item, build_manager, source, container) - self.__add_groups(sub_builder, spec.groups, container, build_manager, source) - empty = sub_builder.is_empty() + self.__add_containers(subgroup_builder, spec, item, build_manager, source, container) + self.__add_groups(subgroup_builder, spec.groups, container, build_manager, source) + empty = subgroup_builder.is_empty() if not empty or (empty and isinstance(spec.quantity, int)): - if sub_builder.name not in builder.groups: - builder.set_group(sub_builder) + if subgroup_builder.name not in group_builder.groups: + group_builder.set_group(subgroup_builder) else: if spec.data_type_def is not None: attr_name = self.get_attribute(spec) if attr_name is not None: attr_value = getattr(container, attr_name, None) if attr_value is not None: - self.__add_containers(builder, spec, attr_value, build_manager, source, container) + self.__add_containers(group_builder, spec, attr_value, build_manager, source, container) else: attr_name = self.get_attribute(spec) attr_value = self.get_attr_value(spec, container, build_manager) if attr_value is not None: - self.__add_containers(builder, spec, attr_value, build_manager, source, container) + self.__add_containers(group_builder, spec, attr_value, build_manager, source, container) - def __add_containers(self, builder, spec, value, build_manager, source, parent_container): + def __add_containers(self, group_builder, spec, value, build_manager, source, parent_container): if isinstance(value, AbstractContainer): if value.parent is None: msg = "'%s' (%s) for '%s' (%s)"\ % (value.name, getattr(value, self.spec.type_key()), - builder.name, self.spec.data_type_def) + group_builder.name, self.spec.data_type_def) warnings.warn(msg, OrphanContainerWarning) if value.modified: # writing a new container if isinstance(spec, BaseStorageSpec): @@ -785,14 +871,14 @@ def __add_containers(self, builder, spec, value, build_manager, source, parent_c # object this AbstractContainer corresponds to if isinstance(spec, LinkSpec) or value.parent is not parent_container: name = spec.name - builder.set_link(LinkBuilder(rendered_obj, name, builder)) + group_builder.set_link(LinkBuilder(rendered_obj, name, group_builder)) elif isinstance(spec, DatasetSpec): if rendered_obj.dtype is None and spec.dtype is not None: val, dtype = self.convert_dtype(spec, rendered_obj.data) rendered_obj.dtype = dtype - builder.set_dataset(rendered_obj) + group_builder.set_dataset(rendered_obj) else: - builder.set_group(rendered_obj) + group_builder.set_group(rendered_obj) elif value.container_source: # make a link to an existing container if value.container_source != parent_container.container_source or\ value.parent is not parent_container: @@ -800,7 +886,7 @@ def __add_containers(self, builder, spec, value, build_manager, source, parent_c rendered_obj = build_manager.build(value, source=source, spec_ext=spec) else: rendered_obj = build_manager.build(value, source=source) - builder.set_link(LinkBuilder(rendered_obj, name=spec.name, parent=builder)) + group_builder.set_link(LinkBuilder(rendered_obj, name=spec.name, parent=group_builder)) else: raise ValueError("Found unmodified AbstractContainer with no source - '%s' with parent '%s'" % (value.name, parent_container.name)) @@ -816,7 +902,7 @@ def __add_containers(self, builder, spec, value, build_manager, source, parent_c raise ValueError(msg % value.__class__.__name__) for container in values: if container: - self.__add_containers(builder, spec, container, build_manager, source, parent_container) + self.__add_containers(group_builder, spec, container, build_manager, source, parent_container) def __get_subspec_values(self, builder, spec, manager): ret = dict() @@ -866,7 +952,9 @@ def __get_subspec_values(self, builder, spec, manager): elif isinstance(spec, DatasetSpec): if not isinstance(builder, DatasetBuilder): raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec") - ret[spec] = self.__check_ref_resolver(builder.data) + data = self.__check_ref_resolver(builder.data) + converted_data, _ = self.convert_dtype(spec, data) + ret[spec] = converted_data return ret @staticmethod @@ -918,17 +1006,22 @@ def __flatten(self, sub_builder, subspec, manager): tmp = tmp[0] return tmp - @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), + @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), # noqa: C901 'doc': 'the builder to construct the AbstractContainer from'}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager for this build'}, {'name': 'parent', 'type': (Proxy, AbstractContainer), 'doc': 'the parent AbstractContainer/Proxy for the AbstractContainer being built', 'default': None}) def construct(self, **kwargs): ''' Construct an AbstractContainer from the given Builder ''' + # NOTE: the construct method requires docval on the __init__ method of the AbstractContainer builder, manager, parent = getargs('builder', 'manager', 'parent', kwargs) cls = manager.get_cls(builder) # gather all subspecs - subspecs = self.__get_subspec_values(builder, self.spec, manager) + try: + subspecs = self.__get_subspec_values(builder, self.spec, manager) + except ConvertError as ex: + msg = "Could not construct %s object due to: %s" % (cls.__name__, ex) + raise ConstructError(msg) from ex # get the constructor argument that each specification corresponds to const_args = dict() # For Data container classes, we need to populate the data constructor argument since @@ -936,7 +1029,14 @@ def construct(self, **kwargs): if issubclass(cls, Data): if not isinstance(builder, DatasetBuilder): raise ValueError('Can only construct a Data object from a DatasetBuilder - got %s' % type(builder)) - const_args['data'] = self.__check_ref_resolver(builder.data) + data = self.__check_ref_resolver(builder.data) + try: + converted_data, _ = self.convert_dtype(self.spec, data) + except ConvertError as ex: + msg = "Could not construct %s object due to: %s" % (cls.__name__, ex) + raise ConstructError(msg) from ex + const_args['data'] = converted_data + for subspec, value in subspecs.items(): const_arg = self.get_const_arg(subspec) if const_arg is not None: @@ -962,8 +1062,53 @@ def construct(self, **kwargs): object_id=builder.attributes.get(self.__spec.id_key())) obj.__init__(**kwargs) except Exception as ex: - msg = 'Could not construct %s object due to %s' % (cls.__name__, ex) - raise Exception(msg) from ex + msg = 'Could not construct %s object due to: %s' % (cls.__name__, ex) + raise ConstructError(msg) from ex + + # add dimensions and coordinates to both the dataset builder and the new container + if isinstance(builder, GroupBuilder): + for subspec, value in subspecs.items(): + if isinstance(subspec, DatasetSpec): + # get the corresponding DatasetBuilder + if subspec.name is None: + # TODO handle case where subspec name is None, e.g. for DynamicTable columns + continue + dataset_builder = builder.datasets[subspec.name] + # verify that the dims are valid and return only the active dims + dims = None + try: + dims = self.__check_dims(subspec, dataset_builder.data) + except ConvertError as ex: + msg = ("Could not construct dims for dataset '%s' for %s '%s' due to: %s" + % (subspec.name, cls.__name__, obj.name, ex)) + warnings.warn(msg) + + if dims: + # since dataset_builder is cached in the manager, need to set its dims + dataset_builder.dims = dims + # set dims on the new Container object + obj.set_dims(array_name=dataset_builder.name, dims=dims) + + # verify that the coords are valid and return only the active coords + coords = None + try: + coords = self.__check_coords(subspec, builder, dataset_builder) + except ConvertError as ex: + msg = ("Could not construct coords for dataset '%s' for %s '%s' due to: %s" + % (subspec.name, cls.__name__, obj.name, ex)) + warnings.warn(msg) + + if coords: + # since dataset_builder is cached in the manager, need to set its coords + dataset_builder.coords = coords # this is a dictionary of coord name : CoordBuilder + # unpack the CoordBuilder and set coords on the new Container object + for coord in coords.values(): + obj.set_coord(array_name=dataset_builder.name, + name=coord.name, + dims_index=coord.axes, + coord_array_name=coord.coord_dataset_name, + coord_array_dims_index=coord.coord_axes, + coord_type=coord.coord_type) return obj @docval({'name': 'container', 'type': AbstractContainer, @@ -983,3 +1128,15 @@ def get_builder_name(self, **kwargs): else: ret = container.name return ret + + +class BuildError(Exception): + pass + + +class ConvertError(Exception): + pass + + +class ConstructError(Exception): + pass diff --git a/src/hdmf/common/sparse.py b/src/hdmf/common/sparse.py index 86f0102e7..d5583d44f 100644 --- a/src/hdmf/common/sparse.py +++ b/src/hdmf/common/sparse.py @@ -11,6 +11,11 @@ @register_class('CSRMatrix') class CSRMatrix(Container): + __fields__ = ({'name': 'data', 'settable': False}, + {'name': 'indices', 'settable': False}, + {'name': 'indptr', 'settable': False}, + {'name': 'shape', 'settable': False}) + @docval({'name': 'data', 'type': (sps.csr_matrix, np.ndarray, h5py.Dataset), 'doc': 'the data to use for this CSRMatrix or CSR data array.' 'If passing CSR data array, *indices*, *indptr*, and *shape* must also be provided'}, @@ -35,20 +40,33 @@ def __init__(self, **kwargs): data = sps.csr_matrix((data, indices, indptr), shape=shape) else: raise ValueError("cannot use ndarray of dimensionality > 2") - self.__data = data - self.__shape = data.shape + self.fields['data'] = data + self.fields['indices'] = data.indices + self.fields['indptr'] = data.indptr + self.fields['shape'] = data.shape @staticmethod def __check_ind(ar, arg): if not (ar.ndim == 1 or np.issubdtype(ar.dtype, int)): raise ValueError('%s must be a 1D array of integers' % arg) - def __getattr__(self, val): - return getattr(self.__data, val) - @property - def shape(self): - return self.__shape + def data(self): + # override auto-generated getter + return self.fields['data'].data def to_spmat(self): - return self.__data + return self.fields['data'] + + def __repr__(self): + cls = self.__class__ + template = "%s %s.%s at 0x%d" % (self.name, cls.__module__, cls.__name__, id(self)) + if len(self.fields): + template += "\nFields:\n" + for k in sorted(self.fields): # sorted to enable tests + v = self.fields[k] + if k == 'data': + template += " {}:\n{}\n".format(k, v) + else: + template += " {}: {}\n".format(k, v) + return template diff --git a/src/hdmf/container.py b/src/hdmf/container.py index 13e7fa2a1..f8f4d9c9f 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -3,7 +3,9 @@ from uuid import uuid4 from .utils import docval, get_docval, call_docval_func, getargs, ExtenderMeta, get_data_shape from .data_utils import DataIO +from collections import namedtuple from warnings import warn +import xarray as xr import h5py @@ -154,19 +156,19 @@ def modified(self): return self.__modified @docval({'name': 'modified', 'type': bool, - 'doc': 'whether or not this Container has been modified', 'default': True}) + 'doc': 'whether or not this AbstractContainer has been modified', 'default': True}) def set_modified(self, **kwargs): modified = getargs('modified', kwargs) self.__modified = modified - if modified and isinstance(self.parent, Container): + if modified and isinstance(self.parent, AbstractContainer): self.parent.set_modified() @property def children(self): return tuple(self.__children) - @docval({'name': 'child', 'type': 'Container', - 'doc': 'the child Container for this Container', 'default': None}) + @docval({'name': 'child', 'type': 'AbstractContainer', + 'doc': 'the child AbstractContainer for this AbstractContainer', 'default': None}) def add_child(self, **kwargs): warn(DeprecationWarning('add_child is deprecated. Set the parent attribute instead.')) child = getargs('child', kwargs) @@ -212,8 +214,8 @@ def parent(self, parent_container): if self.parent is not None: if isinstance(self.parent, AbstractContainer): - raise ValueError(('Cannot reassign parent to Container: %s. ' - 'Parent is already: %s.' % (repr(self), repr(self.parent)))) + raise ValueError(('Cannot reassign parent to: %s. Parent is already: %s.' + % (repr(self), repr(self.parent)))) else: if parent_container is None: raise ValueError("Got None for parent of '%s' - cannot overwrite Proxy with NoneType" % repr(self)) @@ -228,13 +230,21 @@ def parent(self, parent_container): self.__parent.add_candidate(parent_container) else: self.__parent = parent_container - if isinstance(parent_container, Container): + if isinstance(parent_container, AbstractContainer): parent_container.__children.append(self) parent_container.set_modified() class Container(AbstractContainer): + @docval({'name': 'name', 'type': str, 'doc': 'the name of this container'}) + def __init__(self, **kwargs): + call_docval_func(super().__init__, kwargs) + # dict of active dimension names, where key is name of data array and value is a list of names, one per axis + self.__dims = dict() + # dict of dimension coordinates, where key is name of data array and value is a list of label-dataset pairs + self.__coords = dict() + _pconf_allowed_keys = {'name', 'child', 'required_name', 'doc', 'settable'} @classmethod @@ -244,8 +254,7 @@ def _setter(cls, field): if isinstance(field, dict): for k in field.keys(): if k not in cls._pconf_allowed_keys: - msg = "Unrecognized key '%s' in __field__ config '%s' on %s" %\ - (k, field['name'], cls.__name__) + msg = "Unrecognized key '%s' in __field__ config '%s' on %s" % (k, field['name'], cls.__name__) raise ValueError(msg) if field.get('required_name', None) is not None: name = field['required_name'] @@ -373,6 +382,182 @@ def __smart_str_dict(d, num_indent): out += '\n' + indent + right_br return out + @property + def dims(self): + ''' + Return a dict of dimension names, indexed by data array name, for this Container. Each value is a list of + names, one for each dimension of the array. + ''' + return self.__dims + + @docval({'name': 'data_name', 'type': str, 'doc': ''}, + {'name': 'dim', 'type': str, 'doc': ''}) + def _get_dim_axis(self, **kwargs): + """Return the axis corresponding to the given dimension of the given array.""" + # TODO updateme + data_name, dim = getargs('data_name', 'dim', kwargs) + if data_name not in self.fields: + raise ValueError("No field named '%s' in %s." % (data_name, self.__class__.__name__)) + if data_name not in self.dims: + return None # TODO should this return None or raise an error? + try: + axis = self.dims[data_name].index(dim) + except ValueError: + raise ValueError("Dim name '%s' not found for field '%s' of %s." + % (dim, data_name, self.__class__.__name__)) + return axis + + @docval({'name': 'array_name', 'type': str, 'doc': ''}, + {'name': 'dims', 'type': tuple, 'doc': ''}) + def set_dims(self, **kwargs): + """ + Set a coordinate between an axis/axes of a given array and an axis/axes of another array in this Container. + + This should be used only internally within PyNWB. User-defined dimensions will not be saved. + """ + array_name, dims = getargs('array_name', 'dims', kwargs) + if array_name not in self.fields: + raise ValueError("Field named '%s' not found in %s '%s'." + % (array_name, self.__class__.__name__, self.name)) + if array_name in self.dims: + raise ValueError("Cannot reset dims for field '%s' in %s '%s'. Dims is already %s." + % (array_name, self.__class__.__name__, self.name, self.dims[array_name])) + if not Container.__all_unique(dims): + raise ValueError("Cannot set dims for field '%s' in %s '%s'. Dim names must be unique." + % (array_name, self.__class__.__name__, self.name)) + + data = getattr(self, array_name) + data_shape = get_data_shape(data) + if data_shape is None: + raise ValueError("Cannot determine shape of field '%s' in %s '%s'." + % (array_name, self.__class__.__name__, self.name)) + if len(data_shape) != len(dims): + raise ValueError("Number of dims must equal number of axes for field '%s' in %s '%s' (%d != %d)." + % (array_name, self.__class__.__name__, self.name, len(dims), len(data_shape))) + + self.dims[array_name] = dims + + @staticmethod + def __all_unique(x): + """Return whether all elements in the given iterable are unique.""" + seen = set() + return not any(i in seen or seen.add(i) for i in x) + + @property + def coords(self): + """ + Return a dictionary of coordinates, indexed by array name, for this Container. Each value is a dictionary of + coordinates, indexed by label, where the value is a tuple: (tuple of dimension names, the coordinate array). + """ + return self.__coords + + @docval({'name': 'array_name', 'type': str, 'doc': ''}, + {'name': 'name', 'type': str, 'doc': ''}, + {'name': 'dims_index', 'type': tuple, 'doc': ''}, + {'name': 'coord_array_name', 'type': str, 'doc': ''}, + {'name': 'coord_array_dims_index', 'type': tuple, 'doc': ''}, + {'name': 'coord_type', 'type': str, 'doc': ''}) + def set_coord(self, **kwargs): + """ + Set a coordinate between an axis/axes of a given array and an axis/axes of another array in this Container. + + This should be used only internally within PyNWB. User-defined coordinates will not be saved. + + Usage examples: + Field 'data' has dim 'time' for axis 0, 'electrodes' for axis 1. + Field 'timestamps' has cooordinates for axis 0 of data and length equal to data.shape[0]. + set_coord(array_name='data', name='my_time', dims_index=(0, ), coord_array_name='timestamps', + coord_array_dims_index=(0, ), coord_type='aligned') + will result in: + self.coords['data']['my_time'] == Coordinates.Coord(name='my_time', dims=('time', ), + coord_array_name='timestamps', coord_array_dims_index=(0, ), + coord_type='aligned') + + Field 'data' has dim 'frame' for axis 0, 'x' for axis 1, and 'y' for axis 2. + Field 'dorsal_ventral' is a coordinate for axes 1 and 2 of data. + set_coord(array_name='data', name='dv', dims_index=(1, 2), coord_array_name='dorsal_ventral', + coord_array_dims_index=(0, 1), coord_type='aligned') + will result in: + self.coords['data']['dv'] == Coordinates.Coord(name='dv', dims=('x', 'y'), + coord_array_name='dorsal_ventral', coord_array_dims_index=(0, 1), + coord_type='aligned') + """ + array_name, name, dims_index, = getargs('array_name', 'name', 'dims_index', kwargs) + coord_array_name, coord_array_dims_index, coord_type = getargs('coord_array_name', 'coord_array_dims_index', + 'coord_type', kwargs) + if array_name not in self.fields: + raise ValueError("Cannot set coord '%s'. Field name '%s' not found in %s '%s'." + % (name, array_name, self.__class__.__name__, self.name)) + if array_name not in self.dims: + raise ValueError("Cannot set coord '%s'. No dimensions have been specified for '%s' in %s '%s'." + % (name, array_name, self.__class__.__name__, self.name)) + if coord_array_name not in self.fields: + raise ValueError("Cannot set coord '%s'. Field name '%s' not found in %s '%s'." + % (name, coord_array_name, self.__class__.__name__, self.name)) + if array_name == coord_array_name: + raise ValueError("Cannot set coord '%s' to itself: %s" % (name, array_name)) + if len(dims_index) != len(coord_array_dims_index): + raise ValueError("Cannot set coord '%s'. Number of specified dimension indices must equal number of " + "specified coord array dimension indices (%d != %d)." + % (name, len(dims_index), len(coord_array_dims_index))) + + data = getattr(self, array_name) + data_shape = get_data_shape(data) + coord_data = getattr(self, coord_array_name) + coord_shape = get_data_shape(coord_data) + + if coord_type == 'aligned': + for daxis, caxis in zip(dims_index, coord_array_dims_index): + if data_shape[daxis] != coord_shape[caxis]: + raise ValueError(("Cannot set coord '%s'. Dimension index %d of array '%s' must have the same " + "length as dimension index %d of array '%s' in %s '%s' (%d != %d).") + % (name, daxis, array_name, caxis, coord_data, self.__class__.__name__, self.name, + data_shape[daxis], coord_shape[caxis])) + else: + raise ValueError("Cannot set coord '%s'. Unknown coord type: %s" % (name, coord_type)) + + if array_name not in self.coords: + self.coords[array_name] = Coordinates(parent=self) # initialize for this array + + dims = list() + for dims_ind in dims_index: + dims.append(self.dims[array_name][dims_ind]) # get dim names + + self.coords[array_name].add(name=name, dims=tuple(dims), coord_array=coord_data, + coord_array_dims_index=coord_array_dims_index, coord_type=coord_type) + + @docval({'name': 'array_name', 'type': str, 'doc': ''}) + def to_xarray_dataarray(self, **kwargs): + """Return an xarray.DataArray for the given array in this Container, with dims and coords set.""" + array_name = getargs('array_name', kwargs) + if array_name not in self.fields: + raise ValueError("Field name '%s' not found in %s '%s'." % (array_name, self.__class__.__name__, self.name)) + + xr_kwargs = dict() + if array_name in self.dims: + xr_kwargs['dims'] = self.dims[array_name] + + if array_name in self.coords: + # convert the Coordinates dictionary of Coordinate.Coord objs into an xarray coords dictionary of tuples + xr_coords = dict() + for coord in self.coords[array_name].values(): + coord_array = coord.coord_array + coord_array_shape = get_data_shape(coord_array) + if len(coord.dims) != len(coord_array_shape): + raise ValueError("Cannot convert the array '%s' to an xarray.DataArray. All coordinate arrays " + "must map all of their dimensions to a set of dimensions on '%s'." + % (array_name, array_name)) + xr_coords[coord.name] = (coord.dims, coord_array) + xr_kwargs['coords'] = xr_coords + + arr = xr.DataArray(getattr(self, array_name), **xr_kwargs) + return arr + + @docval({'name': 'array_name', 'type': str, 'doc': ''}) + def to_xarray_dataset(self, **kwargs): + """Return an xarray.Dataset of all of the labeled arrays in this Container.""" + raise NotImplementedError() + class Data(AbstractContainer): """ @@ -464,3 +649,60 @@ def region(self): The region that indexes into data e.g. slice or list of indices ''' pass + + +class Coordinates: + """A dictionary-like object that holds coordinate data (Coord namedtuple value) by name (string key).""" + # use composition of a dict instead of inheritance to restrict user's ability to use arbitrary dict methods + # values in this dict cannot be changed once set + + Coord = namedtuple('Coord', ['name', 'dims', 'coord_array', 'coord_array_dims_index', 'coord_type']) + + @docval({'name': 'parent', 'type': Container, 'doc': 'parent container of the coordinate'}) + def __init__(self, **kwargs): + self.__parent = kwargs['parent'] + self.__dict = dict() + + @docval({'name': 'name', 'type': str, 'doc': 'coordinate name'}, + {'name': 'dims', 'type': tuple, 'doc': 'tuple of dimension names to which the coordinate applies'}, + {'name': 'coord_array', 'type': ('data', 'array_data'), 'doc': 'coordinate data array'}, + {'name': 'coord_array_dims_index', 'type': tuple, + 'doc': 'tuple of dimension indices (0-indexed) of coordinate data array'}, + {'name': 'coord_type', 'type': str, 'doc': 'coordinate type'}) + def add(self, **kwargs): + name = kwargs['name'] + if name in self.__dict: + msg = "Coordinate '%s' already exists. Cannot overwrite values in %s." % (name, self.__class__.__name__) + raise ValueError(msg) + self.__dict[name] = Coordinates.Coord(**kwargs) + + def __getitem__(self, key): + return self.__dict[key] + + def __repr__(self): + return repr(self.__dict) + + def __len__(self): + return len(self.__dict) + + def __contains__(self, item): + return item in self.__dict + + def __iter__(self): + return iter(self.__dict) + + def keys(self): + return self.__dict.keys() + + def values(self): + return self.__dict.values() + + def items(self): + return self.__dict.items() + + def __eq__(self, other): + return self.__dict == other.__dict and self.__parent == other.__parent + + @property + def parent(self): + return self.__parent diff --git a/src/hdmf/spec/__init__.py b/src/hdmf/spec/__init__.py index 94819770e..53dd9b6a7 100644 --- a/src/hdmf/spec/__init__.py +++ b/src/hdmf/spec/__init__.py @@ -1,6 +1,9 @@ from .spec import NAME_WILDCARD from .spec import Spec from .spec import AttributeSpec +from .spec import CoordSpec +from .spec import InnerCoordSpec +from .spec import DimSpec from .spec import DtypeSpec from .spec import DtypeHelper from .spec import RefSpec diff --git a/src/hdmf/spec/namespace.py b/src/hdmf/spec/namespace.py index c4cd2a7b8..b49cc37ec 100644 --- a/src/hdmf/spec/namespace.py +++ b/src/hdmf/spec/namespace.py @@ -8,7 +8,7 @@ from itertools import chain from abc import ABCMeta, abstractmethod -from ..utils import docval, getargs, popargs, get_docval, call_docval_func +from ..utils import docval, getargs, popargs, get_docval from .catalog import SpecCatalog from .spec import DatasetSpec, GroupSpec @@ -183,8 +183,7 @@ class YAMLSpecReader(SpecReader): @docval({'name': 'indir', 'type': str, 'doc': 'the path spec files are relative to', 'default': '.'}) def __init__(self, **kwargs): - super_kwargs = {'source': kwargs['indir']} - call_docval_func(super().__init__, super_kwargs) + super().__init__(source=kwargs['indir']) def read_namespace(self, namespace_path): namespaces = None diff --git a/src/hdmf/spec/spec.py b/src/hdmf/spec/spec.py index d8c505fc9..21a9be04a 100644 --- a/src/hdmf/spec/spec.py +++ b/src/hdmf/spec/spec.py @@ -1,6 +1,6 @@ from abc import ABCMeta from copy import deepcopy -from collections import OrderedDict +from collections import OrderedDict, namedtuple import re from warnings import warn @@ -151,17 +151,13 @@ def __hash__(self): _target_type_key = 'target_type' -_ref_args = [ - {'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'}, - {'name': 'reftype', 'type': str, 'doc': 'the type of references this is i.e. region or object'}, -] - class RefSpec(ConstructableDict): __allowable_types = ('object', 'region') - @docval(*_ref_args) + @docval({'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'}, + {'name': 'reftype', 'type': str, 'doc': 'the type of references this is i.e. region or object'}) def __init__(self, **kwargs): target_type, reftype = getargs(_target_type_key, 'reftype', kwargs) self[_target_type_key] = target_type @@ -274,6 +270,128 @@ def build_const_args(cls, spec_dict): return ret +class DimSpec(ConstructableDict): + """Specification for dimensions""" + + @docval({'name': 'name', 'type': str, 'doc': 'The name of this dimension'}, + {'name': 'required', 'type': bool, 'doc': 'Whether this dimension is required'}, + {'name': 'length', 'type': int, 'doc': 'The length of this dimension', 'default': None}, + {'name': 'doc', 'type': str, 'doc': 'Documentation for this dimension', 'default': None}, + {'name': 'parent', 'type': 'DatasetSpec', 'doc': 'The parent dataset spec of this spec', 'default': None}) + def __init__(self, **kwargs): + name, required, length, doc, parent = getargs('name', 'required', 'length', 'doc', 'parent', kwargs) + super().__init__() + self['name'] = name + self['required'] = required + if length is not None and length <= 0: + raise ValueError('DimSpec length must be a positive number of None') + self['length'] = length + self['doc'] = doc + self._parent = parent + + @property + def name(self): + """The name of this dimension""" + return self.get('name', None) + + @property + def required(self): + """Whether this dimension is required""" + return self.get('required', None) + + @property + def length(self): + """The length of this dimension""" + return self.get('length', None) + + @property + def doc(self): + """Documentation for this dimension""" + return self.get('doc', None) + + @property + def parent(self): + """The parent specification of this specification""" + return self._parent + + @parent.setter + def parent(self, spec): + """Set the parent of this specification""" + if self._parent is not None: + raise Exception('Cannot re-assign parent') + self._parent = spec + + +class InnerCoordSpec(namedtuple('InnerCoordSpec', 'dataset_name dims_index type')): + """Specification for the 'coord' key of a dimension coordinate""" + + @docval({'name': 'dataset_name', 'type': str, 'doc': 'The name of the dataset of this coordinate'}, + {'name': 'dims_index', 'type': (int, list, tuple), + 'doc': 'The dimension indices (0-indexed) of the dataset of this coordinate'}, + {'name': 'type', 'type': str, 'doc': 'The type of this coordinate'}) + def __new__(cls, **kwargs): + # initialize a new InnerCoordSpec with argument documentation and validation + # to override initialization of a namedtuple, need to override __new__, not __init__ + + # cast int, list to tuple + dims_index = kwargs['dims_index'] + if type(dims_index) == int: + kwargs['dims_index'] = (dims_index, ) + elif isinstance(dims_index, list): + kwargs['dims_index'] = tuple(dims_index) + return super().__new__(cls, **kwargs) + + +class CoordSpec(ConstructableDict): + """Specification for a dimension coordinate""" + + @docval({'name': 'name', 'type': str, 'doc': 'The name of this coordinate'}, + {'name': 'dims_index', 'type': (int, list, tuple), + 'doc': 'The dimension indices (0-indexed) of the dataset that this coordinate acts on'}, + {'name': 'coord', 'type': InnerCoordSpec, + 'doc': ('Specification of the coordinate dataset, dimension indices, and type. Keys dataset_name, ' + 'dims_index, and type are required.')}, + {'name': 'parent', 'type': 'DatasetSpec', 'doc': 'The parent dataset spec of this spec', 'default': None}) + def __init__(self, **kwargs): + super().__init__() + name, dims_index, coord, parent = getargs('name', 'dims_index', 'coord', 'parent', kwargs) + self['name'] = name + self['coord'] = coord + self._parent = parent + + # cast int, list to tuple + if type(dims_index) == int: + self['dims_index'] = (dims_index, ) + self['dims_index'] = tuple(dims_index) + + @property + def name(self): + """The name of this coordinate""" + return self.get('name', None) + + @property + def dims_index(self): + """The dimension indices (0-indexed) of the dataset that this coordinate acts on""" + return self.get('dims_index', None) + + @property + def coord(self): + """Specification of the coordinate dataset, dimension indices, and type""" + return self.get('coord', None) + + @property + def parent(self): + """ The parent specification of this specification """ + return self._parent + + @parent.setter + def parent(self, spec): + """ Set the parent of this specification """ + if self._parent is not None: + raise Exception('Cannot re-assign parent') + self._parent = spec + + _attrbl_args = [ {'name': 'doc', 'type': str, 'doc': 'a description about what this specification represents'}, {'name': 'name', 'type': str, 'doc': 'the name of this base storage container, ' @@ -602,6 +720,7 @@ def build_const_args(cls, spec_dict): {'name': 'default_name', 'type': str, 'doc': 'The default name of this dataset', 'default': None}, {'name': 'shape', 'type': (list, tuple), 'doc': 'the shape of this dataset', 'default': None}, {'name': 'dims', 'type': (list, tuple), 'doc': 'the dimensions of this dataset', 'default': None}, + {'name': 'coords', 'type': (list, tuple), 'doc': 'the coordinates of this dataset', 'default': None}, {'name': 'attributes', 'type': list, 'doc': 'the attributes on this group', 'default': list()}, {'name': 'linkable', 'type': bool, 'doc': 'whether or not this group can be linked', 'default': True}, {'name': 'quantity', 'type': (str, int), 'doc': 'the required number of allowed instance', 'default': 1}, @@ -618,18 +737,84 @@ class DatasetSpec(BaseStorageSpec): To specify a table-like dataset i.e. a compound data type. ''' - @docval(*deepcopy(_dataset_args)) + @docval(*deepcopy(_dataset_args)) # noqa: C901 def __init__(self, **kwargs): - doc, shape, dims, dtype, default_value = popargs('doc', 'shape', 'dims', 'dtype', 'default_value', kwargs) - if shape is not None: - self['shape'] = shape - if dims is not None: - self['dims'] = dims - if 'shape' not in self: - self['shape'] = tuple([None] * len(dims)) - if self.shape is not None and self.dims is not None: - if len(self['dims']) != len(self['shape']): - raise ValueError("'dims' and 'shape' must be the same length") + doc, shape, dims, coords, dtype, default_value = popargs('doc', 'shape', 'dims', 'coords', 'dtype', + 'default_value', kwargs) + + # parse dims, shape, and coords. convert all to tuples for consistency. + if dims and isinstance(dims[0], (DimSpec, dict)): + if isinstance(dims[0], dict): # dims read from yaml as list/tuple of dicts + new_dims = list(map(DimSpec.build_spec, dims)) + else: + new_dims = dims + optional_dims = False + new_shape = [None] * len(new_dims) + for _i, dim in enumerate(new_dims): + if isinstance(dim, DimSpec): + new_shape[_i] = dim.length + if not dim.required: + optional_dims = True + if dim.required and optional_dims: + msg = ('Required dims cannot follow optional dims - found required dim %s at element %d' + % (dim.name, _i)) + raise ValueError(msg) + else: + msg = ('Dims must consist of DimSpec objects if using new-style dims - found %s at element %d' + % (type(dim), _i)) + raise ValueError(msg) + if shape is not None and tuple(shape) != tuple(new_shape): + msg = 'Specified shape does not match computed shape from dims dictionary.' + raise ValueError(msg) + self['shape'] = tuple(new_shape) + self['dims'] = tuple(new_dims) + elif shape is not None: # legacy shape + if shape and dims: + if len(dims) != len(shape): + raise ValueError("'dims' and 'shape' must be the same length.") + + # construct new style DimSpec based on old style shape and dims specification + # NOTE: when given a list of shape and dim options, the options must be compatible with each other + new_dims = list() + if isinstance(shape[0], (list, tuple)): # shape is a list of shape configurations + for j, shape_config in enumerate(shape): + for i, length in enumerate(shape_config): + if i >= len(new_dims): + dim_name = dims[j][i] if dims and dims[j][i] else 'dim' + str(i) + required = j == 0 # first list contains required dimensions + new_dims.append(DimSpec(name=dim_name, required=required, length=length)) + else: # only one shape configuration is specified + for i, length in enumerate(shape): + dim_name = dims[i] if dims and dims[i] else 'dim' + str(i) + if isinstance(dim_name, dict): + breakpoint() + new_dims.append(DimSpec(name=dim_name, required=True, length=length)) + self['shape'] = tuple(shape) + self['dims'] = tuple(new_dims) + elif dims is not None: # legacy dims (list of strings) and no shape + # construct new style DimSpec based on old style shape and dims specification + # NOTE: legacy code did not allow list of list of strings without shape. when dims was provided without + # shape, shape was set to tuple([None] * len(dims)) + new_dims = list() + for i in range(len(dims)): + dim_name = dims[i] if dims[i] else 'dim' + str(i) + new_dims.append(DimSpec(name=dim_name, required=True, length=None)) + self['shape'] = tuple([None] * len(dims)) + self['dims'] = tuple(new_dims) + + if coords is not None: + for _i, coord in enumerate(coords): + if isinstance(coord, CoordSpec): + pass # TODO + elif isinstance(coord, dict): + pass # TODO + else: + msg = 'Must use CoordSpec to define coordinate - found %s at element %d' % (type(coord), _i) + raise ValueError(msg) + self['coords'] = tuple(coords) + if self.dims is None: + raise ValueError("'dims' must be defined with 'coords'.") + if dtype is not None: if isinstance(dtype, list): # Dtype is a compound data type for _i, col in enumerate(dtype): @@ -709,6 +894,19 @@ def dims(self): ''' The dimensions of this Dataset ''' return self.get('dims', None) + @property + def min_num_dims(self): + ''' The minimum number of dimensions of this Dataset ''' + for _i, d in enumerate(self.dims): + if not d.required: + return _i + return len(self.dims) + + @property + def coords(self): + ''' The coordinates of this Dataset ''' + return self.get('coords', None) + @property def dtype(self): ''' The data type of the Dataset ''' @@ -726,6 +924,7 @@ def default_value(self): @classmethod def __check_dim(cls, dim, data): + # TODO return True @classmethod diff --git a/src/hdmf/spec/write.py b/src/hdmf/spec/write.py index c6a3dc32f..fffb5f157 100644 --- a/src/hdmf/spec/write.py +++ b/src/hdmf/spec/write.py @@ -72,7 +72,8 @@ def my_represent_none(self, data): order = ['neurodata_type_def', 'neurodata_type_inc', 'data_type_def', 'data_type_inc', 'name', 'default_name', 'dtype', 'target_type', 'dims', 'shape', 'default_value', 'value', 'doc', - 'required', 'quantity', 'attributes', 'datasets', 'groups', 'links'] + 'required', 'quantity', 'attributes', 'datasets', 'groups', 'links', + 'label', 'target_dims', 'dataset', 'type'] if isinstance(obj, dict): keys = list(obj.keys()) for k in order[::-1]: diff --git a/src/hdmf/testing/testcase.py b/src/hdmf/testing/testcase.py index 075f3c32d..59bd88554 100644 --- a/src/hdmf/testing/testcase.py +++ b/src/hdmf/testing/testcase.py @@ -4,6 +4,7 @@ import numpy as np import os from abc import ABCMeta, abstractmethod +import scipy.sparse from ..container import Container, Data from ..query import HDMFDataset @@ -78,6 +79,8 @@ def _assert_field_equal(self, f1, f2, ignore_hdmf_attrs=False): self._assert_data_equal(f1, f2, ignore_hdmf_attrs) elif isinstance(f1, (float, np.floating)): np.testing.assert_equal(f1, f2) + elif isinstance(f1, scipy.sparse.csr_matrix): + self.assertEqual((f1 != f2).nnz, 0) else: self.assertEqual(f1, f2) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index b3e64d0ee..b3986ab84 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -122,6 +122,8 @@ def check_shape(expected, received): ret = True else: if isinstance(expected, (list, tuple)): + if len(expected) == 0: + return received is None if isinstance(expected[0], (list, tuple)): for sub in expected: if check_shape(sub, received): @@ -228,7 +230,8 @@ def validate(self, **kwargs): builder = getargs('builder', kwargs) dt = builder.attributes.get(self.__type_key) if dt is None: - msg = "builder must have data type defined with attribute '%s'" % self.__type_key + msg = ("builder (name: '%s') must have data type defined with attribute '%s'" + % (builder.name, self.__type_key)) raise ValueError(msg) validator = self.get_validator(dt) return validator.validate(builder) diff --git a/tests/unit/build_tests/test_io_build_builders.py b/tests/unit/build_tests/test_io_build_builders.py index 5470ba267..65b379a80 100644 --- a/tests/unit/build_tests/test_io_build_builders.py +++ b/tests/unit/build_tests/test_io_build_builders.py @@ -1,4 +1,4 @@ -from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder +from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, CoordBuilder from hdmf.testing import TestCase @@ -43,11 +43,9 @@ def test_add_external_link(self): self.assertIs(self.gb, el.parent) self.assertIs(self.gb2, gp.parent) - # @unittest.expectedFailure def test_set_attribute(self): self.gb.set_attribute('key', 'value') self.assertIn('key', self.gb.obj_type) - # self.assertEqual(dict.__getitem__(self.gb, 'attributes')['key'], 'value') self.assertEqual(self.gb['key'], 'value') def test_parent_constructor(self): @@ -244,7 +242,6 @@ def test_mutually_exclusive_datasets(self): gb2 = GroupBuilder('gb2', datasets={'dataset2': DatasetBuilder('dataset2', [4, 5, 6])}) gb1.deep_update(gb2) self.assertIn('dataset2', gb1) - # self.assertIs(gb1['dataset2'], gb2['dataset2']) self.assertListEqual(gb1['dataset2'].data, gb2['dataset2'].data) def test_mutually_exclusive_attributes(self): @@ -306,3 +303,29 @@ def test_no_overwrite(self): db1.deep_update(db2) self.assertListEqual(db1.data, db2.data) self.assertIn('attr1', db1.attributes) + + +class TestCoordBuilder(TestCase): + + def test_init_get_attr(self): + cb = CoordBuilder(name='letters', axes=(0, ), coord_dataset_name='data2', coord_axes=(0, ), + coord_type='aligned') + self.assertEqual(cb.coord_dataset_name, 'data2') + + def test_get_attr_not_found(self): + cb = CoordBuilder(name='letters', axes=(0, ), coord_dataset_name='data2', coord_axes=(0, ), + coord_type='aligned') + with self.assertRaisesWith(AttributeError, "'CoordBuilder' object has no attribute 'bad_key'"): + cb.bad_key + + def test_set_attr(self): + cb = CoordBuilder(name='letters', axes=(0, ), coord_dataset_name='data2', coord_axes=(0, ), + coord_type='aligned') + with self.assertRaisesWith(AttributeError, "can't set attribute"): + cb.name = 'new_name' + + def test_repr(self): + cb = CoordBuilder(name='letters', axes=(0, ), coord_dataset_name='data2', coord_axes=(0, ), + coord_type='aligned') + self.assertEqual(str(cb), ("CoordBuilder(name='letters', axes=(0,), coord_dataset_name='data2', " + "coord_axes=(0,), coord_type='aligned')")) diff --git a/tests/unit/build_tests/test_io_map.py b/tests/unit/build_tests/test_io_map.py index 62a7c98e6..b2bd85f34 100644 --- a/tests/unit/build_tests/test_io_map.py +++ b/tests/unit/build_tests/test_io_map.py @@ -14,6 +14,8 @@ class Bar(Container): + __fields__ = ('data', 'attr1', 'attr2', 'attr3', {'name': 'foo', 'child': True}) + @docval({'name': 'name', 'type': str, 'doc': 'the name of this Bar'}, {'name': 'data', 'type': ('data', 'array_data'), 'doc': 'some data'}, {'name': 'attr1', 'type': str, 'doc': 'an attribute'}, @@ -23,13 +25,11 @@ class Bar(Container): def __init__(self, **kwargs): name, data, attr1, attr2, attr3, foo = getargs('name', 'data', 'attr1', 'attr2', 'attr3', 'foo', kwargs) super().__init__(name=name) - self.__data = data - self.__attr1 = attr1 - self.__attr2 = attr2 - self.__attr3 = attr3 - self.__foo = foo - if self.__foo is not None and self.__foo.parent is None: - self.__foo.parent = self + self.data = data + self.attr1 = attr1 + self.attr2 = attr2 + self.attr3 = attr3 + self.foo = foo def __eq__(self, other): attrs = ('name', 'data', 'attr1', 'attr2', 'attr3', 'foo') @@ -43,26 +43,6 @@ def __str__(self): def data_type(self): return 'Bar' - @property - def data(self): - return self.__data - - @property - def attr1(self): - return self.__attr1 - - @property - def attr2(self): - return self.__attr2 - - @property - def attr3(self): - return self.__attr3 - - @property - def foo(self): - return self.__foo - class Foo(Container): @@ -294,6 +274,9 @@ def test_dynamic_container_constructor_name(self): with self.assertRaises(TypeError): inst = cls('My Baz', [1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0) + with self.assertRaisesWith(TypeError, "unrecognized argument: 'name'"): + inst = cls([1, 2, 3, 4], 'string attribute', 1000, name='My Baz', attr3=98.6, attr4=1.0) + inst = cls([1, 2, 3, 4], 'string attribute', 1000, attr3=98.6, attr4=1.0) self.assertEqual(inst.name, 'A fixed name') self.assertEqual(inst.data, [1, 2, 3, 4]) @@ -394,19 +377,21 @@ def test_default_mapping(self): class TestObjectMapperNested(TestObjectMapperMixin, TestCase): def setUpBarSpec(self): + attr2_spec = AttributeSpec('attr2', 'an example integer attribute', 'int') + attr1_spec = AttributeSpec('attr1', 'an example string attribute', 'text') + dset_spec = DatasetSpec('an example dataset', 'int', name='data', attributes=[attr2_spec]) self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', - datasets=[DatasetSpec('an example dataset', 'int', name='data', - attributes=[AttributeSpec( - 'attr2', 'an example integer attribute', 'int')])], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text')]) + datasets=[dset_spec], + attributes=[attr1_spec]) def test_build(self): - ''' Test default mapping functionality when object attributes map to an attribute deeper + ''' Test default mapping functionality when object attributes map to an attribute deeper than top-level Builder ''' container_inst = Bar('my_bar', list(range(10)), 'value1', 10) - expected = GroupBuilder('my_bar', datasets={'data': DatasetBuilder( - 'data', list(range(10)), attributes={'attr2': 10})}, + dataset_bldr = DatasetBuilder('data', list(range(10)), attributes={'attr2': 10}) + expected = GroupBuilder('my_bar', + datasets={'data': dataset_bldr}, attributes={'attr1': 'value1'}) self._remap_nested_attr() builder = self.mapper.build(container_inst, self.manager) @@ -416,9 +401,12 @@ def test_construct(self): ''' Test default mapping functionality when object attributes map to an attribute deeper than top-level Builder ''' expected = Bar('my_bar', list(range(10)), 'value1', 10) - builder = GroupBuilder('my_bar', datasets={'data': DatasetBuilder( - 'data', list(range(10)), attributes={'attr2': 10})}, - attributes={'attr1': 'value1', 'data_type': 'Bar', 'namespace': CORE_NAMESPACE, + dataset_bldr = DatasetBuilder('data', list(range(10)), attributes={'attr2': 10}) + builder = GroupBuilder('my_bar', + datasets={'data': dataset_bldr}, + attributes={'attr1': 'value1', + 'data_type': 'Bar', + 'namespace': CORE_NAMESPACE, 'object_id': expected.object_id}) self._remap_nested_attr() container = self.mapper.construct(builder, self.manager) @@ -445,17 +433,21 @@ def _remap_nested_attr(self): class TestObjectMapperNoNesting(TestObjectMapperMixin, TestCase): def setUpBarSpec(self): + attr2_spec = AttributeSpec('attr2', 'an example integer attribute', 'int') + attr1_spec = AttributeSpec('attr1', 'an example string attribute', 'text') + dset_spec = DatasetSpec('an example dataset', 'int', name='data',) self.bar_spec = GroupSpec('A test group specification with a data type', data_type_def='Bar', - datasets=[DatasetSpec('an example dataset', 'int', name='data')], - attributes=[AttributeSpec('attr1', 'an example string attribute', 'text'), - AttributeSpec('attr2', 'an example integer attribute', 'int')]) + datasets=[dset_spec], + attributes=[attr1_spec, attr2_spec]) def test_build(self): ''' Test default mapping functionality when no attributes are nested ''' container = Bar('my_bar', list(range(10)), 'value1', 10) builder = self.mapper.build(container, self.manager) - expected = GroupBuilder('my_bar', datasets={'data': DatasetBuilder('data', list(range(10)))}, + dataset_bldr = DatasetBuilder('data', list(range(10))) + expected = GroupBuilder('my_bar', + datasets={'data': dataset_bldr}, attributes={'attr1': 'value1', 'attr2': 10}) self.assertDictEqual(builder, expected) @@ -463,13 +455,17 @@ def test_build_empty(self): ''' Test default mapping functionality when no attributes are nested ''' container = Bar('my_bar', [], 'value1', 10) builder = self.mapper.build(container, self.manager) - expected = GroupBuilder('my_bar', datasets={'data': DatasetBuilder('data', [])}, + dataset_bldr = DatasetBuilder('data', []) + expected = GroupBuilder('my_bar', + datasets={'data': dataset_bldr}, attributes={'attr1': 'value1', 'attr2': 10}) self.assertDictEqual(builder, expected) def test_construct(self): expected = Bar('my_bar', list(range(10)), 'value1', 10) - builder = GroupBuilder('my_bar', datasets={'data': DatasetBuilder('data', list(range(10)))}, + dataset_bldr = DatasetBuilder('data', list(range(10))) + builder = GroupBuilder('my_bar', + datasets={'data': dataset_bldr}, attributes={'attr1': 'value1', 'attr2': 10, 'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': expected.object_id}) container = self.mapper.construct(builder, self.manager) diff --git a/tests/unit/build_tests/test_simple.py b/tests/unit/build_tests/test_simple.py new file mode 100644 index 000000000..630be2c35 --- /dev/null +++ b/tests/unit/build_tests/test_simple.py @@ -0,0 +1,788 @@ +from hdmf.spec import GroupSpec, DatasetSpec, InnerCoordSpec, CoordSpec, SpecCatalog, SpecNamespace, NamespaceCatalog +from hdmf.spec import DimSpec +from hdmf.build import ObjectMapper, TypeMap, GroupBuilder, DatasetBuilder, BuildManager, BuildError, CoordBuilder +from hdmf.build import ConstructError, ConvertError +from hdmf import Container +from hdmf.container import Coordinates +from hdmf.utils import docval +from hdmf.testing import TestCase +from hdmf.backends.hdf5 import HDF5IO + +from tests.unit.utils import CORE_NAMESPACE +from tests.unit.test_io_hdf5_h5tools import get_temp_filepath +import os +import h5py + + +class Bar(Container): + + __fields__ = ('data1', 'data2', 'data3') + + @docval({'name': 'name', 'type': str, 'doc': 'bar name'}, + {'name': 'data1', 'type': 'array_data', 'doc': 'bar data1'}, + {'name': 'data2', 'type': 'array_data', 'doc': 'bar data2', 'default': None}, + {'name': 'data3', 'type': ('scalar_data', 'array_data'), 'doc': 'bar data3', 'default': None}) + def __init__(self, **kwargs): + super().__init__(name=kwargs['name']) + self.data1 = kwargs['data1'] + self.data2 = kwargs['data2'] + self.data3 = kwargs['data3'] + + +def _create_typemap(bar_spec): + spec_catalog = SpecCatalog() + spec_catalog.register_spec(bar_spec, 'test.yaml') + namespace = SpecNamespace('a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], catalog=spec_catalog) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(CORE_NAMESPACE, 'Bar', Bar) + type_map.register_map(Bar, ObjectMapper) + return type_map + + +class TestBuildDims(TestCase): + + # TODO legacy tests + + # TODO make hdf5 back end write dims and coords and read dims and coords + # TODO update documentation + # TODO do not write new attributes if dims and coords do not exist + + def test_build_dims_1d(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec, the type map can create + a builder from an instance of the Container, with dimensions. Start with the simple case of a 1-D array. + """ + dim_spec = DimSpec(name='x', required=True) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + group_builder = type_map.build(bar_inst) + + self.assertEqual(group_builder.get('data1').dims, ('x', )) + + def test_build_dims_1d_length_none(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec with length none, the type map can + create a builder from an instance of the Container, with dimensions. + """ + dim_spec = DimSpec(name='x', required=True, length=None) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + group_builder = type_map.build(bar_inst) + + self.assertEqual(group_builder.get('data1').dims, ('x', )) + + def test_build_dims_1d_wrong_length(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec with a length, when the type map + tries to create a builder from an instance of the Container with a different length, an error is raised. + """ + dim_spec = DimSpec(name='x', required=True, length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + msg = "Data dimension 'x' (axis 0) must have length 3 but has length 4." + with self.assertRaisesWith(BuildError, "Could not build 'data1' for Bar 'my_bar' due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.build(bar_inst) + + def test_build_dims_1d_opt_wrong_length(self): + """ + Test that given a Spec for an Container class (Bar) that includes an optional DimSpec with a length, when the + type map tries to create a builder from an instance of the Container with a different length, an error is + raised. + """ + dim_spec = DimSpec(name='x', required=False, length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + msg = "Data dimension 'x' (axis 0) must have length 3 but has length 4." + with self.assertRaisesWith(BuildError, "Could not build 'data1' for Bar 'my_bar' due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.build(bar_inst) + + def test_build_dims_2d(self): + """ + Test that given a Spec for an Container class (Bar) that includes two DimSpecs, the type map can create + a builder from an instance of the Container, with dimensions. Here, with a 2-D dataset with dimensions + with a length and doc. + """ + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=False, doc='test_doc') + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [[1, 2], [3, 4], [5, 6], [7, 8]]) + group_builder = type_map.build(bar_inst) + + self.assertEqual(group_builder.get('data1').dims, ('x', 'y')) + + def test_build_dims_2d_wrong_length(self): + """ + Test that given a Spec for an Container class (Bar) that includes two DimSpecs, the type map can create + a builder from an instance of the Container, with dimensions. Here, with a 2-D dataset with dimensions + with a length and doc. + """ + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=False, doc='test_doc', length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [[1, 2], [3, 4], [5, 6], [7, 8]]) + + msg = "Data dimension 'y' (axis 1) must have length 3 but has length 2." + with self.assertRaisesWith(BuildError, "Could not build 'data1' for Bar 'my_bar' due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.build(bar_inst) + + def test_build_dims_1d_with_2d_dims(self): + """ + Test that given a Spec for an Container class (Bar) that includes two DimSpecs, the type map can create + a builder from an instance of the Container, with dimensions. Here, with a 2-D dataset with dimensions + with a length and doc. + """ + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=False, doc='test_doc', length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + group_builder = type_map.build(bar_inst) + + self.assertEqual(group_builder.get('data1').dims, ('x', )) + + def test_build_dims_1d_with_2d_dims_req(self): + """ + Test that given a Spec for an Container class (Bar) that includes two DimSpecs, the type map can create + a builder from an instance of the Container, with dimensions. Here, with a 2-D dataset with dimensions + with a length and doc. + """ + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=True, doc='test_doc', length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + msg = "Data must have at least 2 dimensions but has 1." + with self.assertRaisesWith(BuildError, "Could not build 'data1' for Bar 'my_bar' due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.build(bar_inst) + + +class TestBuildCoords(TestCase): + + def test_build_coords_1d(self): + """ + Test that given a DimSpec and CoordSpec for an Container class, the type map can create a builder from an + instance of the Container, with dimensions and coordinates for a 1-D array. + """ + dim1_spec = DimSpec(name='x', required=True) + dim2_spec = DimSpec(name='chars', required=True) + icoord_spec = InnerCoordSpec(dataset_name='data2', dims_index=(0, ), type='aligned') + coord_spec = CoordSpec(name='letters', dims_index=(0, ), coord=icoord_spec) + # TODO datasetspec add_dim(...) + # TODO datasetspec add_coord(...) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(dim1_spec, ), coords=(coord_spec, )) + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(dim2_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4], ['a', 'b', 'c', 'd']) + group_builder = type_map.build(bar_inst) + + expected = {'letters': CoordBuilder(name='letters', coord_dataset_name='data2', coord_axes=(0, ), axes=(0, ), + coord_type='aligned')} + self.assertEqual(group_builder.get('data1').coords, expected) + + def test_build_coords_2d(self): + """ + Test that given a DimSpec and CoordSpec for an Container class, the type map can create a builder from an + instance of the Container, with dimensions and coordinates for a 2-D array. + """ + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=False, doc='test_doc') + dim2_spec = DimSpec(name='chars', required=True) + icoord_spec = InnerCoordSpec(dataset_name='data2', dims_index=(0, ), type='aligned') + x_coord_spec = CoordSpec(name='xletters', dims_index=(0, ), coord=icoord_spec) + y_coord_spec = CoordSpec(name='yletters', dims_index=(1, ), coord=icoord_spec) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(x_spec, y_spec), coords=(x_coord_spec, y_coord_spec)) + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(dim2_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [[1, 2], [3, 4], [5, 6], [7, 8]], ['a', 'b', 'c', 'd']) + group_builder = type_map.build(bar_inst) + + expected = {'xletters': CoordBuilder(name='xletters', coord_dataset_name='data2', coord_axes=(0, ), axes=(0, ), + coord_type='aligned'), + 'yletters': CoordBuilder(name='yletters', coord_dataset_name='data2', coord_axes=(0, ), axes=(1, ), + coord_type='aligned')} + self.assertEqual(group_builder.get('data1').coords, expected) + + def test_build_coords_missing_dset(self): + """ + Test that given a DimSpec and CoordSpec for an Container class, the type map raises an error when the CoordSpec + references an invalid coord_dataset. + """ + dim1_spec = DimSpec(name='x', required=True) + dim2_spec = DimSpec(name='chars', required=True) + # TODO require coord_dataset to be a datasetspec, validate axes, name + icoord_spec = InnerCoordSpec(dataset_name='data3', dims_index=(0, ), type='aligned') + coord_spec = CoordSpec(name='letters', dims_index=(0, ), coord=icoord_spec) + # TODO validate coord type is an allowed value + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(dim1_spec, ), coords=(coord_spec, )) + # TODO constructor, and add coords validate axes. + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(dim2_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + # TODO on write to yaml, validate that coord references exist. + type_map = _create_typemap(bar_spec) + bar_inst = Bar('my_bar', [1, 2, 3, 4], ['a', 'b', 'c', 'd']) + group_builder = type_map.build(bar_inst) + + self.assertIsNone(group_builder.get('data1').coords) + + +class TestConstructDims(TestCase): + + # NOTE: if a DatasetBuilder does not match its DatasetSpec in dtype or dims/shape, the object can still be + # constructed + + def test_construct_dims_1d(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec, the type map can create + a builder from an instance of the Container, with dimensions. Start with the simple case of a 1-D array. + """ + dim_spec = DimSpec(name='x', required=True) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + dset_builder1 = DatasetBuilder(name='data1', data=[1, 2, 3, 4]) + datasets = [dset_builder1, ] + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + constructed_bar = type_map.construct(group_builder, manager) + self.assertEqual(constructed_bar.dims, {'data1': ('x', )}) + + expected_bar = Bar('my_bar', [1, 2, 3, 4]) + self.assertContainerEqual(constructed_bar, expected_bar, ignore_hdmf_attrs=True) + + def test_construct_dims_1d_length_none(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec, the type map can create + a builder from an instance of the Container, with dimensions. Start with the simple case of a 1-D array. + """ + dim_spec = DimSpec(name='x', required=True, length=None) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + dset_builder1 = DatasetBuilder(name='data1', data=[1, 2, 3, 4]) + datasets = [dset_builder1, ] + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + constructed_bar = type_map.construct(group_builder, manager) + self.assertEqual(constructed_bar.dims, {'data1': ('x', )}) + + expected_bar = Bar('my_bar', [1, 2, 3, 4]) + self.assertContainerEqual(constructed_bar, expected_bar, ignore_hdmf_attrs=True) + + def test_construct_dims_1d_wrong_length(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec, the type map can create + a builder from an instance of the Container, with dimensions. Start with the simple case of a 1-D array. + """ + dim_spec = DimSpec(name='x', required=True, length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + dset_builder1 = DatasetBuilder(name='data1', data=[1, 2, 3, 4]) + datasets = [dset_builder1, ] + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + msg = ("Could not construct dims for dataset 'data1' for Bar 'my_bar' due to: Data dimension 'x' (axis 0) must " + "have length 3 but has length 4.") + with self.assertWarnsWith(UserWarning, msg): + type_map.construct(group_builder, manager) + + def test_construct_dims_2d(self): + """ + Test that given a Spec for an Container class (Bar) that includes a DimSpec, the type map can create + a builder from an instance of the Container, with dimensions for a 2-D array. + """ + x_spec = DimSpec(name='x', required=True) + y_spec = DimSpec(name='y', required=True) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + dset_builder1 = DatasetBuilder(name='data1', data=[[1, 2, 3, 4], [5, 6, 7, 8]]) + datasets = [dset_builder1, ] + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + constructed_bar = type_map.construct(group_builder, manager) + self.assertEqual(constructed_bar.dims, {'data1': ('x', 'y')}) + + expected_bar = Bar('my_bar', [[1, 2, 3, 4], [5, 6, 7, 8]]) + self.assertContainerEqual(constructed_bar, expected_bar, ignore_hdmf_attrs=True) + + +class TestConstructCoords(TestCase): + + def test_construct_coords_1d_not_in_bldr(self): + x_spec = DimSpec(name='x', required=True) + char_spec = DimSpec(name='chars', required=True) + icoord_spec = InnerCoordSpec(dataset_name='data2', dims_index=(0, ), type='aligned') + coord_spec = CoordSpec(name='letters', dims_index=(0, ), coord=icoord_spec) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(x_spec, ), coords=(coord_spec, )) + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(char_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + # on read, the dataset builders MAY not have dims or coords. these would be added in construct + # TODO warning on read if dims and coords do not match spec + dset_builder2 = DatasetBuilder(name='data2', data=['a', 'b', 'c', 'd']) + dset_builder1 = DatasetBuilder(name='data1', data=[1, 2, 3, 4]) + datasets = {'data1': dset_builder1, 'data2': dset_builder2} + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + constructed_bar = type_map.construct(group_builder, manager) + + expected_coords = Coordinates(constructed_bar) + expected_coords.add(name='letters', dims=('x', ), coord_array=constructed_bar.data2, + coord_array_dims_index=(0, ), coord_type='aligned') + self.assertEqual(constructed_bar.coords, {'data1': expected_coords}) + + expected_bar = Bar('my_bar', [1, 2, 3, 4], ['a', 'b', 'c', 'd']) + self.assertContainerEqual(constructed_bar, expected_bar, ignore_hdmf_attrs=True) + + def test_construct_coords_1d_in_bldr(self): + x_spec = DimSpec(name='x', required=True) + char_spec = DimSpec(name='chars', required=True) + icoord_spec = InnerCoordSpec(dataset_name='data2', dims_index=(0, ), type='aligned') + coord_spec = CoordSpec(name='letters', dims_index=(0, ), coord=icoord_spec) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(x_spec, ), coords=(coord_spec, )) + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(char_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + # on read, the dataset builders MAY not have dims or coords. these would be added in construct + # TODO warning on read if dims and coords do not match spec + coord_builder = CoordBuilder(name='letters', axes=(0, ), coord_dataset_name='data2', coord_axes=(0, ), + coord_type='aligned') + dset_builder2 = DatasetBuilder(name='data2', data=['a', 'b', 'c', 'd']) + dset_builder1 = DatasetBuilder(name='data1', data=[1, 2, 3, 4]) + dset_builder1.dims = ('x',) + dset_builder1.coords = {'letters': coord_builder} + datasets = {'data1': dset_builder1, 'data2': dset_builder2} + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + constructed_bar = type_map.construct(group_builder, manager) + + expected_coords = Coordinates(constructed_bar) + expected_coords.add(name='letters', dims=('x', ), coord_array=constructed_bar.data2, + coord_array_dims_index=(0, ), coord_type='aligned') + self.assertEqual(constructed_bar.coords, {'data1': expected_coords}) + + expected_bar = Bar('my_bar', [1, 2, 3, 4], ['a', 'b', 'c', 'd']) + self.assertContainerEqual(constructed_bar, expected_bar, ignore_hdmf_attrs=True) + + def test_construct_coords_2d_not_in_bldr(self): + # here, the dataset builders do not have dims or coords. they are added in construct + frame_spec = DimSpec(name='frame', required=True) + x1_spec = DimSpec(name='x1', required=True, length=2) + y1_spec = DimSpec(name='y1', required=True, length=4) + x2_spec = DimSpec(name='x2', required=False) + y2_spec = DimSpec(name='y2', required=False) + icoord_spec = InnerCoordSpec(dataset_name='data2', dims_index=(0, 1), type='aligned') + coord_spec = CoordSpec(name='dorsal-ventral', dims_index=(1, 2), coord=icoord_spec) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(frame_spec, x1_spec, y1_spec), coords=(coord_spec, )) + dset2_spec = DatasetSpec('an example dataset2', 'int', name='data2', dims=(x2_spec, y2_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + dset_builder2 = DatasetBuilder(name='data2', data=[[-1, -2, -3, -4], [-5, -6, -7, -8]]) + dset_builder1 = DatasetBuilder(name='data1', data=[[[1, 2, 3, 4], [5, 6, 7, 8]], + [[1, 2, 3, 4], [5, 6, 7, 8]], + [[1, 2, 3, 4], [5, 6, 7, 8]]]) + datasets = {'data1': dset_builder1, 'data2': dset_builder2} + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + constructed_bar = type_map.construct(group_builder, manager) + + expected_coords = Coordinates(constructed_bar) + expected_coords.add(name='dorsal-ventral', dims=('x1', 'y1'), coord_array=constructed_bar.data2, + coord_array_dims_index=(0, 1), coord_type='aligned') + self.assertEqual(constructed_bar.coords, {'data1': expected_coords}) + + expected_bar = Bar('my_bar', + [[[1, 2, 3, 4], [5, 6, 7, 8]], + [[1, 2, 3, 4], [5, 6, 7, 8]], + [[1, 2, 3, 4], [5, 6, 7, 8]]], + [[-1, -2, -3, -4], [-5, -6, -7, -8]]) + self.assertContainerEqual(constructed_bar, expected_bar, ignore_hdmf_attrs=True) + +# TODO test dynamic class generation with dim coord spec + + +class TestHDF5IODims(TestCase): + + def setUp(self): + self.path = get_temp_filepath() + + def tearDown(self): + if os.path.exists(self.path): + os.remove(self.path) + + def test_write_dims_none(self): + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1') + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 0) + + def test_write_dims(self): + dim_spec = DimSpec(name='x', required=True) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 1) + self.assertEqual(file['data1'].attrs['dimensions'], '["x"]') + + def test_write_dims_only_legacy(self): + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=('x', )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 1) + self.assertEqual(file['data1'].attrs['dimensions'], '["x"]') + + def test_write_shape_only_legacy(self): + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', shape=(None, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 1) + self.assertEqual(file['data1'].attrs['dimensions'], '["dim0"]') + + def test_write_dims_shape_legacy(self): + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=('x', ), shape=(None, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 1) + self.assertEqual(file['data1'].attrs['dimensions'], '["x"]') + + def test_write_1d_for_2d_dims(self): + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=False, doc='test_doc', length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 1) + self.assertEqual(file['data1'].attrs['dimensions'], '["x"]') + + def test_read_dims_none(self): + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1') + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with HDF5IO(self.path, manager=manager, mode='r') as io: + read_bar = io.read() + self.assertEqual(read_bar.dims, {}) + + def test_read_dims(self): + dim_spec = DimSpec(name='x', required=True) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with HDF5IO(self.path, manager=manager, mode='r') as io: + read_bar = io.read() + self.assertEqual(read_bar.dims, {'data1': ('x', )}) + + def test_read_1d_for_2d_dims(self): + x_spec = DimSpec(name='x', required=True, length=4) + y_spec = DimSpec(name='y', required=False, doc='test_doc', length=3) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(x_spec, y_spec)) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4]) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with HDF5IO(self.path, manager=manager, mode='r') as io: + read_bar = io.read() + self.assertEqual(read_bar.dims, {'data1': ('x', )}) + + +class TestHDF5IOCoords(TestCase): + + def setUp(self): + self.path = get_temp_filepath() + + def tearDown(self): + if os.path.exists(self.path): + os.remove(self.path) + + def test_write_coords(self): + dim1_spec = DimSpec(name='x', required=True) + dim2_spec = DimSpec(name='chars', required=True) + icoord_spec = InnerCoordSpec(dataset_name='data2', dims_index=(0, ), type='aligned') + coord_spec = CoordSpec(name='letters', dims_index=(0, ), coord=icoord_spec) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(dim1_spec, ), coords=(coord_spec, )) + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(dim2_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4], ['a', 'b', 'c', 'd']) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 2) + self.assertEqual(file['data1'].attrs['dimensions'], '["x"]') + self.assertEqual(file['data1'].attrs['coordinates'], + '{"letters": ["letters", [0], "data2", [0], "aligned"]}') + # TODO the latter should be a dict. keys are needed + + def test_write_unused_coords(self): + dim1_spec = DimSpec(name='x', required=True) + dim2_spec = DimSpec(name='chars', required=True) + icoord_spec = InnerCoordSpec(dataset_name='data3', dims_index=(0, ), type='aligned') + coord_spec = CoordSpec(name='letters', dims_index=(0, ), coord=icoord_spec) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', + dims=(dim1_spec, ), coords=(coord_spec, )) + dset2_spec = DatasetSpec('an example dataset2', 'text', name='data2', dims=(dim2_spec, )) + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset2_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + bar_inst = Bar('my_bar', [1, 2, 3, 4], ['a', 'b', 'c', 'd']) + + with HDF5IO(self.path, manager=manager, mode='w') as io: + io.write(bar_inst) + + with h5py.File(self.path, mode='r') as file: + self.assertEqual(len(file['data1'].attrs.keys()), 1) + self.assertEqual(file['data1'].attrs['dimensions'], '["x"]') + + +class TestConstructCheckType(TestCase): + + def _test_construct_helper(self, spec_dtype, builder_data): + dim_spec = DimSpec(name='x', required=True) + dset1_spec = DatasetSpec(doc='an example dataset1', dtype='int', name='data1', dims=(dim_spec, )) # not used + dset3_spec = DatasetSpec(doc='an example dataset3', dtype=spec_dtype, name='data3') + bar_spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[dset1_spec, dset3_spec]) + type_map = _create_typemap(bar_spec) + manager = BuildManager(type_map) + + dset_builder1 = DatasetBuilder(name='data1', data=[10]) # not used + dset_builder3 = DatasetBuilder(name='data3', data=builder_data) + datasets = {'data1': dset_builder1, 'data3': dset_builder3} + attributes = {'data_type': 'Bar', 'namespace': CORE_NAMESPACE, 'object_id': "doesn't matter"} + group_builder = GroupBuilder('my_bar', datasets=datasets, attributes=attributes) + + return type_map, group_builder, manager + + def test_construct_int(self): + type_map, group_builder, manager = self._test_construct_helper('int', 10) + type_map.construct(group_builder, manager) + + def test_construct_intstring_for_int(self): + type_map, group_builder, manager = self._test_construct_helper('int', '10') + bar = type_map.construct(group_builder, manager) + self.assertEqual(bar.data3, 10) + + def test_construct_floatstring_for_int(self): + type_map, group_builder, manager = self._test_construct_helper('int', '10.5') + + msg = "Could not convert data 'data3' to dtype 'int': 10.5" + with self.assertRaisesWith(ConstructError, "Could not construct Bar object due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.construct(group_builder, manager) + + def test_construct_float_for_int(self): + type_map, group_builder, manager = self._test_construct_helper('int', 10.5) + + msg = "Expected int32, received float64 - must supply int32 or higher precision" + with self.assertRaisesWith(ConstructError, "Could not construct Bar object due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.construct(group_builder, manager) + + def test_construct_int_list_for_int(self): + type_map, group_builder, manager = self._test_construct_helper('int', [10]) + bar = type_map.construct(group_builder, manager) + self.assertEqual(bar.data3, [10]) + + def test_construct_text(self): + type_map, group_builder, manager = self._test_construct_helper('text', '10') + type_map.construct(group_builder, manager) + + def test_construct_int_for_text(self): + type_map, group_builder, manager = self._test_construct_helper('text', 10) + + msg = "Expected unicode or ascii string, got " + with self.assertRaisesWith(ConstructError, "Could not construct Bar object due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.construct(group_builder, manager) + + def test_construct_int_list_for_text(self): + type_map, group_builder, manager = self._test_construct_helper('text', [10]) + + msg = "Expected unicode or ascii string, got " + with self.assertRaisesWith(ConstructError, "Could not construct Bar object due to: %s" % msg): + with self.assertRaisesWith(ConvertError, msg): + type_map.construct(group_builder, manager) diff --git a/tests/unit/spec_tests/test_dataset_spec.py b/tests/unit/spec_tests/test_dataset_spec.py index 480bcd572..7574f799e 100644 --- a/tests/unit/spec_tests/test_dataset_spec.py +++ b/tests/unit/spec_tests/test_dataset_spec.py @@ -1,6 +1,6 @@ import json -from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec, DtypeSpec, RefSpec +from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec, DtypeSpec, RefSpec, DimSpec from hdmf.testing import TestCase @@ -49,8 +49,8 @@ def test_constructor_shape(self): name='dataset1', shape=shape, attributes=self.attributes) - self.assertEqual(spec['shape'], shape) - self.assertEqual(spec.shape, shape) + self.assertEqual(spec['shape'], (None, 2)) + self.assertEqual(spec.shape, (None, 2)) def test_constructor_invalidate_dtype(self): with self.assertRaises(ValueError): @@ -230,3 +230,61 @@ def test_datatype_table_extension_diff_format(self): [dtype3], data_type_inc=base, data_type_def='ExtendedTable') + + +class TestDatasetOldStyleDims(TestCase): + + def test_none(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1') + self.assertIsNone(spec.shape) + self.assertIsNone(spec.dims) + self.assertIsNone(spec.coords) + + def test_1d(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=(2, )) + self.assertEqual(spec.shape, (2, )) + self.assertEqual(spec.dims, (DimSpec(name='dim0', required=True, length=2), )) + + def test_1d_none(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=(None, )) + self.assertEqual(spec.shape, (None, )) + self.assertEqual(spec.dims, (DimSpec(name='dim0', required=True, length=None), )) + + def test_1d_dims(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=(2, ), dims=('x', )) + self.assertEqual(spec.dims, (DimSpec(name='x', required=True, length=2), )) + + def test_2d(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=(2, None)) + self.assertEqual(spec.shape, (2, None)) + self.assertEqual(spec.dims, (DimSpec(name='dim0', required=True, length=2), + DimSpec(name='dim1', required=True, length=None))) + + def test_2d_none(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=(None, None)) + self.assertEqual(spec.shape, (None, None)) + self.assertEqual(spec.dims, (DimSpec(name='dim0', required=True, length=None), + DimSpec(name='dim1', required=True, length=None))) + + def test_2d_dims(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=(2, None), dims=('x', 'y')) + self.assertEqual(spec.dims, (DimSpec(name='x', required=True, length=2), + DimSpec(name='y', required=True, length=None))) + + def test_1d_2d(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=((2, ), (2, None))) + self.assertEqual(spec.shape, ((2, ), (2, None))) + self.assertEqual(spec.dims, (DimSpec(name='dim0', required=True, length=2), + DimSpec(name='dim1', required=False, length=None))) + + def test_1d_2d_none(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=((None, ), (None, None))) + self.assertEqual(spec.shape, ((None, ), (None, None))) + self.assertEqual(spec.dims, (DimSpec(name='dim0', required=True, length=None), + DimSpec(name='dim1', required=False, length=None))) + + def test_1d_2d_dims(self): + spec = DatasetSpec('my first dataset', 'int', name='dataset1', shape=((2, ), (2, None)), + dims=(('x', ), ('x', 'y'))) + self.assertEqual(spec.dims, (DimSpec(name='x', required=True, length=2), + DimSpec(name='y', required=False, length=None))) diff --git a/tests/unit/spec_tests/test_load_namespace.py b/tests/unit/spec_tests/test_load_namespace.py index 997907a0c..908b04a24 100644 --- a/tests/unit/spec_tests/test_load_namespace.py +++ b/tests/unit/spec_tests/test_load_namespace.py @@ -2,7 +2,7 @@ import json import os -from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecNamespace, NamespaceCatalog +from hdmf.spec import AttributeSpec, DatasetSpec, GroupSpec, SpecNamespace, NamespaceCatalog, DimSpec from hdmf.testing import TestCase @@ -112,3 +112,169 @@ def test_inherited_attributes_not_resolved(self): src_dsets = {s.name for s in self.ext_datasets} ext_dsets = {s.name for s in es_spec.datasets} self.assertSetEqual(src_dsets, ext_dsets) + + +class TestLoadDims(TestCase): + + NS_NAME = 'test_ns' + + def setUp(self): + dset2_dims = (DimSpec(name='x', required=True), DimSpec(name='y', required=False)) + dset3_dims = (DimSpec(name='x', required=True), DimSpec(name='y', required=True)) + dset4_dims = ('x', 'y') + + self.datasets = [ + DatasetSpec('my first dataset', + 'int', + name='dataset1', + linkable=True), + DatasetSpec('my second dataset', + 'int', + name='dataset2', + dims=dset2_dims, + linkable=True), + DatasetSpec('my third dataset', + 'int', + name='dataset3', + dims=dset3_dims, + linkable=True), + DatasetSpec('my fourth dataset', + 'int', + name='dataset4', + dims=dset4_dims, + linkable=True), + ] + self.spec = GroupSpec('A test group', + name='root_constructor_datatype', + datasets=self.datasets, + linkable=False, + data_type_def='EphysData') + dset1_dims_ext = (DimSpec(name='x', required=True), ) # specify dims + # require y, change names + dset2_dims_ext = (DimSpec(name='x2', required=True), DimSpec(name='y2', required=True)) + dset4_dims_ext = (DimSpec(name='x2', required=True), DimSpec(name='y2', required=True)) # change names + self.ext_datasets = [ + DatasetSpec('my first dataset extension', + 'int', + name='dataset1', + dims=dset1_dims_ext, + linkable=True), + DatasetSpec('my second dataset extension', + 'int', + name='dataset2', + dims=dset2_dims_ext, + linkable=True), + DatasetSpec('my fourth dataset extension', + 'int', + name='dataset4', + dims=dset4_dims_ext, + linkable=True) + ] + self.ext_spec = GroupSpec('A test group extension', + name='root_constructor_datatype', + datasets=self.ext_datasets, + linkable=False, + data_type_inc='EphysData', + data_type_def='SpikeData') + dset2_dims_ext2 = (DimSpec(name='x', required=False), DimSpec(name='y', required=False)) # make x optional + self.ext2_datasets = [ + DatasetSpec('my second dataset extension', + 'int', + name='dataset2', + dims=dset2_dims_ext2, + linkable=True) + ] + self.ext2_spec = GroupSpec('A test group extension', + name='root_constructor_datatype', + datasets=self.ext2_datasets, + linkable=False, + data_type_inc='EphysData', + data_type_def='InvalidData') + to_dump = {'groups': [self.spec, self.ext_spec, self.ext2_spec]} + self.specs_path = 'test_load_namespace.specs.yaml' + self.namespace_path = 'test_load_namespace.namespace.yaml' + with open(self.specs_path, 'w') as tmp: + yaml.safe_dump(json.loads(json.dumps(to_dump)), tmp, default_flow_style=False) + ns_dict = { + 'doc': 'a test namespace', + 'name': self.NS_NAME, + 'schema': [ + {'source': self.specs_path} + ] + } + self.namespace = SpecNamespace.build_namespace(**ns_dict) + to_dump = {'namespaces': [self.namespace]} + with open(self.namespace_path, 'w') as tmp: + yaml.safe_dump(json.loads(json.dumps(to_dump)), tmp, default_flow_style=False) + self.ns_catalog = NamespaceCatalog() + + def tearDown(self): + if os.path.exists(self.namespace_path): + os.remove(self.namespace_path) + if os.path.exists(self.specs_path): + os.remove(self.specs_path) + + def test_inherited_dims(self): + """ + Test a subclass inheriting the superclass' datasets with dims. + """ + self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) + dset3_spec = self.ns_catalog.get_spec(self.NS_NAME, 'EphysData').datasets[2] + dset3_ext_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData').datasets[2] + expected = (DimSpec(name='x', required=True), DimSpec(name='y', required=True)) + self.assertEqual(dset3_ext_spec.dims, dset3_spec.dims) + self.assertEqual(dset3_ext_spec.dims, expected) + + def test_override_dims_simple(self): + """ + Test a subclass specifying a dataset's dims to override the superclass definition of the dataset without dims. + """ + self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) + dset1_spec = self.ns_catalog.get_spec(self.NS_NAME, 'EphysData').datasets[0] + dset1_ext_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData').datasets[0] + + self.assertIsNone(dset1_spec.dims) + + expected = (DimSpec(name='x', required=True), ) + self.assertEqual(dset1_ext_spec.dims, expected) + + def test_override_dims_stricter(self): + """ + Test a subclass specifying a dataset's dims to override the superclass definition of the dataset with dims. + + The subclass dataset's dims are more restrictive than the superclass dataset's dims and have different names. + """ + self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) + dset1_spec = self.ns_catalog.get_spec(self.NS_NAME, 'EphysData').datasets[1] + dset1_ext_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData').datasets[1] + + expected = (DimSpec(name='x', required=True), DimSpec(name='y', required=False)) + self.assertEqual(dset1_spec.dims, expected) + + expected_ext = (DimSpec(name='x2', required=True), DimSpec(name='y2', required=True)) + self.assertEqual(dset1_ext_spec.dims, expected_ext) + # TODO should succeed + + def test_override_dims_looser(self): + """ + Test a subclass cannot use override the superclass definition of a dataset with less restrictive dims. + """ + self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) + # TODO load_namespaces should fail? + + def test_override_dims_new_over_legacy(self): + """ + Test a subclass specifying a dataset's dims to override the superclass legacy def. of the dataset with dims. + + The subclass dataset's dims also has different names. + """ + self.ns_catalog.load_namespaces(self.namespace_path, resolve=True) + dset4_spec = self.ns_catalog.get_spec(self.NS_NAME, 'EphysData').datasets[3] + dset4_ext_spec = self.ns_catalog.get_spec(self.NS_NAME, 'SpikeData').datasets[3] + + expected = (DimSpec(name='x', required=True), DimSpec(name='y', required=True)) + self.assertEqual(dset4_spec.dims, expected) + + expected_ext = (DimSpec(name='x2', required=True), DimSpec(name='y2', required=True)) + self.assertEqual(dset4_ext_spec.dims, expected_ext) + # TODO should succeed diff --git a/tests/unit/test_container.py b/tests/unit/test_container.py index 8fb3b9cc3..4e35a76a5 100644 --- a/tests/unit/test_container.py +++ b/tests/unit/test_container.py @@ -1,6 +1,8 @@ import numpy as np +import xarray as xr +import unittest -from hdmf.container import AbstractContainer, Container, Data +from hdmf.container import AbstractContainer, Container, Data, Coordinates from hdmf.testing import TestCase @@ -46,7 +48,7 @@ def test_set_parent_overwrite(self): another_obj = Container('obj3') with self.assertRaisesWith(ValueError, - 'Cannot reassign parent to Container: %s. Parent is already: %s.' + 'Cannot reassign parent to: %s. Parent is already: %s.' % (repr(child_obj), repr(child_obj.parent))): child_obj.parent = another_obj self.assertIs(child_obj.parent, parent_obj) @@ -112,13 +114,174 @@ def test_reassign_container_source(self): def test_repr(self): parent_obj = Container('obj1') - self.assertRegex(str(parent_obj), r"obj1 hdmf.container.Container at 0x\d+") + self.assertRegex(str(parent_obj), r"obj1 hdmf.container.Container at 0x%d" % id(parent_obj)) def test_type_hierarchy(self): self.assertEqual(Container.type_hierarchy(), (Container, AbstractContainer, object)) self.assertEqual(Subcontainer.type_hierarchy(), (Subcontainer, Container, AbstractContainer, object)) +class Bar(Container): + + __fields__ = ('data1', 'data2', 'data3') + + def __init__(self, name, data1, data2, data3=None): + super().__init__(name=name) + self.data1 = data1 + self.data2 = data2 + self.data3 = data3 + + +class TestContainerDims(TestCase): + + def test_get_no_dims(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + self.assertDictEqual(obj1.dims, {}) + + def test_set_dims_1d(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + obj1.set_dims(array_name='data1', dims=('numbers', )) + self.assertDictEqual(obj1.dims, {'data1': ('numbers', )}) + + def test_set_dims_3d(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + obj1.set_dims(array_name='data2', dims=('x', 'y', 'z')) + self.assertDictEqual(obj1.dims, {'data2': ('x', 'y', 'z')}) + + def test_set_dims_dataio(self): + # TODO + raise unittest.SkipTest('TODO') + + def test_set_dims_dci(self): + # TODO + raise unittest.SkipTest('TODO') + + def test_set_dims_h5dataset(self): + # TODO + raise unittest.SkipTest('TODO') + + def test_set_dims_empty(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + msg = "Number of dims must equal number of axes for field 'data1' in Bar 'obj1' (0 != 1)." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data1', dims=tuple()) + + def test_set_dims_too_many(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + msg = "Number of dims must equal number of axes for field 'data1' in Bar 'obj1' (2 != 1)." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data1', dims=('numbers', 'dup')) + + def test_set_dims_unknown_name(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + msg = "Field named 'data4' not found in Bar 'obj1'." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data4', dims=('numbers', )) + + def test_set_dims_array_none(self): + """Test that set_dims raises an error if given an array name that is defined on the class but not set.""" + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + msg = "Field named 'data3' not found in Bar 'obj1'." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data3', dims=('numbers', )) + + def test_set_dim_axis_non_array(self): + obj1 = Bar('obj1', data1='hello', data2=np.arange(20).reshape((2, 5, 2))) + msg = "Cannot determine shape of field 'data1' in Bar 'obj1'." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data1', dims=('numbers', )) + + def test_set_dims_dup_name(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + obj1.set_dims(array_name='data1', dims=('numbers', )) + msg = "Cannot reset dims for field 'data1' in Bar 'obj1'. Dims is already ('numbers',)." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data1', dims=('numbers', )) + + def test_set_dims_dup_dim_name(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=np.arange(20).reshape((2, 5, 2))) + msg = "Cannot set dims for field 'data1' in Bar 'obj1'. Dim names must be unique." + with self.assertRaisesWith(ValueError, msg): + obj1.set_dims(array_name='data1', dims=('numbers', 'numbers')) + + +class TestContainerCoords(TestCase): + + def test_get_coord_none(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=['a', 'b', 'c']) + self.assertDictEqual(obj1.coords, {}) + + def test_set_coord(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=['a', 'b', 'c']) + obj1.set_dims(array_name='data1', dims=('x', )) + obj1.set_coord(array_name='data1', name='letters', dims_index=(0, ), coord_array_name='data2', + coord_array_dims_index=(0, ), coord_type='aligned') + + self.assertEqual(len(obj1.coords), 1) + received_coords = obj1.coords['data1'] + self.assertIsInstance(received_coords, Coordinates) + self.assertIs(received_coords.parent, obj1) + self.assertEqual(received_coords['letters'], Coordinates.Coord(name='letters', dims=('x', ), + coord_array=obj1.data2, + coord_array_dims_index=(0, ), + coord_type='aligned')) + + def test_set_coord_dataio(self): + # TODO + raise unittest.SkipTest('TODO') + + def test_set_coord_dci(self): + # TODO + raise unittest.SkipTest('TODO') + + def test_set_coord_h5dataset(self): + # TODO + raise unittest.SkipTest('TODO') + + # TODO catch all the ValueErrors from set_coord + + def test_to_xarray_dataarray(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=['a', 'b', 'c']) + obj1.set_dims(array_name='data1', dims=('x', )) + obj1.set_coord(array_name='data1', name='letters', dims_index=(0, ), coord_array_name='data2', + coord_array_dims_index=(0, ), coord_type='aligned') + + arr = obj1.to_xarray_dataarray(array_name='data1') + expected = xr.DataArray([1, 2, 3], dims=('x', ), coords={'letters': (('x', ), ['a', 'b', 'c'])}) + xr.testing.assert_equal(arr, expected) + + def test_to_xarray_dataarray_unknown_name(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=['a', 'b', 'c']) + obj1.set_dims(array_name='data1', dims=('x', )) + obj1.set_coord(array_name='data1', name='letters', dims_index=(0, ), coord_array_name='data2', + coord_array_dims_index=(0, ), coord_type='aligned') + with self.assertRaisesWith(ValueError, "Field name 'data3' not found in Bar 'obj1'."): + obj1.to_xarray_dataarray(array_name='data3') + + def test_to_xarray_dataarray_coord_not_all_axes(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=[['a', 'b'], ['c', 'd'], ['e', 'f']]) + obj1.set_dims(array_name='data1', dims=('x', )) + obj1.set_coord(array_name='data1', name='letters', dims_index=(0, ), coord_array_name='data2', + coord_array_dims_index=(0, ), coord_type='aligned') + msg = ("Cannot convert the array 'data1' to an xarray.DataArray. All coordinate arrays must map all of their " + "dimensions to a set of dimensions on 'data1'.") + with self.assertRaisesWith(ValueError, msg): + obj1.to_xarray_dataarray(array_name='data1') + + def test_to_xarray_dataarray_no_coord(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=['a', 'b', 'c']) + obj1.set_dims(array_name='data1', dims=('x', )) + arr = obj1.to_xarray_dataarray(array_name='data1') + expected = xr.DataArray([1, 2, 3], dims=('x', )) + xr.testing.assert_equal(arr, expected) + + def test_to_xarray_dataarray_no_dim(self): + obj1 = Bar('obj1', data1=[1, 2, 3], data2=['a', 'b', 'c']) + arr = obj1.to_xarray_dataarray(array_name='data1') + expected = xr.DataArray([1, 2, 3]) + xr.testing.assert_equal(arr, expected) + + class TestData(TestCase): def test_bool_true(self): @@ -146,3 +309,81 @@ def test_shape_list(self): """ data_obj = Data('my_data', [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) self.assertTupleEqual(data_obj.shape, (2, 5)) + + +class TestCoordinates(TestCase): + + def test_constructor(self): + """Test that the Coordinates constructor sets values correctly""" + obj = Container('obj1') + coords = Coordinates(obj) + self.assertIs(coords.parent, obj) + self.assertEqual(list(coords.values()), []) + + def test_add_getitem(self): + """Test that adding a coord to Coordinates and accessing it works""" + obj = Container('obj1') + coords = Coordinates(obj) + coords.add(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + + expected = Coordinates.Coord(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], + coord_array_dims_index=(0, ), coord_type='aligned') + self.assertEqual(coords['my_coord'], expected) + + def test_add_dup(self): + """Test that adding a coord whose name is already in Coordinates raises an error""" + obj = Container('obj1') + coords = Coordinates(obj) + coords.add(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + + msg = "Coordinate 'my_coord' already exists. Cannot overwrite values in Coordinates." + with self.assertRaisesWith(ValueError, msg): + coords.add(name='my_coord', dims=('y', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + + def test_eq(self): + """Test equality of Coordinates""" + obj = Container('obj1') + coords = Coordinates(obj) + coords.add(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + + coords2 = Coordinates(obj) + coords2.add(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + self.assertEqual(coords, coords2) + + def test_not_eq(self): + """Test correct failure of equality of Coordinates""" + obj = Container('obj1') + coords = Coordinates(obj) + coords.add(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + + coords2 = Coordinates(obj) + coords2.add(name='my_coord', dims=('y', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + self.assertNotEqual(coords, coords2) + + def test_dict(self): + """Test a variety of dictionary methods on Coordinates""" + obj = Container('obj1') + coords = Coordinates(obj) + coords.add(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], coord_array_dims_index=(0, ), + coord_type='aligned') + + expected_coord = Coordinates.Coord(name='my_coord', dims=('x', ), coord_array=[0, 1, 2, 3, 4], + coord_array_dims_index=(0, ), coord_type='aligned') + + for k, v in coords.items(): + self.assertEqual(k, 'my_coord') + self.assertEqual(v, expected_coord) + + self.assertEqual(len(coords), 1) + self.assertEqual(str(coords), "{'my_coord': Coord(name='my_coord', dims=('x',), coord_array=[0, 1, 2, 3, 4], " + "coord_array_dims_index=(0,), coord_type='aligned')}") + self.assertEqual(list(coords.keys()), ['my_coord']) + self.assertEqual(list(coords.values()), [expected_coord]) + self.assertEqual(list(iter(coords)), ['my_coord']) diff --git a/tests/unit/test_io_hdf5.py b/tests/unit/test_io_hdf5.py index 3e226fa48..98c8299d4 100644 --- a/tests/unit/test_io_hdf5.py +++ b/tests/unit/test_io_hdf5.py @@ -226,7 +226,7 @@ def test_overwrite_written(self): io = HDF5IO(self.path, manager=self.manager, mode='a') io.write_builder(self.builder) builder = io.read_builder() - with self.assertRaisesWith(ValueError, "cannot change written to not written"): + with self.assertRaisesWith(AttributeError, "Cannot change written to not written"): builder.written = False io.close() diff --git a/tests/unit/utils_test/test_docval.py b/tests/unit/utils_test/test_docval.py index 74fdda079..86884b4d6 100644 --- a/tests/unit/utils_test/test_docval.py +++ b/tests/unit/utils_test/test_docval.py @@ -107,9 +107,9 @@ def setUp(self): self.test_obj_sub = MyTestSubclass() def test_bad_type(self): - exp_msg = (r"error parsing 'arg1' argument' : argtype must be a type, " - r"a str, a list, a tuple, or None - got ") - with self.assertRaisesRegex(Exception, exp_msg): + exp_msg = ("error parsing 'arg1' argument' : argtype must be a type, " + "a str, a list, a tuple, or None - got ") + with self.assertRaisesWith(Exception, exp_msg): @docval({'name': 'arg1', 'type': {'a': 1}, 'doc': 'this is a bad type'}) def method(self, **kwargs): pass diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index b0ca132fe..4d394af38 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -39,7 +39,7 @@ def test_valid(self): def test_invalid_missing_req_type(self): builder = GroupBuilder('my_bar') - err_msg = r"builder must have data type defined with attribute '[A-Za-z_]+'" + err_msg = r"builder \(name: 'my_bar'\) must have data type defined with attribute '[A-Za-z_]+'" with self.assertRaisesRegex(ValueError, err_msg): self.vmap.validate(builder)