From d069cadd85fb68a57885315876b46168cb1dac64 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Mon, 23 Mar 2020 23:08:12 -0700 Subject: [PATCH] Fix bug on gettype on empty array, add validator tests --- src/hdmf/validate/errors.py | 38 ++- src/hdmf/validate/validator.py | 129 +++++---- tests/unit/validator_tests/test_validate.py | 276 +++++++++++++++++++- 3 files changed, 392 insertions(+), 51 deletions(-) diff --git a/src/hdmf/validate/errors.py b/src/hdmf/validate/errors.py index 6b3b50b3d..a95049ac2 100644 --- a/src/hdmf/validate/errors.py +++ b/src/hdmf/validate/errors.py @@ -7,11 +7,13 @@ "Error", "DtypeError", "MissingError", + "ExpectedScalarError", "ExpectedArrayError", "ShapeError", "MissingDataType", "IllegalLinkError", - "IncorrectDataType" + "IncorrectDataType", + "EmptyDataNoTypeWarning" ] @@ -96,6 +98,19 @@ def data_type(self): return self.__data_type +class ExpectedScalarError(Error): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, + {'name': 'received', 'type': (tuple, list), 'doc': 'the received data'}, + {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) + def __init__(self, **kwargs): + name = getargs('name', kwargs) + received = getargs('received', kwargs) + reason = "incorrect shape - expected a scalar, got array with shape '%s'" % str(received) + loc = getargs('location', kwargs) + super().__init__(name, reason, location=loc) + + class ExpectedArrayError(Error): @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, @@ -157,3 +172,24 @@ def __init__(self, **kwargs): reason = "incorrect data_type - expected '%s', got '%s'" % (expected, received) loc = getargs('location', kwargs) super().__init__(name, reason, location=loc) + + +class ValidatorWarning(UserWarning): + + pass + + +class EmptyDataNoTypeWarning(Error, ValidatorWarning): + """ + A warning for indicating that a value is empty and has no data type (e.g., an empty list). + """ + + @docval({'name': 'name', 'type': str, 'doc': 'the name of the component that is erroneous'}, + {'name': 'data_type', 'type': type, 'doc': 'the type of the data'}, + {'name': 'location', 'type': str, 'doc': 'the location of the error', 'default': None}) + def __init__(self, **kwargs): + name = getargs('name', kwargs) + data_type = getargs('data_type', kwargs) + reason = "could not determine data type for empty data %s" % data_type + loc = getargs('location', kwargs) + super().__init__(name, reason, location=loc) diff --git a/src/hdmf/validate/validator.py b/src/hdmf/validate/validator.py index efcbb2692..deaa01c73 100644 --- a/src/hdmf/validate/validator.py +++ b/src/hdmf/validate/validator.py @@ -3,6 +3,7 @@ from copy import copy import re from itertools import chain +from warnings import warn from ..utils import docval, getargs, call_docval_func, pystr, get_data_shape @@ -14,7 +15,7 @@ from ..build.builders import BaseBuilder from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType -from .errors import ExpectedArrayError +from .errors import ExpectedArrayError, ExpectedScalarError, EmptyDataNoTypeWarning, ValidatorWarning __synonyms = DtypeHelper.primary_dtype_synonyms @@ -104,42 +105,41 @@ def get_type(data): return 'region' elif isinstance(data, ReferenceBuilder): return 'object' - elif isinstance(data, np.ndarray): - return get_type(data[0]) - if not hasattr(data, '__len__'): + elif not hasattr(data, '__len__'): return type(data).__name__ + # for conditions below, len(data) works + elif hasattr(data, 'dtype'): + if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None: + return get_type(data[0]) + return data.dtype + elif len(data) == 0: + # raise ValueError('cannot determine type for empty data without dtype attribute') + return None else: - if hasattr(data, 'dtype'): - if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None: - return get_type(data[0]) - return data.dtype - if len(data) == 0: - raise ValueError('cannot determine type for empty data') return get_type(data[0]) def check_shape(expected, received): + if expected is None: # scalar + return not isinstance(received, (list, tuple, np.ndarray)) ret = False - if expected is None: - ret = True - else: - if isinstance(expected, (list, tuple)): - if isinstance(expected[0], (list, tuple)): - for sub in expected: - if check_shape(sub, received): - ret = True - break - else: - if len(expected) > 0 and received is None: - ret = False - elif len(expected) == len(received): + if isinstance(expected, (list, tuple)): + if isinstance(expected[0], (list, tuple)): + for sub in expected: + if check_shape(sub, received): ret = True - for e, r in zip(expected, received): - if not check_shape(e, r): - ret = False - break - elif isinstance(expected, int): - ret = expected == received + break + else: + if len(expected) > 0 and received is None: + ret = False + elif len(expected) == len(received): + ret = True + for e, r in zip(expected, received): + if not check_shape(e, r): + ret = False + break + elif isinstance(expected, int): + ret = expected == received return ret @@ -282,6 +282,30 @@ def get_builder_loc(cls, builder): tmp = tmp.parent return "/".join(reversed(stack)) + @classmethod + def check_data_type(cls, value, spec, spec_loc, builder_location=None): + dtype = get_type(value) + if dtype is None: + # dtype = 'unknown data type of empty data %s' % type(value) + # return DtypeError(spec_loc, spec.dtype, dtype, location=builder_location) + return EmptyDataNoTypeWarning(spec_loc, type(value), location=builder_location) + elif not check_type(spec.dtype, dtype): + return DtypeError(spec_loc, spec.dtype, dtype, location=builder_location) + return None + + @classmethod + def check_data_shape(cls, value, spec, spec_loc, builder_location=None): + shape = get_data_shape(value) + if shape == (): + shape = None + if shape is None and spec.shape is not None: + return ExpectedArrayError(spec_loc, spec.shape, str(value), location=builder_location) + elif shape is not None and spec.shape is None: + return ExpectedScalarError(spec_loc, shape, location=builder_location) + elif not check_shape(spec.shape, shape): + return ShapeError(spec_loc, spec.shape, shape, location=builder_location) + return None + class AttributeValidator(Validator): '''A class for validating values against AttributeSpecs''' @@ -313,12 +337,13 @@ def validate(self, **kwargs): if spec.dtype.target_type not in hierarchy: ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type)) else: - dtype = get_type(value) - if not check_type(spec.dtype, dtype): - ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype)) - shape = get_data_shape(value) - if not check_shape(spec.shape, shape): - ret.append(ShapeError(self.get_spec_loc(spec), spec.shape, shape)) + dtype_err = self.check_data_type(value, spec, self.get_spec_loc(spec)) + if dtype_err: + ret.append(dtype_err) + + shape_err = self.check_data_shape(value, spec, self.get_spec_loc(spec)) + if shape_err: + ret.append(shape_err) return ret @@ -349,7 +374,11 @@ def validate(self, **kwargs): errors = validator.validate(attr_val) for err in errors: err.location = self.get_builder_loc(builder) + ".%s" % validator.spec.name - ret.extend(errors) + if isinstance(err, ValidatorWarning): + warn(err, type(err)) + else: + ret.append(err) # return only errors, not warnings + return ret @@ -367,19 +396,21 @@ def validate(self, **kwargs): builder = getargs('builder', kwargs) ret = super().validate(builder) data = builder.data - if self.spec.dtype is not None: - dtype = get_type(data) - if not check_type(self.spec.dtype, dtype): - ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype, - location=self.get_builder_loc(builder))) - shape = get_data_shape(data) - if not check_shape(self.spec.shape, shape): - if shape is None: - ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data), - location=self.get_builder_loc(builder))) - else: - ret.append(ShapeError(self.get_spec_loc(self.spec), self.spec.shape, shape, - location=self.get_builder_loc(builder))) + spec = self.spec + + if spec.dtype is not None: + dtype_err = self.check_data_type(data, spec, self.get_spec_loc(spec), + builder_location=self.get_builder_loc(builder)) + if dtype_err: + if isinstance(dtype_err, ValidatorWarning): + warn(dtype_err, type(dtype_err)) + else: + ret.append(dtype_err) # return only errors, not warnings + + shape_err = self.check_data_shape(data, spec, self.get_spec_loc(spec), + builder_location=self.get_builder_loc(builder)) + if shape_err: + ret.append(shape_err) return ret diff --git a/tests/unit/validator_tests/test_validate.py b/tests/unit/validator_tests/test_validate.py index f4ca3ccb8..f6ddff385 100644 --- a/tests/unit/validator_tests/test_validate.py +++ b/tests/unit/validator_tests/test_validate.py @@ -6,7 +6,7 @@ from hdmf.spec import GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace from hdmf.build import GroupBuilder, DatasetBuilder from hdmf.validate import ValidatorMap -from hdmf.validate.errors import * # noqa: F403 +from hdmf.validate.errors import EmptyDataNoTypeWarning, DtypeError, MissingError, MissingDataType, ExpectedArrayError from hdmf.testing import TestCase CORE_NAMESPACE = 'test_core' @@ -292,3 +292,277 @@ def test_bool_for_numeric(self): expected_errors = {"Bar/attr1 (my_bar.attr1): incorrect type - expected 'numeric', got 'bool'", "Bar/data (my_bar/data): incorrect type - expected 'numeric', got 'bool'"} self.assertEqual(result_strings, expected_errors) + + +class ValueTestMixin(metaclass=ABCMeta): + + @abstractmethod + def set_up_spec(self, spec_kwargs): + pass + + @abstractmethod + def set_up_group_builder(self, value): + pass + + @property + @abstractmethod + def err_prefix(cls): + pass + + def test_str_for_text(self): + """Test that validator allows a string for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = 'a' + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_empty_str_for_text(self): + """Test that validator allows an empty string for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = '' + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_str_list_for_text(self): + """Test that validator does not allow a list of strings for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = ['a'] + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected a scalar, got array with shape '(1,)'"} + self.assertEqual(result_strings, expected_errors) + + def test_str_array_for_text(self): + """Test that validator does not allow an array of strings for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = np.array(['a']) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected a scalar, got array with shape '(1,)'"} + self.assertEqual(result_strings, expected_errors) + + def test_empty_float_array_for_text(self): + """Test that validator does not allow an array of strings for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = np.array([]) # default dtype float64 + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected a scalar, got array with shape '(0,)'", + self.err_prefix + "incorrect type - expected 'text', got 'float64'"} + self.assertEqual(result_strings, expected_errors) + + def test_2d_str_list_for_text(self): + """Test that validator does not allow a 2D list of strings for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = [['a']] + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected a scalar, got array with shape '(1, 1)'"} + self.assertEqual(result_strings, expected_errors) + + def test_2d_str_array_for_text(self): + """Test that validator does not allow a 2D array of strings for a text attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text'}) + value = np.array([['a']]) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected a scalar, got array with shape '(1, 1)'"} + self.assertEqual(result_strings, expected_errors) + + def test_str_list_for_text_array(self): + """Test that validator allows a list of strings for a text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = ['a'] + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_str_array_for_text_array(self): + """Test that validator allows an array of strings for a text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = np.array(['a']) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_empty_list_for_text_array(self): + """Test that validator raises a warning for an empty list for a text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = [] + bar_builder = self.set_up_group_builder(value) + msg = self.err_prefix + "could not determine data type for empty data " + with self.assertWarnsWith(EmptyDataNoTypeWarning, msg): + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_empty_str_array_for_text_array(self): + """Test that validator allows an empty array of type string for a text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = np.array([], dtype=str) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_empty_float_array_for_text_array(self): + """Test that validator does not allow an empty array of type float for a text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = np.array([]) # default dtpye float64 + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect type - expected 'text', got 'float64'"} + self.assertEqual(result_strings, expected_errors) + + def test_2d_str_list_for_1d_text_array(self): + """Test that validator does not allow a 2D lists of strings for a 1D text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = [['a']] + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[None]', got '(1, 1)'"} + self.assertEqual(result_strings, expected_errors) + + def test_2d_str_array_for_1d_text_array(self): + """Test that validator does not allow a 2D array of strings for a 1D text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = np.array([['a']]) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[None]', got '(1, 1)'"} + self.assertEqual(result_strings, expected_errors) + + def test_empty_2d_list_for_1d_text_array(self): + """Test that validator raises a warning and error for an empty 2D list for a 1D text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = [[]] + bar_builder = self.set_up_group_builder(value) + msg = self.err_prefix + "could not determine data type for empty data " + with self.assertWarnsWith(EmptyDataNoTypeWarning, msg): + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[None]', got '(1, 0)'"} + self.assertEqual(result_strings, expected_errors) + + def test_empty_2d_str_array_for_1d_text_array(self): + """Test that validator does not allow an empty 2D array of type string for a 1D text array attr/dset.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [None]}) + value = np.array([[]], dtype=str) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[None]', got '(1, 0)'"} + self.assertEqual(result_strings, expected_errors) + + def test_str_array_correct_length(self): + """Test that validator allows a 1D text array attr/dset with the correct length.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [2]}) + value = np.array(['a', ''], dtype=str) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_str_list_correct_length(self): + """Test that validator allows a 1D text list attr/dset with the correct length.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [2]}) + value = ['a', ''] + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + self.assertEqual(len(results), 0) + + def test_str_array_incorrect_length(self): + """Test that validator does not allow a 1D text array attr/dset with the incorrect length.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [2]}) + value = np.array([''], dtype=str) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[2]', got '(1,)'"} + self.assertEqual(result_strings, expected_errors) + + def test_str_list_incorrect_length(self): + """Test that validator does not allow a 1D text list attr/dset with the incorrect length.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [2]}) + value = [''] + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[2]', got '(1,)'"} + self.assertEqual(result_strings, expected_errors) + + def test_empty_str_array_incorrect_length(self): + """Test that validator does not allow an empty text array for a 1D text array attr/dset with a length.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [2]}) + value = np.array([], dtype=str) + bar_builder = self.set_up_group_builder(value) + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[2]', got '(0,)'"} + self.assertEqual(result_strings, expected_errors) + + def test_empty_list_incorrect_length(self): + """Test that validator raises a warning and error an empty list for a 1D text array attr/dset with a length.""" + self.set_up_spec({'doc': 'doc', 'dtype': 'text', 'shape': [2]}) + value = [] + bar_builder = self.set_up_group_builder(value) + msg = self.err_prefix + "could not determine data type for empty data " + with self.assertWarnsWith(EmptyDataNoTypeWarning, msg): + results = self.vmap.validate(bar_builder) + result_strings = set([str(s) for s in results]) + expected_errors = {self.err_prefix + "incorrect shape - expected '[2]', got '(0,)'"} + self.assertEqual(result_strings, expected_errors) + + +class TestAttrValidation(TestCase, ValueTestMixin): + + def set_up_spec(self, spec_kwargs): + spec_catalog = SpecCatalog() + spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + attributes=[AttributeSpec(name='val1', **spec_kwargs)]) + spec_catalog.register_spec(spec, 'test.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace) + + def set_up_group_builder(self, value): + """Return a GroupBuilder with an attribute with name val1 and value set to the given value""" + return GroupBuilder('my_bar', attributes={'data_type': 'Bar', 'val1': value}) + + @property + def err_prefix(cls): + """Prefix for an error message such as: "Bar/val1 (my_bar.val1): incorrect shape - expected '[2]', got '(1, 0)'" + """ + return 'Bar/val1 (my_bar.val1): ' + + +class TestDsetValidation(TestCase, ValueTestMixin): + + def set_up_spec(self, spec_kwargs): + spec_catalog = SpecCatalog() + spec = GroupSpec('A test group specification with a data type', + data_type_def='Bar', + datasets=[DatasetSpec(name='val1', **spec_kwargs)]) + spec_catalog.register_spec(spec, 'test.yaml') + self.namespace = SpecNamespace( + 'a test namespace', CORE_NAMESPACE, [{'source': 'test.yaml'}], version='0.1.0', catalog=spec_catalog) + self.vmap = ValidatorMap(self.namespace) + + def set_up_group_builder(self, value): + """Return a GroupBuilder with a DatasetBuilder with name val1 and data set to the given value""" + return GroupBuilder('my_bar', attributes={'data_type': 'Bar'}, + datasets=[DatasetBuilder('val1', data=value)]) + + @property + def err_prefix(cls): + """Prefix for an error message such as: "Bar/val1 (my_bar/val1): incorrect shape - expected '[2]', got '(1, 0)'" + """ + return 'Bar/val1 (my_bar/val1): '