improve test coverage for hdfDB

matthiasprobst · Mar 26, 2024 · e5f352c · e5f352c
1 parent 9d264e6
commit e5f352c
Show file tree

Hide file tree

Showing 2 changed files with 112 additions and 23 deletions.
diff --git a/h5rdmtoolbox/database/hdfdb/objdb.py b/h5rdmtoolbox/database/hdfdb/objdb.py
@@ -1,6 +1,6 @@
 import h5py
 import numpy as np
-from typing import Union, Dict, List, Callable, Generator
+from typing import Union, Dict, List, Callable, Generator, Optional
 
 from . import query, utils
 from .nonsearchable import NonInsertableDatabaseInterface
@@ -35,10 +35,10 @@ def __call__(self, name, h5obj):
                 self.found_objects.append(h5obj)
         except AttributeError as e:
             return
-            if not self.ignore_attribute_error:
-                raise AttributeError(f'HDF object {h5obj} has no attribute "{self._attribute}". You may add '
-                                     'an objfilter, because dataset and groups dont share all attributes. '
-                                     'One example is "dtype", which is only available with datasets') from e
+            # if not self.ignore_attribute_error:
+            #     raise AttributeError(f'HDF object {h5obj} has no attribute "{self._attribute}". You may add '
+            #                          'an objfilter, because dataset and groups dont share all attributes. '
+            #                          'One example is "dtype", which is only available with datasets') from e
 
 
 class RecValueFind:
@@ -77,17 +77,21 @@ def __call__(self, name, obj):
         if '.' in self._attribute:
             # dict comparison:
             attr_name, dict_path = self._attribute.split('.', 1)
-            if attr_name in obj.attrs:
-                _attr_dict = dict(obj.attrs[attr_name])
-                for _item in dict_path.split('.'):
-                    try:
-                        _attr_value = _attr_dict[_item]
-                    except KeyError:
-                        _attr_value = None
-                        break
-                if _attr_value:
-                    if self._func(_attr_value, self._value):
-                        self.found_objects.append(obj)
+            attr_value = obj.attrs.get(attr_name, None)
+            if attr_value is not None:
+                if isinstance(attr_value, str) and attr_value.startswith('{') and attr_value.endswith('}'):
+                    import json
+                    _attr_dict = json.loads(attr_value)
+
+                    for _item in dict_path.split('.'):
+                        try:
+                            _attr_value = _attr_dict[_item]
+                        except KeyError:
+                            _attr_value = None
+                            break
+                    if _attr_value:
+                        if self._func(_attr_value, self._value):
+                            self.found_objects.append(obj)
         if self._func(obj.attrs.get(self._attribute, None), self._value):
             self.found_objects.append(obj)
 
@@ -329,7 +333,8 @@ def find(h5obj: Union[h5py.Group, h5py.Dataset],
     return common_results
 
 
-def distinct(h5obj: Union[h5py.Group, h5py.Dataset], key: str,
+def distinct(h5obj: Union[h5py.Group, h5py.Dataset],
+             key: str,
              objfilter: Union[h5py.Group, h5py.Dataset, None]) -> List[str]:
     """Return a distinct list of all found targets. A target generally is
     understood to be an attribute name. However, by adding a $ in front, class
@@ -357,18 +362,18 @@ def distinct(h5obj: Union[h5py.Group, h5py.Dataset], key: str,
         return list(set(rpc.found_objects))
 
     rac = RecAttrCollect(key, objfilter)
-    for k, v in h5obj.attrs.raw.items():
+    for k, v in h5obj.attrs.items():
         if k == key:
             rac.found_objects.append(v)
     if isinstance(h5obj, h5py.Group):
         h5obj.visititems(rac)
         if objfilter:
             if isinstance(h5obj, objfilter):
-                if key in h5obj.attrs.raw:
-                    rac.found_objects.append(h5obj.attrs.raw[key])
+                if key in h5obj.attrs:
+                    rac.found_objects.append(h5obj.attrs[key])
         else:
-            if key in h5obj.attrs.raw:
-                rac.found_objects.append(h5obj.attrs.raw[key])
+            if key in h5obj.attrs:
+                rac.found_objects.append(h5obj.attrs[key])
 
     return list(set(rac.found_objects))
 
@@ -433,7 +438,7 @@ def find(self,
             yield r
 
     def distinct(self, key: str,
-                 objfilter: Union[h5py.Group, h5py.Dataset, None]):
+                 objfilter: Optional[Union[h5py.Group, h5py.Dataset]] = None):
         """Return a distinct list of all found targets. A target generally is
         understood to be an attribute name. However, by adding a $ in front, class
         properties can be found, too, e.g. $shape will return all distinct shapes of the

diff --git a/tests/database/test_hdfDB.py b/tests/database/test_hdfDB.py
@@ -20,6 +20,58 @@ def test_insert(self):
             with self.assertRaises(NotImplementedError):
                 gdb.insert_group(None)
 
+    def test_value_find(self):
+        with h5tbx.File(mode='w') as h5:
+            ds_random = h5.create_dataset('random', data=np.array([1, 2, 3]))
+            ds_half = h5.create_dataset('half', data=0.5)
+            gdb = hdfdb.ObjDB(h5['/'])
+            res = gdb.find_one({'$eq': 0.5}, recursive=True)
+            self.assertEqual(res.name, ds_half.name)
+            res = gdb.find_one({'$gte': 0.5}, recursive=True)
+            self.assertEqual(res.name, ds_half.name)
+            res = gdb.find_one({'$lte': 0.5}, recursive=True)
+            self.assertEqual(res.name, ds_half.name)
+            res = gdb.find_one({'$gt': 0.5}, recursive=True)
+            self.assertTrue(res is None)
+            res = gdb.find_one({'$lt': 0.5}, recursive=True)
+            self.assertTrue(res is None)
+            res = gdb.find_one({'$eq': np.array([1, 2, 3])}, recursive=True)
+            self.assertEqual(res.name, ds_random.name)
+
+    def test_find_shape(self):
+        with h5tbx.File(mode='w') as h5:
+            ds_random = h5.create_dataset('random', data=np.array([1, 2, 3]))
+            ds_half = h5.create_dataset('half', data=0.5)
+
+            gdb = hdfdb.ObjDB(h5['/'])
+
+            res = gdb.find_one({'$shape': (3,)}, recursive=True)
+            self.assertEqual(res.name, ds_random.name)
+            res = gdb.find({'$ndim': 1}, recursive=True)
+            self.assertListEqual([r.name for r in res], [ds_random.name])
+
+            res = gdb.find({'$ndim': {'$gt': 0}}, recursive=True)
+            self.assertListEqual([r.name for r in res], [ds_random.name, ])
+
+            res = gdb.find({'$ndim': {'$gte': 1}}, recursive=True)
+            self.assertListEqual([r.name for r in res], [ds_random.name, ])
+
+            res = gdb.find({'$ndim': {'$gte': 0}}, recursive=True)
+            self.assertListEqual(sorted([r.name for r in res]), sorted([ds_random.name, ds_half.name]))
+
+    def test_distint_props(self):
+        with h5tbx.File(mode='w') as h5:
+            ds_random = h5.create_dataset('random', data=np.array([1, 2, 3]))
+            ds_half = h5.create_dataset('half', data=0.5)
+
+            gdb = hdfdb.ObjDB(h5['/'])
+            res = gdb.distinct('$shape')
+            self.assertListEqual(sorted(res), [(), (3,)])
+
+            gdb = hdfdb.ObjDB(h5['/'])
+            res = gdb.distinct('$ndim')
+            self.assertListEqual(sorted(res), [0, 1])
+
     def test_find_one(self):
         with h5py.File(h5tbx.utils.generate_temporary_filename(suffix='.hdf'),
                        'w') as h5:
@@ -101,6 +153,38 @@ def test_find_one(self):
             single_res = gdb_root.find_one({'a': {'$gte': 0}}, recursive=True)
             self.assertTrue(single_res.attrs['a'] >= 0)
 
+    def test_find_dict_attr(self):
+        with h5tbx.File(mode='w') as h5:
+            grp = h5.create_group('grp')
+            ds = h5.create_dataset('dataset', shape=(2, 3))
+            ds.attrs['a'] = 1
+            grp.attrs['a'] = 1
+            grp.attrs['b'] = {'c': 2}
+            gb = hdfdb.ObjDB(h5['/'])
+            res = gb.find_one({'b.c': 2}, recursive=True)
+            self.assertEqual(res.name, grp.name)
+            res = gb.find({'a': 1}, recursive=True)
+            self.assertListEqual(sorted([r.name for r in res]),
+                                 sorted([grp.name, ds.name]))
+            res = gb.find({'a': 1}, objfilter='dataset', recursive=True)
+            self.assertListEqual(sorted([r.name for r in res]),
+                                 sorted([ds.name, ]))
+
+    def test_distinct(self):
+        with h5tbx.File(mode='w') as h5:
+            h5.attrs['tag'] = 'root'
+            h5.create_dataset('dataset', data=np.array([1, 2, 3]),
+                              attrs={'tag': 'dataset', 'units': 'm'})
+            h5.create_dataset('dataset2', data=np.array([1, 2, 3]),
+                              attrs={'tag': 'dataset', 'units': 'm/s'})
+            grp = h5.create_group('grp')
+            grp.attrs['tag'] = 'group'
+            gb = hdfdb.ObjDB(h5['/'])
+            res = gb.distinct('tag')
+            self.assertListEqual(sorted(res), sorted(['root', 'dataset', 'group']))
+            res = gb.distinct('units')
+            self.assertListEqual(sorted(res), sorted(['m', 'm/s']))
+
     def test_regex(self):
         from h5rdmtoolbox.database.hdfdb.query import _regex
         self.assertFalse(_regex(None, 'b'))