Skip to content

Commit

Permalink
improve test coverage for hdfDB
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Mar 26, 2024
1 parent 9d264e6 commit e5f352c
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 23 deletions.
51 changes: 28 additions & 23 deletions h5rdmtoolbox/database/hdfdb/objdb.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import h5py
import numpy as np
from typing import Union, Dict, List, Callable, Generator
from typing import Union, Dict, List, Callable, Generator, Optional

from . import query, utils
from .nonsearchable import NonInsertableDatabaseInterface
Expand Down Expand Up @@ -35,10 +35,10 @@ def __call__(self, name, h5obj):
self.found_objects.append(h5obj)
except AttributeError as e:
return
if not self.ignore_attribute_error:
raise AttributeError(f'HDF object {h5obj} has no attribute "{self._attribute}". You may add '
'an objfilter, because dataset and groups dont share all attributes. '
'One example is "dtype", which is only available with datasets') from e
# if not self.ignore_attribute_error:
# raise AttributeError(f'HDF object {h5obj} has no attribute "{self._attribute}". You may add '
# 'an objfilter, because dataset and groups dont share all attributes. '
# 'One example is "dtype", which is only available with datasets') from e


class RecValueFind:
Expand Down Expand Up @@ -77,17 +77,21 @@ def __call__(self, name, obj):
if '.' in self._attribute:
# dict comparison:
attr_name, dict_path = self._attribute.split('.', 1)
if attr_name in obj.attrs:
_attr_dict = dict(obj.attrs[attr_name])
for _item in dict_path.split('.'):
try:
_attr_value = _attr_dict[_item]
except KeyError:
_attr_value = None
break
if _attr_value:
if self._func(_attr_value, self._value):
self.found_objects.append(obj)
attr_value = obj.attrs.get(attr_name, None)
if attr_value is not None:
if isinstance(attr_value, str) and attr_value.startswith('{') and attr_value.endswith('}'):
import json
_attr_dict = json.loads(attr_value)

for _item in dict_path.split('.'):
try:
_attr_value = _attr_dict[_item]
except KeyError:
_attr_value = None
break
if _attr_value:
if self._func(_attr_value, self._value):
self.found_objects.append(obj)
if self._func(obj.attrs.get(self._attribute, None), self._value):
self.found_objects.append(obj)

Expand Down Expand Up @@ -329,7 +333,8 @@ def find(h5obj: Union[h5py.Group, h5py.Dataset],
return common_results


def distinct(h5obj: Union[h5py.Group, h5py.Dataset], key: str,
def distinct(h5obj: Union[h5py.Group, h5py.Dataset],
key: str,
objfilter: Union[h5py.Group, h5py.Dataset, None]) -> List[str]:
"""Return a distinct list of all found targets. A target generally is
understood to be an attribute name. However, by adding a $ in front, class
Expand Down Expand Up @@ -357,18 +362,18 @@ def distinct(h5obj: Union[h5py.Group, h5py.Dataset], key: str,
return list(set(rpc.found_objects))

rac = RecAttrCollect(key, objfilter)
for k, v in h5obj.attrs.raw.items():
for k, v in h5obj.attrs.items():
if k == key:
rac.found_objects.append(v)
if isinstance(h5obj, h5py.Group):
h5obj.visititems(rac)
if objfilter:
if isinstance(h5obj, objfilter):
if key in h5obj.attrs.raw:
rac.found_objects.append(h5obj.attrs.raw[key])
if key in h5obj.attrs:
rac.found_objects.append(h5obj.attrs[key])
else:
if key in h5obj.attrs.raw:
rac.found_objects.append(h5obj.attrs.raw[key])
if key in h5obj.attrs:
rac.found_objects.append(h5obj.attrs[key])

return list(set(rac.found_objects))

Expand Down Expand Up @@ -433,7 +438,7 @@ def find(self,
yield r

def distinct(self, key: str,
objfilter: Union[h5py.Group, h5py.Dataset, None]):
objfilter: Optional[Union[h5py.Group, h5py.Dataset]] = None):
"""Return a distinct list of all found targets. A target generally is
understood to be an attribute name. However, by adding a $ in front, class
properties can be found, too, e.g. $shape will return all distinct shapes of the
Expand Down
84 changes: 84 additions & 0 deletions tests/database/test_hdfDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,58 @@ def test_insert(self):
with self.assertRaises(NotImplementedError):
gdb.insert_group(None)

def test_value_find(self):
with h5tbx.File(mode='w') as h5:
ds_random = h5.create_dataset('random', data=np.array([1, 2, 3]))
ds_half = h5.create_dataset('half', data=0.5)
gdb = hdfdb.ObjDB(h5['/'])
res = gdb.find_one({'$eq': 0.5}, recursive=True)
self.assertEqual(res.name, ds_half.name)
res = gdb.find_one({'$gte': 0.5}, recursive=True)
self.assertEqual(res.name, ds_half.name)
res = gdb.find_one({'$lte': 0.5}, recursive=True)
self.assertEqual(res.name, ds_half.name)
res = gdb.find_one({'$gt': 0.5}, recursive=True)
self.assertTrue(res is None)
res = gdb.find_one({'$lt': 0.5}, recursive=True)
self.assertTrue(res is None)
res = gdb.find_one({'$eq': np.array([1, 2, 3])}, recursive=True)
self.assertEqual(res.name, ds_random.name)

def test_find_shape(self):
with h5tbx.File(mode='w') as h5:
ds_random = h5.create_dataset('random', data=np.array([1, 2, 3]))
ds_half = h5.create_dataset('half', data=0.5)

gdb = hdfdb.ObjDB(h5['/'])

res = gdb.find_one({'$shape': (3,)}, recursive=True)
self.assertEqual(res.name, ds_random.name)
res = gdb.find({'$ndim': 1}, recursive=True)
self.assertListEqual([r.name for r in res], [ds_random.name])

res = gdb.find({'$ndim': {'$gt': 0}}, recursive=True)
self.assertListEqual([r.name for r in res], [ds_random.name, ])

res = gdb.find({'$ndim': {'$gte': 1}}, recursive=True)
self.assertListEqual([r.name for r in res], [ds_random.name, ])

res = gdb.find({'$ndim': {'$gte': 0}}, recursive=True)
self.assertListEqual(sorted([r.name for r in res]), sorted([ds_random.name, ds_half.name]))

def test_distint_props(self):
with h5tbx.File(mode='w') as h5:
ds_random = h5.create_dataset('random', data=np.array([1, 2, 3]))
ds_half = h5.create_dataset('half', data=0.5)

gdb = hdfdb.ObjDB(h5['/'])
res = gdb.distinct('$shape')
self.assertListEqual(sorted(res), [(), (3,)])

gdb = hdfdb.ObjDB(h5['/'])
res = gdb.distinct('$ndim')
self.assertListEqual(sorted(res), [0, 1])

def test_find_one(self):
with h5py.File(h5tbx.utils.generate_temporary_filename(suffix='.hdf'),
'w') as h5:
Expand Down Expand Up @@ -101,6 +153,38 @@ def test_find_one(self):
single_res = gdb_root.find_one({'a': {'$gte': 0}}, recursive=True)
self.assertTrue(single_res.attrs['a'] >= 0)

def test_find_dict_attr(self):
with h5tbx.File(mode='w') as h5:
grp = h5.create_group('grp')
ds = h5.create_dataset('dataset', shape=(2, 3))
ds.attrs['a'] = 1
grp.attrs['a'] = 1
grp.attrs['b'] = {'c': 2}
gb = hdfdb.ObjDB(h5['/'])
res = gb.find_one({'b.c': 2}, recursive=True)
self.assertEqual(res.name, grp.name)
res = gb.find({'a': 1}, recursive=True)
self.assertListEqual(sorted([r.name for r in res]),
sorted([grp.name, ds.name]))
res = gb.find({'a': 1}, objfilter='dataset', recursive=True)
self.assertListEqual(sorted([r.name for r in res]),
sorted([ds.name, ]))

def test_distinct(self):
with h5tbx.File(mode='w') as h5:
h5.attrs['tag'] = 'root'
h5.create_dataset('dataset', data=np.array([1, 2, 3]),
attrs={'tag': 'dataset', 'units': 'm'})
h5.create_dataset('dataset2', data=np.array([1, 2, 3]),
attrs={'tag': 'dataset', 'units': 'm/s'})
grp = h5.create_group('grp')
grp.attrs['tag'] = 'group'
gb = hdfdb.ObjDB(h5['/'])
res = gb.distinct('tag')
self.assertListEqual(sorted(res), sorted(['root', 'dataset', 'group']))
res = gb.distinct('units')
self.assertListEqual(sorted(res), sorted(['m', 'm/s']))

def test_regex(self):
from h5rdmtoolbox.database.hdfdb.query import _regex
self.assertFalse(_regex(None, 'b'))
Expand Down

0 comments on commit e5f352c

Please sign in to comment.