Skip to content

Commit

Permalink
None-values for attrs can be ignored via settins
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Apr 12, 2024
1 parent 0aa5aaa commit 4698b33
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 79 deletions.
3 changes: 2 additions & 1 deletion h5rdmtoolbox/_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def is_valid_logger_level(level: Union[str, int]):
# if a standard attribute is defined and cannot be retrieved because the value is invalid, ignore it:
'ignore_get_std_attr_err': False,
'allow_deleting_standard_attributes': False,
'ignore_none': False
}

_VALIDATORS = {
Expand All @@ -55,8 +56,8 @@ def is_valid_logger_level(level: Union[str, int]):
'expose_user_prop_to_attrs': lambda x: isinstance(x, bool),
'add_provenance': lambda x: isinstance(x, bool),
'ignore_set_std_attr_err': lambda x: isinstance(x, bool),

'ignore_get_std_attr_err': lambda x: isinstance(x, bool),
'ignore_none': lambda x: isinstance(x, bool)
}


Expand Down
24 changes: 15 additions & 9 deletions h5rdmtoolbox/database/hdfdb/objdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,12 @@ def _h5find(h5obj: Union[h5py.Group, h5py.Dataset], qk, qv, recursive, objfilter
-------
"""

if qk == '$basename':
qk = '$name'
assert isinstance(qv, str), 'Expected {$basename: "search value"} but value is not a string'
qv = {'$basename': qv}

found_objs = []
if qk in query.value_operator:
# user wants to compare qv to the value of the object
Expand Down Expand Up @@ -428,15 +434,15 @@ def find_one(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> lazy.LHDF
return ObjDB(obj).find_one(*args, **kwargs)

@staticmethod
def find(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> lazy.LHDFObject:
def find(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> Generator[lazy.LHDFObject, None, None]:
"""Please refer to the docstring of the find_one method of the ObjDB class"""
return ObjDB(obj).find(*args, **kwargs)

def _instance_find_one(self,
flt: Union[Dict, str],
objfilter=None,
recursive: bool = True,
ignore_attribute_error: bool = False) -> lazy.LHDFObject:
flt: Union[Dict, str],
objfilter=None,
recursive: bool = True,
ignore_attribute_error: bool = False) -> lazy.LHDFObject:
"""Find one object in the obj
Parameters
Expand Down Expand Up @@ -464,10 +470,10 @@ def _instance_find_one(self,
)

def _instance_find(self,
flt: Union[Dict, str],
objfilter=None,
recursive: bool = True,
ignore_attribute_error: bool = False) -> Generator[lazy.LHDFObject, None, None]:
flt: Union[Dict, str],
objfilter=None,
recursive: bool = True,
ignore_attribute_error: bool = False) -> Generator[lazy.LHDFObject, None, None]:
if isinstance(self.src_obj, h5py.Dataset) and recursive:
recursive = False
results = find(self.src_obj,
Expand Down
75 changes: 44 additions & 31 deletions h5rdmtoolbox/wrapper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
from h5py._hl.base import phil, with_phil
from h5py._objects import ObjectID
from pathlib import Path
from typing import List, Dict, Union, Tuple, Protocol, Optional
from typing import List, Dict, Union, Tuple, Protocol, Optional, Generator

from h5rdmtoolbox.database import ObjDB
from h5rdmtoolbox.database.lazy import LHDFObject
# noinspection PyUnresolvedReferences
from . import xr2hdf
from .ds_decoder import dataset_value_decoder
Expand Down Expand Up @@ -581,10 +582,16 @@ def create_string_dataset(self,
del self[name] # delete existing dataset
# else let h5py return the error

# compression = kwargs.pop('compression', get_config('hdf_compression'))
# compression_opts = kwargs.pop('compression_opts', get_config('hdf_compression_opts'))
if isinstance(data, str):
compression = None
compression_opts = None
else:
compression = kwargs.pop('compression', get_config('hdf_compression'))
compression_opts = kwargs.pop('compression_opts', get_config('hdf_compression_opts'))

make_scale = kwargs.pop('make_scale', False)
ds = super().create_dataset(name, dtype=dtype, data=data, **kwargs)
ds = super().create_dataset(name, dtype=dtype, data=data, **kwargs,
compression=compression, compression_opts=compression_opts)
if make_scale:
if isinstance(data, str):
ds.make_scale(make_scale)
Expand Down Expand Up @@ -871,6 +878,34 @@ def find_one(self, flt: Union[Dict, str],
"""See ObjDB.find_one()"""
return ObjDB(self).find_one(flt, objfilter, recursive, ignore_attribute_error)

def find(self, flt: Union[Dict, str],
objfilter: Union[str, h5py.Dataset, h5py.Group, None] = None,
recursive: bool = True,
ignore_attribute_error: bool = False) -> Generator[LHDFObject, None, None]:
"""
Examples for filter parameters:
filter = {'long_name': 'any objects long name'} --> searches in attributes only
filter = {'$name': '/name'} --> searches in groups and datasets for the (path)name
filter = {'$basename': 'name'} --> searches in groups and datasets for the basename (without path)
Parameters
----------
flt: Dict
Filter request
objfilter: str | h5py.Dataset | h5py.Group | None
Filter. Default is None. Otherwise, only dataset or group types are returned.
recursive: bool, optional
Recursive search. Default is True
ignore_attribute_error: bool, optional=False
If True, the KeyError normally raised when accessing hdf5 object attributess is ignored.
Otherwise, the KeyError is raised.
Returns
-------
h5obj: h5py.Dataset or h5py.Group
"""
return ObjDB(self).find(flt, objfilter, recursive=recursive, ignore_attribute_error=ignore_attribute_error)

def create_dataset_from_csv(self, csv_filename: Union[str, pathlib.Path], *args, **kwargs):
"""Create datasets from a single csv file. Docstring: See File.create_datasets_from_csv()"""
return self.create_datasets_from_csv(csv_filenames=[csv_filename, ], *args, **kwargs)
Expand Down Expand Up @@ -1182,6 +1217,11 @@ def create_from_yaml(self, yaml_filename: Path):
from . import h5yaml
h5yaml.H5Yaml(yaml_filename).write(self)

def create_from_dict(self, dictionary: Dict):
"""Create groups and datasets based on a dictionary"""
from . import h5yaml
h5yaml.H5Dict(dictionary).write(self)

def create_from_jsonld(self, data: str, context: Optional[Dict] = None):
"""Create groups/datasets from a jsonld string."""
from . import jsonld
Expand Down Expand Up @@ -1844,33 +1884,6 @@ def set_primary_scale(self, axis, iscale: int):
self.dims[axis].attach_scale(backup_scales[i][1])
logger.debug('new primary scale: %s', self.dims[axis][0])

def find(self, flt: Union[Dict, str],
objfilter: Union[str, h5py.Dataset, h5py.Group, None] = None,
ignore_attribute_error: bool = False) -> List:
"""
Examples for filter parameters:
filter = {'long_name': 'any objects long name'} --> searches in attributes only
filter = {'$name': '/name'} --> searches in groups and datasets for the (path)name
filter = {'$basename': 'name'} --> searches in groups and datasets for the basename (without path)
Parameters
----------
flt: Dict
Filter request
objfilter: str | h5py.Dataset | h5py.Group | None
Filter. Default is None. Otherwise, only dataset or group types are returned.
recursive: bool, optional
Recursive search. Default is True
ignore_attribute_error: bool, optional=False
If True, the KeyError normally raised when accessing hdf5 object attributess is ignored.
Otherwise, the KeyError is raised.
Returns
-------
h5obj: h5py.Dataset or h5py.Group
"""
return ObjDB(self).find(flt, objfilter, ignore_attribute_error)


class File(h5py.File, Group, SpecialAttributeWriter, Core):
"""Main wrapper around h5py.File.
Expand Down
18 changes: 12 additions & 6 deletions h5rdmtoolbox/wrapper/h5attr.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Attribute module"""
import ast
import h5py
import json
Expand All @@ -6,8 +7,9 @@
import pint
import rdflib
import warnings
from h5py._hl.attrs import AttributeManager
from h5py._hl.base import with_phil
from h5py._objects import ObjectID
from h5py._objects import ObjectID, phil
from typing import Dict, Union, Tuple

from .h5utils import get_rootparent
Expand Down Expand Up @@ -57,7 +59,7 @@ def to_pint(self) -> "pint.util.Quantity":
return get_ureg()(self)


class WrapperAttributeManager(h5py.AttributeManager):
class WrapperAttributeManager(AttributeManager):
"""
Subclass of h5py's Attribute Manager.
Allows storing dictionaries as json strings and to store a dataset or a group as an
Expand Down Expand Up @@ -148,6 +150,9 @@ def create(self,
"""
Create a new attribute.
.. note:: Via the config setting "ignore_none" (`h5tbx.set_config(ignore_none=True)`) attribute values, that are None are not written.
Parameters
----------
name: str
Expand All @@ -163,6 +168,9 @@ def create(self,
rdf_object: Union[str, rdflib.URIRef], optional
IRI of the object
"""
if data is None and get_config('ignore_none'):
logger.debug(f'Attribute "{name}" is None and "ignore_none" in config is True. Attribute is not created.')
return
r = super().create(name,
utils.parse_object_for_attribute_setting(data),
shape, dtype)
Expand Down Expand Up @@ -334,9 +342,7 @@ def sdump(self, show_private=True) -> None:
print(f'{k:{keylen}}: {v}')

@property
def raw(self) -> "h5py.AttributeManager":
def raw(self) -> AttributeManager:
"""Return the original h5py attribute object manager"""
from h5py._hl import attrs
from h5py._objects import phil
with phil:
return attrs.AttributeManager(self._parent)
return AttributeManager(self._parent)
74 changes: 49 additions & 25 deletions h5rdmtoolbox/wrapper/h5yaml.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,16 @@
import pathlib
from typing import Dict

import h5py
import pathlib
import yaml
from typing import Dict
from typing import Protocol


class H5Yaml:
"""Interface class to yaml files which allow to create HDF5
objects from a yaml file definition"""

def __init__(self, filename):
self.filename = pathlib.Path(filename)
if not self.filename.exists():
raise FileNotFoundError(f'File not found: {self.filename}')
if not self.filename.is_file():
raise FileExistsError(f'Not a file: {self.filename}')
self._data = None
class _H5DictDataInterface(Protocol):

@property
def data(self) -> Dict:
if self._data is None:
with open(self.filename, 'r') as f:
self._data = yaml.safe_load(f)
return self._data
"""Return data"""
...

def write(self, h5: h5py.Group):
data = self.data
Expand All @@ -38,16 +26,21 @@ def write(self, h5: h5py.Group):
v['name'] = k
# units = v.pop('units', None)
# standard_name = v.pop('standard_name', None)
print(v)
# TODO remove the following hotfix
name = v.pop('name')
data = v.pop('data')
if isinstance(data, str):
ds = h5.create_string_dataset(name, data=data)
else:
ds = h5.create_dataset(name=name, data=data)
for ak, av in v.items():
ds.attrs[ak] = av
try:
h5.create_dataset(name, data=data, **v)
except (TypeError,) as e:
raise RuntimeError('Could not create dataset. Please check the yaml file. The orig. '
f'error is "{e}"')
# if isinstance(data, str):
# ds = h5.create_string_dataset(name, data=data,
# **v)
# else:
# ds = h5.create_dataset(name=name, data=data)
# for ak, av in v.items():
# ds.attrs[ak] = av
# if units:
# ds.attrs['units'] = units
# if standard_name:
Expand Down Expand Up @@ -91,3 +84,34 @@ def is_group(item) -> bool:
break
return not H5Yaml.is_dataset(item)
return False


class H5Dict(_H5DictDataInterface):

def __init__(self, data):
self._data = data

@property
def data(self) -> Dict:
return self._data


class H5Yaml(_H5DictDataInterface):
"""Interface class to yaml files which allow to create HDF5
objects from a yaml file definition"""

def __init__(self, filename):
self.filename = pathlib.Path(filename)
if not self.filename.exists():
raise FileNotFoundError(f'File not found: {self.filename}')
if not self.filename.is_file():
raise FileExistsError(f'Not a file: {self.filename}')
self._data = None

@property
def data(self) -> Dict:
"""Return data"""
if self._data is None:
with open(self.filename, 'r') as f:
self._data = yaml.safe_load(f)
return self._data
9 changes: 8 additions & 1 deletion tests/database/test_hdfDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import h5rdmtoolbox as h5tbx
from h5rdmtoolbox import database
from h5rdmtoolbox.database import hdfdb
from h5rdmtoolbox.database.hdfdb.query import _basename


class TestHDFDB(unittest.TestCase):
Expand Down Expand Up @@ -297,7 +298,6 @@ def test_gt(self):
self.assertFalse(_gt(1, 2))

def test_basename(self):
from h5rdmtoolbox.database.hdfdb.query import _basename
self.assertFalse(_basename(None, 'b'))
self.assertFalse(_basename('a', None))
self.assertTrue(_basename('/a', 'a'))
Expand All @@ -308,6 +308,13 @@ def test_basename(self):
self.assertFalse(_basename('/a/b/c', 'a'))
self.assertFalse(_basename('/a/b/c', '/a/b/c'))

with h5tbx.File() as h5:
ds = h5.create_dataset('T1', data=4)
res = h5.find_one({'$name': {'$basename': 'T1'}})
self.assertEqual(res, ds)
res = h5.find_one({'$basename': 'T1'})
self.assertEqual(res, ds)

def test_get_ndim(self):
from h5rdmtoolbox.database.hdfdb.query import get_ndim
self.assertEqual(0, get_ndim(5))
Expand Down
Loading

0 comments on commit 4698b33

Please sign in to comment.