None-values for attrs can be ignored via settins

matthiasprobst · Apr 12, 2024 · 4698b33 · 4698b33
1 parent 0aa5aaa
commit 4698b33
Show file tree

Hide file tree

Showing 8 changed files with 148 additions and 79 deletions.
diff --git a/h5rdmtoolbox/_cfg.py b/h5rdmtoolbox/_cfg.py
@@ -37,6 +37,7 @@ def is_valid_logger_level(level: Union[str, int]):
     # if a standard attribute is defined and cannot be retrieved because the value is invalid, ignore it:
     'ignore_get_std_attr_err': False,
     'allow_deleting_standard_attributes': False,
+    'ignore_none': False
 }
 
 _VALIDATORS = {
@@ -55,8 +56,8 @@ def is_valid_logger_level(level: Union[str, int]):
     'expose_user_prop_to_attrs': lambda x: isinstance(x, bool),
     'add_provenance': lambda x: isinstance(x, bool),
     'ignore_set_std_attr_err': lambda x: isinstance(x, bool),
-
     'ignore_get_std_attr_err': lambda x: isinstance(x, bool),
+    'ignore_none': lambda x: isinstance(x, bool)
 }
 
 

diff --git a/h5rdmtoolbox/database/hdfdb/objdb.py b/h5rdmtoolbox/database/hdfdb/objdb.py
@@ -157,6 +157,12 @@ def _h5find(h5obj: Union[h5py.Group, h5py.Dataset], qk, qv, recursive, objfilter
     -------
 
     """
+
+    if qk == '$basename':
+        qk = '$name'
+        assert isinstance(qv, str), 'Expected {$basename: "search value"} but value is not a string'
+        qv = {'$basename': qv}
+
     found_objs = []
     if qk in query.value_operator:
         # user wants to compare qv to the value of the object
@@ -428,15 +434,15 @@ def find_one(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> lazy.LHDF
         return ObjDB(obj).find_one(*args, **kwargs)
 
     @staticmethod
-    def find(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> lazy.LHDFObject:
+    def find(obj: Union[h5py.Dataset, h5py.Group], *args, **kwargs) -> Generator[lazy.LHDFObject, None, None]:
         """Please refer to the docstring of the find_one method of the ObjDB class"""
         return ObjDB(obj).find(*args, **kwargs)
 
     def _instance_find_one(self,
-                 flt: Union[Dict, str],
-                 objfilter=None,
-                 recursive: bool = True,
-                 ignore_attribute_error: bool = False) -> lazy.LHDFObject:
+                           flt: Union[Dict, str],
+                           objfilter=None,
+                           recursive: bool = True,
+                           ignore_attribute_error: bool = False) -> lazy.LHDFObject:
         """Find one object in the obj
 
         Parameters
@@ -464,10 +470,10 @@ def _instance_find_one(self,
         )
 
     def _instance_find(self,
-             flt: Union[Dict, str],
-             objfilter=None,
-             recursive: bool = True,
-             ignore_attribute_error: bool = False) -> Generator[lazy.LHDFObject, None, None]:
+                       flt: Union[Dict, str],
+                       objfilter=None,
+                       recursive: bool = True,
+                       ignore_attribute_error: bool = False) -> Generator[lazy.LHDFObject, None, None]:
         if isinstance(self.src_obj, h5py.Dataset) and recursive:
             recursive = False
         results = find(self.src_obj,

diff --git a/h5rdmtoolbox/wrapper/core.py b/h5rdmtoolbox/wrapper/core.py
@@ -17,9 +17,10 @@
 from h5py._hl.base import phil, with_phil
 from h5py._objects import ObjectID
 from pathlib import Path
-from typing import List, Dict, Union, Tuple, Protocol, Optional
+from typing import List, Dict, Union, Tuple, Protocol, Optional, Generator
 
 from h5rdmtoolbox.database import ObjDB
+from h5rdmtoolbox.database.lazy import LHDFObject
 # noinspection PyUnresolvedReferences
 from . import xr2hdf
 from .ds_decoder import dataset_value_decoder
@@ -581,10 +582,16 @@ def create_string_dataset(self,
                 del self[name]  # delete existing dataset
             # else let h5py return the error
 
-        # compression = kwargs.pop('compression', get_config('hdf_compression'))
-        # compression_opts = kwargs.pop('compression_opts', get_config('hdf_compression_opts'))
+        if isinstance(data, str):
+            compression = None
+            compression_opts = None
+        else:
+            compression = kwargs.pop('compression', get_config('hdf_compression'))
+            compression_opts = kwargs.pop('compression_opts', get_config('hdf_compression_opts'))
+
         make_scale = kwargs.pop('make_scale', False)
-        ds = super().create_dataset(name, dtype=dtype, data=data, **kwargs)
+        ds = super().create_dataset(name, dtype=dtype, data=data, **kwargs,
+                                    compression=compression, compression_opts=compression_opts)
         if make_scale:
             if isinstance(data, str):
                 ds.make_scale(make_scale)
@@ -871,6 +878,34 @@ def find_one(self, flt: Union[Dict, str],
         """See ObjDB.find_one()"""
         return ObjDB(self).find_one(flt, objfilter, recursive, ignore_attribute_error)
 
+    def find(self, flt: Union[Dict, str],
+             objfilter: Union[str, h5py.Dataset, h5py.Group, None] = None,
+             recursive: bool = True,
+             ignore_attribute_error: bool = False) -> Generator[LHDFObject, None, None]:
+        """
+        Examples for filter parameters:
+        filter = {'long_name': 'any objects long name'} --> searches in attributes only
+        filter = {'$name': '/name'}  --> searches in groups and datasets for the (path)name
+        filter = {'$basename': 'name'}  --> searches in groups and datasets for the basename (without path)
+
+        Parameters
+        ----------
+        flt: Dict
+            Filter request
+        objfilter: str | h5py.Dataset | h5py.Group | None
+            Filter. Default is None. Otherwise, only dataset or group types are returned.
+        recursive: bool, optional
+            Recursive search. Default is True
+        ignore_attribute_error: bool, optional=False
+            If True, the KeyError normally raised when accessing hdf5 object attributess is ignored.
+            Otherwise, the KeyError is raised.
+
+        Returns
+        -------
+        h5obj: h5py.Dataset or h5py.Group
+        """
+        return ObjDB(self).find(flt, objfilter, recursive=recursive, ignore_attribute_error=ignore_attribute_error)
+
     def create_dataset_from_csv(self, csv_filename: Union[str, pathlib.Path], *args, **kwargs):
         """Create datasets from a single csv file. Docstring: See File.create_datasets_from_csv()"""
         return self.create_datasets_from_csv(csv_filenames=[csv_filename, ], *args, **kwargs)
@@ -1182,6 +1217,11 @@ def create_from_yaml(self, yaml_filename: Path):
         from . import h5yaml
         h5yaml.H5Yaml(yaml_filename).write(self)
 
+    def create_from_dict(self, dictionary: Dict):
+        """Create groups and datasets based on a dictionary"""
+        from . import h5yaml
+        h5yaml.H5Dict(dictionary).write(self)
+
     def create_from_jsonld(self, data: str, context: Optional[Dict] = None):
         """Create groups/datasets from a jsonld string."""
         from . import jsonld
@@ -1844,33 +1884,6 @@ def set_primary_scale(self, axis, iscale: int):
             self.dims[axis].attach_scale(backup_scales[i][1])
         logger.debug('new primary scale: %s', self.dims[axis][0])
 
-    def find(self, flt: Union[Dict, str],
-             objfilter: Union[str, h5py.Dataset, h5py.Group, None] = None,
-             ignore_attribute_error: bool = False) -> List:
-        """
-        Examples for filter parameters:
-        filter = {'long_name': 'any objects long name'} --> searches in attributes only
-        filter = {'$name': '/name'}  --> searches in groups and datasets for the (path)name
-        filter = {'$basename': 'name'}  --> searches in groups and datasets for the basename (without path)
-
-        Parameters
-        ----------
-        flt: Dict
-            Filter request
-        objfilter: str | h5py.Dataset | h5py.Group | None
-            Filter. Default is None. Otherwise, only dataset or group types are returned.
-        recursive: bool, optional
-            Recursive search. Default is True
-        ignore_attribute_error: bool, optional=False
-            If True, the KeyError normally raised when accessing hdf5 object attributess is ignored.
-            Otherwise, the KeyError is raised.
-
-        Returns
-        -------
-        h5obj: h5py.Dataset or h5py.Group
-        """
-        return ObjDB(self).find(flt, objfilter, ignore_attribute_error)
-
 
 class File(h5py.File, Group, SpecialAttributeWriter, Core):
     """Main wrapper around h5py.File.

diff --git a/h5rdmtoolbox/wrapper/h5attr.py b/h5rdmtoolbox/wrapper/h5attr.py
@@ -1,3 +1,4 @@
+"""Attribute module"""
 import ast
 import h5py
 import json
@@ -6,8 +7,9 @@
 import pint
 import rdflib
 import warnings
+from h5py._hl.attrs import AttributeManager
 from h5py._hl.base import with_phil
-from h5py._objects import ObjectID
+from h5py._objects import ObjectID, phil
 from typing import Dict, Union, Tuple
 
 from .h5utils import get_rootparent
@@ -57,7 +59,7 @@ def to_pint(self) -> "pint.util.Quantity":
         return get_ureg()(self)
 
 
-class WrapperAttributeManager(h5py.AttributeManager):
+class WrapperAttributeManager(AttributeManager):
     """
     Subclass of h5py's Attribute Manager.
     Allows storing dictionaries as json strings and to store a dataset or a group as an
@@ -148,6 +150,9 @@ def create(self,
         """
         Create a new attribute.
 
+        .. note:: Via the config setting "ignore_none" (`h5tbx.set_config(ignore_none=True)`) attribute values, that are None are not written.
+
+
         Parameters
         ----------
         name: str
@@ -163,6 +168,9 @@ def create(self,
         rdf_object: Union[str, rdflib.URIRef], optional
             IRI of the object
         """
+        if data is None and get_config('ignore_none'):
+            logger.debug(f'Attribute "{name}" is None and "ignore_none" in config is True. Attribute is not created.')
+            return
         r = super().create(name,
                            utils.parse_object_for_attribute_setting(data),
                            shape, dtype)
@@ -334,9 +342,7 @@ def sdump(self, show_private=True) -> None:
             print(f'{k:{keylen}}:  {v}')
 
     @property
-    def raw(self) -> "h5py.AttributeManager":
+    def raw(self) -> AttributeManager:
         """Return the original h5py attribute object manager"""
-        from h5py._hl import attrs
-        from h5py._objects import phil
         with phil:
-            return attrs.AttributeManager(self._parent)
+            return AttributeManager(self._parent)
diff --git a/h5rdmtoolbox/wrapper/h5yaml.py b/h5rdmtoolbox/wrapper/h5yaml.py
@@ -1,28 +1,16 @@
-import pathlib
-from typing import Dict
-
 import h5py
+import pathlib
 import yaml
+from typing import Dict
+from typing import Protocol
 
 
-class H5Yaml:
-    """Interface class to yaml files which allow to create HDF5
-    objects from a yaml file definition"""
-
-    def __init__(self, filename):
-        self.filename = pathlib.Path(filename)
-        if not self.filename.exists():
-            raise FileNotFoundError(f'File not found: {self.filename}')
-        if not self.filename.is_file():
-            raise FileExistsError(f'Not a file: {self.filename}')
-        self._data = None
+class _H5DictDataInterface(Protocol):
 
     @property
     def data(self) -> Dict:
-        if self._data is None:
-            with open(self.filename, 'r') as f:
-                self._data = yaml.safe_load(f)
-        return self._data
+        """Return data"""
+        ...
 
     def write(self, h5: h5py.Group):
         data = self.data
@@ -38,16 +26,21 @@ def write(self, h5: h5py.Group):
                         v['name'] = k
                     # units = v.pop('units', None)
                     # standard_name = v.pop('standard_name', None)
-                    print(v)
                     # TODO remove the following hotfix
                     name = v.pop('name')
                     data = v.pop('data')
-                    if isinstance(data, str):
-                        ds = h5.create_string_dataset(name, data=data)
-                    else:
-                        ds = h5.create_dataset(name=name, data=data)
-                    for ak, av in v.items():
-                        ds.attrs[ak] = av
+                    try:
+                        h5.create_dataset(name, data=data, **v)
+                    except (TypeError,) as e:
+                        raise RuntimeError('Could not create dataset. Please check the yaml file. The orig. '
+                                           f'error is "{e}"')
+                    # if isinstance(data, str):
+                    #     ds = h5.create_string_dataset(name, data=data,
+                    #                                   **v)
+                    # else:
+                    #     ds = h5.create_dataset(name=name, data=data)
+                    # for ak, av in v.items():
+                    #     ds.attrs[ak] = av
                     # if units:
                     #     ds.attrs['units'] = units
                     # if standard_name:
@@ -91,3 +84,34 @@ def is_group(item) -> bool:
                 break
             return not H5Yaml.is_dataset(item)
         return False
+
+
+class H5Dict(_H5DictDataInterface):
+
+    def __init__(self, data):
+        self._data = data
+
+    @property
+    def data(self) -> Dict:
+        return self._data
+
+
+class H5Yaml(_H5DictDataInterface):
+    """Interface class to yaml files which allow to create HDF5
+    objects from a yaml file definition"""
+
+    def __init__(self, filename):
+        self.filename = pathlib.Path(filename)
+        if not self.filename.exists():
+            raise FileNotFoundError(f'File not found: {self.filename}')
+        if not self.filename.is_file():
+            raise FileExistsError(f'Not a file: {self.filename}')
+        self._data = None
+
+    @property
+    def data(self) -> Dict:
+        """Return data"""
+        if self._data is None:
+            with open(self.filename, 'r') as f:
+                self._data = yaml.safe_load(f)
+        return self._data
diff --git a/tests/database/test_hdfDB.py b/tests/database/test_hdfDB.py
@@ -7,6 +7,7 @@
 import h5rdmtoolbox as h5tbx
 from h5rdmtoolbox import database
 from h5rdmtoolbox.database import hdfdb
+from h5rdmtoolbox.database.hdfdb.query import _basename
 
 
 class TestHDFDB(unittest.TestCase):
@@ -297,7 +298,6 @@ def test_gt(self):
         self.assertFalse(_gt(1, 2))
 
     def test_basename(self):
-        from h5rdmtoolbox.database.hdfdb.query import _basename
         self.assertFalse(_basename(None, 'b'))
         self.assertFalse(_basename('a', None))
         self.assertTrue(_basename('/a', 'a'))
@@ -308,6 +308,13 @@ def test_basename(self):
         self.assertFalse(_basename('/a/b/c', 'a'))
         self.assertFalse(_basename('/a/b/c', '/a/b/c'))
 
+        with h5tbx.File() as h5:
+            ds = h5.create_dataset('T1', data=4)
+            res = h5.find_one({'$name': {'$basename': 'T1'}})
+            self.assertEqual(res, ds)
+            res = h5.find_one({'$basename': 'T1'})
+            self.assertEqual(res, ds)
+
     def test_get_ndim(self):
         from h5rdmtoolbox.database.hdfdb.query import get_ndim
         self.assertEqual(0, get_ndim(5))