add compression args for string dataset

minor fixes along the way
matthiasprobst · Apr 8, 2024 · 477b809 · 477b809
1 parent 245bdeb
commit 477b809
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 15 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -13,8 +13,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ ubuntu-latest, ]
-        python-version: [ '3.8', ]
+        os: [ ubuntu-latest, macos-latest, windows-latest ]
+        python-version: [ '3.8', '3.12']
         mongodb-version: ['5.0', ]
 
     steps:

diff --git a/h5rdmtoolbox/_repr.py b/h5rdmtoolbox/_repr.py
@@ -13,7 +13,7 @@
 
 from ontolutils import M4I, Thing
 from . import get_config, identifiers, protected_attributes
-from .convention.rdf import RDF_SUBJECT_ATTR_NAME, RDF_PREDICATE_ATTR_NAME
+from .convention.rdf import RDF_SUBJECT_ATTR_NAME, RDF_PREDICATE_ATTR_NAME, RDF_OBJECT_ATTR_NAME
 
 H5PY_SPECIAL_ATTRIBUTES = ('DIMENSION_LIST', 'REFERENCE_LIST', 'NAME', 'CLASS', protected_attributes.COORDINATES)
 try:
@@ -288,6 +288,10 @@ def __attrs__(self, name, h5obj) -> str:
             else:
                 use_attr_name = name
 
+            obj_iri = h5obj.rdf[name].get(RDF_OBJECT_ATTR_NAME, None)
+            if obj_iri:
+                attr_value = f'{attr_value} ({obj_iri})'
+
         if isinstance(attr_value, h5py.Group):
             attr_value = f'grp:{attr_value.name}'
         elif isinstance(attr_value, h5py.Dataset):

diff --git a/h5rdmtoolbox/database/hdfdb/query.py b/h5rdmtoolbox/database/hdfdb/query.py
@@ -97,8 +97,6 @@ def _exists(value, tf: bool) -> bool:
             '$userdefined': _userdefined}
 value_operator = {'$eq': _arreq, '$gt': _gt, '$gte': _gte, '$lt': _lt, '$lte': _lte}
 
-AV_SPECIAL_FILTERS = ('$basename', '$name')
-
 
 def _pass(obj, comparison_value):
     if get_ndim(comparison_value) == obj.ndim:

diff --git a/h5rdmtoolbox/layout/core.py b/h5rdmtoolbox/layout/core.py
@@ -202,7 +202,10 @@ def _parse_n_def(n: int) -> Tuple[Union[int, None], Callable]:
 
             assert len(n) == 1, 'n must be a dictionary with exactly one key'
             for k, v in n.items():
-                number_of_result_comparison = query.operator.get(k)
+                try:
+                    number_of_result_comparison = query.operator[k]
+                except KeyError:
+                    raise KeyError(f'Unexpected operator. Valid ones are: {list(query.operator.keys())}')
                 assert isinstance(v, int), 'n must be an integer'
                 n = v
         return n, number_of_result_comparison
@@ -503,21 +506,27 @@ def is_valid(self) -> bool:
         """Return True if the layout is valid, which is the case if no specs failed"""
         return len(self.get_failed()) == 0
 
-    def get_summary(self, exclude_keys: Optional[List] = None) -> Dict:
-        """return a summary as dictionary"""
+    def get_summary(self, exclude_keys: Optional[List] = None,
+                    failed_only: bool = False) -> List[Dict]:
+        """return a list of dictionaries containing information about a specification call"""
         data = []
         for spec in self.specifications:
-            data.extend(spec.get_summary(exclude_keys=exclude_keys))
+            s = spec.get_summary(exclude_keys=exclude_keys)
+            if failed_only:
+                data.extend([d for d in s if d['flag'] & 2 == 2])
+            else:
+                data.extend(s)
         return data
 
-    def print_summary(self, exclude_keys: Optional[List[str]] = None):
+    def print_summary(self, exclude_keys: Optional[List[str]] = None,
+                      failed_only: bool = False):
         """Prints a summary of the specification. Requires the tabulate package."""
         try:
             from tabulate import tabulate
         except ImportError:
             raise ImportError('Please install tabulate to use this method')
         print('\nSummary of layout validation')
-        print(tabulate(self.get_summary(exclude_keys), headers='keys', tablefmt='psql'))
+        print(tabulate(self.get_summary(exclude_keys, failed_only), headers='keys', tablefmt='psql'))
         if self.is_valid():
             print('--> Layout is valid')
         else:

diff --git a/h5rdmtoolbox/wrapper/core.py b/h5rdmtoolbox/wrapper/core.py
@@ -580,7 +580,12 @@ def create_string_dataset(self,
             if overwrite is True:
                 del self[name]  # delete existing dataset
             # else let h5py return the error
-        ds = super().create_dataset(name, dtype=dtype, data=data)
+
+        compression = kwargs.pop('compression', get_config('hdf_compression'))
+        compression_opts = kwargs.pop('compression_opts', get_config('hdf_compression_opts'))
+        ds = super().create_dataset(name, dtype=dtype, data=data,
+                                    compression=compression,
+                                    compression_opts=compression_opts, **kwargs)
 
         for ak, av in attrs.items():
             ds.attrs[ak] = av

diff --git a/tests/wrapper/test_core.py b/tests/wrapper/test_core.py
@@ -702,9 +702,13 @@ def test_time(self):
                  (datetime.now() + timedelta(hours=1))]
         tdata_np = np.asarray(tdata, dtype=np.datetime64)
         with h5tbx.File() as h5:
-            h5.create_string_dataset('time', data=[t.isoformat() for t in tdata],
-                                     attrs={'ISTIMEDS': 1,
-                                            'TIMEFORMAT': 'ISO'})
+            with h5tbx.set_config(hdf_compression='gzip', hdf_compression_opts=5):
+                h5.create_string_dataset('time', data=[t.isoformat() for t in tdata],
+                                         attrs={'ISTIMEDS': 1,
+                                                'TIMEFORMAT': 'ISO'})
+                self.assertEqual(h5['time'].compression, 'gzip')
+                self.assertEqual(h5['time'].compression_opts, 5)
+
             tds = h5['time'][()]
 
             h5.create_time_dataset('time2', data=tdata)