Skip to content

Commit

Permalink
add compression args for string dataset
Browse files Browse the repository at this point in the history
minor fixes along the way
  • Loading branch information
matthiasprobst committed Apr 8, 2024
1 parent 245bdeb commit 477b809
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 15 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ ubuntu-latest, ]
python-version: [ '3.8', ]
os: [ ubuntu-latest, macos-latest, windows-latest ]
python-version: [ '3.8', '3.12']
mongodb-version: ['5.0', ]

steps:
Expand Down
6 changes: 5 additions & 1 deletion h5rdmtoolbox/_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from ontolutils import M4I, Thing
from . import get_config, identifiers, protected_attributes
from .convention.rdf import RDF_SUBJECT_ATTR_NAME, RDF_PREDICATE_ATTR_NAME
from .convention.rdf import RDF_SUBJECT_ATTR_NAME, RDF_PREDICATE_ATTR_NAME, RDF_OBJECT_ATTR_NAME

H5PY_SPECIAL_ATTRIBUTES = ('DIMENSION_LIST', 'REFERENCE_LIST', 'NAME', 'CLASS', protected_attributes.COORDINATES)
try:
Expand Down Expand Up @@ -288,6 +288,10 @@ def __attrs__(self, name, h5obj) -> str:
else:
use_attr_name = name

obj_iri = h5obj.rdf[name].get(RDF_OBJECT_ATTR_NAME, None)
if obj_iri:
attr_value = f'{attr_value} ({obj_iri})'

if isinstance(attr_value, h5py.Group):
attr_value = f'grp:{attr_value.name}'
elif isinstance(attr_value, h5py.Dataset):
Expand Down
2 changes: 0 additions & 2 deletions h5rdmtoolbox/database/hdfdb/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ def _exists(value, tf: bool) -> bool:
'$userdefined': _userdefined}
value_operator = {'$eq': _arreq, '$gt': _gt, '$gte': _gte, '$lt': _lt, '$lte': _lte}

AV_SPECIAL_FILTERS = ('$basename', '$name')


def _pass(obj, comparison_value):
if get_ndim(comparison_value) == obj.ndim:
Expand Down
21 changes: 15 additions & 6 deletions h5rdmtoolbox/layout/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ def _parse_n_def(n: int) -> Tuple[Union[int, None], Callable]:

assert len(n) == 1, 'n must be a dictionary with exactly one key'
for k, v in n.items():
number_of_result_comparison = query.operator.get(k)
try:
number_of_result_comparison = query.operator[k]
except KeyError:
raise KeyError(f'Unexpected operator. Valid ones are: {list(query.operator.keys())}')
assert isinstance(v, int), 'n must be an integer'
n = v
return n, number_of_result_comparison
Expand Down Expand Up @@ -503,21 +506,27 @@ def is_valid(self) -> bool:
"""Return True if the layout is valid, which is the case if no specs failed"""
return len(self.get_failed()) == 0

def get_summary(self, exclude_keys: Optional[List] = None) -> Dict:
"""return a summary as dictionary"""
def get_summary(self, exclude_keys: Optional[List] = None,
failed_only: bool = False) -> List[Dict]:
"""return a list of dictionaries containing information about a specification call"""
data = []
for spec in self.specifications:
data.extend(spec.get_summary(exclude_keys=exclude_keys))
s = spec.get_summary(exclude_keys=exclude_keys)
if failed_only:
data.extend([d for d in s if d['flag'] & 2 == 2])
else:
data.extend(s)
return data

def print_summary(self, exclude_keys: Optional[List[str]] = None):
def print_summary(self, exclude_keys: Optional[List[str]] = None,
failed_only: bool = False):
"""Prints a summary of the specification. Requires the tabulate package."""
try:
from tabulate import tabulate
except ImportError:
raise ImportError('Please install tabulate to use this method')
print('\nSummary of layout validation')
print(tabulate(self.get_summary(exclude_keys), headers='keys', tablefmt='psql'))
print(tabulate(self.get_summary(exclude_keys, failed_only), headers='keys', tablefmt='psql'))
if self.is_valid():
print('--> Layout is valid')
else:
Expand Down
7 changes: 6 additions & 1 deletion h5rdmtoolbox/wrapper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,12 @@ def create_string_dataset(self,
if overwrite is True:
del self[name] # delete existing dataset
# else let h5py return the error
ds = super().create_dataset(name, dtype=dtype, data=data)

compression = kwargs.pop('compression', get_config('hdf_compression'))
compression_opts = kwargs.pop('compression_opts', get_config('hdf_compression_opts'))
ds = super().create_dataset(name, dtype=dtype, data=data,
compression=compression,
compression_opts=compression_opts, **kwargs)

for ak, av in attrs.items():
ds.attrs[ak] = av
Expand Down
10 changes: 7 additions & 3 deletions tests/wrapper/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,13 @@ def test_time(self):
(datetime.now() + timedelta(hours=1))]
tdata_np = np.asarray(tdata, dtype=np.datetime64)
with h5tbx.File() as h5:
h5.create_string_dataset('time', data=[t.isoformat() for t in tdata],
attrs={'ISTIMEDS': 1,
'TIMEFORMAT': 'ISO'})
with h5tbx.set_config(hdf_compression='gzip', hdf_compression_opts=5):
h5.create_string_dataset('time', data=[t.isoformat() for t in tdata],
attrs={'ISTIMEDS': 1,
'TIMEFORMAT': 'ISO'})
self.assertEqual(h5['time'].compression, 'gzip')
self.assertEqual(h5['time'].compression_opts, 5)

tds = h5['time'][()]

h5.create_time_dataset('time2', data=tdata)
Expand Down

0 comments on commit 477b809

Please sign in to comment.