Skip to content

Commit

Permalink
bugfixes working with binary attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Dec 15, 2023
1 parent d2f628a commit 2d997f9
Show file tree
Hide file tree
Showing 12 changed files with 155 additions and 341 deletions.
20 changes: 11 additions & 9 deletions docs/conventions/layouts.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,14 @@
"metadata": {},
"source": [
"### 2. Root Group\n",
"#### A Root attributes"
"#### A Root attributes\n",
"Let's define, that there must be the attribute `__version__` with a specific value.<br>\n",
"Also, the must be `title` and `user` but their values are not specified:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 26,
"id": "87c4cff7-a27c-4334-bc3d-bfafd92c7d89",
"metadata": {
"tags": []
Expand All @@ -162,7 +164,7 @@
"AttributeValidation(Equal('title')=Any(opt=True), Equal('user')=Any(opt=True) in GroupValidation(ExistIn('/'))>)"
]
},
"execution_count": 5,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -253,7 +255,7 @@
"text": [
"[\"/\"].dataset(name=\"Equal(Validator Equal has not yet been called or no message has been set!)\", compression=Equal(Validator Equal has not yet been called or no message has been set!))\n",
"[\"/...\"].attr([\"Equal('comment')=Regex('^[^ 0-9].*')\"])\n",
"[\"/ExistIn(\"/\" exists in \"/\")\"].attr([\"Equal('__version__')=Equal('0.9.0a0')\"])\n",
"[\"/ExistIn(\"/\" exists in \"/\")\"].attr([\"Equal('__version__')=Equal('1.0.0')\"])\n",
"[\"/ExistIn(\"/\" exists in \"/\")\"].attr([\"Equal('title')=Any(opt=True)\", \"Equal('user')=Any(opt=True)\"])\n",
"GroupValidation(ExistIn('devices'))>\n"
]
Expand Down Expand Up @@ -601,12 +603,12 @@
"\n",
" <ul style=\"list-style-type: none;\" class=\"h5grp-sections\">\n",
" <li>\n",
" <input id=\"group-ds--5436088100\" type=\"checkbox\" checked>\n",
" <label style=\"font-weight: bold\" for=\"group-ds--5436088100\">\n",
" <input id=\"group-ds--4852915800\" type=\"checkbox\" checked>\n",
" <label style=\"font-weight: bold\" for=\"group-ds--4852915800\">\n",
" /<span>(0)</span></label>\n",
" \n",
"\n",
" <ul class=\"h5tb-attr-list\"><li style=\"list-style-type: none; font-style: italic\">__h5rdmtoolbox_version__ : 0.9.0a0</li>\n",
" <ul class=\"h5tb-attr-list\"><li style=\"list-style-type: none; font-style: italic\">__h5rdmtoolbox_version__ : 1.0.0</li>\n",
" </ul>\n",
"</li>\n",
"</ul>\n",
Expand Down Expand Up @@ -726,7 +728,7 @@
"[ValidationResult(\u001b[91mds validation \"Equal(Validator Equal has not yet been called or no message has been set!)\" failed for /\u001b[0m),\n",
" ValidationResult(\u001b[91m\"/\" is missing required attributes: [\"Equal('comment')=Regex('^[^ 0-9].*')\"]\u001b[0m),\n",
" ValidationResult(\u001b[91mds validation \"re:^[x-z]_coordinate\" failed for /\u001b[0m),\n",
" ValidationResult(\u001b[91m\"/\" is missing required attributes: [\"Equal('__version__')=Equal('0.9.0a0')\"]\u001b[0m),\n",
" ValidationResult(\u001b[91m\"/\" is missing required attributes: [\"Equal('__version__')=Equal('1.0.0')\"]\u001b[0m),\n",
" ValidationResult(\u001b[91m\"/\" is missing required attributes: [\"Equal('title')=Any(opt=True)\", \"Equal('user')=Any(opt=True)\"]\u001b[0m),\n",
" ValidationResult(\u001b[91m\"devices\" does not exist in \"/\"\u001b[0m)]"
]
Expand Down Expand Up @@ -766,7 +768,7 @@
"[\"/\"].dataset(name=\"Equal(Validator Equal has not yet been called or no message has been set!)\", compression=Equal(Validator Equal has not yet been called or no message has been set!))\n",
"[\"/...\"].attr([\"Equal('comment')=Regex('^[^ 0-9].*')\"])\n",
"[\"/\"].dataset(name=\"re:^[x-z]_coordinate\", ndim=Equal(Validator Equal has not yet been called or no message has been set!))\n",
"[\"/ExistIn(\"/\" exists in \"/\")\"].attr([\"Equal('__version__')=Equal('0.9.0a0')\"])\n",
"[\"/ExistIn(\"/\" exists in \"/\")\"].attr([\"Equal('__version__')=Equal('1.0.0')\"])\n",
"[\"/ExistIn(\"/\" exists in \"/\")\"].attr([\"Equal('title')=Any(opt=True)\", \"Equal('user')=Any(opt=True)\"])\n",
"GroupValidation(ExistIn('devices'))>\n"
]
Expand Down
47 changes: 42 additions & 5 deletions h5rdmtoolbox/_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
import typing
import warnings
from IPython.display import HTML, display
from abc import abstractmethod
from numpy import ndarray
Expand Down Expand Up @@ -267,13 +268,28 @@ def __call__(self,

def __stringdataset__(self, name, h5obj) -> str:
if h5obj.ndim == 0:
_id1 = f'ds-1-{h5obj.name}-{perf_counter_ns().__str__()}'
_id2 = f'ds-2-{h5obj.name}-{perf_counter_ns().__str__()}'
_pcns = perf_counter_ns().__str__()
_id1 = f'ds-1-{h5obj.name}-{_pcns}1'
_id2 = f'ds-2-{h5obj.name}-{_pcns}2'
return f"""\n
<ul id="{_id1}" class="h5tb-var-list">
<input id="{_id2}" class="h5tb-varname-in" type="checkbox" {self.checkbox_state}>
<label class='h5tb-varname'
for="{_id2}">{name}</label>: {h5obj.values[()]}
for="{_id2}">{name}</label>: [{h5obj.dtype}] data={h5obj.values[()]}
"""
elif h5obj.ndim == 1:
_pcns = perf_counter_ns().__str__()
_id1 = f'ds-1-{h5obj.name}-{_pcns}1'
_id2 = f'ds-2-{h5obj.name}-{_pcns}2'
try:
str_values = ', '.join(h5obj[()])
except UnicodeDecodeError:
str_values = '<i>UnicodeDecodeError</i>'
return f"""\n
<ul id="{_id1}" class="h5tb-var-list">
<input id="{_id2}" class="h5tb-varname-in" type="checkbox" {self.checkbox_state}>
<label class='h5tb-varname'
for="{_id2}">{name}</label>: [{h5obj.dtype}] data="{str_values}"
"""
return self.__NDdataset__(name, h5obj)

Expand Down Expand Up @@ -342,7 +358,8 @@ def __NDdataset__(self, name, h5obj: h5py.Dataset):

def __dataset__(self, name, h5obj) -> str:
"""generate html representation of a dataset"""
if h5obj.dtype.char == 'S':
is_string_dataset = h5obj.dtype.char == 'S'
if is_string_dataset:
_html_pre = self.__stringdataset__(name, h5obj)
else:
if h5obj.ndim == 0:
Expand All @@ -354,6 +371,21 @@ def __dataset__(self, name, h5obj) -> str:
# open attribute section:
_html_ds_attrs = """\n <ul class="h5tb-attr-list">"""
# write attributes:
# if is_string_dataset:
# if h5obj.ndim in (0, 1):
# if h5obj.ndim == 0:
# try:
# str_data = h5obj[()]
# except UnicodeDecodeError:
# str_data = 'UnicodeDecodeError'
# elif h5obj.ndim == 1:
# try:
# str_data = ', '.join(h5obj[()])
# except UnicodeDecodeError:
# str_data = 'UnicodeDecodeError'
# _html_ds_attrs += '<li style="list-style-type: none; ' \
# f'font-style: bold">data : {str_data}</li>'

for k in h5obj.attrs.keys():
if k not in self.ignore_attrs and not k.isupper():
_html_ds_attrs += self.__attrs__(k, h5obj)
Expand Down Expand Up @@ -408,6 +440,11 @@ def __group__(self, name, h5obj: h5py.Group):

def __attrs__(self, name, h5obj):
attr_value = h5obj.attrs.raw[name]
if isinstance(attr_value, np.bytes_):
try:
attr_value = attr_value.decode('utf-8')
except UnicodeDecodeError:
warnings.warn(f'Cannot decode attribute value for {name}', RuntimeWarning)
iri = h5obj.iri.get(name)

iri_name = iri.name
Expand Down Expand Up @@ -489,7 +526,7 @@ def __attrs__(self, name, h5obj):
_value_str = attr_value

if iri_data is not None:
_value_str += get_iri_icon_href(iri_data)# make_href(iri_data, ' [IRI]')
_value_str += get_iri_icon_href(iri_data) # make_href(iri_data, ' [IRI]')
return f'<li style="list-style-type: none; font-style: italic">{name} : {_value_str}</li>'


Expand Down
13 changes: 12 additions & 1 deletion h5rdmtoolbox/conventions/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,15 +405,26 @@ def validate(self, file_or_filename: Union[str, pathlib.Path, "File"]) -> List[D

convention = self

def _is_str_dataset(node):
if node.dtype.kind == 'S':
return True
return False

def _validate_convention(name, node):
"""Checks if the node (dataset or group) is compliant with the convention"""
for k, v in convention.properties.items():
if isinstance(node, k):
for ak, av in v.items():
if av.default_value is consts.DefaultValue.EMPTY:
if av.target_method == 'create_string_dataset' and not _is_str_dataset(node):
continue # not the responsibility of this validator
if av.target_method == 'create_dataset' and _is_str_dataset(node):
continue # not the responsibility of this validator

if ak not in node.attrs:
logger.debug(
f'The attribute "{ak}" is missing in the dataset but is required by the convention')
f'The attribute "{ak}" is missing in the dataset "{name}" but '
'is required by the convention')
failed.append(dict(name=node.name, attr_name=ak, reason='missing_attribute'))
else:
value_to_check = node.attrs[ak]
Expand Down
3 changes: 3 additions & 0 deletions h5rdmtoolbox/database/hdfdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from .filedb import FileDB, FilesDB
from .groupdb import GroupDB

from ...utils import create_tbx_logger

logger = create_tbx_logger('database.hdfdb')

__all__ = ['GroupDB', 'FileDB', 'FilesDB']
17 changes: 14 additions & 3 deletions h5rdmtoolbox/database/hdfdb/query.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import numpy as np
import warnings

import logging
import numpy as np
import re




def _eq(a, b):
"""Check if a == b"""
if a is None or b is None:
Expand Down Expand Up @@ -51,6 +55,14 @@ def _regex(value, pattern) -> bool:

if value is None:
return False

if isinstance(value, np.bytes_):
try:
value = value.decode()
except UnicodeDecodeError:
warnings.warn(f'could not decode {value}', UserWarning)
return False

match = re.search(pattern, value)
if match is None:
return False
Expand All @@ -60,8 +72,7 @@ def _regex(value, pattern) -> bool:
def _basename(value, basename) -> bool:
if value is None:
return False

return _regex(value, pattern=basename + '^.*/{basename}$')
return _regex(value, pattern=f'^.*/{basename}$')


def _exists(value, tf: bool) -> bool:
Expand Down
39 changes: 31 additions & 8 deletions h5rdmtoolbox/database/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,16 @@ class LHDFObject:
open the file manually, but still wants to work with the dataset.
"""

def __init__(self, obj: h5py.Group):
self.filename = pathlib.Path(obj.file.filename)
if isinstance(obj.attrs, h5py.AttributeManager):
self._attrs = dict(obj.attrs)
else:
self._attrs = dict(obj.attrs.raw)

for k, v in _get_dataset_properties(obj, ('file', 'name',)).items():
setattr(self, k, v)

def __repr__(self):
return f'<{self.__class__.__name__} "{self.name}" in "{self.filename}">'

Expand Down Expand Up @@ -85,17 +95,30 @@ class LGroup(LHDFObject):
"""Lazy Group"""

def __init__(self, obj: h5py.Group):
self.filename = pathlib.Path(obj.file.filename)
if isinstance(obj.attrs, h5py.AttributeManager):
self._attrs = dict(obj.attrs)
else:
self._attrs = dict(obj.attrs.raw)
super().__init__(obj)

for k, v in _get_dataset_properties(obj, ('file', 'name',)).items():
setattr(self, k, v)
self._children = {}
for k, v in obj.items():
if isinstance(v, h5py.Group):
self._children[k] = LGroup(v)
if ' ' not in k and not hasattr(self, k):
setattr(self, k, self._children[k])
elif isinstance(v, h5py.Dataset):
self._children[k] = LDataset(v)
if ' ' not in k and not hasattr(self, k):
setattr(self, k, self._children[k])

def keys(self):
"""Return the keys of the group which are the names of datasets and groups"""
return self._children.keys()

def __getitem__(self, item):
if item in self._children:
return self._children[item]
return super(LGroup, self).__getitem__(item)


class LDataset(LGroup):
class LDataset(LHDFObject):
"""Lazy Dataset"""

def __init__(self, obj: h5py.Dataset):
Expand Down
3 changes: 2 additions & 1 deletion h5rdmtoolbox/database/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def make_dict_mongo_compatible(dictionary: Dict):
except Exception as e:
warnings.warn(
f'Could not determine/convert type of {ak}. Try to continue with type {type(av)} of {av}. '
f'Original error: {e}')
f'Original error: {e}',
UserWarning)
return dictionary


Expand Down
Loading

0 comments on commit 2d997f9

Please sign in to comment.