Skip to content

Commit

Permalink
introducing iri
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Dec 5, 2023
1 parent 9721f88 commit 8f8aee3
Show file tree
Hide file tree
Showing 13 changed files with 358 additions and 243 deletions.
268 changes: 105 additions & 163 deletions docs/wrapper/DumpFile.ipynb

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions h5rdmtoolbox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
"""h5rdtoolbox repository"""
import atexit
import pathlib
import shutil
from typing import Union, Callable

# noinspection PyUnresolvedReferences
import pint_xarray
import shutil
import xarray as xr
from typing import Union, Callable

from h5rdmtoolbox._cfg import set_config, get_config, get_ureg

pint_xarray.unit_registry = get_ureg()

from . import conventions
from .conventions.core import Convention
from . import plotting
# from . import plotting
from . import wrapper
from ._user import UserDir
from ._version import __version__
from . import database
from . import utils
from .wrapper.core import lower, Lower, File, Group, Dataset
from . import errors

from .wrapper.accessory import register_special_dataset

name = 'h5rdmtoolbox'
Expand Down
86 changes: 50 additions & 36 deletions h5rdmtoolbox/_repr.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import h5py
import numpy as np
import os
import pkg_resources
import re
import typing
from IPython.display import HTML, display
from abc import abstractmethod
from numpy import ndarray
from time import perf_counter_ns

import h5py
import numpy as np
import pkg_resources
from IPython.display import HTML, display
from numpy import ndarray

from . import get_config
from . import protected_attributes
from .orcid import is_valid_orcid_pattern, get_html_repr
Expand Down Expand Up @@ -83,6 +84,12 @@ def okprint(string):
print(oktext(string))


def make_href(url, text) -> str:
if not url.startswith('http'):
raise ValueError(f'Invalid URL: "{url}". Must start with "http"')
return f'<a href="{url}">{text}</a>'


def process_string_for_link(string: str) -> typing.Tuple[str, bool]:
"""process string to make links actually clickable in html
Expand All @@ -106,7 +113,7 @@ def process_string_for_link(string: str) -> typing.Tuple[str, bool]:
if string.startswith('https://zenodo.org/record/'):
zenodo_url = string
img_url = f'https://zenodo.org/badge/DOI/10.5281/zenodo.{string.split("/")[-1]}.svg'
return f'<a href="{zenodo_url}"><img src="{img_url}" alt="DOI"></a>', True
return make_href(url=zenodo_url, text=f'<img src="{img_url}" alt="DOI">'), True
for p in (r"(https?://\S+)", r"(ftp://\S+)", r"(www\.\S+)"):
urls = re.findall(p, string)
if urls:
Expand All @@ -115,7 +122,7 @@ def process_string_for_link(string: str) -> typing.Tuple[str, bool]:
orcid_url_repr = get_html_repr(url)
string = string.replace(url, orcid_url_repr)
else:
string = string.replace(url, f'<a href="{url}">{url}</a>')
string = string.replace(url, make_href(url, url))
return string, True

return string, False
Expand Down Expand Up @@ -168,15 +175,15 @@ class HDF5StructureStrRepr(_HDF5StructureRepr):
def __call__(self, group, indent=0, preamble=None):
if preamble:
print(preamble)
for attr_name, attr_value in group.attrs.raw.items():
for attr_name in group.attrs.raw.keys():
if not attr_name.isupper():
print(self.base_intent * indent + self.__attrs__(attr_name, attr_value))
print(self.base_intent * indent + self.__attrs__(attr_name, group))
for key, item in group.items():
if isinstance(item, h5py.Dataset):
print(self.base_intent * indent + self.__dataset__(key, item))
for attr_name, attr_value in item.attrs.raw.items():
for attr_name in item.attrs.raw.keys():
if not attr_name.isupper() and attr_name not in self.ignore_attrs:
print(self.base_intent * (indent + 2) + self.__attrs__(attr_name, attr_value))
print(self.base_intent * (indent + 2) + self.__attrs__(attr_name, item))
elif isinstance(item, h5py.Group):
print(self.base_intent * indent + self.__group__(key, item))
self(item, indent + 1)
Expand Down Expand Up @@ -337,9 +344,9 @@ def __dataset__(self, name, h5obj) -> str:
# open attribute section:
_html_ds_attrs = """\n <ul class="h5tb-attr-list">"""
# write attributes:
for k, v in h5obj.attrs.items():
for k in h5obj.attrs.keys():
if k not in self.ignore_attrs and not k.isupper():
_html_ds_attrs += self.__attrs__(k, v)
_html_ds_attrs += self.__attrs__(k, h5obj)
# close attribute section
_html_ds_attrs += """\n </ul>"""

Expand Down Expand Up @@ -370,8 +377,9 @@ def __group__(self, name, h5obj: h5py.Group):
_html += """\n
<ul class="h5tb-attr-list">"""
# write attributes:
for k, v in h5obj.attrs.items():
_html += self.__attrs__(k, v)
for k in h5obj.attrs.keys():
if not k.isupper():
_html += self.__attrs__(k, h5obj)
# close attribute section
_html += """
</ul>"""
Expand All @@ -389,16 +397,21 @@ def __group__(self, name, h5obj: h5py.Group):
return _html

def __attrs__(self, name, h5obj):

if name in ('DIMENSION_LIST', 'REFERENCE_LIST'):
_value = h5obj.__str__().replace('<', '&#60;')
_value = _value.replace('>', '&#62;')
return f'<li style="list-style-type: none; font-style: italic">{name} : {_value}</li>'

if isinstance(h5obj, ndarray):
if all(isinstance(item, str) for item in h5obj):
attr_value = h5obj.attrs.raw[name]
# if name.isupper():
# # if name in ('DIMENSION_LIST', 'REFERENCE_LIST'):
# _value = attr_value.__str__().replace('<', '&#60;')
# _value = _value.replace('>', '&#62;')
# return f'<li style="list-style-type: none; font-style: italic">{name} : {_value}</li>'

iri_value = h5obj.iri.get(name, None)
if iri_value:
name = make_href(iri_value, name)

if isinstance(attr_value, ndarray):
if all(isinstance(item, str) for item in attr_value):
_string_value_list = []
for item in h5obj:
for item in attr_value:
_value, is_url = process_string_for_link(item)
if is_url:
_string_value_list.append(_value)
Expand All @@ -407,13 +420,13 @@ def __attrs__(self, name, h5obj):
return '<li style="list-style-type: none; ' \
f'font-style: italic">{name} : {", ".join(_string_value_list)}</li>'
else:
_value = h5obj.__repr__()
_value = attr_value.__repr__()
if len(_value) > self.max_attr_length:
_value = f'{_value[0:self.max_attr_length]}...'
return f'<li style="list-style-type: none; font-style: italic">{name} : {_value}</li>'

if isinstance(h5obj, str):
_value_str = f'{h5obj}'
if isinstance(attr_value, str):
_value_str = f'{attr_value}'
if len(_value_str) > 1:
if _value_str[0] == '<' and _value_str[-1] == '>':
_value_str = _value_str[1:-1]
Expand All @@ -422,35 +435,36 @@ def __attrs__(self, name, h5obj):
if is_url:
if 'orcid.org' in _value:
from . import orcid
orcid_html = orcid.get_html_repr(h5obj.strip('/').rsplit('/', 1)[-1])
orcid_html = orcid.get_html_repr(attr_value.strip('/').rsplit('/', 1)[-1])
return f'<li style="list-style-type: none; font-style: italic">{name} : {orcid_html}</li>'
else:
if self.max_attr_length:
if len(_value_str) > self.max_attr_length:
_value_str = f'{_value_str[0:self.max_attr_length - 3]}...'
else:
_value_str = h5obj
_value_str = attr_value
else:
_value_str = h5obj
_value_str = attr_value
#
# if len(_value_str) > self.max_attr_length:
# _value_str = f'{_value_str[0:self.max_attr_length-1]}...'
# print(f'<li style="list-style-type: none; font-style: italic">{name} : {_value_str}</li>')
return f'<li style="list-style-type: none; font-style: italic">{name} : {_value_str}</li>'

if not isinstance(h5obj, ndarray):
if getattr(h5obj, '_repr_html_', None):
_value_str = h5obj._repr_html_()
if not isinstance(attr_value, ndarray):
if getattr(attr_value, '_repr_html_', None):
_value_str = attr_value._repr_html_()
else:
_value_str = str(h5obj)
_value_str = str(attr_value)
if _value_str[0] == '<' and _value_str[-1] == '>':
_value_str = _value_str[1:-1]
if self.max_attr_length:
if len(_value_str) > self.max_attr_length:
_value_str = f'{_value_str[0:self.max_attr_length - 3]}...'
else:
_value_str = h5obj
_value_str = attr_value
else:
_value_str = h5obj
_value_str = attr_value

return f'<li style="list-style-type: none; font-style: italic">{name} : {_value_str}</li>'

Expand Down
3 changes: 2 additions & 1 deletion h5rdmtoolbox/consts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""constants used by the h5rdmtoolbox package"""

ANCILLARY_DATASET = 'ANCILLARY_DATASETS'
ANCILLARY_DATASET = 'ANCILLARY_DATASETS'
IRI_ATTR_NAME = 'IRI'
5 changes: 4 additions & 1 deletion h5rdmtoolbox/conventions/standard_names/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,10 @@ def to_html(self, html_filename, open_in_browser: bool = False) -> pathlib.Path:
raise FileNotFoundError(f'Could not find the template file at {template_filename.absolute()}')

# Convert Markdown to HTML using pandoc
import pypandoc
try:
import pypandoc
except ImportError:
raise ImportError('Package "pypandoc" is required for this function.')
output = pypandoc.convert_file(str(markdown_filename.absolute()), 'html', format='md',
extra_args=['--template', template_filename])

Expand Down
38 changes: 38 additions & 0 deletions h5rdmtoolbox/iri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
class IRIManager:
"""Manager class to handle IRIs of opened HDF5 files"""

def __init__(self):
self.registries = {}

def __contains__(self, item):
return item in self.registries

def get(self, name, existing_iri=None):
if name not in self.registries:
if existing_iri is None:
existing_iri = {}
self.registries[name] = existing_iri
return self.registries[name]


class IRI:
"""Helper class to store a IRI (international resource identifier) as an attribute.
It will write the value to the attribute and store the IRI in a separate attribute
(see constant `ATTRIRI`).
Example:
--------
>>> import h5rdmtoolbox as h5tbx
>>> with h5tbx.File() as h5:
>>> h5.attrs['creator'] = h5tbx.IRI('https://orcid.org/0000-0001-8729-0482')
"""

def __init__(self, value, iri):
self.iri = iri
self.value = value

def __repr__(self):
return f'{self.__class__.__name__}({self.value}, {self.iri})'


irimanager = IRIManager()
10 changes: 7 additions & 3 deletions h5rdmtoolbox/tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"""
import os
import pathlib
import xarray as xr
from typing import List

import xarray as xr

from h5rdmtoolbox.conventions.standard_names.table import StandardNameTable
from .utils import generate_temporary_directory
from .wrapper.core import File
Expand Down Expand Up @@ -186,7 +187,7 @@ def _get_pressure(v):

_folders = ('d1', 'd2', 'd3', 'd1/d11', 'd1/d11/d111', 'd2/d21')
folders = [os.path.join(repo_dir, _f) for _f in _folders]
operators = ('Mike', 'Ellen', 'John', 'Susi')
contact_persons = ('Mike', 'Ellen', 'John', 'Susi')
db_file_type = ('fan_case', 'piv_case')

file_ids = range(n_files)
Expand All @@ -200,7 +201,9 @@ def _get_pressure(v):

filename = pathlib.Path(folders[ifolder]) / f'repofile_{fid:05d}.hdf'
with File(filename, 'w') as h5:
h5.attrs['operator'] = operators[np.random.randint(4)]
h5.attrs['contact_person'] = contact_persons[np.random.randint(4)]
h5.iri['contact_person'] = 'http://www.w3.org/ns/prov#Person'

if fid % 2:
__ftype__ = db_file_type[0]
else:
Expand Down Expand Up @@ -239,6 +242,7 @@ def _get_pressure(v):
shape=(zplanes, 64, 86, 2))
g.create_dataset('v', attrs={'units': 'm/s', 'long_name': 'mean v-component'},
shape=(zplanes, 64, 86, 2))
g.iri['units'] = 'http://qudt.org/schema/qudt/Unit'

@staticmethod
def generate_test_files(n_files: int = 5) -> List[pathlib.Path]:
Expand Down
Loading

0 comments on commit 8f8aee3

Please sign in to comment.