Skip to content

Commit

Permalink
RDF consts are moved to rdf module
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Mar 22, 2024
1 parent b7b06ac commit 5abbcf9
Show file tree
Hide file tree
Showing 13 changed files with 144 additions and 143 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ coverage.svg
docs/colab/my_file.hdf
*.h5
*.nxs
*.json
*.jsonld
docs/userguide/wrapper/test.hdf
docs/userguide/wrapper/test.json
2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"license": "https://spdx.org/licenses/MIT",
"codeRepository": "git+https://github.com/matthiasprobst/h5RDMtoolbox.git",
"name": "h5RDMtoolbox",
"version": "1.2.3a1",
"version": "1.2.3a2",
"description": "Supporting a FAIR Research Data lifecycle using Python and HDF5.",
"applicationCategory": "Engineering",
"programmingLanguage": [
Expand Down
9 changes: 3 additions & 6 deletions h5rdmtoolbox/_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from . import get_config
from . import identifiers
from . import protected_attributes
from .wrapper.rdf import RDF_SUBJECT_ATTR_NAME, RDF_PREDICATE_ATTR_NAME

H5PY_SPECIAL_ATTRIBUTES = ('DIMENSION_LIST', 'REFERENCE_LIST', 'NAME', 'CLASS', protected_attributes.COORDINATES)
try:
Expand Down Expand Up @@ -201,8 +202,6 @@ def __attrs__(self, name, h5obj):
"""dataset representation"""


from . import consts


class HDF5StructureStrRepr(_HDF5StructureRepr):

Expand All @@ -214,7 +213,7 @@ def __call__(self, group, indent=0, preamble=None):
if predicate:
print(spaces + f'@predicate: {predicate}')
for attr_name in group.attrs.raw.keys():
if attr_name == consts.RDF_SUBJECT_ATTR_NAME:
if attr_name == RDF_SUBJECT_ATTR_NAME:
print(spaces + f'@type: {group.attrs[attr_name]}')
else:
if not attr_name.isupper():
Expand Down Expand Up @@ -267,7 +266,7 @@ def __group__(self, name, item) -> str:
def __attrs__(self, name, h5obj) -> str:
attr_value = h5obj.attrs.raw[name]

pred = h5obj.rdf[name]['predicate']
pred = h5obj.rdf[name][RDF_PREDICATE_ATTR_NAME]
if pred:
use_attr_name = f'{name} ({pred})'
else:
Expand Down Expand Up @@ -463,8 +462,6 @@ def __group__(self, name, h5obj: h5py.Group):
checkbox_state = self.checkbox_state

self_predicate = h5obj.rdf.predicate.get('SELF', None)
if self_predicate:
print(self_predicate)
self_subject = h5obj.rdf.subject

if self_predicate is not None:
Expand Down
5 changes: 0 additions & 5 deletions h5rdmtoolbox/consts.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
"""constants used by the h5rdmtoolbox package"""

ANCILLARY_DATASET = 'ANCILLARY_DATASETS'
RDF_OBJECT_ATTR_NAME = 'RDF_OBJECT'
RDF_PREDICATE_ATTR_NAME = 'RDF_PREDICATE'
# RDF_SUBJECT_ATTR_NAME = 'IRI_SUBJECT' # '@type'
RDF_SUBJECT_ATTR_NAME = '@type'
# IRI_TYPE_ATTR_NAME = '@type'
8 changes: 4 additions & 4 deletions h5rdmtoolbox/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
from typing import Dict
from typing import Union, Callable, List, Tuple

from . import _user, get_config, get_ureg, consts
from . import _user, get_config, get_ureg
from ._version import __version__
from .wrapper import rdf

logger = logging.getLogger('h5rdmtoolbox')
DEFAULT_LOGGING_LEVEL = logging.INFO
Expand Down Expand Up @@ -181,10 +182,10 @@ def create_h5tbx_version_grp(root: h5py.Group) -> h5py.Group:
version_group = root.create_group('h5rdmtoolbox')
# g.rdf.object = 'https://schema.org/SoftwareSourceCode'
version_group.attrs['__h5rdmtoolbox_version__'] = __version__
version_group.attrs[consts.RDF_PREDICATE_ATTR_NAME] = json.dumps(
version_group.attrs[rdf.RDF_PREDICATE_ATTR_NAME] = json.dumps(
{'__h5rdmtoolbox_version__': 'https://schema.org/softwareVersion'}
)
version_group.attrs[consts.RDF_SUBJECT_ATTR_NAME] = 'https://schema.org/SoftwareSourceCode'
version_group.attrs[rdf.RDF_SUBJECT_ATTR_NAME] = 'https://schema.org/SoftwareSourceCode'
return version_group


Expand Down Expand Up @@ -307,7 +308,6 @@ def parse_object_for_attribute_setting(value) -> Union[str, int, float, bool, Li
try:
return str(value) # try parsing to string
except TypeError:
print(type(value))
raise TypeError(f"Cannot parse type {type(value)} to string")


Expand Down
9 changes: 7 additions & 2 deletions h5rdmtoolbox/wrapper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""
import datetime
import h5py
import json
import logging
import numpy as np
import os
Expand All @@ -16,7 +17,7 @@
from h5py._hl.base import phil, with_phil
from h5py._objects import ObjectID
from pathlib import Path
from typing import List, Dict, Union, Tuple, Callable
from typing import List, Dict, Union, Tuple, Callable, Optional

from h5rdmtoolbox.database import ObjDB
from . import rdf
Expand Down Expand Up @@ -744,7 +745,6 @@ def create_dataset(self,
if anc_ds.shape != _data.shape:
raise ValueError(f'Associated dataset {anc_name} has shape {anc_ds.shape} '
f'which does not match dataset shape {_data.shape}')
import json
attrs[consts.ANCILLARY_DATASET] = json.dumps({k: v.name for k, v in ancillary_datasets.items()})

_maxshape = kwargs.get('maxshape', shape)
Expand Down Expand Up @@ -1207,6 +1207,11 @@ def create_from_yaml(self, yaml_filename: Path):
from . import h5yaml
h5yaml.H5Yaml(yaml_filename).write(self)

def create_from_jsonld(self, data: str, context: Optional[Dict] = None):
"""Create groups/datasets from a jsonld string."""
from . import jsonld
jsonld.to_hdf(self, data=json.loads(data), context=context)

def _get_obj_names(self, obj_type, recursive):
"""Return all names of specified object type
in this group and if recursive==True also
Expand Down
133 changes: 60 additions & 73 deletions h5rdmtoolbox/wrapper/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from typing import Dict, Optional, Union, List
from typing import Iterable, Tuple, Any

import h5rdmtoolbox as h5tbx
from h5rdmtoolbox import consts
from h5rdmtoolbox.convention import hdf_ontology
from ontolutils.classes.utils import split_URIRef
from .core import Dataset, File
from .rdf import RDF_PREDICATE_ATTR_NAME


def _merge_entries(entries: Dict, clean: bool = True) -> Dict:
Expand Down Expand Up @@ -122,7 +122,10 @@ def to_hdf(grp,

if k == '@id':
rdf_predicate = None
value_predicate = k
if v.startswith('http'):
value_predicate = k
else:
continue
else:
# spit predicate:
ns_predicate, value_predicate = split_URIRef(k)
Expand All @@ -140,90 +143,77 @@ def to_hdf(grp,
rdf_predicate = value_predicate

if isinstance(v, dict):
print(f'create group {k} in {grp.name}')
if k not in grp:
to_hdf(grp.create_group(value_predicate), data=v, predicate=rdf_predicate, context=data_context)

elif isinstance(v, list):
if is_list_of_dict(v):
for i, entry in enumerate(v):
sub_grp_name = f'{k}{i + 1}'
if sub_grp_name in grp:
sub_grp = grp[sub_grp_name]
# figure out how to name the sub group
# best would be to take the label, if it exists
for label_identifier in ('rdfs:label', 'label', 'http://www.w3.org/2000/01/rdf-schema#'):
_label = entry.get(label_identifier, None)
break

if _label is None:
if len(v) > 1:
label = f'{k}{i + 1}'
else:
label = k
else:
ns, label = split_URIRef(_label)

if label in grp:
sub_grp = grp[label]
else:
sub_grp = grp.create_group(sub_grp_name)
sub_grp.rdf.predicate = data_context.get(k, None)
ns_predicate, rdf_predicate = split_URIRef(k)
if ns_predicate is None:
rdf_predicate = data_context.get(k, None)
elif ns_predicate.startswith('http'):
rdf_predicate = k
else:
_ns = data_context.get(ns_predicate, None)
if _ns is not None:
rdf_predicate = f'{_ns}{value_predicate}'
else:
rdf_predicate = value_predicate

sub_grp = grp.create_group(label)
sub_grp.rdf.predicate = rdf_predicate

to_hdf(sub_grp, data=entry, context=data_context)
else:
grp.attrs[k, data_context.get(k, None)] = v
else:
# maybe value_object is a IRI?!
ns_object, value_object = split_URIRef(v)
rdf_object = None
if isinstance(v, str):
if v.startswith('http'):
value_object = v
else:
ns_object, value_object = split_URIRef(v)

if ns_object is None:
rdf_object = data_context.get(k, None)
elif value_object.startswith('http'):
rdf_object = k
if ns_object is None:
rdf_object = data_context.get(k, None)
elif value_object.startswith('http'):
rdf_object = k
else:
_ns = data_context.get(ns_object, None)
if _ns is not None:
rdf_object = f'{_ns}{value_object}'
else:
rdf_object = None
else:
_ns = data_context.get(ns_object, None)
if _ns is not None:
rdf_object = f'{_ns}{value_object}'
else:
rdf_object = value_object
if k == '@type':
value_object = v

if k == '@type' and rdf_object is not None:
grp.attrs.create(name=k, data=rdf_object)
elif k == '@id':
grp.attrs.create(name=k, data=v)
else:
grp.attrs.create(name=value_predicate, data=value_object, rdf_predicate=rdf_predicate)


# def to_hdf(jsonld_filename, grp: h5py.Group) -> None:
# """Takes a .jsonld file and writes it into a HDF5 group"""
# if not isinstance(grp, h5py.Group):
# raise TypeError(f'Expecting h5py.Group, got {type(grp)}')
#
# if not isinstance(jsonld_filename, (str, pathlib.Path)):
# raise TypeError(f'Expecting str or pathlib.Path, got {type(jsonld_filename)}')
#
# def _to_hdf(_h5: h5py.Group, jdict: Dict):
# """Takes a .jsonld file and writes it into a HDF5 group"""
# for k, v in jdict.items():
# if isinstance(v, dict):
# if k == 'has parameter':
# label = v.get('label', '@id')
# _h5.attrs[k] = v['@id']
# if v.get('has numerical value', None):
# ds = _h5.create_dataset(label, data=literal_eval(v['has numerical value']), track_order=True)
# for kk, vv in v.items():
# if kk != 'has numerical value':
# ds.attrs[kk] = vv
# else:
# grp = _h5.create_group(label, track_order=True)
# _to_hdf(grp, v)
# else:
# grp = _h5.create_group(k, track_order=True)
# _to_hdf(grp, v)
# elif isinstance(v, list):
# list_grp = _h5.create_group(k, track_order=True)
# for i, item in enumerate(v):
# # _h5[k] =
# obj_name = item.get('@id', str(i))
# if item.get('has numerical value', None):
# obj = list_grp.create_dataset(obj_name, data=literal_eval(item['has numerical value']),
# track_order=True)
# for kk, vv in item.items():
# if kk != 'has numerical value':
# obj.attrs[kk] = vv
# else:
# obj = list_grp.create_group(obj_name, track_order=True)
# _to_hdf(obj, item)
# else:
# _h5.attrs[k] = v
#
# with open(jsonld_filename, 'r') as f:
# return _to_hdf(grp, json.load(f))


def serialize(grp,
iri_only=False,
local=None,
Expand All @@ -241,9 +231,6 @@ def serialize(grp,
recursive=recursive,
compact=compact,
context=context)

hasParameter = URIRef('http://w3id.org/nfdi4ing/metadata4ing#hasParameter')

# global _context
_context = {}
context = context or {}
Expand All @@ -270,7 +257,7 @@ def add_node(name, obj):
# NumericalVariable or TextVariable

if node_type is None:
rdf_predicate_dict = obj.attrs.get(consts.RDF_PREDICATE_ATTR_NAME, None)
rdf_predicate_dict = obj.attrs.get(RDF_PREDICATE_ATTR_NAME, None)
if rdf_predicate_dict and len(rdf_predicate_dict) > 0:
if isinstance(obj, h5py.Dataset):
if obj.dtype.kind == 'S':
Expand Down Expand Up @@ -509,11 +496,11 @@ def _build_group_onto_class(grp):
data[grp.parent.name].append(ontogrp)

def _build_onto_classes(name, node):
if isinstance(node, h5tbx.Dataset):
if isinstance(node, Dataset):
return _build_dataset_onto_class(node)
return _build_group_onto_class(node)

with h5tbx.File(filename, mode='r') as h5:
with File(filename, mode='r') as h5:
root = hdf_ontology.Group(name='/', attribute=_build_attributes(h5.attrs))
data['/'] = []

Expand Down
Loading

0 comments on commit 5abbcf9

Please sign in to comment.