Skip to content

Commit

Permalink
differentiate between semantic properties assigned to root group and …
Browse files Browse the repository at this point in the history
…file
  • Loading branch information
matthiasprobst committed Nov 4, 2024
1 parent e65647f commit b4d10c6
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 24 deletions.
10 changes: 6 additions & 4 deletions h5rdmtoolbox/wrapper/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1866,7 +1866,9 @@ def __getitem__(self,
# decode string array
if dim_ds_attrs.get('time_format', False):
if dim_ds_data.ndim == 0:
dim_ds_data = np.array(datetime.strptime(dim_ds_data.astype(str), dim_ds_attrs['time_format'])).astype(datetime)
dim_ds_data = np.array(
datetime.strptime(dim_ds_data.astype(str), dim_ds_attrs['time_format'])).astype(
datetime)
else:
dim_ds_data = convert_strings_to_datetimes(dim_ds_data.astype(str))
# dim_ds_data = np.array(
Expand Down Expand Up @@ -2223,8 +2225,8 @@ def __init__(self,
logger.debug(f'Initializing h5py.File with name={name}, mode={mode} and kwargs={kwargs}')
try:
super().__init__(name=name,
mode=mode,
**kwargs)
mode=mode,
**kwargs)
except OSError as e:
logger.error(f"Unable to open file {name}. Error message: {e}")
from ..utils import DownloadFileManager
Expand Down Expand Up @@ -2279,7 +2281,7 @@ def standard_attributes(self) -> Dict:
@property
def rdf(self):
"""Return RDF Manager"""
return rdf.RDFManager(self.attrs)
return rdf.FileRDFManager(self.attrs)

@property
def iri(self):
Expand Down
51 changes: 41 additions & 10 deletions h5rdmtoolbox/wrapper/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,8 +544,8 @@ def get_rdflib_graph(source: Union[str, pathlib.Path, h5py.File],
grp = source

_context = {}
if structural:
_context['hdf5'] = str(HDF5._NS)
# if structural:
_context['hdf5'] = str(HDF5._NS)
_context.update(context or {}) # = context or {}

assert isinstance(_context, dict)
Expand Down Expand Up @@ -584,19 +584,46 @@ def _add_node(graph: rdflib.Graph, triple) -> rdflib.Graph:
def _add_hdf_node(name, obj, ctx) -> Dict:
# node = rdflib.URIRef(f'_:{obj.name}')
if isinstance(obj, h5py.File):
root_group = rdflib.BNode(value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode()
iri_dict[name] = root_group
file_node = rdflib.BNode(value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode()
iri_dict['.'] = file_node
_add_node(g, (file_node, RDF.type, HDF5.File))
if structural:
file_node = rdflib.BNode(value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode()
_add_node(g, (file_node, RDF.type, HDF5.File))
root_group = rdflib.BNode(
value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode()
iri_dict[name] = root_group
_add_node(g, (file_node, HDF5.rootGroup, root_group))
# _add_node(g, (root_group, RDF.type, HDF5.Group))
# _add_node(g, (root_group, HDF5.name, rdflib.Literal(name)))
obj_node = root_group
iri_dict["/"] = root_group

# now go through all predicates
for ak, av in obj.attrs.items():
attr_predicate = obj.rdf.predicate.get(ak, None) # TODO: here nur die file predicates holen!
if attr_predicate is not None:
_namespace, _predicate_name = split_URIRef(attr_predicate)
if resolve_keys:
_rdf_name = _predicate_name
else:
_rdf_name = ak
predicate_uri, ctx = process_rdf_key(
rdf_name=_rdf_name,
rdf_value=attr_predicate,
resolve_keys=resolve_keys,
context=ctx)

assert isinstance(ctx, dict)

obj_node = iri_dict.get(obj.name, None)
if obj_node is None:
obj_node = _get_id(obj)
iri_dict[obj.name] = obj_node
_add_node(g, (file_node, predicate_uri, rdflib.URIRef(av)))


if not structural:
obj_node = file_node
else:
obj_node = iri_dict.get(obj.name, None)
if obj_node is None:
obj_node = _get_id(obj)
iri_dict[obj.name] = obj_node

if structural and name != '/':
parent_name = obj.parent.name
Expand All @@ -611,6 +638,10 @@ def _add_hdf_node(name, obj, ctx) -> Dict:
h5_rdf_type = obj.attrs.get(RDF_TYPE_ATTR_NAME, None)
if h5_rdf_type:
_add_node(g, (obj_node, RDF.type, rdflib.URIRef(h5_rdf_type)))

if obj.name == "/":
obj = obj[obj.name]

group_type = obj.rdf.type
if isinstance(group_type, list):
for gs in group_type:
Expand Down
67 changes: 64 additions & 3 deletions h5rdmtoolbox/wrapper/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
from ..protocols import H5TbxAttributeManager

RDF_OBJECT_ATTR_NAME = 'RDF_OBJECT'
RDF_FILE_OBJECT_ATTR_NAME = 'RDF_FILE_OBJECT'
RDF_PREDICATE_ATTR_NAME = 'RDF_PREDICATE'
RDF_FILE_PREDICATE_ATTR_NAME = 'RDF_FILE_PREDICATE'
RDF_SUBJECT_ATTR_NAME = 'RDF_ID' # equivalent to @ID in JSON-LD, thus can only be one value!!!
RDF_TYPE_ATTR_NAME = 'RDF_TYPE' # equivalent to @type in JSON-LD, thus can be multiple values.

Expand Down Expand Up @@ -45,7 +47,10 @@ def validate_url(url: str) -> str:
f'Tested with pydantic: {e}')


def set_predicate(attr: h5py.AttributeManager, attr_name: str, value: str) -> None:
def set_predicate(attr: h5py.AttributeManager,
attr_name: str,
value: str,
rdf_predicate_attr_name=RDF_PREDICATE_ATTR_NAME) -> None:
"""Set the class of an attribute
Parameters
Expand All @@ -67,11 +72,11 @@ def set_predicate(attr: h5py.AttributeManager, attr_name: str, value: str) -> No
raise RDFError(f'Invalid IRI: "{value}" for attr name "{attr_name}". '
f'Expecting a valid URL. This was validated with pydantic. Pydantic error: {e}')

iri_name_data = attr.get(RDF_PREDICATE_ATTR_NAME, None)
iri_name_data = attr.get(rdf_predicate_attr_name, None)
if iri_name_data is None:
iri_name_data = {}
iri_name_data.update({attr_name: value})
attr[RDF_PREDICATE_ATTR_NAME] = iri_name_data
attr[rdf_predicate_attr_name] = iri_name_data


def set_object(attr: h5py.AttributeManager, attr_name: str, data: str) -> None:
Expand Down Expand Up @@ -549,3 +554,59 @@ def delete(self, name):
del self.predicate[name]
if name in self.object:
del self.object[name]


class FileIRIDict(Dict):

def __init__(self, _dict: Dict, attr: h5py.AttributeManager = None, attr_name: str = None):
super().__init__(_dict)
self._attr = attr
self._attr_name = attr_name

@property
def predicate(self):
p = self[RDF_FILE_PREDICATE_ATTR_NAME]
if p is not None:
return p
return p

@predicate.setter
def predicate(self, value):
set_predicate(self._attr, self._attr_name, value,
rdf_predicate_attr_name=RDF_FILE_PREDICATE_ATTR_NAME)


class File_RDF_Predicate(_RDFPO):
"""IRI class attribute manager"""

IRI_ATTR_NAME = RDF_FILE_PREDICATE_ATTR_NAME

def __setiri__(self, key, value):
set_predicate(self._attr, key, value)


class FileRDFManager:

def __init__(self, attr: H5TbxAttributeManager = None):
self._attr = attr

def __getitem__(self, item) -> FileIRIDict:
"""Overwrite parent implementation, because other attr name is used"""
if item not in self._attr:
raise KeyError(f'Attribute "{item}" not found in {self.parent.name}.')
return FileIRIDict(
{
RDF_FILE_PREDICATE_ATTR_NAME: self._attr.get(RDF_FILE_PREDICATE_ATTR_NAME, {}).get(item, None),
RDF_FILE_OBJECT_ATTR_NAME: self._attr.get(RDF_FILE_OBJECT_ATTR_NAME, {}).get(item, None)},
self._attr, item)

@property
def predicate(self) -> File_RDF_Predicate:
"""Return the RDF predicate manager"""
rdf_pred = File_RDF_Predicate(self._attr)
rdf_pred.IRI_ATTR_NAME = RDF_FILE_PREDICATE_ATTR_NAME
return rdf_pred

@predicate.setter
def predicate(self, value):
set_predicate(self._attr, self._attr_name, value)
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ mongomock == 4.1.2
scikit-image>=0.21.0
scikit-learn
scipy>=1.10.1
ssnolib>=1.3.0.1a4
ssnolib>=1.3.0.1a11
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ python-gitlab
pypandoc>=1.11
pydantic>=2.8.2
# other:
ontolutils >= 0.5.0
ontolutils >= 0.7.1
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ install_requires =
pint_xarray >= 0.2.1
regex>=2020.7.9
packaging>=24.1
ontolutils>=0.5.0
ontolutils>=0.7.1
python-forge==18.6.0
requests>=2.32.3
pydantic>=2.8.2
Expand All @@ -59,7 +59,7 @@ test =
pytest>=8.3.3
pytest-cov>=5.0.0
pylint
ssnolib>=1.3.0.1a4
ssnolib>=1.3.0.1a11
mongomock==4.1.2
xmltodict<=0.13.0
scipy>=1.10.1 # provides netcdf4
Expand Down
23 changes: 20 additions & 3 deletions tests/wrapper/test_jsonld.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import json
import pathlib
import unittest

import numpy as np
import ontolutils
import pathlib
import rdflib
import unittest
import ssnolib
from ontolutils import M4I
from ontolutils import namespaces, urirefs, Thing

Expand Down Expand Up @@ -534,4 +536,19 @@ def test_hdf2jsonld(self):
jsonld_filename = jsonld.hdf2jsonld('test.hdf', skipND=1)
self.assertTrue(jsonld_filename.exists())
self.assertTrue(jsonld_filename.suffix == '.jsonld')
jsonld_filename.unlink()
jsonld_filename.unlink()

def test_hdf2jsonld_with_standard_name_table(self):
with h5tbx.File() as h5:
h5.attrs["snt_file"] = "https://sandbox.zenodo.org/uploads/125545"
h5.rdf["snt_file"].predicate = ssnolib.namespace.SSNO.usesStandardNameTable
h5["/"].attrs["snt_rootgroup"] = "https://sandbox.zenodo.org/uploads/12554567"
h5["/"].rdf["snt_rootgroup"].predicate = ssnolib.namespace.SSNO.usesStandardNameTable
print(h5tbx.dump_jsonld(h5.hdf_filename, indent=2, semantic=True, structural=True,
resolve_keys=True,
context={"ssno": "https://matthiasprobst.github.io/ssno#"}))
jdict = json.loads(
h5tbx.dump_jsonld(h5.hdf_filename, indent=2, semantic=True, structural=True,
resolve_keys=True,
context={"ssno": "https://matthiasprobst.github.io/ssno#"}))
jdict["ssno:usesStandardNameTable"] = "https://sandbox.zenodo.org/uploads/125545"

0 comments on commit b4d10c6

Please sign in to comment.