From b4d10c6f3d158e63d34744528725d3013c651050 Mon Sep 17 00:00:00 2001 From: MatthiasProbst Date: Mon, 4 Nov 2024 20:39:53 +0100 Subject: [PATCH] differentiate between semantic properties assigned to root group and file --- h5rdmtoolbox/wrapper/core.py | 10 +++-- h5rdmtoolbox/wrapper/jsonld.py | 51 +++++++++++++++++++++----- h5rdmtoolbox/wrapper/rdf.py | 67 ++++++++++++++++++++++++++++++++-- requirements-dev.txt | 2 +- requirements.txt | 2 +- setup.cfg | 4 +- tests/wrapper/test_jsonld.py | 23 ++++++++++-- 7 files changed, 135 insertions(+), 24 deletions(-) diff --git a/h5rdmtoolbox/wrapper/core.py b/h5rdmtoolbox/wrapper/core.py index 40ef63a..5188641 100644 --- a/h5rdmtoolbox/wrapper/core.py +++ b/h5rdmtoolbox/wrapper/core.py @@ -1866,7 +1866,9 @@ def __getitem__(self, # decode string array if dim_ds_attrs.get('time_format', False): if dim_ds_data.ndim == 0: - dim_ds_data = np.array(datetime.strptime(dim_ds_data.astype(str), dim_ds_attrs['time_format'])).astype(datetime) + dim_ds_data = np.array( + datetime.strptime(dim_ds_data.astype(str), dim_ds_attrs['time_format'])).astype( + datetime) else: dim_ds_data = convert_strings_to_datetimes(dim_ds_data.astype(str)) # dim_ds_data = np.array( @@ -2223,8 +2225,8 @@ def __init__(self, logger.debug(f'Initializing h5py.File with name={name}, mode={mode} and kwargs={kwargs}') try: super().__init__(name=name, - mode=mode, - **kwargs) + mode=mode, + **kwargs) except OSError as e: logger.error(f"Unable to open file {name}. Error message: {e}") from ..utils import DownloadFileManager @@ -2279,7 +2281,7 @@ def standard_attributes(self) -> Dict: @property def rdf(self): """Return RDF Manager""" - return rdf.RDFManager(self.attrs) + return rdf.FileRDFManager(self.attrs) @property def iri(self): diff --git a/h5rdmtoolbox/wrapper/jsonld.py b/h5rdmtoolbox/wrapper/jsonld.py index e37ef45..b86afc8 100644 --- a/h5rdmtoolbox/wrapper/jsonld.py +++ b/h5rdmtoolbox/wrapper/jsonld.py @@ -544,8 +544,8 @@ def get_rdflib_graph(source: Union[str, pathlib.Path, h5py.File], grp = source _context = {} - if structural: - _context['hdf5'] = str(HDF5._NS) + # if structural: + _context['hdf5'] = str(HDF5._NS) _context.update(context or {}) # = context or {} assert isinstance(_context, dict) @@ -584,19 +584,46 @@ def _add_node(graph: rdflib.Graph, triple) -> rdflib.Graph: def _add_hdf_node(name, obj, ctx) -> Dict: # node = rdflib.URIRef(f'_:{obj.name}') if isinstance(obj, h5py.File): - root_group = rdflib.BNode(value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode() - iri_dict[name] = root_group + file_node = rdflib.BNode(value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode() + iri_dict['.'] = file_node + _add_node(g, (file_node, RDF.type, HDF5.File)) if structural: - file_node = rdflib.BNode(value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode() - _add_node(g, (file_node, RDF.type, HDF5.File)) + root_group = rdflib.BNode( + value=f'N{next(_bnode_counter)}') if use_simple_bnode_value else rdflib.BNode() + iri_dict[name] = root_group _add_node(g, (file_node, HDF5.rootGroup, root_group)) # _add_node(g, (root_group, RDF.type, HDF5.Group)) # _add_node(g, (root_group, HDF5.name, rdflib.Literal(name))) + obj_node = root_group + iri_dict["/"] = root_group + + # now go through all predicates + for ak, av in obj.attrs.items(): + attr_predicate = obj.rdf.predicate.get(ak, None) # TODO: here nur die file predicates holen! + if attr_predicate is not None: + _namespace, _predicate_name = split_URIRef(attr_predicate) + if resolve_keys: + _rdf_name = _predicate_name + else: + _rdf_name = ak + predicate_uri, ctx = process_rdf_key( + rdf_name=_rdf_name, + rdf_value=attr_predicate, + resolve_keys=resolve_keys, + context=ctx) + + assert isinstance(ctx, dict) - obj_node = iri_dict.get(obj.name, None) - if obj_node is None: - obj_node = _get_id(obj) - iri_dict[obj.name] = obj_node + _add_node(g, (file_node, predicate_uri, rdflib.URIRef(av))) + + + if not structural: + obj_node = file_node + else: + obj_node = iri_dict.get(obj.name, None) + if obj_node is None: + obj_node = _get_id(obj) + iri_dict[obj.name] = obj_node if structural and name != '/': parent_name = obj.parent.name @@ -611,6 +638,10 @@ def _add_hdf_node(name, obj, ctx) -> Dict: h5_rdf_type = obj.attrs.get(RDF_TYPE_ATTR_NAME, None) if h5_rdf_type: _add_node(g, (obj_node, RDF.type, rdflib.URIRef(h5_rdf_type))) + + if obj.name == "/": + obj = obj[obj.name] + group_type = obj.rdf.type if isinstance(group_type, list): for gs in group_type: diff --git a/h5rdmtoolbox/wrapper/rdf.py b/h5rdmtoolbox/wrapper/rdf.py index 359e4f2..7cfda3b 100644 --- a/h5rdmtoolbox/wrapper/rdf.py +++ b/h5rdmtoolbox/wrapper/rdf.py @@ -12,7 +12,9 @@ from ..protocols import H5TbxAttributeManager RDF_OBJECT_ATTR_NAME = 'RDF_OBJECT' +RDF_FILE_OBJECT_ATTR_NAME = 'RDF_FILE_OBJECT' RDF_PREDICATE_ATTR_NAME = 'RDF_PREDICATE' +RDF_FILE_PREDICATE_ATTR_NAME = 'RDF_FILE_PREDICATE' RDF_SUBJECT_ATTR_NAME = 'RDF_ID' # equivalent to @ID in JSON-LD, thus can only be one value!!! RDF_TYPE_ATTR_NAME = 'RDF_TYPE' # equivalent to @type in JSON-LD, thus can be multiple values. @@ -45,7 +47,10 @@ def validate_url(url: str) -> str: f'Tested with pydantic: {e}') -def set_predicate(attr: h5py.AttributeManager, attr_name: str, value: str) -> None: +def set_predicate(attr: h5py.AttributeManager, + attr_name: str, + value: str, + rdf_predicate_attr_name=RDF_PREDICATE_ATTR_NAME) -> None: """Set the class of an attribute Parameters @@ -67,11 +72,11 @@ def set_predicate(attr: h5py.AttributeManager, attr_name: str, value: str) -> No raise RDFError(f'Invalid IRI: "{value}" for attr name "{attr_name}". ' f'Expecting a valid URL. This was validated with pydantic. Pydantic error: {e}') - iri_name_data = attr.get(RDF_PREDICATE_ATTR_NAME, None) + iri_name_data = attr.get(rdf_predicate_attr_name, None) if iri_name_data is None: iri_name_data = {} iri_name_data.update({attr_name: value}) - attr[RDF_PREDICATE_ATTR_NAME] = iri_name_data + attr[rdf_predicate_attr_name] = iri_name_data def set_object(attr: h5py.AttributeManager, attr_name: str, data: str) -> None: @@ -549,3 +554,59 @@ def delete(self, name): del self.predicate[name] if name in self.object: del self.object[name] + + +class FileIRIDict(Dict): + + def __init__(self, _dict: Dict, attr: h5py.AttributeManager = None, attr_name: str = None): + super().__init__(_dict) + self._attr = attr + self._attr_name = attr_name + + @property + def predicate(self): + p = self[RDF_FILE_PREDICATE_ATTR_NAME] + if p is not None: + return p + return p + + @predicate.setter + def predicate(self, value): + set_predicate(self._attr, self._attr_name, value, + rdf_predicate_attr_name=RDF_FILE_PREDICATE_ATTR_NAME) + + +class File_RDF_Predicate(_RDFPO): + """IRI class attribute manager""" + + IRI_ATTR_NAME = RDF_FILE_PREDICATE_ATTR_NAME + + def __setiri__(self, key, value): + set_predicate(self._attr, key, value) + + +class FileRDFManager: + + def __init__(self, attr: H5TbxAttributeManager = None): + self._attr = attr + + def __getitem__(self, item) -> FileIRIDict: + """Overwrite parent implementation, because other attr name is used""" + if item not in self._attr: + raise KeyError(f'Attribute "{item}" not found in {self.parent.name}.') + return FileIRIDict( + { + RDF_FILE_PREDICATE_ATTR_NAME: self._attr.get(RDF_FILE_PREDICATE_ATTR_NAME, {}).get(item, None), + RDF_FILE_OBJECT_ATTR_NAME: self._attr.get(RDF_FILE_OBJECT_ATTR_NAME, {}).get(item, None)}, + self._attr, item) + + @property + def predicate(self) -> File_RDF_Predicate: + """Return the RDF predicate manager""" + rdf_pred = File_RDF_Predicate(self._attr) + rdf_pred.IRI_ATTR_NAME = RDF_FILE_PREDICATE_ATTR_NAME + return rdf_pred + + @predicate.setter + def predicate(self, value): + set_predicate(self._attr, self._attr_name, value) diff --git a/requirements-dev.txt b/requirements-dev.txt index e4eb74a..bcba02b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -6,4 +6,4 @@ mongomock == 4.1.2 scikit-image>=0.21.0 scikit-learn scipy>=1.10.1 -ssnolib>=1.3.0.1a4 \ No newline at end of file +ssnolib>=1.3.0.1a11 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index ef2690d..950cca2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,4 @@ python-gitlab pypandoc>=1.11 pydantic>=2.8.2 # other: -ontolutils >= 0.5.0 +ontolutils >= 0.7.1 diff --git a/setup.cfg b/setup.cfg index 8c0361f..f0156c3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,7 +35,7 @@ install_requires = pint_xarray >= 0.2.1 regex>=2020.7.9 packaging>=24.1 - ontolutils>=0.5.0 + ontolutils>=0.7.1 python-forge==18.6.0 requests>=2.32.3 pydantic>=2.8.2 @@ -59,7 +59,7 @@ test = pytest>=8.3.3 pytest-cov>=5.0.0 pylint - ssnolib>=1.3.0.1a4 + ssnolib>=1.3.0.1a11 mongomock==4.1.2 xmltodict<=0.13.0 scipy>=1.10.1 # provides netcdf4 diff --git a/tests/wrapper/test_jsonld.py b/tests/wrapper/test_jsonld.py index b470ee4..174f48d 100644 --- a/tests/wrapper/test_jsonld.py +++ b/tests/wrapper/test_jsonld.py @@ -1,9 +1,11 @@ import json +import pathlib +import unittest + import numpy as np import ontolutils -import pathlib import rdflib -import unittest +import ssnolib from ontolutils import M4I from ontolutils import namespaces, urirefs, Thing @@ -534,4 +536,19 @@ def test_hdf2jsonld(self): jsonld_filename = jsonld.hdf2jsonld('test.hdf', skipND=1) self.assertTrue(jsonld_filename.exists()) self.assertTrue(jsonld_filename.suffix == '.jsonld') - jsonld_filename.unlink() \ No newline at end of file + jsonld_filename.unlink() + + def test_hdf2jsonld_with_standard_name_table(self): + with h5tbx.File() as h5: + h5.attrs["snt_file"] = "https://sandbox.zenodo.org/uploads/125545" + h5.rdf["snt_file"].predicate = ssnolib.namespace.SSNO.usesStandardNameTable + h5["/"].attrs["snt_rootgroup"] = "https://sandbox.zenodo.org/uploads/12554567" + h5["/"].rdf["snt_rootgroup"].predicate = ssnolib.namespace.SSNO.usesStandardNameTable + print(h5tbx.dump_jsonld(h5.hdf_filename, indent=2, semantic=True, structural=True, + resolve_keys=True, + context={"ssno": "https://matthiasprobst.github.io/ssno#"})) + jdict = json.loads( + h5tbx.dump_jsonld(h5.hdf_filename, indent=2, semantic=True, structural=True, + resolve_keys=True, + context={"ssno": "https://matthiasprobst.github.io/ssno#"})) + jdict["ssno:usesStandardNameTable"] = "https://sandbox.zenodo.org/uploads/125545"