Skip to content

Commit

Permalink
include skos:definition in jsond-ld dump
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasprobst committed Apr 20, 2024
1 parent 59ba7cf commit 34b9791
Show file tree
Hide file tree
Showing 9 changed files with 433 additions and 227 deletions.
313 changes: 208 additions & 105 deletions docs/colab/quickstart.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/userguide/wrapper/DumpFile.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
"version": "3.8.19"
}
},
"nbformat": 4,
Expand Down
257 changes: 156 additions & 101 deletions docs/userguide/wrapper/FAIRAttributes.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions h5rdmtoolbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,13 @@ def dump_jsonld(hdf_filename: Union[str, pathlib.Path],
if structural and not semantic:
return jsonld.dump_file(hdf_filename, skipND=skipND)
with File(hdf_filename) as h5:
return jsonld.dumps(h5, indent=2, structural=structural, resolve_keys=resolve_keys, **kwargs)
return jsonld.dumps(h5, structural=structural, resolve_keys=resolve_keys, **kwargs)


def register_dataset_decoder(decoder: Callable, decoder_name: str = None, overwrite: bool = False):
"""A decoder function takes a xarray.DataArray and a dataset as input and returns a xarray.DataArray
It is called after the dataset is loaded into memory and before being returned to the user. Be careful:
Multiple decoders can be registered and they are called in the order of registration. Hence, your decoder
Multiple decoders can be registered, and they are called in the order of registration. Hence, your decoder
may behave unexpectedly!
"""
from .wrapper import ds_decoder
Expand Down
2 changes: 2 additions & 0 deletions h5rdmtoolbox/convention/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def __getitem__(self, item) -> Optional[str]:
return self.get(item)

def __setitem__(self, key, value):
if key not in self._attr:
raise KeyError(f"Attribute {key} does not exist")
attr_def = self._attr.get(DEFINITION_ATTR_NAME, {})
attr_def.update({key: value})
self._attr[DEFINITION_ATTR_NAME] = attr_def
Expand Down
35 changes: 26 additions & 9 deletions h5rdmtoolbox/wrapper/core.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,25 @@
"""Core wrapper module containing basic wrapper implementation of File, Dataset and Group
"""
import datetime
import h5py
import json
import logging
import numpy as np
import os
import pathlib
# noinspection PyUnresolvedReferences
import pint
import shutil
import warnings
import xarray as xr
from collections.abc import Iterable
from datetime import datetime, timezone
from h5py._hl.base import phil, with_phil
from h5py._objects import ObjectID
from pathlib import Path
from typing import List, Dict, Union, Tuple, Protocol, Optional, Generator

import h5py
import numpy as np
# noinspection PyUnresolvedReferences
import pint
import xarray as xr
from h5py._hl.base import phil, with_phil
from h5py._objects import ObjectID

from h5rdmtoolbox.database import ObjDB
from h5rdmtoolbox.database.lazy import LHDFObject
# noinspection PyUnresolvedReferences
Expand All @@ -29,8 +30,8 @@
from .. import _repr, get_config, convention, utils, consts, protected_attributes
from .. import get_ureg
from .._repr import H5Repr, H5PY_SPECIAL_ATTRIBUTES
from ..convention import rdf
from ..convention import definition
from ..convention import rdf
from ..convention.consts import DefaultValue

logger = logging.getLogger('h5rdmtoolbox')
Expand Down Expand Up @@ -1674,7 +1675,8 @@ def __setitem__(self, key, value):
super().__setitem__(key, value)

@dataset_value_decoder
def __getitem__(self, args, new_dtype=None, nparray=False, links_as_strings:bool=False) -> Union[xr.DataArray, np.ndarray]:
def __getitem__(self, args, new_dtype=None, nparray=False, links_as_strings: bool = False) -> Union[
xr.DataArray, np.ndarray]:
"""Return sliced HDF dataset. If global setting `return_xarray`
is set to True, a `xr.DataArray` is returned, otherwise the default
behaviour of the h5p-package is used and a np.ndarray is returned.
Expand Down Expand Up @@ -2212,6 +2214,21 @@ def open(filename: Union[str, pathlib.Path], mode: str = "r+") -> 'File':
"""
return File(filename, mode)

def dump_jsonld(self,
skipND: int = 1,
structural: bool = True,
semantic: bool = True,
resolve_keys: bool = False,
**kwargs):
"""Dump the file content as JSON-LD"""
from .. import dump_jsonld
return dump_jsonld(self.hdf_filename,
skipND=skipND,
structural=structural,
semantic=semantic,
resolve_keys=resolve_keys,
**kwargs)


Dataset._h5grp = Group
Dataset._h5ds = Dataset
Expand Down
20 changes: 13 additions & 7 deletions h5rdmtoolbox/wrapper/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import ontolutils
import rdflib
from ontolutils.classes.utils import split_URIRef
from rdflib import Graph, URIRef, Literal, BNode, XSD, RDF
from rdflib import Graph, URIRef, Literal, BNode, XSD, RDF, SKOS
from rdflib.plugins.shared.jsonld.context import Context

from h5rdmtoolbox.convention import hdf_ontology
Expand Down Expand Up @@ -579,10 +579,6 @@ def _add_hdf_node(name, obj, ctx):
# _add_node(g, (root_group, RDF.type, HDF5.Group))
# _add_node(g, (root_group, HDF5.name, rdflib.Literal(name)))

# return
# else:
# root_group = rdflib.BNode()

obj_node = iri_dict.get(obj.name, None)
if obj_node is None:
obj_node = _get_id(obj, local=local)
Expand Down Expand Up @@ -643,7 +639,17 @@ def _add_hdf_node(name, obj, ctx):

if structural: # add hdf type and name nodes
_add_node(g, (attr_node, RDF.type, HDF5.Attribute))
_add_node(g, (attr_node, HDF5.name, rdflib.Literal(ak)))
attr_def: str = obj.attrsdef.get(ak, None)
if attr_def:
_add_node(g, (attr_node, HDF5.name, rdflib.Literal(ak)))
_add_node(g, (attr_node, SKOS.definition, rdflib.Literal(attr_def)))
if 'skos' not in ctx:
ctx['skos'] = 'http://www.w3.org/2004/02/skos/core#'
# def_node = rdflib.BNode()
# _add_node(g, (def_node, SCHEMA.comment, rdflib.Literal(attr_def)))
# _add_node(g, (attr_node, HDF5.name, def_node))
else:
_add_node(g, (attr_node, HDF5.name, rdflib.Literal(ak)))

list_node = None
attr_literal = None
Expand Down Expand Up @@ -818,7 +824,7 @@ def dumps(grp,
structural: bool = True,
resolve_keys: bool = False,
**kwargs) -> str:
"""Dump a group or a dataset to to string."""
"""Dump a group or a dataset to string."""
return json.dumps(dumpd(
grp=grp,
iri_only=iri_only,
Expand Down
24 changes: 22 additions & 2 deletions tests/wrapper/test_jsonld.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import pathlib
import unittest

import numpy as np
import ontolutils
import pathlib
import rdflib
import unittest
from ontolutils import M4I
from ontolutils import namespaces, urirefs, Thing

Expand Down Expand Up @@ -472,3 +473,22 @@ def test_codemeta_to_hdf(self):
h5tbx.dumps('test.hdf')

h5.hdf_filename.unlink(missing_ok=True)

def test_jsonld_with_attrs_definition(self):
with h5tbx.File() as h5:
h5.attrs['name'] = h5tbx.Attribute('Matthias', definition='My first name')
jstr = h5.dump_jsonld()

sparql_str = """SELECT ?n ?v ?d
{
?id <http://purl.allotrope.org/ontologies/hdf5/1.8#attribute> ?a .
?a <http://purl.allotrope.org/ontologies/hdf5/1.8#name> ?n .
?a <http://purl.allotrope.org/ontologies/hdf5/1.8#value> ?v .
?a <http://www.w3.org/2004/02/skos/core#definition> ?d .
}"""
g = rdflib.Graph().parse(data=jstr, format='json-ld')
qres = g.query(sparql_str)
for row in qres:
self.assertEqual(str(row[0]), 'name')
self.assertEqual(str(row[1]), 'Matthias')
self.assertEqual(str(row[2]), 'My first name')
3 changes: 3 additions & 0 deletions tests/wrapper/test_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,6 @@ def test_definition(self):
self.assertEqual(h5.attrsdef['name'], 'This is the name of the person to contact')

h5.dumps()

with self.assertRaises(KeyError):
h5.attrsdef['test'] = 'This should not work!'

0 comments on commit 34b9791

Please sign in to comment.