Merge pull request #3 from hampusnasstrom/add-serializer

Add serializer
hampusnasstrom · Apr 29, 2024 · 03e7e7b · 03e7e7b
2 parents 1cdec44 + 908501a
commit 03e7e7b
Show file tree

Hide file tree

Showing 12 changed files with 398 additions and 14 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -30,6 +30,10 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install build
+    - name: Test with pytest
+      run: |
+        pip install pytest
+        pytest
     - name: Build package
       run: python -m build
     - name: Publish package

diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -0,0 +1,31 @@
+# This workflow will run pytest only
+
+name: Run pytest
+
+on:
+  push:
+  pull_request:
+  workflow_dispatch:
+
+
+permissions:
+  contents: read
+
+jobs:
+  deploy:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install .[dev]
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/README.md b/README.md
@@ -1,2 +1,75 @@
 # ontopint
-A python package for reading units from a JSON-LD files and generating pint quantities.
+A python package for reading & writing units from a JSON-LD files and generating pint quantities.
+
+## How it works
+
+```python
+import ontopint
+
+# jsonld input with 'value' and 'unit' mapped to qudt terms
+data = {
+  "@context": {
+    "qudt": "http://qudt.org/schema/qudt/",
+    "qunit": "http://qudt.org/vocab/unit/",
+    "qkind": "http://qudt.org/vocab/quantkind/",
+    "unit": {
+      "@id": "qudt:hasUnit",
+      "@type": "@id"
+    },
+    "quantity": {
+      "@id": "qudt:hasQuantityKind",
+      "@type": "@id"
+    },
+    "value": "qudt:value"
+  },
+  "value": 4.0,
+  "unit": "qunit:CentiM"
+}
+
+# convert the value + unit pairs to pint.Quantity
+data = ontopint.parse_units(data)
+print(data)
+"""
+{
+  '@context': {...},
+  'value': <Quantity(4.0, 'centimeter')>
+}
+"""
+
+# do something with pint
+data["value"] += 3 * ontopint.ureg.meter
+data["value"] = data["value"].to(ontopint.ureg.meter)
+print(data)
+"""
+{
+  '@context': {...},
+  'value': <Quantity(3.04, 'meter')>
+}
+"""
+
+# export the result as jsonld
+data = ontopint.export_units(data)
+print(data)
+"""
+{
+  "@context": {
+    "qudt": "http://qudt.org/schema/qudt/",
+    "qunit": "http://qudt.org/vocab/unit/",
+    "qkind": "http://qudt.org/vocab/quantkind/",
+    "unit": {
+      "@id": "qudt:hasUnit",
+      "@type": "@id"
+    },
+    "quantity": {
+      "@id": "qudt:hasQuantityKind",
+      "@type": "@id"
+    },
+    "value": "qudt:value"
+  },
+  "value": 3.04,
+  "unit": "qunit:M"
+}
+"""
+```
+
+Note: more complex examples can be found at [tests/data](https://github.com/hampusnasstrom/ontopint/tree/main/tests/data)
diff --git a/pyproject.toml b/pyproject.toml
@@ -29,13 +29,15 @@ classifiers = [
 ]
 dependencies = [
     "rdflib",
+    "sparqlwrapper",
     "pint",
     "pyld",
     "ucumvert",
 ]
 [project.optional-dependencies]
 dev = [
     "pytest",
+    "deepdiff",
 ]
 
 [project.license]

diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py
@@ -1,10 +1,12 @@
 import json
 
+import SPARQLWrapper
 import rdflib
 from pyld import jsonld
 
 # from pint import UnitRegistry
 from ucumvert import PintUcumRegistry
+import pint
 
 # ureg = UnitRegistry()
 ureg = PintUcumRegistry()
@@ -18,9 +20,38 @@
     'value': 'qudt:value',
 }
 
-HAS_UNIT = 'http://qudt.org/schema/qudt/hasUnit'
-VALUE = 'http://qudt.org/schema/qudt/value'
+def get_ucum_code_from_unit_iri(unit_iri):
+    graph = rdflib.Graph()
+    graph.parse(unit_iri)
+    result = graph.query(
+        f'SELECT * WHERE {{<{unit_iri}> <http://qudt.org/schema/qudt/ucumCode> ?ucumCode}}'
+    )
+    ucum_code = str(result.bindings[0]['ucumCode'])
+    return ucum_code 
 
+def get_qunit_iri_from_unit_code(code, is_ucum_code = False):
+    # testing: https://www.qudt.org/fuseki/#/dataset/qudt/query
+    sparql = SPARQLWrapper.SPARQLWrapper("https://www.qudt.org/fuseki/qudt/sparql")
+
+    sparql.setMethod(SPARQLWrapper.POST)
+    code = "'" + code + "'"
+    query = """
+        SELECT ?subject
+        WHERE {
+            ?subject <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://qudt.org/schema/qudt/Unit> .
+            ?subject <{{{predicate}}}> {{{code}}} .
+        }
+        LIMIT 1
+    """.replace(
+        "{{{predicate}}}", "http://qudt.org/schema/qudt/ucumCode" if is_ucum_code else "http://qudt.org/schema/qudt/symbol"
+    ).replace(
+        "{{{code}}}", code + "^^<http://qudt.org/schema/qudt/UCUMcs>" if is_ucum_code else code
+    )
+    sparql.setQuery(query)
+    sparql.setReturnFormat(SPARQLWrapper.JSON)
+    result = sparql.query().convert()
+    result = result['results']['bindings'][0]['subject']['value']
+    return result
 
 class UnitDecoder(json.JSONDecoder):
     def __init__(self, *args, **kwargs):
@@ -51,18 +82,17 @@ def object_hook(self, obj):
 
 def _replace_units(obj, context, original_key_lookup_dict):
     if isinstance(obj, dict):
-        expanded_obj = jsonld.expand({**obj, '@context': context}, context)
-        if HAS_UNIT in expanded_obj[0] and VALUE in expanded_obj[0]:
-            unit_iri = expanded_obj[0][HAS_UNIT][0]['@id']
+        expanded_obj = jsonld.expand({**obj, "@context": context}, context)
+        compacted_obj = jsonld.compact(expanded_obj, processing_context)
+        if 'unit' in compacted_obj and 'value' in compacted_obj:
+            # note: "urn:ontopint:iri" is just any iri not existing in the input data
+            unit_iri = jsonld.expand(
+                    {"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": compacted_obj["unit"]}, {}
+                )[0]["urn:ontopint:iri"][0]["@id"]
             obj.pop(original_key_lookup_dict['unit'])
-            graph = rdflib.Graph()
-            graph.parse(unit_iri)
-            result = graph.query(
-                f'SELECT * WHERE {{<{unit_iri}> <http://qudt.org/schema/qudt/symbol> ?ucumCode}}'
-            )
-            unit = result.bindings[0]['ucumCode']
+            ucum_code = get_ucum_code_from_unit_iri(unit_iri)
             obj[original_key_lookup_dict['value']] = ureg.Quantity(
-                obj[original_key_lookup_dict['value']], ureg.from_ucum(unit)
+                obj[original_key_lookup_dict['value']], ureg.from_ucum(ucum_code)
             )
         for key, value in obj.items():
             obj[key] = _replace_units(value, context, original_key_lookup_dict)
@@ -73,6 +103,34 @@ def _replace_units(obj, context, original_key_lookup_dict):
         ]
     else:
         return obj
+
+def _serialize_units(obj, context, original_key_lookup_dict):
+    if isinstance(obj, dict):
+        for key in list(obj.keys()): # make a list copy in order to delete keys while iterating
+            value = obj[key]
+            if (isinstance(value, pint.Quantity)):
+                # see https://pint.readthedocs.io/en/stable/user/formatting.html
+                # value = value.to_base_units() # this will not work until we have ucum support
+                quantity_value = float(format(value, 'f~').split(' ')[0])
+                unit_code = format(value.u, '~') 
+                # ToDo: use ucum code
+                unit_iri = get_qunit_iri_from_unit_code(unit_code)
+                # note: "urn:ontopint:iri" is just any iri not existing in the input data
+                unit_compact_iri = jsonld.compact(
+                    {"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": unit_iri}, 
+                    {**context, "urn:ontopint:iri": {"@type": "@id"}}
+                )["urn:ontopint:iri"]
+                obj[original_key_lookup_dict['value']] = quantity_value
+                obj[original_key_lookup_dict['unit']] = unit_compact_iri
+
+            else: obj[key] = _serialize_units(value, context, original_key_lookup_dict)
+        return obj
+    elif isinstance(obj, list):
+        return [
+            _serialize_units(value, context, original_key_lookup_dict) for value in obj
+        ]
+    else:
+        return obj
 
 
 def parse_units(json_ld: dict) -> dict:
@@ -86,5 +144,21 @@ def parse_units(json_ld: dict) -> dict:
     # reverse the dict
     original_key_lookup_dict = {v: k for k, v in compacted.items()}
     parsed_json = _replace_units(json_ld, original_context, original_key_lookup_dict)
-    parsed_json['@context'] = original_context
+    parsed_json = {'@context': original_context, **parsed_json}
+    json_ld['@context'] = original_context # restore context
+    return parsed_json
+
+def export_units(json_ld: dict, context = processing_context) -> dict:
+    original_context = json_ld.pop('@context', context)
+    key_dict = {'@context': processing_context, 'unit': 'unit', 'value': 'value'}
+    # inverse expand-reverse cycle
+    expanded = jsonld.expand(key_dict, processing_context)
+    compacted = jsonld.compact(expanded, original_context)
+    # remove the context
+    del compacted['@context']
+    # reverse the dict
+    original_key_lookup_dict = {v: k for k, v in compacted.items()}
+    parsed_json = _serialize_units(json_ld, original_context, original_key_lookup_dict)
+    parsed_json = {'@context': original_context, **parsed_json}
+    json_ld['@context'] = original_context # restore context
     return parsed_json
diff --git a/tests/010_api_test.py b/tests/010_api_test.py
@@ -0,0 +1,24 @@
+
+import pint
+from ontopint import get_qunit_iri_from_unit_code, get_ucum_code_from_unit_iri
+import ontopint
+
+def test_pint_print_formats():
+    # see https://pint.readthedocs.io/en/stable/user/formatting.html
+    q : pint.Quantity = pint.Quantity(1.0, ontopint.ureg.from_ucum("kg")).to_base_units()
+    assert( float(format(q, 'f~').split(' ')[0]) == 1.0)
+    assert( format(q.u, '~') == "kg")
+    q : pint.Quantity = pint.Quantity(304, ontopint.ureg.from_ucum("cm"))
+    assert( float(format(q, 'f~').split(' ')[0]) == 304)
+    assert( format(q, 'f~').split(' ')[1] == "cm")
+    q : pint.Quantity = pint.Quantity(10, ontopint.ureg.from_ucum("eV"))
+    assert( float(format(q, 'f~').split(' ')[0]) == 10)
+    assert( format(q.u, '~') == "eV") 
+
+def test_qudt_sparql_api():
+    assert (get_qunit_iri_from_unit_code("kg") == "http://qudt.org/vocab/unit/KiloGM")
+    assert (get_qunit_iri_from_unit_code("kg", True) == "http://qudt.org/vocab/unit/KiloGM")
+    assert (get_ucum_code_from_unit_iri("http://qudt.org/vocab/unit/KiloGM") == "kg")
+
+    assert (get_qunit_iri_from_unit_code("m") == "http://qudt.org/vocab/unit/M")
+    assert (get_qunit_iri_from_unit_code("m", True) == "http://qudt.org/vocab/unit/M")
diff --git a/tests/020_deserialization_test.py b/tests/020_deserialization_test.py
@@ -0,0 +1,33 @@
+import ontopint
+import pint
+
+from common import _load_test_data, _recursive_items
+
+def test_default_keys():
+    """test input data with default keys 'value' and 'unit'
+    """
+    input_jsonld = _load_test_data("test_data_default_keys.jsonld")
+    parsed_jsonld = ontopint.parse_units(input_jsonld)
+    del parsed_jsonld["@context"]
+    parse_values_count = 0
+    for key, value in _recursive_items(parsed_jsonld):
+        if key == "value": 
+            assert(isinstance(value, pint.Quantity))
+            parse_values_count += 1 
+        if key == "unit": assert False, "unit key should not be present"
+    assert parse_values_count == 2, "result should contain 2 parsed values"
+
+def test_custom_keys():
+    """test input data with custom keys 'my_value' and 'my_unit'
+    """
+    input_jsonld = _load_test_data("test_data_custom_keys.jsonld")
+    parsed_jsonld = ontopint.parse_units(input_jsonld)
+    del parsed_jsonld["@context"]
+    parse_values_count = 0
+    for key, value in _recursive_items(parsed_jsonld):
+        if key == "my_value": 
+            assert(isinstance(value, pint.Quantity))
+            parse_values_count += 1 
+        if key == "my_unit": assert False, "my_unit key should not be present"
+    assert parse_values_count == 2, "result should contain 2 parsed values"
+
diff --git a/tests/030_serialization_test.py b/tests/030_serialization_test.py
@@ -0,0 +1,47 @@
+import ontopint
+import deepdiff
+
+def test_default_keys():
+    """test input data with default keys 'value' and 'unit'
+    """
+
+    test = {
+        "value": ontopint.ureg.Quantity(
+            1.123, ontopint.ureg.from_ucum("eV")
+        )
+    }
+    expected = {
+        "value": 1.123,
+        "unit": "qunit:EV"
+    }
+    result = ontopint.export_units(test)
+    del result["@context"]
+    assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff
+
+def test_custom_keys():
+    """test input data with custom keys 'my_value' and 'my_unit'
+    """
+    test = {
+        "@context": {
+            "qudt": "http://qudt.org/schema/qudt/",
+            "qunit": "http://qudt.org/vocab/unit/",
+            "qkind": "http://qudt.org/vocab/quantkind/",
+            "my_unit": {
+                "@id": "qudt:hasUnit",
+                "@type": "@id"
+            },
+            "my_value": "qudt:value",
+        },
+        "my_value": ontopint.ureg.Quantity(
+            1.123, ontopint.ureg.from_ucum("eV")
+        )
+    }
+    expected = {
+        "my_value": 1.123,
+        "my_unit": "qunit:EV"
+    }
+    result = ontopint.export_units(test)
+    del result["@context"]
+    assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff
+
+