Skip to content

Commit

Permalink
Merge pull request #3 from hampusnasstrom/add-serializer
Browse files Browse the repository at this point in the history
Add serializer
  • Loading branch information
simontaurus authored Apr 29, 2024
2 parents 1cdec44 + 908501a commit 03e7e7b
Show file tree
Hide file tree
Showing 12 changed files with 398 additions and 14 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install build
- name: Test with pytest
run: |
pip install pytest
pytest
- name: Build package
run: python -m build
- name: Publish package
Expand Down
31 changes: 31 additions & 0 deletions .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# This workflow will run pytest only

name: Run pytest

on:
push:
pull_request:
workflow_dispatch:


permissions:
contents: read

jobs:
deploy:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[dev]
- name: Test with pytest
run: |
pytest
75 changes: 74 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,75 @@
# ontopint
A python package for reading units from a JSON-LD files and generating pint quantities.
A python package for reading & writing units from a JSON-LD files and generating pint quantities.

## How it works

```python
import ontopint

# jsonld input with 'value' and 'unit' mapped to qudt terms
data = {
"@context": {
"qudt": "http://qudt.org/schema/qudt/",
"qunit": "http://qudt.org/vocab/unit/",
"qkind": "http://qudt.org/vocab/quantkind/",
"unit": {
"@id": "qudt:hasUnit",
"@type": "@id"
},
"quantity": {
"@id": "qudt:hasQuantityKind",
"@type": "@id"
},
"value": "qudt:value"
},
"value": 4.0,
"unit": "qunit:CentiM"
}

# convert the value + unit pairs to pint.Quantity
data = ontopint.parse_units(data)
print(data)
"""
{
'@context': {...},
'value': <Quantity(4.0, 'centimeter')>
}
"""

# do something with pint
data["value"] += 3 * ontopint.ureg.meter
data["value"] = data["value"].to(ontopint.ureg.meter)
print(data)
"""
{
'@context': {...},
'value': <Quantity(3.04, 'meter')>
}
"""

# export the result as jsonld
data = ontopint.export_units(data)
print(data)
"""
{
"@context": {
"qudt": "http://qudt.org/schema/qudt/",
"qunit": "http://qudt.org/vocab/unit/",
"qkind": "http://qudt.org/vocab/quantkind/",
"unit": {
"@id": "qudt:hasUnit",
"@type": "@id"
},
"quantity": {
"@id": "qudt:hasQuantityKind",
"@type": "@id"
},
"value": "qudt:value"
},
"value": 3.04,
"unit": "qunit:M"
}
"""
```

Note: more complex examples can be found at [tests/data](https://github.com/hampusnasstrom/ontopint/tree/main/tests/data)
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ classifiers = [
]
dependencies = [
"rdflib",
"sparqlwrapper",
"pint",
"pyld",
"ucumvert",
]
[project.optional-dependencies]
dev = [
"pytest",
"deepdiff",
]

[project.license]
Expand Down
100 changes: 87 additions & 13 deletions src/ontopint/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import json

import SPARQLWrapper
import rdflib
from pyld import jsonld

# from pint import UnitRegistry
from ucumvert import PintUcumRegistry
import pint

# ureg = UnitRegistry()
ureg = PintUcumRegistry()
Expand All @@ -18,9 +20,38 @@
'value': 'qudt:value',
}

HAS_UNIT = 'http://qudt.org/schema/qudt/hasUnit'
VALUE = 'http://qudt.org/schema/qudt/value'
def get_ucum_code_from_unit_iri(unit_iri):
graph = rdflib.Graph()
graph.parse(unit_iri)
result = graph.query(
f'SELECT * WHERE {{<{unit_iri}> <http://qudt.org/schema/qudt/ucumCode> ?ucumCode}}'
)
ucum_code = str(result.bindings[0]['ucumCode'])
return ucum_code

def get_qunit_iri_from_unit_code(code, is_ucum_code = False):
# testing: https://www.qudt.org/fuseki/#/dataset/qudt/query
sparql = SPARQLWrapper.SPARQLWrapper("https://www.qudt.org/fuseki/qudt/sparql")

sparql.setMethod(SPARQLWrapper.POST)
code = "'" + code + "'"
query = """
SELECT ?subject
WHERE {
?subject <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://qudt.org/schema/qudt/Unit> .
?subject <{{{predicate}}}> {{{code}}} .
}
LIMIT 1
""".replace(
"{{{predicate}}}", "http://qudt.org/schema/qudt/ucumCode" if is_ucum_code else "http://qudt.org/schema/qudt/symbol"
).replace(
"{{{code}}}", code + "^^<http://qudt.org/schema/qudt/UCUMcs>" if is_ucum_code else code
)
sparql.setQuery(query)
sparql.setReturnFormat(SPARQLWrapper.JSON)
result = sparql.query().convert()
result = result['results']['bindings'][0]['subject']['value']
return result

class UnitDecoder(json.JSONDecoder):
def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -51,18 +82,17 @@ def object_hook(self, obj):

def _replace_units(obj, context, original_key_lookup_dict):
if isinstance(obj, dict):
expanded_obj = jsonld.expand({**obj, '@context': context}, context)
if HAS_UNIT in expanded_obj[0] and VALUE in expanded_obj[0]:
unit_iri = expanded_obj[0][HAS_UNIT][0]['@id']
expanded_obj = jsonld.expand({**obj, "@context": context}, context)
compacted_obj = jsonld.compact(expanded_obj, processing_context)
if 'unit' in compacted_obj and 'value' in compacted_obj:
# note: "urn:ontopint:iri" is just any iri not existing in the input data
unit_iri = jsonld.expand(
{"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": compacted_obj["unit"]}, {}
)[0]["urn:ontopint:iri"][0]["@id"]
obj.pop(original_key_lookup_dict['unit'])
graph = rdflib.Graph()
graph.parse(unit_iri)
result = graph.query(
f'SELECT * WHERE {{<{unit_iri}> <http://qudt.org/schema/qudt/symbol> ?ucumCode}}'
)
unit = result.bindings[0]['ucumCode']
ucum_code = get_ucum_code_from_unit_iri(unit_iri)
obj[original_key_lookup_dict['value']] = ureg.Quantity(
obj[original_key_lookup_dict['value']], ureg.from_ucum(unit)
obj[original_key_lookup_dict['value']], ureg.from_ucum(ucum_code)
)
for key, value in obj.items():
obj[key] = _replace_units(value, context, original_key_lookup_dict)
Expand All @@ -73,6 +103,34 @@ def _replace_units(obj, context, original_key_lookup_dict):
]
else:
return obj

def _serialize_units(obj, context, original_key_lookup_dict):
if isinstance(obj, dict):
for key in list(obj.keys()): # make a list copy in order to delete keys while iterating
value = obj[key]
if (isinstance(value, pint.Quantity)):
# see https://pint.readthedocs.io/en/stable/user/formatting.html
# value = value.to_base_units() # this will not work until we have ucum support
quantity_value = float(format(value, 'f~').split(' ')[0])
unit_code = format(value.u, '~')
# ToDo: use ucum code
unit_iri = get_qunit_iri_from_unit_code(unit_code)
# note: "urn:ontopint:iri" is just any iri not existing in the input data
unit_compact_iri = jsonld.compact(
{"@context": {**context, "urn:ontopint:iri": {"@type": "@id"}}, "urn:ontopint:iri": unit_iri},
{**context, "urn:ontopint:iri": {"@type": "@id"}}
)["urn:ontopint:iri"]
obj[original_key_lookup_dict['value']] = quantity_value
obj[original_key_lookup_dict['unit']] = unit_compact_iri

else: obj[key] = _serialize_units(value, context, original_key_lookup_dict)
return obj
elif isinstance(obj, list):
return [
_serialize_units(value, context, original_key_lookup_dict) for value in obj
]
else:
return obj


def parse_units(json_ld: dict) -> dict:
Expand All @@ -86,5 +144,21 @@ def parse_units(json_ld: dict) -> dict:
# reverse the dict
original_key_lookup_dict = {v: k for k, v in compacted.items()}
parsed_json = _replace_units(json_ld, original_context, original_key_lookup_dict)
parsed_json['@context'] = original_context
parsed_json = {'@context': original_context, **parsed_json}
json_ld['@context'] = original_context # restore context
return parsed_json

def export_units(json_ld: dict, context = processing_context) -> dict:
original_context = json_ld.pop('@context', context)
key_dict = {'@context': processing_context, 'unit': 'unit', 'value': 'value'}
# inverse expand-reverse cycle
expanded = jsonld.expand(key_dict, processing_context)
compacted = jsonld.compact(expanded, original_context)
# remove the context
del compacted['@context']
# reverse the dict
original_key_lookup_dict = {v: k for k, v in compacted.items()}
parsed_json = _serialize_units(json_ld, original_context, original_key_lookup_dict)
parsed_json = {'@context': original_context, **parsed_json}
json_ld['@context'] = original_context # restore context
return parsed_json
24 changes: 24 additions & 0 deletions tests/010_api_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@

import pint
from ontopint import get_qunit_iri_from_unit_code, get_ucum_code_from_unit_iri
import ontopint

def test_pint_print_formats():
# see https://pint.readthedocs.io/en/stable/user/formatting.html
q : pint.Quantity = pint.Quantity(1.0, ontopint.ureg.from_ucum("kg")).to_base_units()
assert( float(format(q, 'f~').split(' ')[0]) == 1.0)
assert( format(q.u, '~') == "kg")
q : pint.Quantity = pint.Quantity(304, ontopint.ureg.from_ucum("cm"))
assert( float(format(q, 'f~').split(' ')[0]) == 304)
assert( format(q, 'f~').split(' ')[1] == "cm")
q : pint.Quantity = pint.Quantity(10, ontopint.ureg.from_ucum("eV"))
assert( float(format(q, 'f~').split(' ')[0]) == 10)
assert( format(q.u, '~') == "eV")

def test_qudt_sparql_api():
assert (get_qunit_iri_from_unit_code("kg") == "http://qudt.org/vocab/unit/KiloGM")
assert (get_qunit_iri_from_unit_code("kg", True) == "http://qudt.org/vocab/unit/KiloGM")
assert (get_ucum_code_from_unit_iri("http://qudt.org/vocab/unit/KiloGM") == "kg")

assert (get_qunit_iri_from_unit_code("m") == "http://qudt.org/vocab/unit/M")
assert (get_qunit_iri_from_unit_code("m", True) == "http://qudt.org/vocab/unit/M")
33 changes: 33 additions & 0 deletions tests/020_deserialization_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import ontopint
import pint

from common import _load_test_data, _recursive_items

def test_default_keys():
"""test input data with default keys 'value' and 'unit'
"""
input_jsonld = _load_test_data("test_data_default_keys.jsonld")
parsed_jsonld = ontopint.parse_units(input_jsonld)
del parsed_jsonld["@context"]
parse_values_count = 0
for key, value in _recursive_items(parsed_jsonld):
if key == "value":
assert(isinstance(value, pint.Quantity))
parse_values_count += 1
if key == "unit": assert False, "unit key should not be present"
assert parse_values_count == 2, "result should contain 2 parsed values"

def test_custom_keys():
"""test input data with custom keys 'my_value' and 'my_unit'
"""
input_jsonld = _load_test_data("test_data_custom_keys.jsonld")
parsed_jsonld = ontopint.parse_units(input_jsonld)
del parsed_jsonld["@context"]
parse_values_count = 0
for key, value in _recursive_items(parsed_jsonld):
if key == "my_value":
assert(isinstance(value, pint.Quantity))
parse_values_count += 1
if key == "my_unit": assert False, "my_unit key should not be present"
assert parse_values_count == 2, "result should contain 2 parsed values"

47 changes: 47 additions & 0 deletions tests/030_serialization_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import ontopint
import deepdiff

def test_default_keys():
"""test input data with default keys 'value' and 'unit'
"""

test = {
"value": ontopint.ureg.Quantity(
1.123, ontopint.ureg.from_ucum("eV")
)
}
expected = {
"value": 1.123,
"unit": "qunit:EV"
}
result = ontopint.export_units(test)
del result["@context"]
assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff

def test_custom_keys():
"""test input data with custom keys 'my_value' and 'my_unit'
"""
test = {
"@context": {
"qudt": "http://qudt.org/schema/qudt/",
"qunit": "http://qudt.org/vocab/unit/",
"qkind": "http://qudt.org/vocab/quantkind/",
"my_unit": {
"@id": "qudt:hasUnit",
"@type": "@id"
},
"my_value": "qudt:value",
},
"my_value": ontopint.ureg.Quantity(
1.123, ontopint.ureg.from_ucum("eV")
)
}
expected = {
"my_value": 1.123,
"my_unit": "qunit:EV"
}
result = ontopint.export_units(test)
del result["@context"]
assert (len(deepdiff.DeepDiff(expected, result).keys()) == 0) # no diff


Loading

0 comments on commit 03e7e7b

Please sign in to comment.