Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
Hans-Chrstian committed Sep 26, 2024
1 parent 9055109 commit 7e82340
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 212 deletions.
2 changes: 1 addition & 1 deletion ckanext/fairdatapoint/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def catalogs(self) -> Iterable[Dict]:
for catalog_ref in self._catalogs():
catalog_dict = {}
for profile_class in self._profiles:
profile = profile_class(self.g, self.compatibility_mode)
profile = profile_class(graph= self.g,compatibility_mode = self.compatibility_mode)
profile.parse_dataset(catalog_dict, catalog_ref)

yield catalog_dict
77 changes: 12 additions & 65 deletions ckanext/fairdatapoint/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import json
import logging

from ckanext.dcat.profiles import EuropeanDCATAP2Profile
from ckanext.dcat.profiles import EuropeanDCATAP3Profile
from ckan.plugins import toolkit
from ckan import model
import dateutil.parser as dateparser
Expand All @@ -21,46 +21,6 @@

VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")


def _convert_extras_to_declared_schema_fields(dataset_dict: Dict) -> Dict:
"""
Compares the extras dictionary with the declared schema.
Updates the declared schema fields with the values that match from the extras.
Remove the extras that are present on the declared schema.
:param dataset_dict:
:return: dataset_dict - Updated dataset_dict
"""
# Use the correct dataset type, Defaults to 'dataset'
dataset_type = dataset_dict.get('type', 'dataset')
# Gets the full Schema definition of the correct dataset type
context = {'model': model, 'session': model.Session}
data_dict = {'type': dataset_type}
full_schema_dict = toolkit.get_action('scheming_dataset_schema_show')(context, data_dict)

dataset_fields = {x.get('field_name'): x.get('preset') for x in full_schema_dict.get('dataset_fields', [])}

# Populate the declared schema fields, if they are present in the extras
for extra_dict in dataset_dict.get('extras', []):
field_key = extra_dict.get('key')
field_value = extra_dict.get('value')
if field_key in dataset_fields:
preset = dataset_fields[field_key]
if preset == 'multiple_text' and field_value:
try:
dataset_dict[field_key] = json.loads(field_value)
except JSONDecodeError:
dataset_dict[field_key] = field_value
elif preset == 'date' and field_value:
dataset_dict[field_key] = convert_datetime_string(field_value)
else:
dataset_dict[field_key] = field_value

# Remove the extras that have been populated into the declared schema fields
dataset_dict['extras'] = [d for d in dataset_dict['extras'] if d.get('key') not in dataset_fields]

return dataset_dict


def validate_tags(values_list: List[Dict]) -> List:
"""
Validates tags strings to contain allowed characters, replaces others with spaces
Expand All @@ -85,31 +45,18 @@ def validate_tags(values_list: List[Dict]) -> List:
return tags


def convert_datetime_string(date_value: str) -> datetime:
"""
Converts datestrings (e.g. '2023-10-06T10:12:55.614000+00:00') to datetime class instance
"""
try:
date_value = dateparser.parse(date_value, yearfirst=True)
if date_value.tzinfo is not None:
date_value = date_value.astimezone(timezone.utc)
except ParserError:
log.error(f'A date field string value {date_value} can not be parsed to a date')
return date_value


class FAIRDataPointDCATAPProfile(EuropeanDCATAP2Profile):
class FAIRDataPointDCATAPProfile(EuropeanDCATAP3Profile):
"""
An RDF profile for FAIR data points
"""

def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
super(FAIRDataPointDCATAPProfile, self).parse_dataset(dataset_dict, dataset_ref)
dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)

#dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)
dataset_dict = self._parse_creator(dataset_dict, dataset_ref)

dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)
## dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)

dataset_dict['tags'] = validate_tags(dataset_dict['tags'])

Expand All @@ -123,10 +70,10 @@ def _contact_point_details(self, subject, predicate) -> List:

for agent in self.g.objects(subject, predicate):
contact = {
'contact_uri': (str(agent) if isinstance(agent, URIRef)
'uri': (str(agent) if isinstance(agent, URIRef)
else self._get_vcard_property_value(agent, VCARD.hasUID)),
'contact_name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
'contact_email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}
'name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
'email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}

contact_list.append(contact)

Expand Down Expand Up @@ -156,16 +103,16 @@ def _parse_creator(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
creator_name = graph.value(creator_ref, FOAF.name)

if creator_identifier:
creator['creator_identifier'] = str(creator_identifier)
creator['identifier'] = str(creator_identifier)
if creator_name:
creator['creator_name'] = str(creator_name)
creator['name'] = str(creator_name)
else:
# If the creator is a URI, use it as the identifier
if isinstance(creator_ref, URIRef):
creator['creator_identifier'] = str(creator_ref)
creator['creator_name'] = str(creator_ref)
creator['identifier'] = str(creator_ref)
creator['name'] = str(creator_ref)
else:
creator['creator_name'] = str(creator_ref)
creator['name'] = str(creator_ref)

creators.append(creator)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@
dcat:startDate "2020-01-01"^^xsd:date ] ;
dcterms:title "COVID-NL cohort MUMC+"@en ;
dcat:contactPoint [ a v:Kind ;
v:fn "N.K. De Vries" ;
v:hasUID <https://orcid.org/0000-0002-4348-707X> ] .
v:fn "N.K. De Vries" ; ] .
52 changes: 30 additions & 22 deletions ckanext/fairdatapoint/tests/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from dateutil.tz import tzutc
from pathlib import Path
from unittest.mock import patch

from docopt import extras
from rdflib import Graph
from ckanext.fairdatapoint.harvesters.domain.fair_data_point_record_to_package_converter import (
FairDataPointRecordToPackageConverter)
Expand Down Expand Up @@ -45,21 +47,23 @@ def test_fdp_record_converter_dataset_dict(self):
"http://purl.org/zonmw/generic/10006;"
"dataset=https://covid19initiatives.health-ri.nl/p/Project/27866022694497978",
record=data)
expected_dataset = dict(extras=[
{"key": "uri", "value": "https://covid19initiatives.health-ri.nl/p/Project/27866022694497978"}
], resources=[], title="COVID-NL cohort MUMC+", notes="Clinical data of MUMC COVID-NL cohort", tags=[],
license_id="", identifier="27866022694497978",
has_version=["https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a"],
contact_point=[
{
"contact_name": "N.K. De Vries",
"contact_uri": "https://orcid.org/0000-0002-4348-707X",
"contact_email": "",
}
], creator=[{"creator_identifier": "https://orcid.org/0000-0002-0180-3636",
"creator_name": "https://orcid.org/0000-0002-0180-3636"}],
publisher_uri="https://opal.health-ri.nl/pub/", temporal_start=datetime(2020, 1, 1, 0, 0),
temporal_end=datetime(2025, 12, 31, 0, 0))
expected_dataset = dict(extras=[], uri="https://covid19initiatives.health-ri.nl/p/Project/27866022694497978",
resources=[], title="COVID-NL cohort MUMC+",
notes="Clinical data of MUMC COVID-NL cohort", tags=[],
license_id="", identifier="27866022694497978",
has_version=[
"https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a"],
contact=[
{
"name": "N.K. De Vries"
}
], creator=[{"identifier": "https://orcid.org/0000-0002-0180-3636",
"name": "https://orcid.org/0000-0002-0180-3636"}],
publisher=[
{
"uri": "https://opal.health-ri.nl/pub/"
}
], temporal_start='2020-01-01', temporal_end='2025-12-31')
assert actual_dataset == expected_dataset

def test_fdp_record_converter_catalog_dict(self):
Expand All @@ -68,23 +72,27 @@ def test_fdp_record_converter_catalog_dict(self):
actual = fdp_record_to_package.record_to_package(
guid="catalog=https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d",
record=data)

expected = {
"uri": "https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d",
"access_rights": "https://fair.healthinformationportal.eu/catalog/"
"1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d#accessRights",
"conforms_to": ["https://fair.healthinformationportal.eu/profile/"
"a0949e72-4466-4d53-8900-9436d1049a4b"],
"extras": [{"key": "uri",
"value": "https://fair.healthinformationportal.eu/catalog/"
"1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d"},
],
"extras": [],
"has_version": ["1.0"],
"issued": datetime(2023, 10, 6, 10, 12, 55, 614000, tzinfo=tzutc()),
"issued": '2023-10-06T10:12:55.614000+00:00',
"language": ["http://id.loc.gov/vocabulary/iso639-1/en"],
"license_id": "",
"modified": datetime(2023, 10, 6, 10, 12, 55, 614000, tzinfo=tzutc()),
"publisher_name": "Automatic",
"modified": '2023-10-06T10:12:55.614000+00:00',
"publisher": [
{
"name": "Automatic"
}
],
"resources": [],
"tags": [],
"title": "Slovenia National Node"
}

assert actual == expected
Loading

0 comments on commit 7e82340

Please sign in to comment.