fix test

GenomicDataInfrastructure · Sep 26, 2024 · 7e82340 · 7e82340
1 parent 9055109
commit 7e82340
Show file tree

Hide file tree

Showing 5 changed files with 163 additions and 212 deletions.
diff --git a/ckanext/fairdatapoint/processors.py b/ckanext/fairdatapoint/processors.py
@@ -35,7 +35,7 @@ def catalogs(self) -> Iterable[Dict]:
         for catalog_ref in self._catalogs():
             catalog_dict = {}
             for profile_class in self._profiles:
-                profile = profile_class(self.g, self.compatibility_mode)
+                profile = profile_class(graph= self.g,compatibility_mode = self.compatibility_mode)
                 profile.parse_dataset(catalog_dict, catalog_ref)
 
             yield catalog_dict
diff --git a/ckanext/fairdatapoint/profiles.py b/ckanext/fairdatapoint/profiles.py
@@ -8,7 +8,7 @@
 import json
 import logging
 
-from ckanext.dcat.profiles import EuropeanDCATAP2Profile
+from ckanext.dcat.profiles import EuropeanDCATAP3Profile
 from ckan.plugins import toolkit
 from ckan import model
 import dateutil.parser as dateparser
@@ -21,46 +21,6 @@
 
 VCARD = Namespace("http://www.w3.org/2006/vcard/ns#")
 
-
-def _convert_extras_to_declared_schema_fields(dataset_dict: Dict) -> Dict:
-    """
-    Compares the extras dictionary with the declared schema.
-    Updates the declared schema fields with the values that match from the extras.
-    Remove the extras that are present on the declared schema.
-    :param dataset_dict:
-    :return: dataset_dict - Updated dataset_dict
-    """
-    # Use the correct dataset type, Defaults to 'dataset'
-    dataset_type = dataset_dict.get('type', 'dataset')
-    # Gets the full Schema definition of the correct dataset type
-    context = {'model': model, 'session': model.Session}
-    data_dict = {'type': dataset_type}
-    full_schema_dict = toolkit.get_action('scheming_dataset_schema_show')(context, data_dict)
-
-    dataset_fields = {x.get('field_name'): x.get('preset') for x in full_schema_dict.get('dataset_fields', [])}
-
-    # Populate the declared schema fields, if they are present in the extras
-    for extra_dict in dataset_dict.get('extras', []):
-        field_key = extra_dict.get('key')
-        field_value = extra_dict.get('value')
-        if field_key in dataset_fields:
-            preset = dataset_fields[field_key]
-            if preset == 'multiple_text' and field_value:
-                try:
-                    dataset_dict[field_key] = json.loads(field_value)
-                except JSONDecodeError:
-                    dataset_dict[field_key] = field_value
-            elif preset == 'date' and field_value:
-                dataset_dict[field_key] = convert_datetime_string(field_value)
-            else:
-                dataset_dict[field_key] = field_value
-
-    # Remove the extras that have been populated into the declared schema fields
-    dataset_dict['extras'] = [d for d in dataset_dict['extras'] if d.get('key') not in dataset_fields]
-
-    return dataset_dict
-
-
 def validate_tags(values_list: List[Dict]) -> List:
     """
     Validates tags strings to contain allowed characters, replaces others with spaces
@@ -85,31 +45,18 @@ def validate_tags(values_list: List[Dict]) -> List:
     return tags
 
 
-def convert_datetime_string(date_value: str) -> datetime:
-    """
-    Converts datestrings (e.g. '2023-10-06T10:12:55.614000+00:00') to datetime class instance
-    """
-    try:
-        date_value = dateparser.parse(date_value, yearfirst=True)
-        if date_value.tzinfo is not None:
-            date_value = date_value.astimezone(timezone.utc)
-    except ParserError:
-        log.error(f'A date field string value {date_value} can not be parsed to a date')
-    return date_value
-
-
-class FAIRDataPointDCATAPProfile(EuropeanDCATAP2Profile):
+class FAIRDataPointDCATAPProfile(EuropeanDCATAP3Profile):
     """
     An RDF profile for FAIR data points
     """
 
     def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
         super(FAIRDataPointDCATAPProfile, self).parse_dataset(dataset_dict, dataset_ref)
-        dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)
 
+        #dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref)
         dataset_dict = self._parse_creator(dataset_dict, dataset_ref)
 
-        dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)
+       ## dataset_dict = _convert_extras_to_declared_schema_fields(dataset_dict)
 
         dataset_dict['tags'] = validate_tags(dataset_dict['tags'])
 
@@ -123,10 +70,10 @@ def _contact_point_details(self, subject, predicate) -> List:
 
         for agent in self.g.objects(subject, predicate):
             contact = {
-                'contact_uri': (str(agent) if isinstance(agent, URIRef)
+                'uri': (str(agent) if isinstance(agent, URIRef)
                         else self._get_vcard_property_value(agent, VCARD.hasUID)),
-                'contact_name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
-                'contact_email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}
+                'name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn),
+                'email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))}
 
             contact_list.append(contact)
 
@@ -156,16 +103,16 @@ def _parse_creator(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict:
             creator_name = graph.value(creator_ref, FOAF.name)
 
             if creator_identifier:
-                creator['creator_identifier'] = str(creator_identifier)
+                creator['identifier'] = str(creator_identifier)
             if creator_name:
-                creator['creator_name'] = str(creator_name)
+                creator['name'] = str(creator_name)
             else:
                 # If the creator is a URI, use it as the identifier
                 if isinstance(creator_ref, URIRef):
-                    creator['creator_identifier'] = str(creator_ref)
-                    creator['creator_name'] = str(creator_ref)
+                    creator['identifier'] = str(creator_ref)
+                    creator['name'] = str(creator_ref)
                 else:
-                    creator['creator_name'] = str(creator_ref)
+                    creator['name'] = str(creator_ref)
 
             creators.append(creator)
 

diff --git a/ckanext/fairdatapoint/tests/test_data/Project_27866022694497978_out.ttl b/ckanext/fairdatapoint/tests/test_data/Project_27866022694497978_out.ttl
@@ -21,5 +21,4 @@
             dcat:startDate "2020-01-01"^^xsd:date ] ;
     dcterms:title "COVID-NL cohort MUMC+"@en ;
      dcat:contactPoint [ a v:Kind ;
-        v:fn "N.K. De Vries" ;
-        v:hasUID <https://orcid.org/0000-0002-4348-707X> ] .
+        v:fn "N.K. De Vries" ; ] .
diff --git a/ckanext/fairdatapoint/tests/test_processors.py b/ckanext/fairdatapoint/tests/test_processors.py
@@ -7,6 +7,8 @@
 from dateutil.tz import tzutc
 from pathlib import Path
 from unittest.mock import patch
+
+from docopt import extras
 from rdflib import Graph
 from ckanext.fairdatapoint.harvesters.domain.fair_data_point_record_to_package_converter import (
     FairDataPointRecordToPackageConverter)
@@ -45,21 +47,23 @@ def test_fdp_record_converter_dataset_dict(self):
                  "http://purl.org/zonmw/generic/10006;"
                  "dataset=https://covid19initiatives.health-ri.nl/p/Project/27866022694497978",
             record=data)
-        expected_dataset = dict(extras=[
-            {"key": "uri", "value": "https://covid19initiatives.health-ri.nl/p/Project/27866022694497978"}
-        ], resources=[], title="COVID-NL cohort MUMC+", notes="Clinical data of MUMC COVID-NL cohort", tags=[],
-            license_id="", identifier="27866022694497978",
-            has_version=["https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a"],
-            contact_point=[
-                {
-                    "contact_name": "N.K. De Vries",
-                    "contact_uri": "https://orcid.org/0000-0002-4348-707X",
-                    "contact_email": "",
-                }
-            ], creator=[{"creator_identifier": "https://orcid.org/0000-0002-0180-3636",
-                         "creator_name": "https://orcid.org/0000-0002-0180-3636"}],
-            publisher_uri="https://opal.health-ri.nl/pub/", temporal_start=datetime(2020, 1, 1, 0, 0),
-            temporal_end=datetime(2025, 12, 31, 0, 0))
+        expected_dataset = dict(extras=[], uri="https://covid19initiatives.health-ri.nl/p/Project/27866022694497978",
+                                resources=[], title="COVID-NL cohort MUMC+",
+                                notes="Clinical data of MUMC COVID-NL cohort", tags=[],
+                                license_id="", identifier="27866022694497978",
+                                has_version=[
+                                    "https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a"],
+                                contact=[
+                                    {
+                                        "name": "N.K. De Vries"
+                                    }
+                                ], creator=[{"identifier": "https://orcid.org/0000-0002-0180-3636",
+                                             "name": "https://orcid.org/0000-0002-0180-3636"}],
+                                publisher=[
+                                    {
+                                        "uri": "https://opal.health-ri.nl/pub/"
+                                    }
+                                ], temporal_start='2020-01-01', temporal_end='2025-12-31')
         assert actual_dataset == expected_dataset
 
     def test_fdp_record_converter_catalog_dict(self):
@@ -68,23 +72,27 @@ def test_fdp_record_converter_catalog_dict(self):
         actual = fdp_record_to_package.record_to_package(
             guid="catalog=https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d",
             record=data)
+
         expected = {
+            "uri": "https://fair.healthinformationportal.eu/catalog/1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d",
             "access_rights": "https://fair.healthinformationportal.eu/catalog/"
                              "1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d#accessRights",
             "conforms_to": ["https://fair.healthinformationportal.eu/profile/"
                             "a0949e72-4466-4d53-8900-9436d1049a4b"],
-            "extras": [{"key": "uri",
-                        "value": "https://fair.healthinformationportal.eu/catalog/"
-                                 "1c75c2c9-d2cc-44cb-aaa8-cf8c11515c8d"},
-                       ],
+            "extras": [],
             "has_version": ["1.0"],
-            "issued": datetime(2023, 10, 6, 10, 12, 55, 614000, tzinfo=tzutc()),
+            "issued": '2023-10-06T10:12:55.614000+00:00',
             "language": ["http://id.loc.gov/vocabulary/iso639-1/en"],
             "license_id": "",
-            "modified": datetime(2023, 10, 6, 10, 12, 55, 614000, tzinfo=tzutc()),
-            "publisher_name": "Automatic",
+            "modified": '2023-10-06T10:12:55.614000+00:00',
+            "publisher": [
+                {
+                    "name": "Automatic"
+                }
+            ],
             "resources": [],
             "tags": [],
             "title": "Slovenia National Node"
         }
+
         assert actual == expected