diff --git a/ckanext/fairdatapoint/profiles.py b/ckanext/fairdatapoint/profiles.py index e468f95..c8815f3 100644 --- a/ckanext/fairdatapoint/profiles.py +++ b/ckanext/fairdatapoint/profiles.py @@ -105,6 +105,7 @@ class FAIRDataPointDCATAPProfile(EuropeanDCATAP2Profile): def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict: super(FAIRDataPointDCATAPProfile, self).parse_dataset(dataset_dict, dataset_ref) + dataset_dict = self._parse_contact_point(dataset_dict, dataset_ref) dataset_dict = self._parse_creator(dataset_dict, dataset_ref) @@ -114,21 +115,37 @@ def parse_dataset(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict: return dataset_dict - def _contact_details(self, subject, predicate): + def _contact_point_details(self, subject, predicate) -> List: """ Overrides RDFProfile._contact_details so uri is taken from hasUID for VCard """ - contact = {} + contact_list = [] for agent in self.g.objects(subject, predicate): - contact['uri'] = (str(agent) if isinstance(agent, URIRef) - else self._get_vcard_property_value(agent, VCARD.hasUID)) + contact = { + 'contact_uri': (str(agent) if isinstance(agent, URIRef) + else self._get_vcard_property_value(agent, VCARD.hasUID)), + 'contact_name': self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn), + 'contact_email': self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail))} - contact['name'] = self._get_vcard_property_value(agent, VCARD.hasFN, VCARD.fn) - contact['email'] = self._without_mailto(self._get_vcard_property_value(agent, VCARD.hasEmail)) - contact['phone'] = self._get_vcard_property_value(agent, VCARD.hasTelephone) + contact_list.append(contact) - return contact + return contact_list + + def _parse_contact_point(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict: + """ + ckan-dcat extension implies there can be just one contact point and in case a list is provided by source only + last value is taken. Besides it never solves uri from a VCard object. This function parses DCAT.contactPoint + information to a list of `pontact_point` dictionaries and replaces ckan-dcat values + """ + contact_point = self._contact_point_details(subject=dataset_ref, predicate=DCAT.contactPoint) + dcat_profile_contact_fields = ['contact_name', 'contact_email', 'contact_uri'] + if contact_point: + dataset_dict['extras'].append({'key': 'contact_point', 'value': contact_point}) + # Remove the extras contact_ fields if they were parsed by dcat extension + dataset_dict['extras'] = \ + [item for item in dataset_dict['extras'] if item.get('key') not in dcat_profile_contact_fields] + return dataset_dict def _parse_creator(self, dataset_dict: Dict, dataset_ref: URIRef) -> Dict: graph = self.g diff --git a/ckanext/fairdatapoint/tests/test_data/contact_point_multiple_cards.ttl b/ckanext/fairdatapoint/tests/test_data/contact_point_multiple_cards.ttl new file mode 100644 index 0000000..78e6019 --- /dev/null +++ b/ckanext/fairdatapoint/tests/test_data/contact_point_multiple_cards.ttl @@ -0,0 +1,27 @@ +# SPDX-FileCopyrightText: 2024 Stichting Health-RI +# +# SPDX-License-Identifier: AGPL-3.0-only + +@prefix dcterms: . +@prefix dcat: . +@prefix foaf: . +@prefix xsd: . +@prefix ldp: . +@prefix v: . + + + a dcat:Resource, dcat:Dataset; + "Example"; + dcterms:title "Example"; + "2023-09-05T12:00:36.276171042Z"^^xsd:dateTime; + "2024-05-02T13:01:35.716385359Z"^^xsd:dateTime; + dcterms:license ; + dcterms:description "This is an example description."; + dcat:contactPoint [ a v:VCard ; + v:fn "Marc Bonten" ; + v:hasUID ; + v:hasEmail ] , + [ a v:VCard ; + v:fn "Frits Rosendaal" ; + v:hasUID ; + v:hasEmail ] . diff --git a/ckanext/fairdatapoint/tests/test_data/contact_point_multiple_urls.ttl b/ckanext/fairdatapoint/tests/test_data/contact_point_multiple_urls.ttl new file mode 100644 index 0000000..d5aba87 --- /dev/null +++ b/ckanext/fairdatapoint/tests/test_data/contact_point_multiple_urls.ttl @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2024 Stichting Health-RI +# +# SPDX-License-Identifier: AGPL-3.0-only + +@prefix dcterms: . +@prefix dcat: . +@prefix foaf: . +@prefix xsd: . +@prefix ldp: . + + + a dcat:Resource, dcat:Dataset; + "Example"; + dcterms:title "Example"; + "2023-09-05T12:00:36.276171042Z"^^xsd:dateTime; + "2024-05-02T13:01:35.716385359Z"^^xsd:dateTime; + dcterms:license ; + dcterms:description "This is an example description."; + dcat:contactPoint , . diff --git a/ckanext/fairdatapoint/tests/test_data/test_schema.json b/ckanext/fairdatapoint/tests/test_data/test_schema.json index f304291..ee80d9b 100644 --- a/ckanext/fairdatapoint/tests/test_data/test_schema.json +++ b/ckanext/fairdatapoint/tests/test_data/test_schema.json @@ -1,743 +1,755 @@ { - "scheming_version": 1, - "dataset_type": "dataset", - "about": "Dataset Schema Compatible with DCAT 2.1.1 for GDI - User Portal Catalogue", - "about_url": "https://github.com/SEMICeu/DCAT-AP/tree/master/releases/2.1.1", - "dataset_fields": [ - { - "field_name": "title", - "label": "Title", - "preset": "title", - "help_inline": true, - "help_text": { - "en": "[dct:title] This property contains a name given to the Dataset." - } - }, - { - "field_name": "name", - "label": "URL", - "preset": "dataset_slug", - "form_placeholder": "" - }, - { - "field_name": "notes", - "form_snippet": "markdown.html", - "label": "Description", - "required": true, - "help_inline": true, - "help_text": { - "en": "[dct:description] This property contains a free-text account of the Dataset." - } - }, - { - "field_name": "tag_string", - "label": "Tags", - "preset": "tag_string_autocomplete", - "help_inline": true, - "help_text": { - "en": "[dcat:keyword] This property contains a keyword or tag describing the Dataset." - } - }, - { - "field_name": "license_id", - "label": "License", - "form_snippet": "license.html", - "help_inline": true, - "help_text": { - "en": "[dct:license] This property refers to the license under which the Dataset is made available." - } - }, - { - "field_name": "owner_org", - "preset": "dataset_organization", - "label": "Organization", - "sorted_choices": true - }, - { - "field_name": "url", - "label": "Source", - "form_placeholder": "http://example.com/dataset.json", - "display_snippet": "link.html", - "help_inline": true, - "help_text": { - "en": "[dcat:landingPage] This property refers to a web page that provides access to the Dataset, its Distributions and/or additional information. It is intended to point to a landing page at the original data provider, not to a page on a site of a third party, such as an aggregator." - } - }, - { - "field_name": "version", - "label": "Version", - "validators": "ignore_missing unicode_safe package_version_validator", - "form_placeholder": "1.0", - "help_inline": true, - "help_text": { - "en": "[owl:versionInfo] This property contains a version number or other version designation of the Dataset." - } - }, - { - "field_name": "author", - "label": "Author", - "form_placeholder": "Joe Bloggs", - "help_inline": true, - "help_text": { - "en": "[CKAN] Fallback value for [vcard:fn]" - } - }, - { - "field_name": "author_email", - "label": "Author Email", - "form_placeholder": "joe@example.com", - "display_snippet": "email.html", - "display_email_name_field": "author", - "help_inline": true, - "help_text": { - "en": "[CKAN] Fallback value for [vcard:hasEmail]" - } - }, - { - "field_name": "maintainer", - "label": "Maintainer", - "form_placeholder": "Joe Bloggs", - "help_inline": true, - "help_text": { - "en": "[CKAN] Fallback value for [vcard:fn]" - } - }, - { - "field_name": "maintainer_email", - "label": "Maintainer Email", - "form_placeholder": "joe@example.com", - "display_snippet": "email.html", - "display_email_name_field": "maintainer", - "help_inline": true, - "help_text": { - "en": "[CKAN] Fallback value for [vcard:hasEmail]" - } - }, - { - "field_name": "creator", - "help_inline": true, - "help_text": { - "en": "[dct:creator] This property refers to the entity responsible for producing the dataset." - }, - "label": { - "en": "Creator" - }, - "repeating_subfields": [ - { - "field_name": "creator_identifier", - "help_inline": true, - "help_text": { - "en": "[dct:identifier] Unique identification of the entity." - }, - "label": { - "en": "Contact URI" - } - }, - { - "field_name": "creator_name", - "label": { - "en": "Creator Name" - }, - "help_inline": true, - "help_text": { - "en": "[foaf:name] This property contains name of producer." - } + "scheming_version": 1, + "dataset_type": "dataset", + "about": "Dataset Schema Compatible with DCAT 2.1.1 for GDI - User Portal Catalogue", + "about_url": "https://github.com/SEMICeu/DCAT-AP/tree/master/releases/2.1.1", + "dataset_fields": [ + { + "field_name": "title", + "label": "Title", + "preset": "title", + "help_inline": true, + "help_text": { + "en": "[dct:title] This property contains a name given to the Dataset." + } + }, + { + "field_name": "name", + "label": "URL", + "preset": "dataset_slug", + "form_placeholder": "" + }, + { + "field_name": "notes", + "form_snippet": "markdown.html", + "label": "Description", + "required": true, + "help_inline": true, + "help_text": { + "en": "[dct:description] This property contains a free-text account of the Dataset." + } + }, + { + "field_name": "tag_string", + "label": "Tags", + "preset": "tag_string_autocomplete", + "help_inline": true, + "help_text": { + "en": "[dcat:keyword] This property contains a keyword or tag describing the Dataset." + } + }, + { + "field_name": "license_id", + "label": "License", + "form_snippet": "license.html", + "help_inline": true, + "help_text": { + "en": "[dct:license] This property refers to the license under which the Dataset is made available." + } + }, + { + "field_name": "owner_org", + "preset": "dataset_organization", + "label": "Organization", + "sorted_choices": true + }, + { + "field_name": "url", + "label": "Source", + "form_placeholder": "http://example.com/dataset.json", + "display_snippet": "link.html", + "help_inline": true, + "help_text": { + "en": "[dcat:landingPage] This property refers to a web page that provides access to the Dataset, its Distributions and/or additional information. It is intended to point to a landing page at the original data provider, not to a page on a site of a third party, such as an aggregator." + } + }, + { + "field_name": "version", + "label": "Version", + "validators": "ignore_missing unicode_safe package_version_validator", + "form_placeholder": "1.0", + "help_inline": true, + "help_text": { + "en": "[owl:versionInfo] This property contains a version number or other version designation of the Dataset." + } + }, + { + "field_name": "author", + "label": "Author", + "form_placeholder": "Joe Bloggs", + "help_inline": true, + "help_text": { + "en": "[CKAN] Fallback value for [vcard:fn]" + } + }, + { + "field_name": "author_email", + "label": "Author Email", + "form_placeholder": "joe@example.com", + "display_snippet": "email.html", + "display_email_name_field": "author", + "help_inline": true, + "help_text": { + "en": "[CKAN] Fallback value for [vcard:hasEmail]" + } + }, + { + "field_name": "maintainer", + "label": "Maintainer", + "form_placeholder": "Joe Bloggs", + "help_inline": true, + "help_text": { + "en": "[CKAN] Fallback value for [vcard:fn]" + } + }, + { + "field_name": "maintainer_email", + "label": "Maintainer Email", + "form_placeholder": "joe@example.com", + "display_snippet": "email.html", + "display_email_name_field": "maintainer", + "help_inline": true, + "help_text": { + "en": "[CKAN] Fallback value for [vcard:hasEmail]" + } + }, + { + "field_name": "creator", + "help_inline": true, + "help_text": { + "en": "[dct:creator] This property refers to the entity responsible for producing the dataset." + }, + "label": { + "en": "Creator" + }, + "repeating_subfields": [ + { + "field_name": "creator_identifier", + "help_inline": true, + "help_text": { + "en": "[dct:identifier] Unique identification of the entity." + }, + "label": { + "en": "Contact URI" + } + }, + { + "field_name": "creator_name", + "label": { + "en": "Creator Name" + }, + "help_inline": true, + "help_text": { + "en": "[foaf:name] This property contains name of producer." + } + } + ] + }, + { + "field_name": "contact_point", + "help_inline": true, + "help_text": { + "en": "[dcat:contactPoint] This property contains contact information that can be used for sending comments about the Dataset." + }, + "label": { + "en": "Contact Point" + }, + "repeating_subfields": [ + { + "field_name": "contact_uri", + "help_inline": true, + "help_text": { + "en": "[dcat:contactPoint] This property contains contact information that can be used for sending comments about the Dataset." + }, + "label": { + "en": "Contact URI" + } + }, + { + "field_name": "contact_name", + "label": { + "en": "Contact Name" + }, + "help_inline": true, + "help_text": { + "en": "[vcard:fn] This property contains contact information that can be used for sending comments about the Dataset." + } + }, + { + "field_name": "contact_email", + "help_inline": true, + "help_text": { + "en": "[vcard:hasEmail] This property contains contact information that can be used for sending comments about the Dataset." + }, + "label": { + "en": "Contact Email" + }, + "display_snippet": "email.html" + } + ] + }, + { + "field_name": "publisher_uri", + "label": { + "en": "Publisher" + }, + "help_inline": true, + "help_text": { + "en": "[dct:publisher] This property refers to an entity (organisation) responsible for making the Dataset available." + } + }, + { + "field_name": "publisher_name", + "label": { + "en": "Publisher Name" + }, + "help_inline": true, + "help_text": { + "en": "[foaf:name] This property contains a name of the agent." + } + }, + { + "field_name": "publisher_email", + "label": { + "en": "Publisher Email" + }, + "help_inline": true, + "help_text": { + "en": "[foaf:mbox] This property contains an email of the agent." + } + }, + { + "field_name": "publisher_url", + "label": { + "en": "Publisher URL" + }, + "help_inline": true, + "help_text": { + "en": "[foaf:homepage] This property refers to a web page that acts as the main page for the Catalogue." + } + }, + { + "field_name": "publisher_type", + "label": { + "en": "Publisher Type" + }, + "help_inline": true, + "help_text": { + "en": "[dct:type] This property refers to the type of the Dataset. " + } + }, + { + "field_name": "spatial_uri", + "label": { + "en": "Spatial URI" + }, + "help_inline": true, + "help_text": { + "en": "[dct:spatial] This property refers to a geographic region that is covered by the Dataset. " + } + }, + { + "field_name": "temporal_start", + "help_inline": true, + "help_text": { + "en": "[dct:temporal] This property refers to a temporal period that the Dataset covers." + }, + "label": { + "en": "Temportal Start Date" + }, + "preset": "date" + }, + { + "field_name": "temporal_end", + "help_inline": true, + "help_text": { + "en": "[dct:temporal] This property refers to a temporal period that the Dataset covers." + }, + "label": { + "en": "Temporal end date" + }, + "preset": "date" + }, + { + "field_name": "theme", + "label": { + "en": "Theme" + }, + "preset": "multiple_text", + "help_inline": true, + "help_text": { + "en": "[dcat:theme] This property refers to a category of the Dataset. A Dataset may be associated with multiple themes." + } + }, + { + "field_name": "version_notes", + "label": { + "en": "Version Notes" + }, + "help_inline": true, + "help_text": { + "en": "[adms:versionNotes] This property contains a description of the differences between this version and a previous version of the Dataset." + } + }, + { + "field_name": "landing_page", + "label": { + "en": "Landing Page" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:landingPage] This property refers to a web page that provides access to the Dataset, its Distributions and/or additional information. It is intended to point to a landing page at the original data provider, not to a page on a site of a third party, such as an aggregator." + } + }, + { + "field_name": "spatial_resolution_in_meters", + "label": { + "en": "Spatial Resolution in Meters" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:spatialResolutionInMeters] This property refers to the minimum spatial separation resolvable in a dataset, measured in meters." + } + }, + { + "field_name": "temporal_resolution", + "label": { + "en": "Temporal Resolution" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:temporalResolution] This property refers to the minimum time period resolvable in the dataset." + } + }, + { + "field_name": "qualified_relation", + "label": { + "en": "Qualified Relation" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:qualifiedRelation] This property provides a link to a description of a relationship with another resource." + } + }, + { + "field_name": "access_rights", + "label": { + "en": "Access rights" + }, + "help_inline": true, + "help_text": { + "en": "[dct:accessRights] This property refers to information that indicates whether the Dataset is open data, has access restrictions or is not public." + } + }, + { + "field_name": "frequency", + "label": { + "en": "Frequency" + }, + "help_inline": true, + "help_text": { + "en": "[dct:accrualPeriodicity] This property refers to the frequency at which the Dataset is updated." + } + }, + { + "field_name": "conforms_to", + "label": { + "en": "Conforms to" + }, + "preset": "multiple_text", + "help_inline": true, + "help_text": { + "en": "[dct:conformsTo] This property refers to an implementing rule or other specification." + } + }, + { + "field_name": "is_referenced_by", + "label": { + "en": "Is referenced by" + }, + "help_inline": true, + "help_text": { + "en": "[dct:isReferencedBy] This property is about a related resource, such as a publication, that references, cites, or otherwise points to the dataset." + } + }, + { + "field_name": "is_version_of", + "label": { + "en": "Is version of" + }, + "help_inline": true, + "help_text": { + "en": "[dct:isVersionOf] This property refers to a related Dataset of which the described Dataset is a version, edition, or adaptation." + } + }, + { + "field_name": "identifier", + "label": { + "en": "Identifier" + }, + "help_inline": true, + "help_text": { + "en": "[dct:identifier] This property contains the main identifier for the Dataset, e.g. the URI or other unique identifier in the context of the Catalogue." + } + }, + { + "field_name": "issued", + "label": { + "en": "Issued" + }, + "help_inline": true, + "help_text": { + "en": "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset." + }, + "preset": "date" + }, + { + "field_name": "language", + "label": { + "en": "Language" + }, + "preset": "multiple_text", + "help_inline": true, + "help_text": { + "en": "[dct:language] This property refers to a language of the Dataset. This property can be repeated if there are multiple languages in the Dataset." + } + }, + { + "field_name": "modified", + "label": { + "en": "Modification Date" + }, + "preset": "date", + "help_inline": true, + "help_text": { + "en": "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified." + } + }, + { + "field_name": "provenance", + "label": { + "en": "Provenance" + }, + "help_inline": true, + "help_text": { + "en": "[dct:provenance] This property contains a statement about the lineage of a Dataset." + } + }, + { + "field_name": "relation", + "label": { + "en": "Relation" + }, + "help_inline": true, + "help_text": { + "en": "[dct:relation] This property refers to a related resource." + } + }, + { + "field_name": "sample", + "label": { + "en": "Sample" + }, + "help_inline": true, + "help_text": { + "en": "[adms:sample] This property refers to a sample distribution of the dataset." + } + }, + { + "field_name": "source", + "label": { + "en": "Source" + }, + "help_inline": true, + "help_text": { + "en": "[dct:source] This property refers to a related Dataset from which the described Dataset is derived." + } + }, + { + "field_name": "dcat_type", + "label": { + "en": "Type" + }, + "help_inline": true, + "help_text": { + "en": "[dct:type] This property refers to the type of the Dataset." + } + }, + { + "field_name": "has_version", + "label": { + "en": "Has Version" + }, + "preset": "multiple_text", + "help_inline": true, + "help_text": { + "en": "[dct:hasVersion] This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset." + } + }, + { + "field_name": "documentation", + "label": { + "en": "Documentation" + }, + "help_inline": true, + "help_text": { + "en": "[foaf:page] This property refers to a page or document about this Dataset." + } + }, + { + "field_name": "qualified_attribution", + "label": { + "en": "Qualified Attribution" + }, + "help_inline": true, + "help_text": { + "en": "[prov:qualifiedAttribution ] This property refers to a link to an Agent having some form of responsibility for the resource." + } + }, + { + "field_name": "was_generated_by", + "label": { + "en": "Was Generated By" + }, + "help_inline": true, + "help_text": { + "en": "[prov:wasGeneratedBy] This property refers to an activity that generated, or provides the business context for, the creation of the dataset." + } + }, + { + "field_name": "alternate_identifier", + "label": { + "en": "Alternate identifier" + }, + "help_inline": true, + "help_text": { + "en": "[adms:identifier] This property refers to a secondary identifier of the Dataset, such as MAST/ADS , DataCite , DOI , EZID or W3ID ." + } + } + ], + "resource_fields": [ + { + "field_name": "url", + "label": "URL", + "preset": "resource_url_upload" + }, + { + "field_name": "name", + "label": "Name", + "help_inline": true, + "help_text": { + "en": "[dct:title] This property contains a name given to the Distribution." + }, + "form_placeholder": "eg. January 2011 Gold Prices" + }, + { + "field_name": "description", + "label": "Description", + "form_snippet": "markdown.html", + "form_placeholder": "Some useful notes about the data", + "help_inline": true, + "help_text": { + "en": "[dct:description] This property contains a free-text account of the Distribution." + } + }, + { + "field_name": "format", + "label": "Format", + "preset": "resource_format_autocomplete", + "help_inline": true, + "help_text": { + "en": "[dct:format] This property refers to the file format of the Distribution." + } + }, + { + "field_name": "access_url", + "label": { + "en": "Access URL" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:accessURL] This property contains a URL that gives access to a Distribution of the Dataset. The resource at the access URL may contain information about how to get the Dataset." + } + }, + { + "field_name": "availability", + "label": { + "en": "Availability" + }, + "help_inline": true, + "help_text": { + "en": "[dcatap:availability] This property indicates how long it is planned to keep the Distribution of the Dataset available. " + } + }, + { + "field_name": "status", + "label": { + "en": "Status" + }, + "help_inline": true, + "help_text": { + "en": "[adms:status] The status of the distribution in the context of maturity lifecycle." + } + }, + { + "field_name": "access_services", + "label": { + "en": "Access Services" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:accessService] This property refers to a data service that gives access to the distribution of the dataset." + } + }, + { + "field_name": "size", + "label": "Size", + "form_snippet": null + }, + { + "field_name": "compress_format", + "label": { + "en": "Compress Format" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:compressFormat] This property refers to the format of the file in which the data is contained in a compressed form, e.g. to reduce the size of the downloadable file." + } + }, + { + "field_name": "download_url", + "label": "Download URL", + "display_snippet": "link.html", + "help_inline": true, + "help_text": { + "en": "[dcat:downloadURL] This property contains a URL that is a direct link to a downloadable file in a given format." + } + }, + { + "field_name": "mimetype", + "label": "Mime Type", + "help_inline": true, + "help_text": { + "en": "[dcat:mediaType] This property refers to the media type of the Distribution as defined in the official register of media types managed by IANA." + } + }, + { + "field_name": "package_format", + "label": { + "en": "Package Format" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:packageFormat] This property refers to the format of the file in which one or more data files are grouped together, e.g. to enable a set of related files to be downloaded together." + } + }, + { + "field_name": "spatial_resolution_in_meters", + "label": { + "en": "Spatial Resolution in Meters" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:spatialResolutionInMeters] This property refers to the minimum spatial separation resolvable in a dataset, measured in meters." + } + }, + { + "field_name": "temporal_resolution", + "label": { + "en": "Temporal Resolution" + }, + "help_inline": true, + "help_text": { + "en": "[dcat:temporalResolution] This property refers to the minimum time period resolvable in the dataset." + } + }, + { + "field_name": "conforms_to", + "label": { + "en": "Conforms to" + }, + "help_inline": true, + "help_text": { + "en": "[dct:conformsTo] This property refers to an implementing rule or other specification." + } + }, + { + "field_name": "issued", + "label": { + "en": "Issued" + }, + "help_inline": true, + "help_text": { + "en": "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset." + }, + "preset": "date" + }, + { + "field_name": "language", + "label": { + "en": "Language" + }, + "help_inline": true, + "help_text": { + "en": "[dct:language] This property refers to a language of the Dataset. This property can be repeated if there are multiple languages in the Dataset." + } + }, + { + "field_name": "modified", + "label": { + "en": "Modification Date" + }, + "preset": "date", + "help_inline": true, + "help_text": { + "en": "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified." + } + }, + { + "field_name": "rights", + "label": { + "en": "Rights" + }, + "help_inline": true, + "help_text": { + "en": "[dct:rights] This property refers to a statement that specifies rights associated with the Distribution." + } + }, + { + "field_name": "documentation", + "label": { + "en": "Documentation" + }, + "help_inline": true, + "help_text": { + "en": "[foaf:page] This property refers to a page or document about this Distribution." + } + }, + { + "field_name": "has_policy", + "label": { + "en": "Has Policy" + }, + "help_inline": true, + "help_text": { + "en": "[odrl:hasPolicy] This property refers to the policy expressing the rights associated with the distribution if using the ODRL vocabulary." + } + }, + { + "field_name": "hash", + "label": "Hash", + "help_inline": true, + "help_text": { + "en": "[spdx:checksumValue] This property provides a lower case hexadecimal encoded digest value produced using a specific algorithm." + } + }, + { + "field_name": "hash_algorithm", + "label": "Hash Algorithm", + "help_inline": true, + "help_text": { + "en": "[spdx:algorithm] This property identifies the algorithm used to produce the subject Checksum. " + } } - ] - }, - { - "field_name": "contact_uri", - "help_inline": true, - "help_text": { - "en": "[dcat:contactPoint] This property contains contact information that can be used for sending comments about the Dataset." - }, - "label": { - "en": "Contact URI" - } - }, - { - "field_name": "contact_name", - "label": { - "en": "Contact Name" - }, - "help_inline": true, - "help_text": { - "en": "[vcard:fn] This property contains contact information that can be used for sending comments about the Dataset." - } - }, - { - "field_name": "contact_email", - "help_inline": true, - "help_text": { - "en": "[vcard:hasEmail] This property contains contact information that can be used for sending comments about the Dataset." - }, - "label": { - "en": "Contact Email" - }, - "display_snippet": "email.html" - }, - { - "field_name": "publisher_uri", - "label": { - "en": "Publisher" - }, - "help_inline": true, - "help_text": { - "en": "[dct:publisher] This property refers to an entity (organisation) responsible for making the Dataset available." - } - }, - { - "field_name": "publisher_name", - "label": { - "en": "Publisher Name" - }, - "help_inline": true, - "help_text": { - "en": "[foaf:name] This property contains a name of the agent." - } - }, - { - "field_name": "publisher_email", - "label": { - "en": "Publisher Email" - }, - "help_inline": true, - "help_text": { - "en": "[foaf:mbox] This property contains an email of the agent." - } - }, - { - "field_name": "publisher_url", - "label": { - "en": "Publisher URL" - }, - "help_inline": true, - "help_text": { - "en": "[foaf:homepage] This property refers to a web page that acts as the main page for the Catalogue." - } - }, - { - "field_name": "publisher_type", - "label": { - "en": "Publisher Type" - }, - "help_inline": true, - "help_text": { - "en": "[dct:type] This property refers to the type of the Dataset. " - } - }, - { - "field_name": "spatial_uri", - "label": { - "en": "Spatial URI" - }, - "help_inline": true, - "help_text": { - "en": "[dct:spatial] This property refers to a geographic region that is covered by the Dataset. " - } - }, - { - "field_name": "temporal_start", - "help_inline": true, - "help_text": { - "en": "[dct:temporal] This property refers to a temporal period that the Dataset covers." - }, - "label": { - "en": "Temportal Start Date" - }, - "preset": "date" - }, - { - "field_name": "temporal_end", - "help_inline": true, - "help_text": { - "en": "[dct:temporal] This property refers to a temporal period that the Dataset covers." - }, - "label": { - "en": "Temporal end date" - }, - "preset": "date" - }, - { - "field_name": "theme", - "label": { - "en": "Theme" - }, - "preset": "multiple_text", - "help_inline": true, - "help_text": { - "en": "[dcat:theme] This property refers to a category of the Dataset. A Dataset may be associated with multiple themes." - } - }, - { - "field_name": "version_notes", - "label": { - "en": "Version Notes" - }, - "help_inline": true, - "help_text": { - "en": "[adms:versionNotes] This property contains a description of the differences between this version and a previous version of the Dataset." - } - }, - { - "field_name": "landing_page", - "label": { - "en": "Landing Page" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:landingPage] This property refers to a web page that provides access to the Dataset, its Distributions and/or additional information. It is intended to point to a landing page at the original data provider, not to a page on a site of a third party, such as an aggregator." - } - }, - { - "field_name": "spatial_resolution_in_meters", - "label": { - "en": "Spatial Resolution in Meters" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:spatialResolutionInMeters] This property refers to the minimum spatial separation resolvable in a dataset, measured in meters." - } - }, - { - "field_name": "temporal_resolution", - "label": { - "en": "Temporal Resolution" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:temporalResolution] This property refers to the minimum time period resolvable in the dataset." - } - }, - { - "field_name": "qualified_relation", - "label": { - "en": "Qualified Relation" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:qualifiedRelation] This property provides a link to a description of a relationship with another resource." - } - }, - { - "field_name": "access_rights", - "label": { - "en": "Access rights" - }, - "help_inline": true, - "help_text": { - "en": "[dct:accessRights] This property refers to information that indicates whether the Dataset is open data, has access restrictions or is not public." - } - }, - { - "field_name": "frequency", - "label": { - "en": "Frequency" - }, - "help_inline": true, - "help_text": { - "en": "[dct:accrualPeriodicity] This property refers to the frequency at which the Dataset is updated." - } - }, - { - "field_name": "conforms_to", - "label": { - "en": "Conforms to" - }, - "preset": "multiple_text", - "help_inline": true, - "help_text": { - "en": "[dct:conformsTo] This property refers to an implementing rule or other specification." - } - }, - { - "field_name": "is_referenced_by", - "label": { - "en": "Is referenced by" - }, - "help_inline": true, - "help_text": { - "en": "[dct:isReferencedBy] This property is about a related resource, such as a publication, that references, cites, or otherwise points to the dataset." - } - }, - { - "field_name": "is_version_of", - "label": { - "en": "Is version of" - }, - "help_inline": true, - "help_text": { - "en": "[dct:isVersionOf] This property refers to a related Dataset of which the described Dataset is a version, edition, or adaptation." - } - }, - { - "field_name": "identifier", - "label": { - "en": "Identifier" - }, - "help_inline": true, - "help_text": { - "en": "[dct:identifier] This property contains the main identifier for the Dataset, e.g. the URI or other unique identifier in the context of the Catalogue." - } - }, - { - "field_name": "issued", - "label": { - "en": "Issued" - }, - "help_inline": true, - "help_text": { - "en": "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset." - }, - "preset": "date" - }, - { - "field_name": "language", - "label": { - "en": "Language" - }, - "preset": "multiple_text", - "help_inline": true, - "help_text": { - "en": "[dct:language] This property refers to a language of the Dataset. This property can be repeated if there are multiple languages in the Dataset." - } - }, - { - "field_name": "modified", - "label": { - "en": "Modification Date" - }, - "preset": "date", - "help_inline": true, - "help_text": { - "en": "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified." - } - }, - { - "field_name": "provenance", - "label": { - "en": "Provenance" - }, - "help_inline": true, - "help_text": { - "en": "[dct:provenance] This property contains a statement about the lineage of a Dataset." - } - }, - { - "field_name": "relation", - "label": { - "en": "Relation" - }, - "help_inline": true, - "help_text": { - "en": "[dct:relation] This property refers to a related resource." - } - }, - { - "field_name": "sample", - "label": { - "en": "Sample" - }, - "help_inline": true, - "help_text": { - "en": "[adms:sample] This property refers to a sample distribution of the dataset." - } - }, - { - "field_name": "source", - "label": { - "en": "Source" - }, - "help_inline": true, - "help_text": { - "en": "[dct:source] This property refers to a related Dataset from which the described Dataset is derived." - } - }, - { - "field_name": "dcat_type", - "label": { - "en": "Type" - }, - "help_inline": true, - "help_text": { - "en": "[dct:type] This property refers to the type of the Dataset." - } - }, - { - "field_name": "has_version", - "label": { - "en": "Has Version" - }, - "preset": "multiple_text", - "help_inline": true, - "help_text": { - "en": "[dct:hasVersion] This property refers to a related Dataset that is a version, edition, or adaptation of the described Dataset." - } - }, - { - "field_name": "documentation", - "label": { - "en": "Documentation" - }, - "help_inline": true, - "help_text": { - "en": "[foaf:page] This property refers to a page or document about this Dataset." - } - }, - { - "field_name": "qualified_attribution", - "label": { - "en": "Qualified Attribution" - }, - "help_inline": true, - "help_text": { - "en": "[prov:qualifiedAttribution ] This property refers to a link to an Agent having some form of responsibility for the resource." - } - }, - { - "field_name": "was_generated_by", - "label": { - "en": "Was Generated By" - }, - "help_inline": true, - "help_text": { - "en": "[prov:wasGeneratedBy] This property refers to an activity that generated, or provides the business context for, the creation of the dataset." - } - }, - { - "field_name": "alternate_identifier", - "label": { - "en": "Alternate identifier" - }, - "help_inline": true, - "help_text": { - "en": "[adms:identifier] This property refers to a secondary identifier of the Dataset, such as MAST/ADS , DataCite , DOI , EZID or W3ID ." - } - } - ], - "resource_fields": [ - { - "field_name": "url", - "label": "URL", - "preset": "resource_url_upload" - }, - { - "field_name": "name", - "label": "Name", - "help_inline": true, - "help_text": { - "en": "[dct:title] This property contains a name given to the Distribution." - }, - "form_placeholder": "eg. January 2011 Gold Prices" - }, - { - "field_name": "description", - "label": "Description", - "form_snippet": "markdown.html", - "form_placeholder": "Some useful notes about the data", - "help_inline": true, - "help_text": { - "en": "[dct:description] This property contains a free-text account of the Distribution." - } - }, - { - "field_name": "format", - "label": "Format", - "preset": "resource_format_autocomplete", - "help_inline": true, - "help_text": { - "en": "[dct:format] This property refers to the file format of the Distribution." - } - }, - { - "field_name": "access_url", - "label": { - "en": "Access URL" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:accessURL] This property contains a URL that gives access to a Distribution of the Dataset. The resource at the access URL may contain information about how to get the Dataset." - } - }, - { - "field_name": "availability", - "label": { - "en": "Availability" - }, - "help_inline": true, - "help_text": { - "en": "[dcatap:availability] This property indicates how long it is planned to keep the Distribution of the Dataset available. " - } - }, - { - "field_name": "status", - "label": { - "en": "Status" - }, - "help_inline": true, - "help_text": { - "en": "[adms:status] The status of the distribution in the context of maturity lifecycle." - } - }, - { - "field_name": "access_services", - "label": { - "en": "Access Services" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:accessService] This property refers to a data service that gives access to the distribution of the dataset." - } - }, - { - "field_name": "size", - "label": "Size", - "form_snippet": null - }, - { - "field_name": "compress_format", - "label": { - "en": "Compress Format" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:compressFormat] This property refers to the format of the file in which the data is contained in a compressed form, e.g. to reduce the size of the downloadable file." - } - }, - { - "field_name": "download_url", - "label": "Download URL", - "display_snippet": "link.html", - "help_inline": true, - "help_text": { - "en": "[dcat:downloadURL] This property contains a URL that is a direct link to a downloadable file in a given format." - } - }, - { - "field_name": "mimetype", - "label": "Mime Type", - "help_inline": true, - "help_text": { - "en": "[dcat:mediaType] This property refers to the media type of the Distribution as defined in the official register of media types managed by IANA." - } - }, - { - "field_name": "package_format", - "label": { - "en": "Package Format" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:packageFormat] This property refers to the format of the file in which one or more data files are grouped together, e.g. to enable a set of related files to be downloaded together." - } - }, - { - "field_name": "spatial_resolution_in_meters", - "label": { - "en": "Spatial Resolution in Meters" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:spatialResolutionInMeters] This property refers to the minimum spatial separation resolvable in a dataset, measured in meters." - } - }, - { - "field_name": "temporal_resolution", - "label": { - "en": "Temporal Resolution" - }, - "help_inline": true, - "help_text": { - "en": "[dcat:temporalResolution] This property refers to the minimum time period resolvable in the dataset." - } - }, - { - "field_name": "conforms_to", - "label": { - "en": "Conforms to" - }, - "help_inline": true, - "help_text": { - "en": "[dct:conformsTo] This property refers to an implementing rule or other specification." - } - }, - { - "field_name": "issued", - "label": { - "en": "Issued" - }, - "help_inline": true, - "help_text": { - "en": "[dct:issued] This property contains the date of formal issuance (e.g., publication) of the Dataset." - }, - "preset": "date" - }, - { - "field_name": "language", - "label": { - "en": "Language" - }, - "help_inline": true, - "help_text": { - "en": "[dct:language] This property refers to a language of the Dataset. This property can be repeated if there are multiple languages in the Dataset." - } - }, - { - "field_name": "modified", - "label": { - "en": "Modification Date" - }, - "preset": "date", - "help_inline": true, - "help_text": { - "en": "[dct:modified] This property contains the most recent date on which the Dataset was changed or modified." - } - }, - { - "field_name": "rights", - "label": { - "en": "Rights" - }, - "help_inline": true, - "help_text": { - "en": "[dct:rights] This property refers to a statement that specifies rights associated with the Distribution." - } - }, - { - "field_name": "documentation", - "label": { - "en": "Documentation" - }, - "help_inline": true, - "help_text": { - "en": "[foaf:page] This property refers to a page or document about this Distribution." - } - }, - { - "field_name": "has_policy", - "label": { - "en": "Has Policy" - }, - "help_inline": true, - "help_text": { - "en": "[odrl:hasPolicy] This property refers to the policy expressing the rights associated with the distribution if using the ODRL vocabulary." - } - }, - { - "field_name": "hash", - "label": "Hash", - "help_inline": true, - "help_text": { - "en": "[spdx:checksumValue] This property provides a lower case hexadecimal encoded digest value produced using a specific algorithm." - } - }, - { - "field_name": "hash_algorithm", - "label": "Hash Algorithm", - "help_inline": true, - "help_text": { - "en": "[spdx:algorithm] This property identifies the algorithm used to produce the subject Checksum. " - } - } - ] -} + ] +} \ No newline at end of file diff --git a/ckanext/fairdatapoint/tests/test_processors.py b/ckanext/fairdatapoint/tests/test_processors.py index a3da5bd..043093e 100644 --- a/ckanext/fairdatapoint/tests/test_processors.py +++ b/ckanext/fairdatapoint/tests/test_processors.py @@ -45,29 +45,21 @@ def test_fdp_record_converter_dataset_dict(self): "http://purl.org/zonmw/generic/10006;" "dataset=https://covid19initiatives.health-ri.nl/p/Project/27866022694497978", record=data) - - expected_dataset = dict( - extras=[ - {"key": "uri", "value": "https://covid19initiatives.health-ri.nl/p/Project/27866022694497978"} - ], - resources=[], - title="COVID-NL cohort MUMC+", - notes="Clinical data of MUMC COVID-NL cohort", - tags=[], - license_id="", - identifier="27866022694497978", + expected_dataset = dict(extras=[ + {"key": "uri", "value": "https://covid19initiatives.health-ri.nl/p/Project/27866022694497978"} + ], resources=[], title="COVID-NL cohort MUMC+", notes="Clinical data of MUMC COVID-NL cohort", tags=[], + license_id="", identifier="27866022694497978", has_version=["https://repo.metadatacenter.org/template-instances/2836bf1c-76e9-44e7-a65e-80e9ca63025a"], - contact_name="N.K. De Vries", - contact_uri="https://orcid.org/0000-0002-4348-707X", - creator=[{ - "creator_identifier": "https://orcid.org/0000-0002-0180-3636", - "creator_name": "https://orcid.org/0000-0002-0180-3636" - }], - publisher_uri="https://opal.health-ri.nl/pub/", - temporal_start=datetime(2020, 1, 1, 0, 0), - temporal_end=datetime(2025, 12, 31, 0, 0) - ) - + contact_point=[ + { + "contact_name": "N.K. De Vries", + "contact_uri": "https://orcid.org/0000-0002-4348-707X", + "contact_email": "", + } + ], creator=[{"creator_identifier": "https://orcid.org/0000-0002-0180-3636", + "creator_name": "https://orcid.org/0000-0002-0180-3636"}], + publisher_uri="https://opal.health-ri.nl/pub/", temporal_start=datetime(2020, 1, 1, 0, 0), + temporal_end=datetime(2025, 12, 31, 0, 0)) assert actual_dataset == expected_dataset def test_fdp_record_converter_catalog_dict(self): diff --git a/ckanext/fairdatapoint/tests/test_profiles.py b/ckanext/fairdatapoint/tests/test_profiles.py index 782e207..c68df31 100644 --- a/ckanext/fairdatapoint/tests/test_profiles.py +++ b/ckanext/fairdatapoint/tests/test_profiles.py @@ -9,8 +9,8 @@ from rdflib import Graph from ckanext.fairdatapoint.profiles import validate_tags, convert_datetime_string from ckanext.fairdatapoint.harvesters.domain.fair_data_point_record_to_package_converter import ( - FairDataPointRecordToPackageConverter) - + FairDataPointRecordToPackageConverter +) TEST_DATA_DIRECTORY = Path(Path(__file__).parent.resolve(), "test_data") @@ -112,7 +112,13 @@ def test_profile_contact_point_uri(): ], 'title': 'Example', 'notes': 'This is an example description.', - 'contact_uri': 'https://orcid.org/0000-0002-9095-9201', + 'contact_point': [ + { + 'contact_uri': 'https://orcid.org/0000-0002-9095-9201', + 'contact_email': '', + 'contact_name': '' + } + ], 'license_id': '', 'resources': [], 'tags': [] @@ -136,15 +142,90 @@ def test_profile_contact_point_vcard(): ], 'title': 'Example', 'notes': 'This is an example description.', - 'contact_uri': 'https://orcid.org/0000-0002-9095-9201', - 'contact_name': 'Marc Bonten', - 'contact_email': 'marc.bonten@example.com', + 'contact_point': [ + { + 'contact_uri': 'https://orcid.org/0000-0002-9095-9201', + 'contact_name': 'Marc Bonten', + 'contact_email': 'marc.bonten@example.com' + } + ], + 'license_id': '', + 'resources': [], + 'tags': [] + } + assert actual == expected + + +@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") +@pytest.mark.usefixtures("with_plugins") +def test_profile_contact_point_multiple_uris(): + fdp_record_to_package = FairDataPointRecordToPackageConverter(profile="fairdatapoint_dcat_ap") + data = Graph().parse(Path(TEST_DATA_DIRECTORY, "contact_point_multiple_urls.ttl")).serialize() + actual = fdp_record_to_package.record_to_package( + guid="https://health-ri.sandbox.semlab-leiden.nl/catalog/e3faf7ad-050c-475f-8ce4-da7e2faa5cd0;" + "dataset=https://health-ri.sandbox.semlab-leiden.nl/dataset/d7129d28-b72a-437f-8db0-4f0258dd3c25", + record=data) + expected = { + 'extras': [ + {'key': 'uri', + 'value': 'https://health-ri.sandbox.semlab-leiden.nl/dataset/d7129d28-b72a-437f-8db0-4f0258dd3c25'} + ], + 'title': 'Example', + 'notes': 'This is an example description.', + 'contact_point': [ + { + 'contact_email': '', + 'contact_name': '', + 'contact_uri': 'https://orcid.org/0000-0002-9095-9201' + }, + { + 'contact_email': '', + 'contact_name': '', + 'contact_uri': 'https://orcid.org/0000-0003-2558-7496' + } + ], + 'license_id': '', + 'resources': [], + 'tags': [] + } + assert actual == expected + + +@pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") +@pytest.mark.usefixtures("with_plugins") +def test_profile_contact_point_multiple_cards(): + fdp_record_to_package = FairDataPointRecordToPackageConverter(profile="fairdatapoint_dcat_ap") + data = Graph().parse(Path(TEST_DATA_DIRECTORY, "contact_point_multiple_cards.ttl")).serialize() + actual = fdp_record_to_package.record_to_package( + guid="https://health-ri.sandbox.semlab-leiden.nl/catalog/e3faf7ad-050c-475f-8ce4-da7e2faa5cd0;" + "dataset=https://health-ri.sandbox.semlab-leiden.nl/dataset/d7129d28-b72a-437f-8db0-4f0258dd3c25", + record=data) + expected = { + 'extras': [ + {'key': 'uri', + 'value': 'https://health-ri.sandbox.semlab-leiden.nl/dataset/d7129d28-b72a-437f-8db0-4f0258dd3c25'} + ], + 'title': 'Example', + 'notes': 'This is an example description.', + 'contact_point': [ + { + 'contact_email': 'marc.bonten@example.com', + 'contact_name': 'Marc Bonten', + 'contact_uri': 'https://orcid.org/0000-0002-9095-9201' + }, + { + 'contact_email': 'frits.rosendaal@example.com', + 'contact_name': 'Frits Rosendaal', + 'contact_uri': 'https://orcid.org/0000-0003-2558-7496' + } + ], 'license_id': '', 'resources': [], 'tags': [] } assert actual == expected + @pytest.mark.ckan_config("ckan.plugins", "scheming_datasets") @pytest.mark.usefixtures("with_plugins") def test_profile_creator():