Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Add a Data Package serializer #10632

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions osf/metadata/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from osf import exceptions
from osf.metadata import gather
from .datacite import DataciteJsonMetadataSerializer, DataciteXmlMetadataSerializer
from .datapackage import DatapackageSerializer
from .google_dataset_json_ld import GoogleDatasetJsonLdSerializer
from .turtle import TurtleMetadataSerializer

Expand All @@ -15,6 +16,7 @@
'turtle': TurtleMetadataSerializer,
'datacite-json': DataciteJsonMetadataSerializer,
'datacite-xml': DataciteXmlMetadataSerializer,
'datapackage.json': DatapackageSerializer,
'google-dataset-json-ld': GoogleDatasetJsonLdSerializer,
}

Expand Down
99 changes: 99 additions & 0 deletions osf/metadata/serializers/datapackage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import json
from osf.metadata.serializers import _base
from osf.metadata.serializers.datacite import DataciteJsonMetadataSerializer

PROFILE_URL = "https://datapackage.org/profiles/2.0/datapackage.json"


class DatapackageSerializer(_base.MetadataSerializer):
mediatype = "application/json" # type: ignore

def filename_for_itemid(self, itemid: str):
return f"{itemid}.datapackage.json"

def serialize(self) -> str:
return json.dumps(
self.metadata_as_dict(),
indent=2,
sort_keys=True,
)

def metadata_as_dict(self) -> dict:
dataset = DataciteJsonMetadataSerializer(self.basket).metadata_as_dict()
package = {"$schema": PROFILE_URL, "resources": []}

# Id
for identifier in dataset.get("identifiers", []):
type = identifier.get("identifierType")
value = identifier.get("identifier")
if value and type == "DOI":
package["id"] = value
break

# Title
for title in dataset.get("titles", []):
type = title.get("titleType")
value = title.get("title")
if value and not type:
package["title"] = value
break

# Description
for title in dataset.get("descriptions", []):
type = title.get("descriptionType")
value = title.get("description")
if value and type == "Abstract":
package["description"] = value
break

# Homepage
for identifier in dataset.get("identifiers", []):
type = identifier.get("identifierType")
value = identifier.get("identifier")
if value and type == "URL":
package["homepage"] = value
break

# Version
version = dataset.get("version")
if version:
package["version"] = version

# Keywords
for subject in dataset.get("subjects", []):
value = subject.get("subject")
if value:
package.setdefault("keywords", []).append(value)

# Licenses
for right in dataset.get("rightsList", []):
license = {}
license["name"] = right.get("rightsIdentifier")
license["path"] = right.get("rightsUri")
license["title"] = right.get("rights")
license = {k: v for k, v in license.items() if v is not None}
if license:
package.setdefault("licenses", []).append(license)

# Contributors
creators = dataset.get("creators", [])
contributors = dataset.get("contributors", [])
for type, items in [("creator", creators), (None, contributors)]:
for item in items:
type = item.get("contributorType", type)
contributor = {}
contributor["title"] = item.get("name")
contributor["givenName"] = item.get("givenName")
contributor["familyName"] = item.get("familyName")
if type:
contributor["roles"] = [type]
for affiliation in item.get("affiliations", []):
name = affiliation.get("name")
if name:
contributor["organization"] = name
break
contributor = {k: v for k, v in contributor.items() if v is not None}
if contributor:
package.setdefault("contributors", []).append(contributor)

return package