Skip to content
This repository has been archived by the owner on Oct 11, 2022. It is now read-only.

Commit

Permalink
Handle DCP/2 schema changes (#15)
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes-ucsc committed Jul 14, 2020
2 parents 637b7e9 + 13de1e7 commit e8d97db
Show file tree
Hide file tree
Showing 4 changed files with 505 additions and 16 deletions.
74 changes: 58 additions & 16 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
MutableMapping,
Optional,
Set,
Tuple,
Type,
TypeVar,
Union,
Expand All @@ -25,14 +26,13 @@
)

from humancellatlas.data.metadata.age_range import AgeRange

# A few helpful type aliases
#
from humancellatlas.data.metadata.lookup import (
lookup,
LookupDefault,
lookup,
)

# A few helpful type aliases
#
UUID4 = UUID
AnyJSON2 = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]]
AnyJSON1 = Union[str, int, float, bool, None, Mapping[str, AnyJSON2], List[AnyJSON2]]
Expand Down Expand Up @@ -677,17 +677,18 @@ class Link:
source_type: str
destination_id: UUID4
destination_type: str
link_type: str = 'process_link'

@classmethod
def from_json(cls, json: JSON) -> Iterable['Link']:
def from_json(cls, json: JSON, schema_version: Tuple[int]) -> Iterable['Link']:
if 'source_id' in json:
# v5
# DCP/1 v5 (obsolete)
yield cls(source_id=UUID4(json['source_id']),
source_type=json['source_type'],
destination_id=UUID4(json['destination_id']),
destination_type=json['destination_type'])
else:
# vx
elif schema_version[0] == 1:
# DCP/1 vx (current)
process_id = UUID4(json['process'])
for source_id in json['inputs']:
yield cls(source_id=UUID4(source_id),
Expand All @@ -704,6 +705,42 @@ def from_json(cls, json: JSON) -> Iterable['Link']:
source_type='process',
destination_id=UUID4(protocol['protocol_id']),
destination_type=lookup(protocol, 'type', 'protocol_type'))
elif schema_version[0] == 2:
# DCP/2 (current)
link_type = json['link_type']
if link_type == 'process_link':
process_id = UUID4(json['process_id'])
process_type = json['process_type']
for input_ in json['inputs']:
yield cls(link_type=link_type,
source_id=UUID4(input_['input_id']),
source_type=input_['input_type'],
destination_id=process_id,
destination_type=process_type)
for output in json['outputs']:
yield cls(link_type=link_type,
source_id=process_id,
source_type=process_type,
destination_id=UUID4(output['output_id']),
destination_type=output['output_type'])
for protocol in json['protocols']:
yield cls(link_type=link_type,
source_id=process_id,
source_type=process_type,
destination_id=UUID4(protocol['protocol_id']),
destination_type=protocol['protocol_type'])
elif link_type == 'supplementary_file_link':
entity = json['entity']
for supp_file in json['files']:
yield cls(link_type=link_type,
source_id=UUID4(entity['entity_id']),
source_type=entity['entity_type'],
destination_id=UUID4(supp_file['file_id']),
destination_type=supp_file['file_type'])
else:
assert False, f'Unknown link_type {link_type}'
else:
assert False, f'Unknown schema_version {schema_version}'


@dataclass(init=False)
Expand Down Expand Up @@ -772,16 +809,21 @@ def from_json_vx(core_cls: Type[E], **kwargs) -> MutableMapping[UUID4, E]:

self.entities = {**self.projects, **self.biomaterials, **self.processes, **self.protocols, **self.files}

links = metadata_files['links.json']['links']
self.links = list(chain.from_iterable(map(Link.from_json, links)))
links_json = metadata_files['links.json']
schema_version = tuple(map(int, links_json['schema_version'].split('.')))
self.links = list(chain.from_iterable(
Link.from_json(link, schema_version)
for link in links_json['links']
))

for link in self.links:
source_entity = self.entities[link.source_id]
destination_entity = self.entities[link.destination_id]
assert isinstance(source_entity, LinkedEntity)
assert isinstance(destination_entity, LinkedEntity)
source_entity.connect_to(destination_entity, forward=True)
destination_entity.connect_to(source_entity, forward=False)
if link.link_type == 'process_link':
source_entity = self.entities[link.source_id]
destination_entity = self.entities[link.destination_id]
assert isinstance(source_entity, LinkedEntity)
assert isinstance(destination_entity, LinkedEntity)
source_entity.connect_to(destination_entity, forward=True)
destination_entity.connect_to(source_entity, forward=False)

def root_entities(self) -> Mapping[UUID4, LinkedEntity]:
roots = {}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
[
{
"content-type": "application/json; dcp-type=\"metadata/biomaterial\"",
"crc32c": "0300a92e",
"indexed": true,
"name": "cell_suspension_0.json",
"s3_etag": "4f3859e7778d1818bcf4120b76a1ffa6",
"sha1": "505038762f830ae810e7a63eea87ca72fc90196b",
"sha256": "4909898b1cbaea063ba589b146f7457c56928a6864546b4a465cde3d1b1d67f3",
"size": 850,
"uuid": "01ba6be9-ed4b-4c6b-ae05-2e06aadc2019",
"version": "2019-05-14T120006.941000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/biomaterial\"",
"crc32c": "7158fd43",
"indexed": true,
"name": "specimen_from_organism_0.json",
"s3_etag": "676e573bec1eb8fe9d7b9888ece979e7",
"sha1": "04c71e27a86ea91ee260df9feab594517cf9c5cd",
"sha256": "37eb5a18c9be2be0c452aecbf2d7cf50ec9af68e0379f6647c7aedf5372b9833",
"size": 812,
"uuid": "74eb3cb5-918a-49fc-9e15-3ac49fd54caf",
"version": "2019-05-14T114647.512000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/biomaterial\"",
"crc32c": "833cd7f3",
"indexed": true,
"name": "donor_organism_0.json",
"s3_etag": "15380716b8a9c7a78f33a6051a1a477d",
"sha1": "657c0e467fb61ee80819f31f86369c2227220307",
"sha256": "33e5ecaf9d039b5a41718eb2e38a0d66d15a701eaca7fdbebc9f2b5f46561ee1",
"size": 1749,
"uuid": "63818269-c4d9-429b-85a3-db39c0dd7fa0",
"version": "2019-05-14T112950.173000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/file\"",
"crc32c": "02c57821",
"indexed": true,
"name": "sequence_file_0.json",
"s3_etag": "608e74f1bf465c2d5e4951fab48b349b",
"sha1": "b885e81fd0ce11e170176535536a45299e1eed3a",
"sha256": "e497c5c87514a04bf398d95c60884bfdf10bc084c75d413ff2d308cb22326b94",
"size": 459,
"uuid": "61fd5348-92c5-446b-a57a-746330cebf76",
"version": "2019-05-14T112117.762000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/file\"",
"crc32c": "61502b5b",
"indexed": true,
"name": "supplementary_file_0.json",
"s3_etag": "b9789082b025294c44dce46ddd22b7aa",
"sha1": "daa76ae1d2af21871d8180006b066f56cdf7594f",
"sha256": "069391131baa61584a57a5fd9ec9633b6224722c783faf269db28d76d6133c79",
"size": 481,
"uuid": "e738a267-87fc-4070-abc7-b3be6442c6d0",
"version": "2019-05-14T110115.816000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/file\"",
"crc32c": "030e888d",
"indexed": true,
"name": "supplementary_file_1.json",
"s3_etag": "0dc591fd5db04fbb770de26ae5419037",
"sha1": "48b75f01afaee3ca9f04510725c4c3b0c536f8b0",
"sha256": "2a3940369dacfea767cc02b29b637d94f46fdffde6bc1b08c238f8b2850471de",
"size": 477,
"uuid": "01a1d04b-05d0-4904-b627-68b0dc02bc17",
"version": "2019-05-14T110109.564000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/project\"",
"crc32c": "ef3ef012",
"indexed": true,
"name": "project_0.json",
"s3_etag": "308189c7eab62a35a894125efbe76558",
"sha1": "9da3dba1ec159439071d6390a25ab9110be95f86",
"sha256": "2e2f330270644222106b2a8f648733cbd7609c283e29f1ee07363be32c223a9b",
"size": 8464,
"uuid": "8c3c290d-dfff-4553-8868-54ce45f4ba7f",
"version": "2019-05-14T112051.382000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/process\"",
"crc32c": "57699965",
"indexed": true,
"name": "process_0.json",
"s3_etag": "fdc514845934d034918521bb96f1ee24",
"sha1": "ba3c05024bdd18dd054ef07cbd2c6594fe64e168",
"sha256": "2ff1fab2dac6ac35b866a437f7bd720066212998432ab8eb05f6039b748c5225",
"size": 379,
"uuid": "91f475ec-be51-4f1e-a904-74b10b7259f1",
"version": "2019-05-14T121053.274000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/process\"",
"crc32c": "ad459007",
"indexed": true,
"name": "process_1.json",
"s3_etag": "b067d9b22cf3d999069bdc42a56fabbe",
"sha1": "23ed66f1f47ecb29cf5354d6fb675784804df817",
"sha256": "a3948a06685954b8f5d21113621e53cea384aea12aa07d9bb19796ccf4e7300d",
"size": 378,
"uuid": "e521d67d-c134-4dbd-9555-29e23f0463c5",
"version": "2019-05-14T122720.135000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/process\"",
"crc32c": "4352f3bf",
"indexed": true,
"name": "process_2.json",
"s3_etag": "f94d4ee8d194714e8db01f7f1d9ad66e",
"sha1": "db413a2532ec0d9d235aa56fd69ba27fc96bc70d",
"sha256": "cf1d4be9c9a62590b38014d092b67a981ec516013af924d30870a18cd6a060ed",
"size": 377,
"uuid": "453a352c-94fb-4d3b-b609-df1e7abf8c09",
"version": "2019-05-14T122652.545000Z"
},
{
"content-type": "application/json; dcp-type=\"metadata/links\"",
"crc32c": "2c25a3c2",
"indexed": true,
"name": "links.json",
"s3_etag": "1f7c2cbccc797da2d885bb52de52232d",
"sha1": "39ba144902ba5d1095af5b8d5dace8dd19a4b08d",
"sha256": "33052247612f39f6ea48568a12c761842af30030e1242943bb2c5eb238722488",
"size": 2083,
"uuid": "51168054-6dad-45aa-916a-ef71135651b2",
"version": "2019-05-16T015324.197421Z"
},
{
"content-type": "application/gzip; dcp-type=data",
"crc32c": "86192092",
"indexed": false,
"name": "21784_6#51_1.fastq.gz",
"s3_etag": "8ef4064fd5c94502b0d42c0dbecc74ca",
"sha1": "0c0a36b8e8e8bf53db8e5eed5688546f0d23f863",
"sha256": "1975336c7071ac70caef3ff833089b0dc26962b4d6fc5159aece28eaa4324052",
"size": 7591723,
"uuid": "e4d9ebe5-2e62-47cf-bc35-1fb8ef7c1ef7",
"version": "2019-05-16T015324.490566Z"
},
{
"content-type": "application/gzip; dcp-type=data",
"crc32c": "56a9e200",
"indexed": false,
"name": "21784_6#51_2.fastq.gz",
"s3_etag": "86b0f2604a0aa1dbe3bd72d316f077f3",
"sha1": "d138b7a22c6fd4834f03564921a6436ebed5076e",
"sha256": "6a95e7f0792fe70dfcf174ee585c6900f2ed5fb401c5c8d0d15bef5e95271726",
"size": 7553715,
"uuid": "60f17b79-74d9-49b5-a6da-48580a67f11f",
"version": "2019-05-16T015324.739985Z"
},
{
"content-type": "application/pdf; dcp-type=data",
"crc32c": "847325b6",
"indexed": false,
"name": "TissueDissociationProtocol.pdf",
"s3_etag": "7e892bf8f6aa489ccb08a995c7f017e1",
"sha1": "f2237ad0a776fd7057eb3d3498114c85e2f521d7",
"sha256": "6929799f227ae5f0b3e0167a6cf2bd683db097848af6ccde6329185212598779",
"size": 32748,
"uuid": "6578c322-7060-4c82-8469-9e54100e6b44",
"version": "2019-05-16T015325.007527Z"
},
{
"content-type": "application/pdf; dcp-type=data",
"crc32c": "b9364bfa",
"indexed": false,
"name": "SmartSeq2_RTPCR_protocol.pdf",
"s3_etag": "846fd9e6b98041df46a1ddb94e85b6b9",
"sha1": "89d9eb3f1b94f78a33d46c0288c2e81d4002049b",
"sha256": "2f6866c4ede92123f90dd15fb180fac56e33309b8fd3f4f52f263ed2f8af2f16",
"size": 29230,
"uuid": "cd8e02d1-d0f9-4094-9a31-329931df60dc",
"version": "2019-05-16T015325.251968Z"
},
{
"content-type": "application/pdf; dcp-type=data",
"crc32c": "3658ec51",
"indexed": false,
"name": "SmartSeq2_sequencing_protocol.pdf",
"s3_etag": "2742e1e78f6d4663bf41d3080396695c",
"sha1": "9ec6ee2b6e2093681c1fed694b3a8c78a2aa3438",
"sha256": "9c93a354a8636c041a31ba6f3fb00ef20352e1b853d8080d63a654221cb35673",
"size": 61134,
"uuid": "bf92ef4a-c422-44fb-bfc1-c2f86528b86b",
"version": "2019-05-16T015325.498431Z"
}
]
Loading

0 comments on commit e8d97db

Please sign in to comment.