From f80eddd486bfd59e07f0619cf156fb9c0efbf6ab Mon Sep 17 00:00:00 2001 From: Hannes Schmidt Date: Mon, 12 Jul 2021 13:42:43 -0700 Subject: [PATCH 1/3] Prepare beta 32 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5a6d5cf..ff5c52a 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="hca-metadata-api", - version="1.0b31.dev1", + version="1.0b32.dev1", license='MIT', install_requires=[ "dataclasses >= 0.6;python_version<'3.7'" From 2769a8172c6a170cd64dcba562bdfa8b68909518 Mon Sep 17 00:00:00 2001 From: Daniel Sotirhos Date: Wed, 18 Aug 2021 08:39:40 -0700 Subject: [PATCH 2/3] Add Project.estimated_cell_count property (#53) --- src/humancellatlas/data/metadata/api.py | 2 ++ test/test.py | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py index 5b9f3fc..a215414 100644 --- a/src/humancellatlas/data/metadata/api.py +++ b/src/humancellatlas/data/metadata/api.py @@ -292,6 +292,7 @@ class Project(Entity): array_express_accessions: Set[str] insdc_study_accessions: Set[str] supplementary_links: Set[str] + estimated_cell_count: Optional[int] def __init__(self, json: JSON, @@ -311,6 +312,7 @@ def __init__(self, self.array_express_accessions = set(content.get('array_express_accessions', [])) self.insdc_study_accessions = set(content.get('insdc_study_accessions', [])) self.supplementary_links = set(content.get('supplementary_links', [])) + self.estimated_cell_count = content.get('estimated_cell_count') @property def laboratory_names(self) -> set: diff --git a/test/test.py b/test/test.py index 5c3b665..9399c32 100644 --- a/test/test.py +++ b/test/test.py @@ -520,7 +520,7 @@ def _assert_bundle(self, uuid, version, manifest, metadata_files, return bundle def test_canned_staging_area(self): - ref = 'de355cad77ea7988040b6f1f5f2eafae58f686a8' + ref = '55628953e4b3a24a7d7798569b6082032bd07a6b' url = f'https://github.com/HumanCellAtlas/schema-test-data/tree/{ref}/tests' factory = GitHubStagingAreaFactory.from_url(url) staging_area = factory.load_staging_area() @@ -529,8 +529,9 @@ def test_canned_staging_area(self): with self.subTest(link_id=link_id): version, manifest, metadata_files = staging_area.get_bundle(link_id) bundle = Bundle(link_id, version, manifest, metadata_files) - bundle_json = as_json(bundle) - self.assertEqual(link_id, bundle_json['uuid']) + self.assertEqual(bundle.uuid, UUID(link_id)) + project = bundle.projects[UUID('90bf705c-d891-5ce2-aa54-094488b445c6')] + self.assertEqual(project.estimated_cell_count, 10000) def test_analysis_protocol(self): uuid = 'ffee7f29-5c38-461a-8771-a68e20ec4a2e' From 10e55c34a488c5b3f168fd1c9ba8e5227e359e90 Mon Sep 17 00:00:00 2001 From: amar jandu Date: Tue, 17 Aug 2021 17:58:06 -0700 Subject: [PATCH 3/3] Generic support for project accessions (#54) --- src/humancellatlas/data/metadata/api.py | 46 +++++++++++++++++-- .../2019-03-17T220646.332108Z/metadata.json | 3 +- test/test.py | 14 +++++- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py index a215414..8589b88 100644 --- a/src/humancellatlas/data/metadata/api.py +++ b/src/humancellatlas/data/metadata/api.py @@ -280,6 +280,12 @@ def contact_name(self) -> str: return self.name +@dataclass(eq=True, frozen=True) +class Accession: + domain: str + value: str + + @dataclass(init=False) class Project(Entity): project_short_name: str @@ -287,6 +293,7 @@ class Project(Entity): project_description: Optional[str] # optional up to core/project/5.2.2/project_core publications: Set[ProjectPublication] contributors: Set[ProjectContact] + accessions: Set[Accession] insdc_project_accessions: Set[str] geo_series_accessions: Set[str] array_express_accessions: Set[str] @@ -307,12 +314,43 @@ def __init__(self, self.publications = set(ProjectPublication.from_json(publication) for publication in content.get('publications', [])) self.contributors = {ProjectContact.from_json(contributor) for contributor in content.get('contributors', [])} - self.insdc_project_accessions = set(content.get('insdc_project_accessions', [])) - self.geo_series_accessions = set(content.get('geo_series_accessions', [])) - self.array_express_accessions = set(content.get('array_express_accessions', [])) - self.insdc_study_accessions = set(content.get('insdc_study_accessions', [])) self.supplementary_links = set(content.get('supplementary_links', [])) self.estimated_cell_count = content.get('estimated_cell_count') + accessions = set() + for name, value in content.items(): + prefix, _, suffix = name.rpartition('_') + if suffix == 'accessions': + assert prefix, name + assert isinstance(value, list) + accessions.update(Accession(domain=prefix, value=v) for v in value) + self.accessions = accessions + + def _accessions(self, domain: str) -> Set[str]: + return {a.value for a in self.accessions if a.domain == domain} + + @property + def insdc_project_accessions(self) -> Set[str]: + warnings.warn("Project.insdc_project_accessions is deprecated. " + "Use Project.accessions instead.", DeprecationWarning) + return self._accessions('insdc_project') + + @property + def geo_series_accessions(self) -> Set[str]: + warnings.warn("Project.geo_series_accessions is deprecated. " + "Use Project.accessions instead.", DeprecationWarning) + return self._accessions('geo_series') + + @property + def array_express_accessions(self) -> Set[str]: + warnings.warn("Project.array_express_accessions is deprecated. " + "Use Project.accessions instead.", DeprecationWarning) + return self._accessions('array_express') + + @property + def insdc_study_accessions(self) -> Set[str]: + warnings.warn("Project.insdc_study_accessions is deprecated. " + "Use Project.accessions instead.", DeprecationWarning) + return self._accessions('insdc_study') @property def laboratory_names(self) -> set: diff --git a/test/cans/staging/eca05046-3dad-4e45-b86c-8720f33a5dde/2019-03-17T220646.332108Z/metadata.json b/test/cans/staging/eca05046-3dad-4e45-b86c-8720f33a5dde/2019-03-17T220646.332108Z/metadata.json index 91847c7..7b2d26f 100644 --- a/test/cans/staging/eca05046-3dad-4e45-b86c-8720f33a5dde/2019-03-17T220646.332108Z/metadata.json +++ b/test/cans/staging/eca05046-3dad-4e45-b86c-8720f33a5dde/2019-03-17T220646.332108Z/metadata.json @@ -258,7 +258,8 @@ } ], "insdc_project_accessions": [ - "SRP000000" + "SRP000000", + "SRP000001" ], "geo_series_accessions": [ "GSE00000" diff --git a/test/test.py b/test/test.py index 9399c32..a582b00 100644 --- a/test/test.py +++ b/test/test.py @@ -23,6 +23,7 @@ from more_itertools import one from humancellatlas.data.metadata.api import ( + Accession, AgeRange, AnalysisFile, AnalysisProtocol, @@ -313,10 +314,17 @@ def test_accessions_fields(self): project_roles={'principal investigator'}, age_range=AgeRange(630720000.0, 630720000.0), library_construction_methods={'10X v2 sequencing'}, - insdc_project_accessions={'SRP000000'}, + insdc_project_accessions={'SRP000000', 'SRP000001'}, geo_series_accessions={'GSE00000'}, array_express_accessions={'E-AAAA-00'}, - insdc_study_accessions={'PRJNA000000'}) + insdc_study_accessions={'PRJNA000000'}, + accessions={ + Accession('insdc_project', 'SRP000000'), + Accession('insdc_project', 'SRP000001'), + Accession('geo_series', 'GSE00000'), + Accession('array_express', 'E-AAAA-00'), + Accession('insdc_study', 'PRJNA000000') + }) def test_imaging_bundle(self): self._test_bundle(uuid='94f2ba52-30c8-4de0-a78e-f95a3f8deb9c', @@ -396,6 +404,7 @@ def _assert_bundle(self, uuid, version, manifest, metadata_files, preservation_methods=frozenset({None}), library_construction_methods=frozenset(), selected_cell_types=frozenset(), + accessions=frozenset(), insdc_project_accessions=frozenset(), geo_series_accessions=frozenset(), array_express_accessions=frozenset(), @@ -450,6 +459,7 @@ def _assert_bundle(self, uuid, version, manifest, metadata_files, self.assertEqual(geo_series_accessions, project.geo_series_accessions) self.assertEqual(array_express_accessions, project.array_express_accessions) self.assertEqual(insdc_study_accessions, project.insdc_study_accessions) + self.assertEqual(accessions, project.accessions) root_entities = bundle.root_entities().values() root_entity_types = {type(e) for e in root_entities}