diff --git a/Makefile b/Makefile index 11b28cc..53d623d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ SHELL=/bin/bash -ifneq ($(shell python -c "import sys; print(hasattr(sys, 'real_prefix'))"),True) +ifndef VIRTUAL_ENV $(error Looks like no virtualenv is active) endif diff --git a/setup.py b/setup.py index 08d14f3..6e41dd4 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,6 @@ package_dir={'': 'src'}, packages=find_namespace_packages('src'), project_urls={ - "Source Code": "https://github.com/HumanCellAtlas/metadata-api", + "Source Code": "https://github.com/DataBiosphere/hca-metadata-api", } ) diff --git a/src/humancellatlas/data/metadata/api.py b/src/humancellatlas/data/metadata/api.py index e4233ed..660a80e 100644 --- a/src/humancellatlas/data/metadata/api.py +++ b/src/humancellatlas/data/metadata/api.py @@ -16,6 +16,7 @@ Type, TypeVar, Union, + Dict, ) from uuid import UUID import warnings @@ -35,10 +36,10 @@ # A few helpful type aliases # UUID4 = UUID -AnyJSON2 = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]] -AnyJSON1 = Union[str, int, float, bool, None, Mapping[str, AnyJSON2], List[AnyJSON2]] -AnyJSON = Union[str, int, float, bool, None, Mapping[str, AnyJSON1], List[AnyJSON1]] -JSON = Mapping[str, AnyJSON] +AnyJSON2 = Union[str, int, float, bool, None, Dict[str, Any], List[Any]] +AnyJSON1 = Union[str, int, float, bool, None, Dict[str, AnyJSON2], List[AnyJSON2]] +AnyJSON = Union[str, int, float, bool, None, Dict[str, AnyJSON1], List[AnyJSON1]] +JSON = Dict[str, AnyJSON] @dataclass(init=False) @@ -626,6 +627,10 @@ class ManifestEntry: version: str def __init__(self, json: JSON): + # '/' was once forbidden in file paths and was encoded with '!'. Now + # '/' is allowed and we force it in the metadata so that backwards + # compatibility is simplified downstream. + json['name'] = json['name'].replace('!', '/') self.json = json self.content_type = json['content-type'] self.uuid = UUID4(json['uuid']) @@ -649,7 +654,11 @@ class File(LinkedEntity): def __init__(self, json: JSON, manifest: Mapping[str, ManifestEntry]): super().__init__(json) content = json.get('content', json) + # '/' was once forbidden in file paths and was encoded with '!'. Now + # '/' is allowed and we force it in the metadata so that backwards + # compatibility is simplified downstream. core = content['file_core'] + core['file_name'] = core['file_name'].replace('!', '/') self.format = lookup(core, 'format', 'file_format') self.manifest_entry = manifest[core['file_name']] self.content_description = {ontology_label(cd) for cd in core.get('content_description', [])} diff --git a/test/test.py b/test/test.py index b31e770..9ddc2f5 100644 --- a/test/test.py +++ b/test/test.py @@ -24,6 +24,7 @@ from humancellatlas.data.metadata.api import ( AgeRange, + AnalysisFile, AnalysisProtocol, Biomaterial, Bundle, @@ -770,8 +771,49 @@ def test_missing_mandatory_checksums(self): checksums.append(cm.exception.args[1]) self.assertEqual(['crc32c', 'crc32c', 'sha256', 'sha256'], checksums) + def test_name_substitution(self): + uuid = 'ffee7f29-5c38-461a-8771-a68e20ec4a2e' + version = '2019-02-02T065454.662896Z' + manifest, metadata_files = self._load_bundle(uuid, version, replica='aws', deployment='prod') + + files_before = [f['name'] for f in manifest] + with_bang_before = set(f for f in files_before if '!' in f) + expected_bang_before = { + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!.zattrs', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!.zgroup', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_id!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_id!0', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric!0.0', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric_name!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric_name!0', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string!0.0', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string_name!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string_name!0', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!expression!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!expression!0.0', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!gene_id!.zarray', + '9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!gene_id!0', + } + self.assertEqual(expected_bang_before, with_bang_before) + with_slash_before = set(f for f in files_before if '/' in f) + self.assertEqual(set(), with_slash_before) + + bundle = Bundle(uuid, version, manifest, metadata_files) + + expected_slash_after = set(f1.replace('!', '/') for f1 in with_bang_before) + entity_json_file_names = set(e.json['file_core']['file_name'] + for e in bundle.entities.values() + if isinstance(e, (AnalysisFile, SequenceFile))) + for files_after in set(bundle.manifest.keys()), entity_json_file_names: + with_bang_after = set(f1 for f1 in files_after if '!' in f1) + self.assertEqual(set(), with_bang_after) + with_slash_after = set(f1 for f1 in files_after if '/' in f1) + self.assertEqual(expected_slash_after, with_slash_after) + -def load_tests(loader, tests, ignore): +def load_tests(_loader, tests, _ignore): tests.addTests(doctest.DocTestSuite('humancellatlas.data.metadata.age_range')) tests.addTests(doctest.DocTestSuite('humancellatlas.data.metadata.lookup')) tests.addTests(doctest.DocTestSuite('humancellatlas.data.metadata.api'))