Skip to content
This repository has been archived by the owner on Oct 11, 2022. It is now read-only.

Commit

Permalink
Replace '!' with '/' in file names (#22, PR #23)
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes-ucsc committed Nov 2, 2020
2 parents d0d439c + 8a6e1b2 commit fb4e10d
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 7 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SHELL=/bin/bash

ifneq ($(shell python -c "import sys; print(hasattr(sys, 'real_prefix'))"),True)
ifndef VIRTUAL_ENV
$(error Looks like no virtualenv is active)
endif

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
package_dir={'': 'src'},
packages=find_namespace_packages('src'),
project_urls={
"Source Code": "https://github.com/HumanCellAtlas/metadata-api",
"Source Code": "https://github.com/DataBiosphere/hca-metadata-api",
}
)
17 changes: 13 additions & 4 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Type,
TypeVar,
Union,
Dict,
)
from uuid import UUID
import warnings
Expand All @@ -35,10 +36,10 @@
# A few helpful type aliases
#
UUID4 = UUID
AnyJSON2 = Union[str, int, float, bool, None, Mapping[str, Any], List[Any]]
AnyJSON1 = Union[str, int, float, bool, None, Mapping[str, AnyJSON2], List[AnyJSON2]]
AnyJSON = Union[str, int, float, bool, None, Mapping[str, AnyJSON1], List[AnyJSON1]]
JSON = Mapping[str, AnyJSON]
AnyJSON2 = Union[str, int, float, bool, None, Dict[str, Any], List[Any]]
AnyJSON1 = Union[str, int, float, bool, None, Dict[str, AnyJSON2], List[AnyJSON2]]
AnyJSON = Union[str, int, float, bool, None, Dict[str, AnyJSON1], List[AnyJSON1]]
JSON = Dict[str, AnyJSON]


@dataclass(init=False)
Expand Down Expand Up @@ -626,6 +627,10 @@ class ManifestEntry:
version: str

def __init__(self, json: JSON):
# '/' was once forbidden in file paths and was encoded with '!'. Now
# '/' is allowed and we force it in the metadata so that backwards
# compatibility is simplified downstream.
json['name'] = json['name'].replace('!', '/')
self.json = json
self.content_type = json['content-type']
self.uuid = UUID4(json['uuid'])
Expand All @@ -649,7 +654,11 @@ class File(LinkedEntity):
def __init__(self, json: JSON, manifest: Mapping[str, ManifestEntry]):
super().__init__(json)
content = json.get('content', json)
# '/' was once forbidden in file paths and was encoded with '!'. Now
# '/' is allowed and we force it in the metadata so that backwards
# compatibility is simplified downstream.
core = content['file_core']
core['file_name'] = core['file_name'].replace('!', '/')
self.format = lookup(core, 'format', 'file_format')
self.manifest_entry = manifest[core['file_name']]
self.content_description = {ontology_label(cd) for cd in core.get('content_description', [])}
Expand Down
44 changes: 43 additions & 1 deletion test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from humancellatlas.data.metadata.api import (
AgeRange,
AnalysisFile,
AnalysisProtocol,
Biomaterial,
Bundle,
Expand Down Expand Up @@ -770,8 +771,49 @@ def test_missing_mandatory_checksums(self):
checksums.append(cm.exception.args[1])
self.assertEqual(['crc32c', 'crc32c', 'sha256', 'sha256'], checksums)

def test_name_substitution(self):
uuid = 'ffee7f29-5c38-461a-8771-a68e20ec4a2e'
version = '2019-02-02T065454.662896Z'
manifest, metadata_files = self._load_bundle(uuid, version, replica='aws', deployment='prod')

files_before = [f['name'] for f in manifest]
with_bang_before = set(f for f in files_before if '!' in f)
expected_bang_before = {
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!.zattrs',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!.zgroup',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_id!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_id!0',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric!0.0',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric_name!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_numeric_name!0',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string!0.0',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string_name!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!cell_metadata_string_name!0',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!expression!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!expression!0.0',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!gene_id!.zarray',
'9ea49dd1-7511-48f8-be12-237e3d0690c0.zarr!gene_id!0',
}
self.assertEqual(expected_bang_before, with_bang_before)
with_slash_before = set(f for f in files_before if '/' in f)
self.assertEqual(set(), with_slash_before)

bundle = Bundle(uuid, version, manifest, metadata_files)

expected_slash_after = set(f1.replace('!', '/') for f1 in with_bang_before)
entity_json_file_names = set(e.json['file_core']['file_name']
for e in bundle.entities.values()
if isinstance(e, (AnalysisFile, SequenceFile)))
for files_after in set(bundle.manifest.keys()), entity_json_file_names:
with_bang_after = set(f1 for f1 in files_after if '!' in f1)
self.assertEqual(set(), with_bang_after)
with_slash_after = set(f1 for f1 in files_after if '/' in f1)
self.assertEqual(expected_slash_after, with_slash_after)


def load_tests(loader, tests, ignore):
def load_tests(_loader, tests, _ignore):
tests.addTests(doctest.DocTestSuite('humancellatlas.data.metadata.age_range'))
tests.addTests(doctest.DocTestSuite('humancellatlas.data.metadata.lookup'))
tests.addTests(doctest.DocTestSuite('humancellatlas.data.metadata.api'))
Expand Down

0 comments on commit fb4e10d

Please sign in to comment.