Skip to content
This repository has been archived by the owner on Oct 11, 2022. It is now read-only.

Commit

Permalink
Release beta 29
Browse files Browse the repository at this point in the history
  • Loading branch information
hannes-ucsc committed May 21, 2021
2 parents d460591 + e4811a5 commit a291ea7
Show file tree
Hide file tree
Showing 9 changed files with 506 additions and 115 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ install_flake8:
pip install -U flake8==3.7.8

install:
pip install -e .[dss,test,coverage,examples]
pip install -e .[dss,staging_area,test,coverage,examples]

travis_install:
pip install -U setuptools>=40.1.0
pip install -e .[dss,test,coverage]
pip install -e .[dss,staging_area,test,coverage]

test: install
coverage run -m unittest discover -vs test
Expand Down
22 changes: 19 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[![Build Status](https://travis-ci.org/HumanCellAtlas/metadata-api.svg?branch=develop)](https://travis-ci.org/HumanCellAtlas/metadata-api)
[![Coverage Status](https://coveralls.io/repos/github/HumanCellAtlas/metadata-api/badge.svg?branch=develop)](https://coveralls.io/github/HumanCellAtlas/metadata-api?branch=develop)
[![Build Status](https://travis-ci.com/DataBiosphere/hca-metadata-api.svg?branch=develop)](https://travis-ci.com/DataBiosphere/hca-metadata-api)
[![Coverage Status](https://coveralls.io/repos/github/DataBiosphere/hca-metadata-api/badge.svg?branch=develop)](https://coveralls.io/github/DataBiosphere/hca-metadata-api?branch=develop)

## The HumanCellAtlas metadata API

Expand All @@ -16,9 +16,25 @@ Version 1.0 will be on PyPI but until then we need to install from GitHub:
```
virtualenv -p python3 foo
source foo/bin/activate
pip install "git+git://github.com/HumanCellAtlas/metadata-api@master#egg=hca-metadata-api[dss]"
pip install "git+git://github.com/DataBiosphere/hca-metadata-api@master#egg=hca-metadata-api[dss]"
```

You can omit `[dss]` at the end of the `pip` invocation if you don't need
the download helper this library provides and don't want to pull in the HCA CLI
distribution the helper depends on.

## Github credentials

Github credentials in the form of a personal access token are required to run
test cases that pull files from the canned staging area in the
[schema-test-data](https://github.com/HumanCellAtlas/schema-test-data)
repository.

Use the
[Creating a personal access token](https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token)
guide to create your token. No additional scopes or permissions should be
granted to this token as it will only be used to read from the canned staging
area repository.

Copy the token and use it as the value of an environment variable named
`GITHUB_TOKEN`.
10 changes: 8 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="hca-metadata-api",
version="1.0b28",
version="1.0b29",
license='MIT',
install_requires=[
"dataclasses >= 0.6;python_version<'3.7'"
Expand All @@ -14,6 +14,12 @@
'urllib3 >= 1.23',
'requests >= 2.19.1'
],
"staging_area": [
'attrs == 20.3.0',
'furl == 2.1.2',
'jsonschema == 3.2.0',
'PyGithub == 1.54.1'
],
"examples": [
'jupyter >= 1.0.0'
],
Expand All @@ -30,6 +36,6 @@
package_dir={'': 'src'},
packages=find_namespace_packages('src'),
project_urls={
"Source Code": "https://github.com/HumanCellAtlas/metadata-api",
"Source Code": "https://github.com/DataBiosphere/hca-metadata-api",
}
)
18 changes: 16 additions & 2 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class Entity:
document_id: UUID4
submitter_id: Optional[str]
metadata_manifest_entry: Optional[ManifestEntry]
submission_date: str
update_date: Optional[str]

@property
def is_stitched(self):
Expand Down Expand Up @@ -120,6 +122,8 @@ def __init__(self,
if False and self.metadata_manifest_entry is not None:
assert self.document_id == self.metadata_manifest_entry.uuid
self.submitter_id = provenance.get('submitter_id')
self.submission_date = lookup(provenance, 'submission_date', 'submissionDate')
self.update_date = lookup(provenance, 'update_date', 'updateDate', default=None)

@property
def address(self):
Expand Down Expand Up @@ -740,6 +744,7 @@ class File(LinkedEntity):
to_processes: MutableMapping[UUID4, Process]
manifest_entry: ManifestEntry
content_description: Set[str]
file_source: str

def __init__(self,
json: JSON,
Expand All @@ -755,6 +760,7 @@ def __init__(self,
self.format = lookup(core, 'format', 'file_format')
self.manifest_entry = manifest[core['file_name']]
self.content_description = {ontology_label(cd) for cd in core.get('content_description', [])}
self.file_source = core.get('file_source')
self.from_processes = {}
self.to_processes = {}

Expand Down Expand Up @@ -796,7 +802,15 @@ class SupplementaryFile(File):

@dataclass(init=False)
class AnalysisFile(File):
pass
matrix_cell_count: int

def __init__(self,
json: JSON,
metadata_manifest_entry,
manifest: Mapping[str, ManifestEntry]):
super().__init__(json, metadata_manifest_entry, manifest)
content = json.get('content', json)
self.matrix_cell_count = content.get('matrix_cell_count')


@dataclass(init=False)
Expand Down Expand Up @@ -843,7 +857,7 @@ def from_json(cls, json: JSON, schema_version: Tuple[int]) -> Iterable['Link']:
source_type='process',
destination_id=UUID4(protocol['protocol_id']),
destination_type=lookup(protocol, 'type', 'protocol_type'))
elif schema_version[0] == 2:
elif schema_version[0] in (2, 3):
# DCP/2 (current)
link_type = json['link_type']
if link_type == 'process_link':
Expand Down
36 changes: 36 additions & 0 deletions src/humancellatlas/data/metadata/helpers/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copied from https://github.com/DataBiosphere/azul/blob/develop/src/azul/__init__.py


class RequirementError(RuntimeError):
"""
Unlike assertions, unsatisfied requirements do not constitute a bug in the program.
"""


def require(condition: bool, *args, exception: type = RequirementError):
"""
Raise a RequirementError, or an instance of the given exception class, if the given condition is False.
:param condition: the boolean condition to be required
:param args: optional positional arguments to be passed to the exception constructor. Typically only one such
argument should be provided: a string containing a textual description of the requirement.
:param exception: a custom exception class to be instantiated and raised if the condition does not hold
"""
reject(not condition, *args, exception=exception)


def reject(condition: bool, *args, exception: type = RequirementError):
"""
Raise a RequirementError, or an instance of the given exception class, if the given condition is True.
:param condition: the boolean condition to be rejected
:param args: optional positional arguments to be passed to the exception constructor. Typically only one such
argument should be provided: a string containing a textual description of the rejected condition.
:param exception: a custom exception class to be instantiated and raised if the condition occurs
"""
if condition:
raise exception(*args)
42 changes: 42 additions & 0 deletions src/humancellatlas/data/metadata/helpers/schema_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from functools import (
lru_cache,
)
import json
import logging

from jsonschema import (
FormatChecker,
ValidationError,
validate,
)
import requests

from humancellatlas.data.metadata.api import (
JSON,
)
from humancellatlas.data.metadata.helpers.exception import (
RequirementError,
)

logger = logging.getLogger(__name__)


class SchemaValidator:

def validate_json(self, file_json: JSON, file_name: str):
try:
schema = self._download_schema(file_json['describedBy'])
except json.decoder.JSONDecodeError as e:
schema_url = file_json['describedBy']
raise RequirementError('Failed to parse schema JSON',
file_name, schema_url) from e
try:
validate(file_json, schema, format_checker=FormatChecker())
except ValidationError as e:
raise RequirementError(*e.args, file_name) from e

@lru_cache(maxsize=None)
def _download_schema(self, schema_url: str) -> JSON:
response = requests.get(schema_url, allow_redirects=False)
response.raise_for_status()
return response.json()
Loading

0 comments on commit a291ea7

Please sign in to comment.