-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #201 from jmchilton/idc_2
Enhancements to the IDC scripts
- Loading branch information
Showing
16 changed files
with
929 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from pathlib import Path | ||
from typing import ( | ||
Dict, | ||
List, | ||
Optional, | ||
Union, | ||
) | ||
|
||
import yaml | ||
from pydantic import ( | ||
BaseModel, | ||
Extra, | ||
) | ||
|
||
StrOrPath = Union[Path, str] | ||
|
||
|
||
class RepositoryInstallTarget(BaseModel): | ||
name: str | ||
owner: str | ||
tool_shed_url: Optional[str] | ||
tool_panel_section_id: Optional[str] | ||
tool_panel_section_label: Optional[str] | ||
revisions: Optional[List[str]] | ||
install_tool_dependencies: Optional[bool] | ||
install_repository_dependencies: Optional[bool] | ||
install_resolver_dependencies: Optional[bool] | ||
|
||
|
||
class RepositoryInstallTargets(BaseModel): | ||
""" """ | ||
|
||
api_key: Optional[str] | ||
galaxy_instance: Optional[str] | ||
tools: List[RepositoryInstallTarget] | ||
|
||
|
||
class DataManager(BaseModel, extra=Extra.forbid): | ||
tags: List[str] | ||
tool_id: str | ||
|
||
|
||
class DataManagers(BaseModel, extra=Extra.forbid): | ||
__root__: Dict[str, DataManager] | ||
|
||
|
||
class Genome(BaseModel): | ||
id: str # The unique id of the data in Galaxy | ||
description: str # The description of the data, including its taxonomy, version and date | ||
dbkey: Optional[str] | ||
source: Optional[str] # The source of the data. Can be: 'ucsc', an NCBI accession number or a URL to a fasta file. | ||
|
||
# The following fields are currently purely for human consumption and unused by | ||
# IDC infrastructure. | ||
doi: Optional[str] # Any DOI associated with the data | ||
blob: Optional[str] # A blob for any other pertinent information | ||
checksum: Optional[str] # A SHA256 checksum of the original | ||
version: Optional[str] # Any version information associated with the data | ||
|
||
# Description of actions (data managers) to run on target genome. | ||
indexers: Optional[ | ||
List[str] | ||
] # indexers to run - keyed on repository name - see data_managers.yml for how to resolve these to tools | ||
skiplist: Optional[List[str]] # unimplemented: but if we implement classes of indexers, these will be ones to skip | ||
|
||
|
||
class Genomes(BaseModel): | ||
genomes: List[Genome] | ||
|
||
|
||
def _read_yaml(path: StrOrPath): | ||
with open(path) as f: | ||
return yaml.safe_load(f) | ||
|
||
|
||
def read_data_managers(path: StrOrPath) -> DataManagers: | ||
return DataManagers(__root__=_read_yaml(path)) | ||
|
||
|
||
def read_genomes(path: StrOrPath) -> Genomes: | ||
return Genomes(**_read_yaml(path)) | ||
|
||
|
||
def read_tools(path: StrOrPath) -> RepositoryInstallTargets: | ||
return RepositoryInstallTargets(**_read_yaml(path)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
#!/usr/bin/env python | ||
"""Helper script for IDC - not yet meant for public consumption. | ||
This script takes a data_managers.yml configuration describing the | ||
set of data managers the IDC configuration targets and builds a | ||
a tools.yml file from it for use with shed_tools. | ||
""" | ||
import argparse | ||
import logging | ||
from typing import ( | ||
Dict, | ||
List, | ||
NamedTuple, | ||
) | ||
|
||
import yaml | ||
|
||
from ._config_models import ( | ||
read_data_managers, | ||
RepositoryInstallTargets, | ||
) | ||
from .common_parser import ( | ||
add_log_file_argument, | ||
add_verbosity_argument, | ||
) | ||
from .ephemeris_log import ( | ||
disable_external_library_logging, | ||
setup_global_logger, | ||
) | ||
|
||
|
||
class DataManager(NamedTuple): | ||
tool_id: str | ||
repository_name: str | ||
tags: List[str] | ||
|
||
|
||
def read_data_managers_configuration(path: str) -> Dict[str, DataManager]: | ||
raw_data_managers = read_data_managers(path) | ||
data_managers: Dict[str, DataManager] = {} | ||
for repository_name, data_manager_configuration in raw_data_managers.__root__.items(): | ||
data_manager = DataManager( | ||
tool_id=data_manager_configuration.tool_id, | ||
repository_name=repository_name, | ||
tags=data_manager_configuration.tags or [], | ||
) | ||
data_managers[repository_name] = data_manager | ||
return data_managers | ||
|
||
|
||
def build_shed_install_conf(path: str) -> dict: | ||
data_managers = read_data_managers_configuration(path) | ||
tools = [] | ||
for data_manager in data_managers.values(): | ||
tool_id = data_manager.tool_id | ||
tool_id_parts = tool_id.split("/") | ||
repo_owner = tool_id_parts[2] | ||
repo_name = tool_id_parts[3] | ||
entry = { | ||
"name": repo_name, | ||
"owner": repo_owner, | ||
"tool_panel_section_label": None, | ||
"tool_shed_url": "toolshed.g2.bx.psu.edu", | ||
} | ||
tools.append(entry) | ||
tools_yaml = {"tools": tools} | ||
return tools_yaml | ||
|
||
|
||
def write_shed_install_conf(data_manager_conf_path: str, output_path: str) -> None: | ||
tools_yaml = build_shed_install_conf(data_manager_conf_path) | ||
|
||
# validate generated dict to ensure we're writing out valid file | ||
RepositoryInstallTargets(**tools_yaml) | ||
|
||
with open(output_path, "w") as f: | ||
yaml.safe_dump(tools_yaml, f) | ||
|
||
|
||
def _parser(): | ||
"""returns the parser object.""" | ||
|
||
parser = argparse.ArgumentParser(add_help=False) | ||
general_group = parser.add_argument_group("General options") | ||
add_verbosity_argument(general_group) | ||
add_log_file_argument(general_group) | ||
parser.add_argument("--data-managers-conf", default="data_managers.yml") | ||
parser.add_argument("--shed-install-output-conf", default="tools.yml") | ||
return parser | ||
|
||
|
||
def main(): | ||
disable_external_library_logging() | ||
parser = _parser() | ||
args = parser.parse_args() | ||
log = setup_global_logger(name=__name__, log_file=args.log_file) | ||
if args.verbose: | ||
log.setLevel(logging.DEBUG) | ||
else: | ||
log.setLevel(logging.INFO) | ||
write_shed_install_conf(args.data_managers_conf, args.shed_install_output_conf) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import os | ||
from pathlib import Path | ||
|
||
import yaml | ||
|
||
from ._config_models import ( | ||
read_data_managers, | ||
read_genomes, | ||
) | ||
|
||
|
||
def read_yaml(path: Path): | ||
with open(path) as f: | ||
return yaml.safe_load(f) | ||
|
||
|
||
def lint_idc_directory(directory: Path): | ||
genomes_path = directory / "genomes.yml" | ||
data_managers_path = directory / "data_managers.yml" | ||
assert genomes_path.exists() | ||
assert data_managers_path.exists() | ||
data_managers = read_data_managers(data_managers_path).__root__ | ||
genomes = read_genomes(genomes_path) | ||
|
||
for data_manager in data_managers.values(): | ||
data_manager_tool_id = data_manager.tool_id | ||
if not data_manager_tool_id.startswith("toolshed.g2.bx.psu.edu/"): | ||
raise Exception( | ||
f"Expected a data manager repository from main Galaxy tool shed but discovered tool ID {data_manager_tool_id}" | ||
) | ||
|
||
for genome in genomes.genomes: | ||
print(genome) | ||
for indexer in genome.indexers or []: | ||
if indexer not in data_managers: | ||
raise Exception(f"Failed to find data manager {indexer} referenced for genome {genome}") | ||
|
||
|
||
def main(): | ||
lint_idc_directory(Path(os.curdir)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.