From c29d18673f0c2bd5b7c9de9d9082ec905d47b359 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 8 Apr 2024 11:13:08 +0200 Subject: [PATCH] feat: add download subcommand --- AMDirT/cli.py | 43 ++++++++++++++++++++++- AMDirT/core/__init__.py | 30 +++++++++++++++- AMDirT/download/__init__.py | 62 ++++++++++++++++++++++++++++++++++ docs/source/how_to/download.md | 21 ++++++++++++ tests/test_download.py | 10 ++++++ 5 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 AMDirT/download/__init__.py create mode 100644 docs/source/how_to/download.md create mode 100644 tests/test_download.py diff --git a/AMDirT/cli.py b/AMDirT/cli.py index c642a07..d6374fd 100644 --- a/AMDirT/cli.py +++ b/AMDirT/cli.py @@ -4,9 +4,10 @@ from AMDirT.validate import run_validation from AMDirT.viewer import run_app from AMDirT.convert import run_convert -from AMDirT.core import get_json_path +from AMDirT.core import get_json_path, get_amdir_tags, get_latest_tag from AMDirT.autofill import run_autofill from AMDirT.merge import merge_new_df +from AMDirT.download import download as download_amdir from json import load @@ -294,5 +295,45 @@ def merge(ctx, no_args_is_help=True, **kwargs): merge_new_df(**kwargs, **ctx.obj) +@cli.command() +@click.option( + "-t", + "--table", + help="AncientMetagenomeDir table to download", + type=click.Choice(get_table_list()), + default="ancientmetagenome-hostassociated", + show_default=True, +) +@click.option( + "-y", + "--table_type", + help="Type of table to download", + type=click.Choice(["samples", "libraries"]), + default="samples", + show_default=True, +) +@click.option( + "-r", + "--release", + help="Release tag to download", + type=click.Choice(get_amdir_tags()), + default=get_latest_tag(get_amdir_tags()), + show_default=True, +) +@click.option( + "-o", + "--output", + help="Output directory", + type=click.Path(writable=True), + default=".", + show_default=True, +) +def download(no_args_is_help=True, **kwargs): + """\b + Download a table from the AMDirT repository + """ + download_amdir(**kwargs) + + if __name__ == "__main__": cli() diff --git a/AMDirT/core/__init__.py b/AMDirT/core/__init__.py index 4eb3c3f..43f69d4 100644 --- a/AMDirT/core/__init__.py +++ b/AMDirT/core/__init__.py @@ -5,6 +5,7 @@ import pandas as pd import streamlit as st from packaging import version +from packaging.version import InvalidVersion from importlib.resources import files as get_module_dir import os import logging @@ -65,7 +66,34 @@ def get_amdir_tags(): if version.parse(tag["name"]) >= version.parse("v22.09") ] else: - return [] + logger.warning( + "Could not fetch tags from AncientMetagenomeDir. Defaulting to master" + ) + return ["master"] + + +@st.cache_data +def get_latest_tag(tags): + try: + return sorted(tags, key=lambda x: version.Version(x))[-1] + except InvalidVersion: + if "master" in tags: + return "master" + else: + raise InvalidVersion("No valid tags found") + + +def check_allowed_values(ref: list, test: str): + """ + Check if test is in ref + Args: + ref(list): List of allowed values + test(str): value to check + """ + + if test in ref: + return True + return False def get_colour_chemistry(instrument: str) -> int: diff --git a/AMDirT/download/__init__.py b/AMDirT/download/__init__.py new file mode 100644 index 0000000..62baaa6 --- /dev/null +++ b/AMDirT/download/__init__.py @@ -0,0 +1,62 @@ +from AMDirT.core import ( + logger, + get_amdir_tags, + get_remote_resources, + check_allowed_values, +) +import requests + + +def download(table: str, table_type: str, release: str, output: str = ".") -> str: + """ + Download a table from the AMDirT repository. + + Parameters + ---------- + table : str + The table to download. + table_type : str + The type of table to download. Allowed values are ['samples', 'libraries']. + release : str + The release of the table to download. Must be a valid release tag. + output: str + The output directory to save the table. Default is the current directory. + + Returns + ------- + str: + The path to the downloaded table. + + Raises + ------ + ValueError + If an invalid table is provided. + ValueError + If an invalid table type is provided. + ValueError + If an invalid release is provided. + """ + + resources = get_remote_resources() + tags = get_amdir_tags() + if tags != ["master"]: + if check_allowed_values(tags, release) is False: + raise ValueError(f"Invalid release: {release}. Allowed values are {tags}") + + tables = resources["samples"] + if check_allowed_values(tables, table) is False: + raise ValueError(f"Invalid table: {table}. Allowed values are {tables}") + + if check_allowed_values(["samples", "libraries"], table_type) is False: + raise ValueError( + f"Invalid table type: {table_type}. Allowed values are ['samples', 'libraries']" + ) + table_filename = f"{table}_{table_type}_{release}.tsv" + logger.info( + f"Downloading {table} {table_type} table from {release} release, saving to {output}/{table_filename}" + ) + t = requests.get(resources[table_type][table].replace("master", release)) + with open(table_filename, "w") as fh: + fh.write(t.text) + + return table_filename diff --git a/docs/source/how_to/download.md b/docs/source/how_to/download.md new file mode 100644 index 0000000..2f6ea48 --- /dev/null +++ b/docs/source/how_to/download.md @@ -0,0 +1,21 @@ +# download + +## What + +Download a copy of an AncientMetagenomeDir table. + +## When + +This command would be used when you want to download an AncientMetagenomeDir table locally. + +You typically do this if you're planning to use the `convert` command later. + +## How + +```bash +AMDirT download --table ancientsinglegenome-hostassociated --table_type samples -r v23.12.0 -o . +``` + +## Output + +This command will download the `ancientsinglegenome-hostassociated` `sample` table from the `v23.12.0` AncientMetagenomeDir release, and save it locally to `ancientmetagenome-hostassociated_samples_v23.12.0.tsv` diff --git a/tests/test_download.py b/tests/test_download.py new file mode 100644 index 0000000..5923212 --- /dev/null +++ b/tests/test_download.py @@ -0,0 +1,10 @@ +from AMDirT.download import download + + +def test_download(): + table = "ancientmetagenome-hostassociated" + table_type = "samples" + release = "v23.12.0" + + d = download(table, table_type, release, output=".") + assert d == "ancientmetagenome-hostassociated_samples_v23.12.0.tsv"