From c29d18673f0c2bd5b7c9de9d9082ec905d47b359 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 8 Apr 2024 11:13:08 +0200 Subject: [PATCH 1/6] feat: add download subcommand --- AMDirT/cli.py | 43 ++++++++++++++++++++++- AMDirT/core/__init__.py | 30 +++++++++++++++- AMDirT/download/__init__.py | 62 ++++++++++++++++++++++++++++++++++ docs/source/how_to/download.md | 21 ++++++++++++ tests/test_download.py | 10 ++++++ 5 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 AMDirT/download/__init__.py create mode 100644 docs/source/how_to/download.md create mode 100644 tests/test_download.py diff --git a/AMDirT/cli.py b/AMDirT/cli.py index c642a07..d6374fd 100644 --- a/AMDirT/cli.py +++ b/AMDirT/cli.py @@ -4,9 +4,10 @@ from AMDirT.validate import run_validation from AMDirT.viewer import run_app from AMDirT.convert import run_convert -from AMDirT.core import get_json_path +from AMDirT.core import get_json_path, get_amdir_tags, get_latest_tag from AMDirT.autofill import run_autofill from AMDirT.merge import merge_new_df +from AMDirT.download import download as download_amdir from json import load @@ -294,5 +295,45 @@ def merge(ctx, no_args_is_help=True, **kwargs): merge_new_df(**kwargs, **ctx.obj) +@cli.command() +@click.option( + "-t", + "--table", + help="AncientMetagenomeDir table to download", + type=click.Choice(get_table_list()), + default="ancientmetagenome-hostassociated", + show_default=True, +) +@click.option( + "-y", + "--table_type", + help="Type of table to download", + type=click.Choice(["samples", "libraries"]), + default="samples", + show_default=True, +) +@click.option( + "-r", + "--release", + help="Release tag to download", + type=click.Choice(get_amdir_tags()), + default=get_latest_tag(get_amdir_tags()), + show_default=True, +) +@click.option( + "-o", + "--output", + help="Output directory", + type=click.Path(writable=True), + default=".", + show_default=True, +) +def download(no_args_is_help=True, **kwargs): + """\b + Download a table from the AMDirT repository + """ + download_amdir(**kwargs) + + if __name__ == "__main__": cli() diff --git a/AMDirT/core/__init__.py b/AMDirT/core/__init__.py index 4eb3c3f..43f69d4 100644 --- a/AMDirT/core/__init__.py +++ b/AMDirT/core/__init__.py @@ -5,6 +5,7 @@ import pandas as pd import streamlit as st from packaging import version +from packaging.version import InvalidVersion from importlib.resources import files as get_module_dir import os import logging @@ -65,7 +66,34 @@ def get_amdir_tags(): if version.parse(tag["name"]) >= version.parse("v22.09") ] else: - return [] + logger.warning( + "Could not fetch tags from AncientMetagenomeDir. Defaulting to master" + ) + return ["master"] + + +@st.cache_data +def get_latest_tag(tags): + try: + return sorted(tags, key=lambda x: version.Version(x))[-1] + except InvalidVersion: + if "master" in tags: + return "master" + else: + raise InvalidVersion("No valid tags found") + + +def check_allowed_values(ref: list, test: str): + """ + Check if test is in ref + Args: + ref(list): List of allowed values + test(str): value to check + """ + + if test in ref: + return True + return False def get_colour_chemistry(instrument: str) -> int: diff --git a/AMDirT/download/__init__.py b/AMDirT/download/__init__.py new file mode 100644 index 0000000..62baaa6 --- /dev/null +++ b/AMDirT/download/__init__.py @@ -0,0 +1,62 @@ +from AMDirT.core import ( + logger, + get_amdir_tags, + get_remote_resources, + check_allowed_values, +) +import requests + + +def download(table: str, table_type: str, release: str, output: str = ".") -> str: + """ + Download a table from the AMDirT repository. + + Parameters + ---------- + table : str + The table to download. + table_type : str + The type of table to download. Allowed values are ['samples', 'libraries']. + release : str + The release of the table to download. Must be a valid release tag. + output: str + The output directory to save the table. Default is the current directory. + + Returns + ------- + str: + The path to the downloaded table. + + Raises + ------ + ValueError + If an invalid table is provided. + ValueError + If an invalid table type is provided. + ValueError + If an invalid release is provided. + """ + + resources = get_remote_resources() + tags = get_amdir_tags() + if tags != ["master"]: + if check_allowed_values(tags, release) is False: + raise ValueError(f"Invalid release: {release}. Allowed values are {tags}") + + tables = resources["samples"] + if check_allowed_values(tables, table) is False: + raise ValueError(f"Invalid table: {table}. Allowed values are {tables}") + + if check_allowed_values(["samples", "libraries"], table_type) is False: + raise ValueError( + f"Invalid table type: {table_type}. Allowed values are ['samples', 'libraries']" + ) + table_filename = f"{table}_{table_type}_{release}.tsv" + logger.info( + f"Downloading {table} {table_type} table from {release} release, saving to {output}/{table_filename}" + ) + t = requests.get(resources[table_type][table].replace("master", release)) + with open(table_filename, "w") as fh: + fh.write(t.text) + + return table_filename diff --git a/docs/source/how_to/download.md b/docs/source/how_to/download.md new file mode 100644 index 0000000..2f6ea48 --- /dev/null +++ b/docs/source/how_to/download.md @@ -0,0 +1,21 @@ +# download + +## What + +Download a copy of an AncientMetagenomeDir table. + +## When + +This command would be used when you want to download an AncientMetagenomeDir table locally. + +You typically do this if you're planning to use the `convert` command later. + +## How + +```bash +AMDirT download --table ancientsinglegenome-hostassociated --table_type samples -r v23.12.0 -o . +``` + +## Output + +This command will download the `ancientsinglegenome-hostassociated` `sample` table from the `v23.12.0` AncientMetagenomeDir release, and save it locally to `ancientmetagenome-hostassociated_samples_v23.12.0.tsv` diff --git a/tests/test_download.py b/tests/test_download.py new file mode 100644 index 0000000..5923212 --- /dev/null +++ b/tests/test_download.py @@ -0,0 +1,10 @@ +from AMDirT.download import download + + +def test_download(): + table = "ancientmetagenome-hostassociated" + table_type = "samples" + release = "v23.12.0" + + d = download(table, table_type, release, output=".") + assert d == "ancientmetagenome-hostassociated_samples_v23.12.0.tsv" From 90f8462ad4a0fda3b4ae21063e5e15c15e4d41cd Mon Sep 17 00:00:00 2001 From: Maxime Borry Date: Mon, 8 Apr 2024 11:20:56 +0200 Subject: [PATCH 2/6] Update AMDirT/core/__init__.py Co-authored-by: James A. Fellows Yates --- AMDirT/core/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AMDirT/core/__init__.py b/AMDirT/core/__init__.py index 43f69d4..e4545b4 100644 --- a/AMDirT/core/__init__.py +++ b/AMDirT/core/__init__.py @@ -67,7 +67,7 @@ def get_amdir_tags(): ] else: logger.warning( - "Could not fetch tags from AncientMetagenomeDir. Defaulting to master" + "Could not fetch tags from AncientMetagenomeDir. Defaulting to master. Metadata may not yet be officially released." ) return ["master"] From e3b6c7fac2521556b7dca5ff442752cf4730083f Mon Sep 17 00:00:00 2001 From: Maxime Borry Date: Mon, 8 Apr 2024 11:21:01 +0200 Subject: [PATCH 3/6] Update AMDirT/download/__init__.py Co-authored-by: James A. Fellows Yates --- AMDirT/download/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AMDirT/download/__init__.py b/AMDirT/download/__init__.py index 62baaa6..22623ef 100644 --- a/AMDirT/download/__init__.py +++ b/AMDirT/download/__init__.py @@ -14,7 +14,7 @@ def download(table: str, table_type: str, release: str, output: str = ".") -> st Parameters ---------- table : str - The table to download. + The AncientMetagenomeDir table to download. table_type : str The type of table to download. Allowed values are ['samples', 'libraries']. release : str From 960420f18596b1c4da16d9af51a8a663bdf410f9 Mon Sep 17 00:00:00 2001 From: Maxime Borry Date: Mon, 8 Apr 2024 11:21:06 +0200 Subject: [PATCH 4/6] Update docs/source/how_to/download.md Co-authored-by: James A. Fellows Yates --- docs/source/how_to/download.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/how_to/download.md b/docs/source/how_to/download.md index 2f6ea48..68e16c2 100644 --- a/docs/source/how_to/download.md +++ b/docs/source/how_to/download.md @@ -18,4 +18,4 @@ AMDirT download --table ancientsinglegenome-hostassociated --table_type samples ## Output -This command will download the `ancientsinglegenome-hostassociated` `sample` table from the `v23.12.0` AncientMetagenomeDir release, and save it locally to `ancientmetagenome-hostassociated_samples_v23.12.0.tsv` +This example command above will download the `ancientsinglegenome-hostassociated` `sample` table from the `v23.12.0` AncientMetagenomeDir release, and save it locally to `ancientmetagenome-hostassociated_samples_v23.12.0.tsv` From c0fc8dc7840319b588a41b3a94777a3c3bd1930c Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 8 Apr 2024 11:28:00 +0200 Subject: [PATCH 5/6] doc: update convert tutorial --- docs/source/tutorials/convert.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/source/tutorials/convert.md b/docs/source/tutorials/convert.md index 80dc270..78b8fe7 100644 --- a/docs/source/tutorials/convert.md +++ b/docs/source/tutorials/convert.md @@ -15,8 +15,7 @@ We will take use one of the previous releases of AncientMetagenomeDir as an exam ```bash mkdir amdirt-convert-tutorial cd amdirt-convert-tutorial -curl -LO https://github.com/SPAAM-community/AncientMetagenomeDir/releases/download/v23.09.0/AncientMetagenomeDir_v23.09.0.zip -unzip AncientMetagenomeDir_v23.09.0.zip +AMDirT download --table ancientmetagenome-hostassociated --table_type samples -r v23.09.0 ``` ## Filter a sample metadata table @@ -24,7 +23,7 @@ unzip AncientMetagenomeDir_v23.09.0.zip Next we can filter the ancient metagenome 'host-associated' sample sheet for all dental calculus tables from Germany. ```bash -cat ancientmetagenome-hostassociated/samples/ancientmetagenome-hostassociated_samples.tsv | grep -e '^project_name' -e 'dental calculus' | grep -e '^project_name' -e 'Germany' > germany_dentalcalculus.tsv +cat ancientmetagenome-hostassociated_samples_v23.09.0.tsv | grep -e '^project_name' -e 'dental calculus' | grep -e '^project_name' -e 'Germany' > germany_dentalcalculus.tsv ``` > ⚠ _The command above is not robust and is only used for system portability and demonstration purposes. For example the `Germany` string could be in a site name. In practice, you should use more robust filtering methods such more specific `grep` expressions or in R_. From 1ed1e3d472ce5baedfb2cb454320bd072732f218 Mon Sep 17 00:00:00 2001 From: maxibor Date: Mon, 8 Apr 2024 11:29:42 +0200 Subject: [PATCH 6/6] chore: version bump --- AMDirT/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AMDirT/__init__.py b/AMDirT/__init__.py index 5b60188..e4adfb8 100644 --- a/AMDirT/__init__.py +++ b/AMDirT/__init__.py @@ -1 +1 @@ -__version__ = "1.5.0" +__version__ = "1.6.0"