Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add download subcommand #149

Merged
merged 7 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AMDirT/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.5.0"
__version__ = "1.6.0"
43 changes: 42 additions & 1 deletion AMDirT/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from AMDirT.validate import run_validation
from AMDirT.viewer import run_app
from AMDirT.convert import run_convert
from AMDirT.core import get_json_path
from AMDirT.core import get_json_path, get_amdir_tags, get_latest_tag
from AMDirT.autofill import run_autofill
from AMDirT.merge import merge_new_df
from AMDirT.download import download as download_amdir
from json import load


Expand Down Expand Up @@ -294,5 +295,45 @@ def merge(ctx, no_args_is_help=True, **kwargs):
merge_new_df(**kwargs, **ctx.obj)


@cli.command()
@click.option(
"-t",
"--table",
help="AncientMetagenomeDir table to download",
type=click.Choice(get_table_list()),
default="ancientmetagenome-hostassociated",
show_default=True,
)
@click.option(
"-y",
"--table_type",
help="Type of table to download",
type=click.Choice(["samples", "libraries"]),
default="samples",
show_default=True,
)
@click.option(
"-r",
"--release",
help="Release tag to download",
type=click.Choice(get_amdir_tags()),
default=get_latest_tag(get_amdir_tags()),
show_default=True,
)
@click.option(
"-o",
"--output",
help="Output directory",
type=click.Path(writable=True),
default=".",
show_default=True,
)
def download(no_args_is_help=True, **kwargs):
"""\b
Download a table from the AMDirT repository
"""
download_amdir(**kwargs)


if __name__ == "__main__":
cli()
30 changes: 29 additions & 1 deletion AMDirT/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import streamlit as st
from packaging import version
from packaging.version import InvalidVersion
from importlib.resources import files as get_module_dir
import os
import logging
Expand Down Expand Up @@ -65,7 +66,34 @@ def get_amdir_tags():
if version.parse(tag["name"]) >= version.parse("v22.09")
]
else:
return []
logger.warning(
"Could not fetch tags from AncientMetagenomeDir. Defaulting to master. Metadata may not yet be officially released."
)
return ["master"]


@st.cache_data
def get_latest_tag(tags):
try:
return sorted(tags, key=lambda x: version.Version(x))[-1]
except InvalidVersion:
if "master" in tags:
return "master"
else:
raise InvalidVersion("No valid tags found")


def check_allowed_values(ref: list, test: str):
"""
Check if test is in ref
Args:
ref(list): List of allowed values
test(str): value to check
"""

if test in ref:
return True
return False


def get_colour_chemistry(instrument: str) -> int:
Expand Down
62 changes: 62 additions & 0 deletions AMDirT/download/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from AMDirT.core import (
logger,
get_amdir_tags,
get_remote_resources,
check_allowed_values,
)
import requests


def download(table: str, table_type: str, release: str, output: str = ".") -> str:
"""
Download a table from the AMDirT repository.

Parameters
----------
table : str
The AncientMetagenomeDir table to download.
table_type : str
The type of table to download. Allowed values are ['samples', 'libraries'].
release : str
The release of the table to download. Must be a valid release tag.
output: str
The output directory to save the table. Default is the current directory.

Returns
-------
str:
The path to the downloaded table.

Raises
------
ValueError
If an invalid table is provided.
ValueError
If an invalid table type is provided.
ValueError
If an invalid release is provided.
"""

resources = get_remote_resources()
tags = get_amdir_tags()
if tags != ["master"]:
if check_allowed_values(tags, release) is False:
raise ValueError(f"Invalid release: {release}. Allowed values are {tags}")

tables = resources["samples"]
if check_allowed_values(tables, table) is False:
raise ValueError(f"Invalid table: {table}. Allowed values are {tables}")

if check_allowed_values(["samples", "libraries"], table_type) is False:
raise ValueError(
f"Invalid table type: {table_type}. Allowed values are ['samples', 'libraries']"
)
table_filename = f"{table}_{table_type}_{release}.tsv"
logger.info(
f"Downloading {table} {table_type} table from {release} release, saving to {output}/{table_filename}"
)
t = requests.get(resources[table_type][table].replace("master", release))
with open(table_filename, "w") as fh:
fh.write(t.text)

return table_filename
21 changes: 21 additions & 0 deletions docs/source/how_to/download.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# download

## What

Download a copy of an AncientMetagenomeDir table.

## When

This command would be used when you want to download an AncientMetagenomeDir table locally.

You typically do this if you're planning to use the `convert` command later.

## How

```bash
AMDirT download --table ancientsinglegenome-hostassociated --table_type samples -r v23.12.0 -o .
```

## Output

This example command above will download the `ancientsinglegenome-hostassociated` `sample` table from the `v23.12.0` AncientMetagenomeDir release, and save it locally to `ancientmetagenome-hostassociated_samples_v23.12.0.tsv`
5 changes: 2 additions & 3 deletions docs/source/tutorials/convert.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,15 @@ We will take use one of the previous releases of AncientMetagenomeDir as an exam
```bash
mkdir amdirt-convert-tutorial
cd amdirt-convert-tutorial
curl -LO https://github.com/SPAAM-community/AncientMetagenomeDir/releases/download/v23.09.0/AncientMetagenomeDir_v23.09.0.zip
unzip AncientMetagenomeDir_v23.09.0.zip
AMDirT download --table ancientmetagenome-hostassociated --table_type samples -r v23.09.0
```

## Filter a sample metadata table

Next we can filter the ancient metagenome 'host-associated' sample sheet for all dental calculus tables from Germany.

```bash
cat ancientmetagenome-hostassociated/samples/ancientmetagenome-hostassociated_samples.tsv | grep -e '^project_name' -e 'dental calculus' | grep -e '^project_name' -e 'Germany' > germany_dentalcalculus.tsv
cat ancientmetagenome-hostassociated_samples_v23.09.0.tsv | grep -e '^project_name' -e 'dental calculus' | grep -e '^project_name' -e 'Germany' > germany_dentalcalculus.tsv
```

> ⚠ _The command above is not robust and is only used for system portability and demonstration purposes. For example the `Germany` string could be in a site name. In practice, you should use more robust filtering methods such more specific `grep` expressions or in R_.
Expand Down
10 changes: 10 additions & 0 deletions tests/test_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from AMDirT.download import download


def test_download():
table = "ancientmetagenome-hostassociated"
table_type = "samples"
release = "v23.12.0"

d = download(table, table_type, release, output=".")
assert d == "ancientmetagenome-hostassociated_samples_v23.12.0.tsv"
Loading