Skip to content

Commit

Permalink
feat: add download with sratoolkit/fasterq-dump
Browse files Browse the repository at this point in the history
  • Loading branch information
maxibor committed Dec 18, 2023
1 parent 758ea6b commit 54b01f2
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 67 deletions.
2 changes: 1 addition & 1 deletion AMDirT/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.4.6"
__version__ = "1.5.0"
48 changes: 25 additions & 23 deletions AMDirT/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,19 @@ def viewer(ctx, no_args_is_help=True, **kwargs):
help="Generate bash script with Aspera-based download commands for all libraries of samples in input table",
)
@click.option(
"--eager",
"--fetchngs",
is_flag=True,
help="Convert filtered samples and libraries tables to eager input tables",
help="Convert filtered samples and libraries tables to nf-core/fetchngs input tables",
)
@click.option(
"--fetchngs",
"--sratoolkit",
is_flag=True,
help="Convert filtered samples and libraries tables to nf-core/fetchngs input tables",
help="Generate bash script with SRA Toolkit fasterq-dump based download commands for all libraries of samples in input table",
)
@click.option(
"--eager",
is_flag=True,
help="Convert filtered samples and libraries tables to eager input tables",
)
@click.option(
"--ameta",
Expand Down Expand Up @@ -178,26 +183,27 @@ def convert(ctx, no_args_is_help=True, **kwargs):
# Autofill tool #
#################


@cli.command()
@click.argument("accession", type=str, nargs=-1)
@click.option(
"-n",
"--table_name",
"--table_name",
type=click.Choice(get_table_list()),
default='ancientmetagenome-hostassociated',
show_default=True
default="ancientmetagenome-hostassociated",
show_default=True,
)
@click.option(
"-l",
"--library_output",
type=click.Path(writable=True),
help="path to library output table file"
help="path to library output table file",
)
@click.option(
"-s",
"--sample_output",
type=click.Path(writable=True),
help="path to sample output table file"
help="path to sample output table file",
)
@click.pass_context
def autofill(ctx, no_args_is_help=True, **kwargs):
Expand All @@ -219,31 +225,26 @@ def autofill(ctx, no_args_is_help=True, **kwargs):
@click.argument("dataset", type=click.Path(exists=True))
@click.option(
"-n",
"--table_name",
"--table_name",
type=click.Choice(get_table_list()),
default='ancientmetagenome-hostassociated',
show_default=True
default="ancientmetagenome-hostassociated",
show_default=True,
)
@click.option(
"-t",
"--table_type",
type=click.Choice(['samples', 'libraries']),
default='libraries',
show_default=True
)
@click.option(
"-m",
"--markdown",
is_flag=True,
help="Output is in markdown format"
"--table_type",
type=click.Choice(["samples", "libraries"]),
default="libraries",
show_default=True,
)
@click.option("-m", "--markdown", is_flag=True, help="Output is in markdown format")
@click.option(
"-o",
"--outdir",
type=click.Path(writable=True),
default=".",
show_default=True,
help="path to sample output table file"
help="path to sample output table file",
)
@click.pass_context
def merge(ctx, no_args_is_help=True, **kwargs):
Expand All @@ -255,5 +256,6 @@ def merge(ctx, no_args_is_help=True, **kwargs):
"""
merge_new_df(**kwargs, **ctx.obj)


if __name__ == "__main__":
cli()
96 changes: 55 additions & 41 deletions AMDirT/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def run_convert(
aspera=False,
eager=False,
fetchngs=False,
sratoolkit=False,
ameta=False,
taxprofiler=False,
mag=False,
Expand Down Expand Up @@ -65,13 +66,21 @@ def run_convert(
supported_archives=supported_archives,
)

accession_table = prepare_accession_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)

logger.warning(
"We provide no warranty to the accuracy of the generated input sheets."
)

if bibliography == True:
logger.info("Preparing Bibtex citation file")
with open(f"{output}/AncientMetagenomeDir_bibliography.bib", "w") as fw:
bibfile = f"{output}/AncientMetagenomeDir_bibliography.bib"
logger.info(f"Writing Bibtex citation file to {bibfile}")
with open(bibfile, "w") as fw:
fw.write(prepare_bibtex_file(samples))

if table_name in ["ancientmetagenome-environmental"]:
Expand All @@ -80,86 +89,78 @@ def run_convert(
col_drop = ["archive_accession", "sample_host"]

if librarymetadata == True:
logger.info("Writing filtered libraries table")
tbl_file = f"{output}/AncientMetagenomeDir_filtered_libraries.tsv"
logger.info(f"Writing filtered libraries table to {tbl_file}")
librarymetadata = selected_libraries.drop(col_drop, axis=1)
librarymetadata.to_csv(
f"{output}/AncientMetagenomeDir_filtered_libraries.tsv",
tbl_file,
sep="\t",
index=False,
)

if curl == True:
logger.info("Writing curl download script")
accession_table = prepare_accession_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
with open(f"{output}/AncientMetagenomeDir_curl_download_script.sh", "w") as fw:
dl_file = f"{output}/AncientMetagenomeDir_curl_download_script.sh"
logger.info(f"Writing curl download script to {dl_file}")
with open(dl_file, "w") as fw:
fw.write(accession_table["curl_script"])

if aspera == True:
logger.info("Writing Aspera download script")
dl_file = f"{output}/AncientMetagenomeDir_aspera_download_script.sh"
logger.info(f"Writing Aspera download script to {dl_file}")
logger.warning(
"You will need to set the ${ASPERA_PATH} environment variable. See https://amdirt.readthedocs.io for more information."
)
accession_table = prepare_accession_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
with open(
f"{output}/AncientMetagenomeDir_aspera_download_script.sh", "w"
) as fw:
with open(dl_file, "w") as fw:
fw.write(accession_table["aspera_script"])

if fetchngs == True:
logger.info("Preparing nf-core/fetchngs table")
accession_table = prepare_accession_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
dl_file = f"{output}/AncientMetagenomeDir_nf_core_fetchngs_download_script.sh"
logger.info(f"Writing nf-core/fetchngs table to {dl_file}")
accession_table["df"]["archive_data_accession"].to_csv(
f"{output}/AncientMetagenomeDir_nf_core_fetchngs_input_table.tsv",
dl_file,
sep="\t",
header=False,
index=False,
)
if sratoolkit == True:
dl_file = f"{output}/AncientMetagenomeDir_sratoolkit_download_script.sh"
logger.info(f"Writing sratoolkit/fasterq-dump download script to {dl_file}")
with open(dl_file, "w") as fw:
fw.write(accession_table["fasterq_dump_script"])

if eager == True:
logger.info("Preparing nf-core/eager table")
tbl_file = f"{output}/AncientMetagenomeDir_nf_core_eager_input_table.tsv"
logger.info(f"Writing nf-core/eager table to {tbl_file}")
eager_table = prepare_eager_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
eager_table.to_csv(
f"{output}/AncientMetagenomeDir_nf_core_eager_input_table.tsv",
tbl_file,
sep="\t",
index=False,
)

if taxprofiler == True:
logger.info("Preparing nf-core/taxprofiler table")
accession_table = prepare_taxprofiler_table(
tbl_file = f"{output}/AncientMetagenomeDir_nf_core_taxprofiler_input_table.csv"
logger.info(f"Writing nf-core/taxprofiler table to {tbl_file}")
taxprofiler_table = prepare_taxprofiler_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
accession_table.to_csv(
f"{output}/AncientMetagenomeDir_nf_core_taxprofiler_input_table.csv",
taxprofiler_table.to_csv(
tbl_file,
header=False,
index=False,
)

if ameta == True:
logger.info("Preparing aMeta table")
tbl_file = f"{output}/AncientMetagenomeDir_aMeta_input_table.tsv"
logger.info(f"Writing aMeta table to {tbl_file}")
logger.warning(
"aMeta does not support pairs. You must manually merge pair-end data before using samplesheet."
)
Expand All @@ -169,27 +170,40 @@ def run_convert(
table_name=table_name,
supported_archives=supported_archives,
)

aMeta_table.to_csv(
f"{output}/AncientMetagenomeDir_aMeta_input_table.tsv",
tbl_file,
sep="\t",
index=False,
)

if mag == True:
logger.info("Preparing nf-core/mag table")
logger.info("Preparing nf-core/mag table(s)")
mag_table_single, mag_table_paired = prepare_mag_table(
samples=samples,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
if not mag_table_single.empty:
mag_tbl_single_file = (
f"{output}/AncientMetagenomeDir_nf_core_mag_input_single_table.csv"
)
logger.info(
f"Writing nf-core/mag single-end table to {mag_tbl_single_file}"
)
mag_table_single.to_csv(
f"{output}/AncientMetagenomeDir_nf_core_mag_input_single_table.csv",
mag_tbl_single_file,
index=False,
)
if not mag_table_paired.empty:
mag_tbl_paired_file = (
f"{output}/AncientMetagenomeDir_nf_core_mag_input_paired_table.csv"
)
logger.info(
f"Writing nf-core/mag paired-end table to {mag_tbl_paired_file}"
)
mag_table_paired.to_csv(
f"{output}/AncientMetagenomeDir_nf_core_mag_input_paired_table.csv",
mag_tbl_paired_file,
index=False,
)
5 changes: 5 additions & 0 deletions AMDirT/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ def prepare_accession_table(

# Downloading with curl or aspera instead of fetchngs
urls = set(libraries["download_links"])
accessions = set(libraries["archive_data_accession"])
links = set()
for u in urls:
for s in u.split(";"):
Expand All @@ -321,11 +322,15 @@ def prepare_accession_table(
)
+ "\n"
)
fasterq_dump_script = (
"\n".join([f"fasterq-dump --split-files -p {a}" for a in accessions]) + "\n"
)

return {
"df": libraries[["archive_data_accession", "download_sizes"]].drop_duplicates(),
"curl_script": dl_script_header + curl_script,
"aspera_script": dl_script_header + aspera_script,
"fasterq_dump_script": dl_script_header + fasterq_dump_script,
}


Expand Down
17 changes: 15 additions & 2 deletions AMDirT/viewer/streamlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,11 @@ def parse_args():
options = ["No table selected"] + list(samples.keys())
st.session_state.table_name = st.selectbox(label="Select a table", options=options)
st.session_state.height = st.selectbox(
"Number of rows to display", (10, 20, 50, 100, 200), index=2
"Number of rows to display", (10, 20, 50, 100, 200), index=1
)
st.session_state.dl_method = st.selectbox(
label="Data download method", options=["curl", "nf-core/fetchngs", "aspera"]
label="Data download method",
options=["curl", "nf-core/fetchngs", "aspera", "sratookit"],
)
if st.session_state.dl_method == "aspera":
st.warning(
Expand Down Expand Up @@ -319,6 +320,18 @@ def parse_args():
)["aspera_script"],
file_name="AncientMetagenomeDir_aspera_download_script.sh",
)
elif st.session_state.dl_method == "sratookit":
st.download_button(
label="Download SRAtoolkit/fasterq-dump sample download script",
help=f"approx. {total_size_str} of sequencing data selected",
data=prepare_accession_table(
pd.DataFrame(df_mod["selected_rows"]),
lib_mod,
st.session_state.table_name,
supported_archives,
)["fasterq_dump_script"],
file_name="AncientMetagenomeDir_sratoolkit_download_script.sh",
)
else:
st.download_button(
label="Download Curl sample download script",
Expand Down

0 comments on commit 54b01f2

Please sign in to comment.