Skip to content

Commit

Permalink
fix convert and remove mag files always written
Browse files Browse the repository at this point in the history
  • Loading branch information
maxibor committed Dec 18, 2023
1 parent 2cbda82 commit 758ea6b
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 53 deletions.
42 changes: 23 additions & 19 deletions AMDirT/convert/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,16 @@ def run_convert(
samples = pd.read_csv(samples, sep="\t")
libraries = pd.read_csv(tables["libraries"][table_name], sep="\t")

logger.warning("We provide no warranty to the accuracy of the generated input sheets.")
selected_libraries = get_libraries(
samples=samples,
libraries=libraries,
table_name=table_name,
supported_archives=supported_archives,
)

logger.warning(
"We provide no warranty to the accuracy of the generated input sheets."
)

if bibliography == True:
logger.info("Preparing Bibtex citation file")
Expand All @@ -72,14 +81,7 @@ def run_convert(

if librarymetadata == True:
logger.info("Writing filtered libraries table")
librarymetadata = get_libraries(
samples=samples,
libraries=libraries,
table_name=table_name,
supported_archives=supported_archives,
).drop(
col_drop, axis=1
)
librarymetadata = selected_libraries.drop(col_drop, axis=1)
librarymetadata.to_csv(
f"{output}/AncientMetagenomeDir_filtered_libraries.tsv",
sep="\t",
Expand All @@ -90,7 +92,7 @@ def run_convert(
logger.info("Writing curl download script")
accession_table = prepare_accession_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
Expand All @@ -104,7 +106,7 @@ def run_convert(
)
accession_table = prepare_accession_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
Expand All @@ -117,11 +119,11 @@ def run_convert(
logger.info("Preparing nf-core/fetchngs table")
accession_table = prepare_accession_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
accession_table["df"]['archive_accession'].to_csv(
accession_table["df"]["archive_data_accession"].to_csv(
f"{output}/AncientMetagenomeDir_nf_core_fetchngs_input_table.tsv",
sep="\t",
header=False,
Expand All @@ -132,7 +134,7 @@ def run_convert(
logger.info("Preparing nf-core/eager table")
eager_table = prepare_eager_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
Expand All @@ -146,22 +148,24 @@ def run_convert(
logger.info("Preparing nf-core/taxprofiler table")
accession_table = prepare_taxprofiler_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
accession_table["df"].to_csv(
accession_table.to_csv(
f"{output}/AncientMetagenomeDir_nf_core_taxprofiler_input_table.csv",
header=False,
index=False,
)

if ameta == True:
logger.info("Preparing aMeta table")
logger.warning("aMeta does not support pairs. You must manually merge pair-end data before using samplesheet.")
logger.warning(
"aMeta does not support pairs. You must manually merge pair-end data before using samplesheet."
)
aMeta_table = prepare_aMeta_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
Expand All @@ -175,7 +179,7 @@ def run_convert(
logger.info("Preparing nf-core/mag table")
mag_table_single, mag_table_paired = prepare_mag_table(
samples=samples,
libraries=libraries,
libraries=selected_libraries,
table_name=table_name,
supported_archives=supported_archives,
)
Expand Down
3 changes: 1 addition & 2 deletions AMDirT/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,6 @@ def prepare_accession_table(
# )

# Downloading with curl or aspera instead of fetchngs
print(libraries.columns)
urls = set(libraries["download_links"])
links = set()
for u in urls:
Expand All @@ -324,7 +323,7 @@ def prepare_accession_table(
)

return {
"df": libraries[["archive_accession", "download_sizes"]].drop_duplicates(),
"df": libraries[["archive_data_accession", "download_sizes"]].drop_duplicates(),
"curl_script": dl_script_header + curl_script,
"aspera_script": dl_script_header + aspera_script,
}
Expand Down
69 changes: 42 additions & 27 deletions AMDirT/viewer/streamlit.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from numpy import ALLOW_THREADS
import streamlit as st
import pandas as pd

import os
from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode, JsCode
import argparse
import zipfile
Expand Down Expand Up @@ -265,10 +265,10 @@ def parse_args():
data=(
pd.DataFrame(df_mod["selected_rows"])
.drop("_selectedRowNodeInfo", axis=1)
.to_csv(sep=",", index=False)
.to_csv(sep="\t", index=False)
.encode("utf-8")
),
file_name="AncientMetagenomeDir_filtered_samples.csv",
file_name="AncientMetagenomeDir_filtered_samples.tsv",
)

###################
Expand All @@ -286,7 +286,7 @@ def parse_args():
data=(
lib_mod.drop(col_drop, axis=1).to_csv(sep="\t", index=False)
).encode("utf-8"),
file_name="AncientMetagenomeDir_filtered_libraries.csv",
file_name="AncientMetagenomeDir_filtered_libraries.tsv",
)

############################
Expand Down Expand Up @@ -349,37 +349,52 @@ def parse_args():
file_name="AncientMetagenomeDir_nf_core_eager_input_table.tsv",
)

#######################
## NF-CORE/MAG TABLE ##
#######################
mag_table_single, mag_table_paired = prepare_mag_table(
pd.DataFrame(df_mod["selected_rows"]),
lib_mod,
st.session_state.table_name,
supported_archives,
)
zip_file = zipfile.ZipFile(
"ancientMetagenomeDir_mag_input.zip", mode="w"
)
if not mag_table_single.empty:
mag_table_single.to_csv(
"nf_core_mag_input_single_table.csv", index=False
#######################
## NF-CORE/MAG TABLE ##
#######################
with button_samplesheet_mag:
mag_table_single, mag_table_paired = prepare_mag_table(
pd.DataFrame(df_mod["selected_rows"]),
lib_mod,
st.session_state.table_name,
supported_archives,
)
zip_file.write("nf_core_mag_input_single_table.csv")
if not mag_table_paired.empty:
mag_table_paired.to_csv(
"nf_core_mag_input_paired_table.csv", index=False
zip_file = zipfile.ZipFile(
"AncientMetagenomeDir_nf_core_mag_input.zip", mode="w"
)
zip_file.write("nf_core_mag_input_paired_table.csv")
zip_file.close()
with open("ancientMetagenomeDir_mag_input.zip", "rb") as zip_file:
with button_samplesheet_mag:
if not mag_table_single.empty:
mag_table_single.to_csv(
"AncientMetagenomeDir_nf_core_mag_input_single_table.csv",
index=False,
)
zip_file.write(
"AncientMetagenomeDir_nf_core_mag_input_single_table.csv"
)
os.remove(
"AncientMetagenomeDir_nf_core_mag_input_single_table.csv"
)
if not mag_table_paired.empty:
mag_table_paired.to_csv(
"AncientMetagenomeDir_nf_core_mag_input_paired_table.csv",
index=False,
)
zip_file.write(
"AncientMetagenomeDir_nf_core_mag_input_paired_table.csv"
)
os.remove(
"AncientMetagenomeDir_nf_core_mag_input_paired_table.csv"
)
zip_file.close()
with open(
"AncientMetagenomeDir_nf_core_mag_input.zip", "rb"
) as zip_file:
st.download_button(
label="Download nf-core/mag input CSV",
data=zip_file,
file_name="AncientMetagenomeDir_nf_core_mag_input.zip",
mime="application/zip",
)
os.remove("AncientMetagenomeDir_nf_core_mag_input.zip")

#######################
## TAXPROFILER TABLE ##
Expand Down
3 changes: 0 additions & 3 deletions tests/data/data_test_convert.tsv

This file was deleted.

3 changes: 3 additions & 0 deletions tests/data/libraries_test_convert.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
project_name publication_year data_publication_doi sample_name archive archive_project archive_sample_accession library_name strand_type library_polymerase library_treatment library_concentration instrument_model library_layout library_strategy read_count archive_data_accession download_links download_md5s download_sizes
Warinner2014 2014 10.1038/ng.2906 B61 SRA PRJNA216965 SRS473742 S1-Shot-B61-calc double Phusion HS II DNA none Illumina HiSeq 2000 SINGLE WGS 13228381 SRR957738 ftp.sra.ebi.ac.uk/vol1/fastq/SRR957/SRR957738/SRR957738.fastq.gz 9c40c43b5d455e760ae8db924347f0b2 953396663
Weyrich2017 2017 10.1038/nature21674 ElSidron1 SRA PRJNA685265 SRS7890498 ElSidron1_12056 double Unknown none Illumina HiSeq 2500 PAIRED WGS 53186534 SRR13263123 ftp.sra.ebi.ac.uk/vol1/fastq/SRR132/023/SRR13263123/SRR13263123_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/SRR132/023/SRR13263123/SRR13263123_2.fastq.gz e70fb68658e6e66a60ae4d289666bc03;15a9cbc5c9a1234978c220df8e91c4e7 3533496739;4013394642
3 changes: 3 additions & 0 deletions tests/data/samples_test_convert.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
project_name publication_year publication_doi site_name latitude longitude geo_loc_name sample_name sample_host sample_age sample_age_doi community_type material archive archive_project archive_accession
Warinner2014 2014 10.1038/ng.2906 Dalheim 51.565 8.84 Germany B61 Homo sapiens 900 10.1038/ng.2906 oral dental calculus SRA PRJNA216965 SRS473742,SRS473743,SRS473744,SRS473745
Weyrich2017 2017 10.1038/nature21674 El Sidrón Cave 43.386 -5.328 Spain ElSidron1 Homo sapiens neanderthalensis 49000 10.1038/nature21674 oral dental calculus SRA PRJNA685265 SRS7890498
4 changes: 2 additions & 2 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
def test_convert(test_data_dir):
assert (
run_convert(
samples=os.path.join(test_data_dir, "data_test_convert.tsv"),
table_name="ancientmetagenome-environmental",
samples=os.path.join(test_data_dir, "samples_test_convert.tsv"),
table_name="ancientmetagenome-hostassociated",
eager=True,
fetchngs=True,
ameta=True,
Expand Down

0 comments on commit 758ea6b

Please sign in to comment.