From 9b301a6e55bda368cca6909af594da5b0eed7da6 Mon Sep 17 00:00:00 2001 From: axelwalter Date: Wed, 27 Sep 2023 17:10:02 +0200 Subject: [PATCH] seperate file lists for workflows --- assets/default-params.json | 3 +- "pages/0_\360\237\223\201_File_Upload.py" | 5 - ...37\224\215_Extracted_Ion_Chromatograms.py" | 48 ++-- ...60\237\247\252_Untargeted_Metabolomics.py" | 271 +++++++++--------- src/common.py | 19 +- src/fileupload.py | 26 +- src/xic.py | 11 +- 7 files changed, 176 insertions(+), 207 deletions(-) diff --git a/assets/default-params.json b/assets/default-params.json index 1fe0149..463a035 100644 --- a/assets/default-params.json +++ b/assets/default-params.json @@ -1,8 +1,8 @@ { - "selected-mzML-files": [], "image-format": "svg", "2D-map-intensity-cutoff": 5000, + "eic_selected_mzML": [], "eic_mz_unit": "ppm", "eic_tolerance_ppm": 10, "eic_tolerance_da": 0.2, @@ -11,6 +11,7 @@ "eic_combine": false, "eic_peak_width": 20, + "umetaflow_selected_mzML": [], "ffm_mass_error": 10.0, "ffm_noise": 100.0, "ffm_peak_width": 10.0, diff --git "a/pages/0_\360\237\223\201_File_Upload.py" "b/pages/0_\360\237\223\201_File_Upload.py" index 6ca3df6..f36be66 100644 --- "a/pages/0_\360\237\223\201_File_Upload.py" +++ "b/pages/0_\360\237\223\201_File_Upload.py" @@ -10,9 +10,6 @@ st.title("File Upload") -if "selected-mzML-files" not in st.session_state: - st.session_state["selected-mzML-files"] = params["selected-mzML-files"] - tabs = ["File Upload", "Example Data"] if st.session_state.location == "local": tabs.append("Files from local folder") @@ -61,12 +58,10 @@ c1, c2 = st.columns(2) if c2.button("Remove **selected**", type="primary", disabled=not any(to_remove)): remove_selected_mzML_files(to_remove) - save_params(params) st.experimental_rerun() if c1.button("⚠️ Remove **all**", disabled=not any(mzML_dir.iterdir())): remove_all_mzML_files() - save_params(params) st.experimental_rerun() save_params(params) diff --git "a/pages/2_\360\237\224\215_Extracted_Ion_Chromatograms.py" "b/pages/2_\360\237\224\215_Extracted_Ion_Chromatograms.py" index fc1e8d7..4c5ffdb 100644 --- "a/pages/2_\360\237\224\215_Extracted_Ion_Chromatograms.py" +++ "b/pages/2_\360\237\224\215_Extracted_Ion_Chromatograms.py" @@ -13,7 +13,7 @@ with st.expander("📖 Help"): st.markdown(HELP) - + default_df = pd.DataFrame({"name": [""], "mz": [np.nan], "RT (seconds)": [np.nan], "peak width (seconds)": [np.nan]}) if "workspace" in st.session_state: @@ -28,13 +28,11 @@ # Load a default example df df = default_df -with st.expander("Settings", expanded=True): - st.markdown("**Table with metabolites for chromatogram extraction**") +with st.expander("**Mass table with metabolites for chromatogram extraction**", True): c1, c2 = st.columns(2) # Uploader for XIC input table c1.file_uploader("Upload XIC input table", type="tsv", label_visibility="collapsed", - key="xic-table-uploader", accept_multiple_files=False, on_change=upload_xic_table, args=[df]) - + key="xic-table-uploader", accept_multiple_files=False, on_change=upload_xic_table, args=[df]) # def update_mass_table() edited = st.data_editor(df, use_container_width=True, num_rows="dynamic") @@ -44,6 +42,7 @@ edited.to_csv(sep="\t", index=False).encode("utf-8"), "XIC-input-table.tsv", ) + st.markdown("**Calculate mass and add to table**") c1, c2, c3 = st.columns(3) name = c1.text_input( @@ -72,6 +71,10 @@ else: st.warning("Invalid formula.") +with st.form("eic_form"): + st.multiselect("mzML files", [f.stem for f in Path(st.session_state.workspace, "mzML-files").glob("*.mzML")], + params["eic_selected_mzML"], key="eic_selected_mzML") + # Retention time settings st.markdown("**Parameters for chromatogram extraction**") c1, c2, c3 = st.columns(3) @@ -94,27 +97,28 @@ "mass tolerance Da", 0.01, 10.0, params["eic_tolerance_da"], 0.05, key="eic_tolerance_da" ) - mzML_files = [str(Path(st.session_state.workspace, - "mzML-files", f+".mzML")) for f in st.session_state["selected-mzML-files"]] results_dir = Path(st.session_state.workspace, "extracted-ion-chromatograms") - v_space(1) _, c2, _ = st.columns(3) - if c2.button("Extract chromatograms", type="primary"): - if not mzML_files: - st.warning("Upload/select some mzML files first!") - else: - extract_chromatograms(results_dir, - mzML_files, - edited, - st.session_state["eic_mz_unit"], - st.session_state["eic_tolerance_ppm"], - st.session_state["eic_tolerance_da"], - st.session_state["eic_time_unit"], - st.session_state["eic_peak_width"], - st.session_state["eic_baseline"]) + submitted = c2.form_submit_button("Extract chromatograms", type="primary") + +if submitted: + mzML_files = [str(Path(st.session_state.workspace, + "mzML-files", f+".mzML")) for f in st.session_state["eic_selected_mzML"]] + if not mzML_files: + st.warning("Upload/select some mzML files first!") + else: + extract_chromatograms(results_dir, + mzML_files, + edited, + st.session_state["eic_mz_unit"], + st.session_state["eic_tolerance_ppm"], + st.session_state["eic_tolerance_da"], + st.session_state["eic_time_unit"], + st.session_state["eic_peak_width"], + st.session_state["eic_baseline"]) # Display summary table @@ -207,4 +211,4 @@ "xic-meta-data.tsv", ) -save_params(params) +save_params(params) \ No newline at end of file diff --git "a/pages/3_\360\237\247\252_Untargeted_Metabolomics.py" "b/pages/3_\360\237\247\252_Untargeted_Metabolomics.py" index f133f98..be0f58d 100644 --- "a/pages/3_\360\237\247\252_Untargeted_Metabolomics.py" +++ "b/pages/3_\360\237\247\252_Untargeted_Metabolomics.py" @@ -12,7 +12,9 @@ with st.expander("📖 Help"): st.markdown(HELP) -with st.expander("Settings", expanded=True): +with st.form("umetaflow-form"): + st.multiselect("mzML files", [f.stem for f in Path(st.session_state.workspace, "mzML-files").glob("*.mzML")], + params["umetaflow_selected_mzML"], key="umetaflow_selected_mzML") st.markdown("#### 1. Pre-Processing") st.markdown("**Feature Detection**") c1, c2, c3, c4 = st.columns(4) @@ -43,71 +45,71 @@ ) if not ( st.session_state["ffm_min_fwhm"] <= st.session_state["ffm_peak_width"] <= st.session_state["ffm_max_fwhm"] - ): + ): c4.warning("Check your peak width settings.") v_space(1) st.checkbox( "**Blank Removal**", params["remove_blanks"], key="remove_blanks", help="Useful to filter out features which are present in blank sample/s or e.g. for differential feature detection to remove features which are present in control, but not in treatment samples." ) - if st.session_state["remove_blanks"]: - c1, c2 = st.columns(2) - c1.multiselect( - "select blank samples", - st.session_state["selected-mzML-files"], - key="blank_files", - help="The selected samples will be used to calculate avarage feature blank intensities and will not be further processed.", - ) - c2.number_input( - "ratio blank/sample average intensity cutoff", - 0.05, - 0.9, - params["blank_cutoff"], - 0.05, - key="blank_cutoff", - help="Features that have an intensity ratio below (avagera blank) to (average samples) will be removed. Set low for strict blank removal.", - ) + # if st.session_state["remove_blanks"]: + c1, c2 = st.columns(2) + c1.multiselect( + "select blank samples", + st.session_state["umetaflow_selected_mzML"], + key="blank_files", + help="The selected samples will be used to calculate avarage feature blank intensities and will not be further processed.", + ) + c2.number_input( + "ratio blank/sample average intensity cutoff", + 0.05, + 0.9, + params["blank_cutoff"], + 0.05, + key="blank_cutoff", + help="Features that have an intensity ratio below (avagera blank) to (average samples) will be removed. Set low for strict blank removal.", + ) v_space(1) st.checkbox("**Map Alignment**", params["use_ma"], key="use_ma") - if st.session_state["use_ma"]: - c1, c2, c3 = st.columns(3) - c1.number_input( - "**mz max difference**", - 0.01, - 1000.0, - params["ma_mz_max"], - step=1.0, - format="%.2f", - key="ma_mz_max" - ) - c2.radio( - "mz distance unit", - ["ppm", "Da"], - ["ppm", "Da"].index(params["ma_mz_unit"]), - key="ma_mz_unit" - ) + # if st.session_state["use_ma"]: + c1, c2, c3 = st.columns(3) + c1.number_input( + "**mz max difference**", + 0.01, + 1000.0, + params["ma_mz_max"], + step=1.0, + format="%.2f", + key="ma_mz_max" + ) + c2.radio( + "mz distance unit", + ["ppm", "Da"], + ["ppm", "Da"].index(params["ma_mz_unit"]), + key="ma_mz_unit" + ) - c3.number_input( - "RT max difference", 1, 1000, int(params["ma_rt_max"]), 10, key="ma_rt_max" - ) + c3.number_input( + "RT max difference", 1, 1000, int(params["ma_rt_max"]), 10, key="ma_rt_max" + ) v_space(1) st.checkbox("**Adduct Detection**", params["use_ad"], key="use_ad") - if st.session_state["use_ad"]: - c1, c2, c3, c4 = st.columns(4) - c1.radio( - "ionization mode", - ["positive", "negative"], - ["positive", "negative"].index(params["ad_ion_mode"]), - key="ad_ion_mode", - help="Carefully adjust settings for each mode. Especially potential adducts and negative min/max charges for negative mode.", - ) - c2.text_area( - "potential adducts", - params["ad_adducts"], - key="ad_adducts", - help=""" + # if st.session_state["use_ad"]: + c1, c2, c3, c4 = st.columns(4) + c1.radio( + "ionization mode", + ["positive", "negative"], + ["positive", "negative"].index(params["ad_ion_mode"]), + key="ad_ion_mode", + help="Carefully adjust settings for each mode. Especially potential adducts and negative min/max charges for negative mode.", + ) + c2.text_area( + "potential adducts", + params["ad_adducts"], + key="ad_adducts", + help=""" Specify adducts and neutral additions/losses.\n Format (each in a new line): adducts:charge:probability.\n The summed up probability for all charged entries needs to be 1.0.\n @@ -123,31 +125,31 @@ CH2O2:0:0.5 """ ) - c3.number_input( - "charge min", - -3, - 3, - params["ad_charge_min"], - key="ad_charge_min", - help="e.g. for negative mode -3, for positive mode 1" - ) - c3.number_input( - "charge max", - -3, - 3, - params["ad_charge_max"], - key="ad_charge_max", - help="e.g. for negative mode -1, for positive mode 3", - ) + c3.number_input( + "charge min", + -3, + 3, + params["ad_charge_min"], + key="ad_charge_min", + help="e.g. for negative mode -3, for positive mode 1" + ) + c3.number_input( + "charge max", + -3, + 3, + params["ad_charge_max"], + key="ad_charge_max", + help="e.g. for negative mode -1, for positive mode 3", + ) - c4.number_input( - "RT max difference", - 1, - 60, - int(params["ad_rt_max_diff"]), - key="ad_rt_max_diff", - help="Groups features with slightly different RT.", - ) + c4.number_input( + "RT max difference", + 1, + 60, + int(params["ad_rt_max_diff"]), + key="ad_rt_max_diff", + help="Groups features with slightly different RT.", + ) v_space(1) st.markdown("**Feature Linking**") @@ -194,13 +196,13 @@ key="use_gnps", help="Run GNPS Feature Based Molecular Networking and Ion Identity Molecular Networking with these files, can be found in results -> GNPS.", ) - if st.session_state["use_gnps"]: - st.checkbox( - "annotate features with GNPS library", - params["annotate_gnps_library"], - key="annotate_gnps_library", - help="UmetaFlow contains the complete GNPS library in mgf file format. Check to annotate.", - ) + # if st.session_state["use_gnps"]: + st.checkbox( + "annotate features with GNPS library", + params["annotate_gnps_library"], + key="annotate_gnps_library", + help="UmetaFlow contains the complete GNPS library in mgf file format. Check to annotate.", + ) v_space(1) st.markdown("#### 4. Annotation via in-house library") @@ -210,32 +212,34 @@ key="annotate_ms1", help="Annotate features on MS1 level with known m/z and retention times values.", ) - if st.session_state["annotate_ms1"]: - ms1_annotation_file_upload = st.file_uploader( - "Select library for MS1 annotations.", type=["tsv"] - ) - if ms1_annotation_file_upload: - path = Path(st.session_state.workspace, - ms1_annotation_file_upload.name) - with open(path, "wb") as f: - f.write(ms1_annotation_file_upload.getbuffer()) - params["ms1_annotation_file"] = str(path) - elif params["ms1_annotation_file"]: - st.info( - f"Currently selected MS1 library: {Path(params['ms1_annotation_file']).name}") - else: - st.warning("No MS1 library selected.") - params["ms1_annotation_file"] = "" - c1, c2 = st.columns(2) - c1.number_input( - "retention time window for annotation in seconds", - 1, 240, params["annoation_rt_window_sec"], 10, - key="annoation_rt_window_sec", - help="Checks around peak apex, e.g. window of 60 s will check left and right 30 s.", - ) - params["annotation_mz_window_ppm"] = c2.number_input( - "mz window for annotation in ppm", 1, 100, params["annotation_mz_window_ppm"], 1, key="annotation_mz_window_ppm" - ) + c1, c2 = st.columns(2) + v_space(1, c2) + # if st.session_state["annotate_ms1"]: + ms1_annotation_file_upload = c1.file_uploader( + "Select library for MS1 annotations.", type=["tsv"] + ) + if ms1_annotation_file_upload: + path = Path(st.session_state.workspace, + ms1_annotation_file_upload.name) + with open(path, "wb") as f: + f.write(ms1_annotation_file_upload.getbuffer()) + params["ms1_annotation_file"] = str(path) + elif params["ms1_annotation_file"]: + c2.info( + f"Currently selected MS1 library: {Path(params['ms1_annotation_file']).name}") + else: + c2.warning("No MS1 library selected.") + params["ms1_annotation_file"] = "" + c1, c2 = st.columns(2) + c1.number_input( + "retention time window for annotation in seconds", + 1, 240, params["annoation_rt_window_sec"], 10, + key="annoation_rt_window_sec", + help="Checks around peak apex, e.g. window of 60 s will check left and right 30 s.", + ) + params["annotation_mz_window_ppm"] = c2.number_input( + "mz window for annotation in ppm", 1, 100, params["annotation_mz_window_ppm"], 1, key="annotation_mz_window_ppm" + ) st.checkbox( "**MS2 annotation via fragmentation patterns**", @@ -243,38 +247,43 @@ key="annotate_ms2", help="Annotate features on MS2 level based on their fragmentation patterns. The library has to be in mgf file format.", ) - if st.session_state["annotate_ms2"]: - ms2_annotation_file_upload = st.file_uploader( - "Select library for MS2 annotations", type=["mgf"] - ) - if ms2_annotation_file_upload: - path = Path(st.session_state.workspace, - ms2_annotation_file_upload.name) - with open(path, "wb") as f: - f.write(ms2_annotation_file_upload.getbuffer()) - params["ms2_annotation_file"] = str(path) - elif params["ms2_annotation_file"]: - st.info( - f"Currently selected MS2 library: {Path(params['ms2_annotation_file']).name}") - else: - st.warning("No MS2 library selected.") - params["ms2_annotation_file"] = "" + c1, c2 = st.columns(2) + v_space(1, c2) + # if st.session_state["annotate_ms2"]: + ms2_annotation_file_upload = c1.file_uploader( + "Select library for MS2 annotations", type=["mgf"] + ) + if ms2_annotation_file_upload: + path = Path(st.session_state.workspace, + ms2_annotation_file_upload.name) + with open(path, "wb") as f: + f.write(ms2_annotation_file_upload.getbuffer()) + params["ms2_annotation_file"] = str(path) + elif params["ms2_annotation_file"]: + c2.info( + f"Currently selected MS2 library: {Path(params['ms2_annotation_file']).name}") + else: + c2.warning("No MS2 library selected.") + params["ms2_annotation_file"] = "" v_space(1) _, c2, _ = st.columns(3) - run_button = c2.button("Run UmetaFlow", type="primary") + run_button = c2.form_submit_button("Run UmetaFlow", type="primary") results_dir = Path(st.session_state.workspace, "umetaflow-results") if run_button: + save_params(params) umetaflow_params = load_params() # Modify paramters to have float values if necessary for key in ("fl_rt_tol", "ad_rt_max_diff", "ma_rt_max", "ffm_noise"): umetaflow_params[key] = float(umetaflow_params[key]) mzML_files = [str(Path(st.session_state.workspace, - "mzML-files", f+".mzML")) for f in st.session_state["selected-mzML-files"]] - - reset_directory(results_dir) - run_umetaflow(umetaflow_params, mzML_files, results_dir) + "mzML-files", f+".mzML")) for f in st.session_state["umetaflow_selected_mzML"]] + if mzML_files: + reset_directory(results_dir) + run_umetaflow(umetaflow_params, mzML_files, results_dir) + else: + st.warning("No mzML files selected!") if results_dir.exists(): v_space(1) diff --git a/src/common.py b/src/common.py index fdf6d7b..f9b9e59 100644 --- a/src/common.py +++ b/src/common.py @@ -3,13 +3,11 @@ import shutil import sys import uuid -import base64 import json from typing import Any from pathlib import Path import streamlit as st -import streamlit.components.v1 as components import pandas as pd # set these variables according to your project @@ -45,12 +43,6 @@ def load_params(default: bool = False) -> dict[str, Any]: with open("assets/default-params.json", "r") as f: params = json.load(f) - # Check if any parameters have been modified during the current session and update the parameter dictionary - if not default: - for key, value in st.session_state.items(): - if key in params.keys(): - params[key] = value - # Return the parameter dictionary return params @@ -238,12 +230,7 @@ def change_workspace(): ) st.experimental_rerun() - # Workflow pages have mzML file selector, there can be multiple workflow pages which share mzML file selection - if page == "workflow": - st.markdown("📁 **mzML files**") - options = [Path(f).stem for f in Path(st.session_state.workspace, "mzML-files").glob("*.mzML")] - st.multiselect("mzML files", options, params["selected-mzML-files"], key="selected-mzML-files", label_visibility="collapsed") - # All pages have logo and settings + # All pages have settings, workflow indicator and logo with st.expander("⚙️ **Settings**"): img_formats = ["svg", "png", "jpeg", "webp"] st.selectbox( @@ -251,13 +238,9 @@ def change_workspace(): img_formats, img_formats.index(params["image-format"]), key="image-format" ) - - # Indicator for current workspace if page != "main": st.info( f"**{Path(st.session_state['workspace']).stem}**") - - # Logo st.image("assets/pyopenms_transparent_background.png", "powered by") return params diff --git a/src/fileupload.py b/src/fileupload.py index 6427b93..4f7f585 100644 --- a/src/fileupload.py +++ b/src/fileupload.py @@ -9,22 +9,6 @@ # Specify mzML file location in workspace mzML_dir: Path = Path(st.session_state.workspace, "mzML-files") - -def add_to_selected_mzML(filename: str): - """ - Add the given filename to the list of selected mzML files. - - Args: - filename (str): The filename to be added to the list of selected mzML files. - - Returns: - None - """ - # Check if file in params selected mzML files, if not add it - if filename not in st.session_state["selected-mzML-files"]: - st.session_state["selected-mzML-files"].append(filename) - - @st.cache_data def save_uploaded_mzML(uploaded_files: list[bytes]) -> None: """ @@ -48,11 +32,9 @@ def save_uploaded_mzML(uploaded_files: list[bytes]) -> None: if f.name not in [f.name for f in mzML_dir.iterdir()] and f.name.endswith("mzML"): with open(Path(mzML_dir, f.name), "wb") as fh: fh.write(f.getbuffer()) - add_to_selected_mzML(Path(f.name).stem) st.success("Successfully added uploaded files!") -@st.cache_data def copy_local_mzML_files_from_directory(local_mzML_directory: str) -> None: """ Copies local mzML files from a specified directory to the mzML directory. @@ -70,9 +52,7 @@ def copy_local_mzML_files_from_directory(local_mzML_directory: str) -> None: # Copy all mzML files to workspace mzML directory, add to selected files files = Path(local_mzML_directory).glob("*.mzML") for f in files: - if f.name not in mzML_dir.iterdir(): - shutil.copy(f, mzML_dir) - add_to_selected_mzML(f.stem) + shutil.copy(f, Path(mzML_dir, f.name)) st.success("Successfully added local files!") @@ -89,7 +69,6 @@ def load_example_mzML_files() -> None: # Copy files from example-data/mzML to workspace mzML directory, add to selected files for f in Path("example-data", "mzML").glob("*.mzML"): shutil.copy(f, mzML_dir) - add_to_selected_mzML(f.stem) st.success("Example mzML files loaded!") @@ -106,7 +85,6 @@ def remove_selected_mzML_files(to_remove: list[str]) -> None: # remove all given files from mzML workspace directory and selected files for f in to_remove: Path(mzML_dir, f+".mzML").unlink() - st.session_state["selected-mzML-files"].remove(f) st.success("Selected mzML files removed!") @@ -122,6 +100,4 @@ def remove_all_mzML_files() -> None: """ # reset (delete and re-create) mzML directory in workspace reset_directory(mzML_dir) - # reset selected mzML list - st.session_state["selected-mzML-files"] = [] st.success("All mzML files removed!") diff --git a/src/xic.py b/src/xic.py index 11aeed5..807dba1 100644 --- a/src/xic.py +++ b/src/xic.py @@ -47,6 +47,8 @@ Feel free to explore the different features and options on this page to extract and analyze your chromatogram data efficiently. """ +path = Path(st.session_state.workspace, "XIC-input-table.tsv") + def upload_xic_table(df): if not st.session_state["xic-table-uploader"]: return @@ -72,7 +74,7 @@ def upload_xic_table(df): def extract_chromatograms(results_dir, mzML_files, df_input, mz_unit, mz_ppm, mz_da, time_unit, default_peak_width, baseline): - with st.spinner("Extracting chromatograms..."): + with st.status("Extracting chromatograms..."): # Save edited xic input table to tsv file df_input.to_csv(path, sep="\t", index=False) @@ -102,6 +104,7 @@ def extract_chromatograms(results_dir, mzML_files, df_input, mz_unit, mz_ppm, mz # Iterate over the files and extract chromatograms in a single dataframe per file for file in mzML_files: + st.write(f"Extracting chromatograms from {Path(file).name} ...") # Load mzML file into exp exp = MSExperiment() MzMLFile().load(str(file), exp) @@ -165,7 +168,6 @@ def extract_chromatograms(results_dir, mzML_files, df_input, mz_unit, mz_ppm, mz else: # Find the highest peak in spec within mz window (ppm) ppm_window = float((mz_ppm / 1000000) * mass) - print(ppm_window) index_highest_peak_within_window = ( spec.findHighestInWindow( mass, ppm_window, ppm_window) @@ -233,9 +235,8 @@ def extract_chromatograms(results_dir, mzML_files, df_input, mz_unit, mz_ppm, mz # Save AUC to text file with open(Path(results_dir, "run-params.txt"), "w") as f: f.write(f"{baseline}\n{time_unit}") - - # Re-run to prevent tab jumping back to first tab upon first widget change (streamlit bug) - st.experimental_rerun() + # # Re-run to prevent tab jumping back to first tab upon first widget change (streamlit bug) + # st.experimental_rerun() @st.cache_resource