axelwalter · axelwalter · Dec 14, 2023 · Dec 14, 2023
diff --git a/Home.py b/Home.py
@@ -1,6 +1,6 @@
 import streamlit as st
 from src.common import *
-from src.common import page_setup
+from src.common import page_setup, save_params
 
 params = page_setup(page="main")
 
@@ -11,7 +11,7 @@
 
 This app is based on the [UmetaFlow](https://chemrxiv.org/engage/chemrxiv/article-details/634fb68fdfbd2b6abc5c5fcd) workflow for LC-MS data analysis. UmetaFlow is implemented as a [snakemake pipeline](https://github.com/NBChub/snakemake-UmetaFlow) and as a Python version in [Jupyter notebooks](https://github.com/eeko-kon/pyOpenMS_UmetaFlow) based on [pyOpenMS](https://pyopenms.readthedocs.io/en/latest/index.html).
 
-Here, we take the powerful UmetFlow algorithms in a simple and easy graphical user interface. In contrast to the pipeline for automatic data processing,
+Here, we take the powerful UmetaFlow algorithms in a simple and easy graphical user interface. In contrast to the pipeline for automatic data processing,
 this app is tweaked a bit to be used with smaller to medium sample sets and some manual data interpretation. For example the automatic annotation of features via SIRIUS is omitted.
 Instead we export all the files necessary to run in the SIRIUS GUI tool and manually annotate the result tables via a unique identifier. This method of curated annotation can be interesting if you really want to be confident in your annotations.
 The same applies for GNPS, here you can export all the files required for Feature Based Molecular Networking and Ion Identity Networking.
@@ -53,6 +53,12 @@
 Result files are available via specified download buttons or, if run locally, within the workspace directory.
 ### Workflows
 
+#### 📟 m/z Calculator
+
+The m/z calculator facilitates the calculation of mass-to-charge ratios (m/z) for metabolites and includes a method to easily combine metabolites into large molecules.
+
+This table can be used as input for the Extracted Ion Chromatograms workflow.
+
 #### 🔍 Extracted Ion Chromatograms
 
 Simple workflow for the extraction of chromatograms by `m/z` (and optionally `RT` range) value. Produces a **Feature Matrix** file with area under the curve intensities as well as a **Meta Data** template and the chromatogram data for each file.
@@ -81,20 +87,11 @@
 For an advanced and complete workflow visit the [app for statistical analysis of metabolomics data](https://axelwalter-streamlit-metabol-statistics-for-metabolomics-3ornhb.streamlit.app/).
     """)
 
-# # Check if the script is run in local mode (e.g., "streamlit run app.py local")
-# if "local" in sys.argv:
-#     # In local mode, run the main function without applying captcha
-#     main()
-
-# # If not in local mode, assume it's hosted/online mode
-# else:
-
-#     # WORK LIKE MULTIPAGE APP
-#     if 'controllo' not in st.session_state or st.session_state['controllo'] == False:
-
-#         # Apply captcha control to verify the user
-#         captcha_control()
+# make sure new default params are saved in workspace params
+with open("assets/default-params.json", "r") as f:
+    default_params = json.load(f)
+for key, value in default_params.items():
+    if key not in params.keys():
+        params[key] = value
 
-#     else:     
-#         # Run the main function
-#         main()
+save_params(params)
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 
 This app is based on the [UmetaFlow](https://chemrxiv.org/engage/chemrxiv/article-details/634fb68fdfbd2b6abc5c5fcd) workflow for LC-MS data analysis. UmetaFlow is implemented as a [snakemake pipeline](https://github.com/NBChub/snakemake-UmetaFlow) and as a Python version in [Jupyter notebooks](https://github.com/eeko-kon/pyOpenMS_UmetaFlow) based on [pyOpenMS](https://pyopenms.readthedocs.io/en/latest/index.html).
 
-Here, we take the powerful UmetFlow algorithms in a simple and easy graphical user interface. In contrast to the pipeline for automatic data processing,
+Here, we take the powerful UmetaFlow algorithms in a simple and easy graphical user interface. In contrast to the pipeline for automatic data processing,
 this app is tweaked a bit to be used with smaller to medium sample sets and some manual data interpretation. For example the automatic annotation of features via SIRIUS is omitted.
 Instead we export all the files necessary to run in the SIRIUS GUI tool and manually annotate the result tables via a unique identifier. This method of curated annotation can be interesting if you really want to be confident in your annotations.
 The same applies for GNPS, here you can export all the files required for Feature Based Molecular Networking and Ion Identity Networking.
@@ -54,6 +54,12 @@ Your uploaded files will be shown in the sidebar of all tabs dealing with the fi
 Result files are available via specified download buttons or, if run locally, within the workspace directory.
 ### Workflows
 
+#### 📟 m/z Calculator
+
+The m/z calculator facilitates the calculation of mass-to-charge ratios (m/z) for metabolites and includes a method to easily combine metabolites into large molecules.
+
+This table can be used as input for the Extracted Ion Chromatograms workflow.
+
 #### 🔍 Extracted Ion Chromatograms
 
 Simple workflow for the extraction of chromatograms by `m/z` (and optionally `RT` range) value. Produces a **Feature Matrix** file with area under the curve intensities as well as a **Meta Data** template and the chromatogram data for each file.

diff --git a/assets/default-params.json b/assets/default-params.json
@@ -10,6 +10,7 @@
     "eic_baseline": 1000,
     "eic_combine": false,
     "eic_peak_width": 20,
+    "eic_use_mz_table": false,
 
     "umetaflow_selected_mzML": [],
     "ffm_mass_error": 10.0,

diff --git a/pages/2_📟_Mass_to_Charge_Calculator.py b/pages/2_📟_Mass_to_Charge_Calculator.py
@@ -0,0 +1,94 @@
+import streamlit as st
+import pandas as pd
+import numpy as np
+from pathlib import Path
+from src.common import *
+from src.masscalculator import create_compound, build_compound, save_df, HELP
+
+params = page_setup(page="workflow")
+
+st.title("m/z calculator")
+
+with st.expander("📖 Help"):
+    st.markdown(HELP)
+
+input_table_path = Path(st.session_state.workspace, "mass-calculator.csv")
+
+if not input_table_path.exists():
+    pd.DataFrame(columns=["name", "sum formula", "adduct", "mz", "RT",
+                 "peak width", "comment"]).to_csv(input_table_path, index=False)
+
+tabs = st.tabs(["➕ New", "📟 Combine metabolites", "📁 View only"])
+
+with tabs[0]:
+    with st.form("new-metabolite-form"):
+        c1, c2 = st.columns(2)
+        formula = c1.text_input(
+            "**sum formula**", "", help="Enter sum formula for new metabolite.")
+        name = c1.text_input("metabolite name (optional)", "",
+                             help="Will be created automatically if omitted.")
+        neutral_loss = c1.text_input(
+            "neutral losses (optional)", "", help="Sum formula of neutral losses (e.g. H2O).")
+        charge = c2.number_input(
+            "**charge**", -50, 50, 1, help="Enter charge. Negative numbers for negative ion mode, positive numbers for positive ion mode.")
+        c2.markdown("adducts", help="Specify adducts except for protons (H) up the number of charges in total, the remaing will be filled with protons (positive mode). In negative mode as the absolute charge number of protons will be removed regardless of specified additional adducts.")
+        adducts = c2.data_editor(pd.DataFrame({"adduct": ["Na", "K", "HCOOH"], "number": [
+            0, 0, 0]}), hide_index=True, use_container_width=True)
+        _, c2, _ = st.columns(3)
+        create_compound_button = c2.form_submit_button(
+            "Add new metabolite", use_container_width=True,
+            help="Calculate m/z from sum formula and adduct and add metabolite to table.")
+    if create_compound_button:
+        save_df(create_compound(
+            formula, charge, adducts, neutral_loss, name),
+            input_table_path)
+
+with tabs[1]:
+    with st.form("build-metabolite-form", clear_on_submit=True):
+        st.markdown("**metabolites to combine**",
+                    help="Select from metabolites which are already in the table to combine them into larger molecules from the given numbers.")
+        column_types = {'metabolite': 'str', 'number': 'int'}
+        builder = st.data_editor(pd.DataFrame(columns=column_types.keys()).astype(column_types), hide_index=True, use_container_width=True, num_rows="dynamic", column_config={
+            "metabolite": st.column_config.SelectboxColumn(
+                "metabolite",
+                width="large",
+                options=[x for x in pd.read_csv(input_table_path)[["name", "sum formula"]].dropna()["name"].tolist()],
+                required=True,
+            ),
+            "number": st.column_config.NumberColumn(
+                "number",
+                width="small",
+                min_value=-100,
+                max_value=100,
+                step=1,
+                required=True,
+                default=1
+            )}, key="builder")
+        c1, c2 = st.columns(2)
+        charge = c1.number_input(
+            "**charge**", -50, 50, 1, help="Enter charge. Negative numbers for negative ion mode, positive numbers for positive ion mode.")
+        name = c1.text_input("metabolite name (optional)", "",
+                                help="Will be created automatically if omitted.")
+        elimination = c1.text_input("elimination product (optional)", "H2O", help="Remove elemination product when combining two metabolites.")
+        c2.markdown("adducts", help="Specify adducts except for protons (H) up the number of charges in total, the remaing will be filled with protons (positive mode). In negative mode as the absolute charge number of protons will be removed regardless of specified additional adducts.")
+        adducts = c2.data_editor(pd.DataFrame({"adduct": ["Na", "K", "HCOOH"], "number": [
+            0, 0, 0]}), hide_index=True, use_container_width=True)
+        _, c2, _ = st.columns(3)
+        build_compound_button = c2.form_submit_button(
+        "Calculate metabolite", use_container_width=True,
+        help="Calculate m/z from sum formula and adduct and add metabolite to table.")
+
+    if build_compound_button:
+        save_df(build_compound(st.session_state["builder"]["added_rows"], charge, adducts, name, pd.read_csv(input_table_path), elimination), input_table_path)
+
+edited = st.data_editor(pd.read_csv(input_table_path, dtype={"name": str, "sum formula": str, "adduct": str, "mz": float, "RT": float, "peak width": float, "comment": str}), use_container_width=True, hide_index=True,
+                        key="mass-table", disabled=("sum formula", "adduct", "mz"), num_rows="dynamic")
+
+if (st.session_state["mass-table"]["edited_rows"] or st.session_state["mass-table"]["deleted_rows"] or st.session_state["mass-table"]["added_rows"]):
+    if edited["name"].duplicated().any():
+        st.error("Metabolite names need to be unique.")
+    else:
+        edited.to_csv(input_table_path, index=False)
+        st.rerun()
+
+save_params(params)
diff --git a/pages/2_🔍_Extracted_Ion_Chromatograms.py → pages/3_🔍_Extracted_Ion_Chromatograms.py b/pages/2_🔍_Extracted_Ion_Chromatograms.py → pages/3_🔍_Extracted_Ion_Chromatograms.py
@@ -43,7 +43,7 @@
 
 To paste a data table from Excel simply select all the cells in Excel, select the top left cell in the metabolite table (turns red) and paste with **Ctrl-V**.    
 """)
-
+    use_mz_calculator_table = st.toggle("Use table from mass-to-charge calculator and **ignore table below**.", params["eic_use_mz_table"], key="eic_use_mz_table")
     edited = st.data_editor(df, use_container_width=True, num_rows="dynamic")
     c1, c2, c3 = st.columns(3)
     formula = c1.text_input(
@@ -53,7 +53,6 @@
 
     c3.markdown("###")
     add_compound_button = c3.form_submit_button("Add Metabolite", use_container_width=True, help="Calculate m/z from sum formula and adduct and add metabolite to table.")
-
     st.markdown("**Parameters**")
     c1, c2, c3 = st.columns(3)
     c1.radio(
@@ -111,33 +110,39 @@
     if not mzML_files:
         st.warning("Upload/select some mzML files first!")
     else:
-        extract_chromatograms(results_dir,
+        data = edited
+        if use_mz_calculator_table:
+            data = pd.read_csv(Path(st.session_state.workspace, "mass-calculator.csv"))[["name", "mz", "RT", "peak width"]]
+
+        if not data.empty:
+            extract_chromatograms(results_dir,
                                 mzML_files,
-                                edited,
+                                data,
                                 st.session_state["eic_mz_unit"],
                                 st.session_state["eic_tolerance_ppm"],
                                 st.session_state["eic_tolerance_da"],
                                 st.session_state["eic_time_unit"],
                                 st.session_state["eic_peak_width"],
                                 st.session_state["eic_baseline"])
-
+        else:
+            st.error("No input m/z values provided.")
 
 # Display summary table
 path = Path(results_dir, "summary.tsv")
 if path.exists():
+    st.checkbox(
+        "combine metabolite variants",
+        params["eic_combine"],
+        help="Combines different variants (e.g. adducts or neutral losses) of a metabolite. Put a `#` with the name first and variant second (e.g. `glucose#[M+H]+` and `glucose#[M+Na]+`)",
+        key="eic_combine"
+    )
     tabs = st.tabs(["📊 Summary", "📈 Samples", "📈 Metabolites",
                     "📁 Chromatogram data", "📁 Meta data"])
     with open(Path(results_dir, "run-params.txt"), "r") as f:
         baseline = int(f.readline())
         time_unit = f.readline()
-
+    
     with tabs[0]:
-        st.checkbox(
-            "combine metabolite variants",
-            params["eic_combine"],
-            help="Combines different variants (e.g. adducts or neutral losses) of a metabolite. Put a `#` with the name first and variant second (e.g. `glucose#[M+H]+` and `glucose#[M+Na]+`)",
-            key="eic_combine"
-        )
         if st.session_state["eic_combine"]:
             file_name = "summary-combined.tsv"
         else:
@@ -166,20 +171,6 @@
         fig = get_sample_plot(df, file, time_unit)
         show_fig(fig, file)
 
-        # if df_auc.shape[1] > 1:
-        #     file2_options = df_auc.columns.tolist()
-        #     file2_options.remove(file1)
-        #     file2 = c2.selectbox(
-        #         f"select file 2", file2_options)
-        #     df = pd.read_feather(Path(results_dir, file2[:-4] + "ftr"))
-        #     if show_baseline:
-        #         df["AUC baseline"] = [baseline] * df.shape[0]
-        #     if not show_bpc:
-        #         df.drop(columns=["BPC"], inplace=True)
-        #     fig = get_sample_plot(df, file2, time_unit)
-        #     with c2:
-        #         show_fig(fig, file2)
-
     with tabs[2]:
         # overlayed EICs for each sample
         metabolite = st.selectbox("select metabolite", df_auc.index)

diff --git a/pages/3_🧪_Untargeted_Metabolomics.py → pages/4_🧪_Untargeted_Metabolomics.py b/pages/3_🧪_Untargeted_Metabolomics.py → pages/4_🧪_Untargeted_Metabolomics.py
diff --git a/pages/4_📈_Statistics.py → pages/5_📈_Statistics.py b/pages/4_📈_Statistics.py → pages/5_📈_Statistics.py
diff --git a/src/common.py b/src/common.py
@@ -96,7 +96,7 @@ def page_setup(page: str = "") -> dict[str, Any]:
         page_icon="assets/icon.png",
         layout="wide",
         initial_sidebar_state="auto",
-        menu_items=None,
+        menu_items=None
     )
 
     st.markdown("""

diff --git a/src/eic.py b/src/eic.py
@@ -244,7 +244,7 @@ def get_metabolite_fig(df_auc, metabolite, time_unit):
             go.Scattergl(
                 name=sample[:-5],
                 x=df["time"],
-                y=df[[col for col in df if metabolite in col][0]],
+                y=df[[metabolite][0]],
             )
         )
     fig.update_layout(