Skip to content

Commit

Permalink
add MS2 annotation and logo
Browse files Browse the repository at this point in the history
  • Loading branch information
axelwalter committed Apr 24, 2024
1 parent c06608c commit bf45b36
Show file tree
Hide file tree
Showing 11 changed files with 171 additions and 71 deletions.
2 changes: 1 addition & 1 deletion Home.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

params = page_setup(page="main")

st.image("assets/umetaflow-logo.png", width=300)
st.markdown(
"""
# UmetaFlow
## A universal metabolomics tool
This app is based on the [UmetaFlow](https://chemrxiv.org/engage/chemrxiv/article-details/634fb68fdfbd2b6abc5c5fcd) workflow for LC-MS data analysis. UmetaFlow is implemented as a [snakemake pipeline](https://github.com/NBChub/snakemake-UmetaFlow) and as a Python version in [Jupyter notebooks](https://github.com/eeko-kon/pyOpenMS_UmetaFlow) based on [pyOpenMS](https://pyopenms.readthedocs.io/en/latest/index.html).
Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# UmetaFlow GUI [Open!](https://abi-services.cs.uni-tuebingen.de/umetaflow/)

<img src="assets/pyopenms_transparent_background.png" width=20%>
<table>
<tr>
<td>
![umetaflow-logo](assets/umetaflow-logo.png)
</td>
<td>
![pyopenms-logo](assets/pyopenms_transparent_background.png)
</td>
<td>
[Open app](https://abi-services.cs.uni-tuebingen.de/umetaflow/)
</td>
</tr>
</table>

This app is based on the [UmetaFlow](https://chemrxiv.org/engage/chemrxiv/article-details/634fb68fdfbd2b6abc5c5fcd) workflow for LC-MS data analysis. UmetaFlow is implemented as a [snakemake pipeline](https://github.com/NBChub/snakemake-UmetaFlow) and as a Python version in [Jupyter notebooks](https://github.com/eeko-kon/pyOpenMS_UmetaFlow) based on [pyOpenMS](https://pyopenms.readthedocs.io/en/latest/index.html).

Expand Down
Binary file added assets/umetaflow-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions pages/4_🧪_UmetaFlow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
c1.title("UmetaFlow")
v_space(1, c2)
results_only = c2.toggle("view results only")
st.markdown("💡**Minimal interface with pyOpenMS only. For more advanced use cases and fast execution times use UmetaFlow TOPP. Requires OpenMS TOPP tools to be installed.**")

results_dir = Path(st.session_state.workspace, "umetaflow-results")

Expand Down
4 changes: 3 additions & 1 deletion pages/6_🚧_Testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
import pyopenms as poms

from pathlib import Path
import zipfile
from pyteomics.mztab import MzTab

params = {"in": []}

82 changes: 41 additions & 41 deletions src/UmetaFlowTOPPWorkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def configure(self) -> None:
[
"**Pre-Processing**",
"Re-Quantification",
"Input files for SIRIUS & GNPS",
"Annotation by in-house library",
"SIRIUS",
"GNPS FBMN"
]
)
with tabs[0]:
Expand Down Expand Up @@ -100,50 +101,38 @@ def configure(self) -> None:
)
self.ui.input_TOPP("FeatureFinderMetaboIdent")
with tabs[2]:
st.markdown("**Export input files**")
t = st.tabs(["MS1", "MS2"])
with t[0]:
self.ui.input_widget("annotate-ms1", False, "annotate consensus features", help="Based on m/z and RT")
self.ui.simple_file_uploader("ms1-library", "tsv", "MS1 library in tsv format")
self.ui.input_python("annotate-ms1", num_cols=2)
with t[1]:
self.ui.input_widget("annotate-ms2", False, "annotate consensus features", help="Based on MS2 spectrum similarity.")
self.ui.simple_file_uploader("ms2-library", "mgf", "MS2 library in mgf format")
self.ui.input_TOPP("MetaboliteSpectralMatcher")
with tabs[3]:
self.ui.input_widget(
"export-sirius",
False,
"export files for SIRIUS",
help="Generate input files for SIRIUS from raw data and feature information using the OpenMS TOPP tool *SiriusExport*.",
)
self.ui.input_TOPP("SiriusExport")
# st.markdown("**Run SIRIUS and annotate features**")
# self.ui.input_widget("run-sirius", False, "run SIRIUS and annotate features")
# self.ui.input_widget
# t = st.tabs(["Formula prediction: SIRIUS", "Structure prediction: CSI : FingerID", "CANOPUS"])
with tabs[4]:
self.ui.input_widget(
"export-gnps",
False,
"export files for GNPS FBMN and IIMN",
help="Generate input files for GNPS feature based molecular networking (FBMN) and ion identity molecular networking (IIMN) from raw data and feature information using the OpenMS TOPP tool *GNPSExport*.",
)
t = st.tabs(["**SIRIUS**", "**GNPS**"])
with t[0]:
self.ui.input_TOPP("SiriusExport")
with t[1]:
self.ui.input_TOPP("GNPSExport")
st.markdown("**Run SIRIUS and annotate features**")
self.ui.input_widget("run-sirius", False, "run SIRIUS and annotate features")
self.ui.input_widget
t = st.tabs(["Formula prediction: SIRIUS", "Structure prediction: CSI : FingerID", "CANOPUS"])
with tabs[3]:
t = st.tabs(["MS1", "MS2"])
with t[0]:
self.ui.input_widget("annotate-ms1", False, "annotate consensus features", help="Based on m/z and RT")
self.ui.simple_file_uploader("ms1-library", "tsv", "MS1 library in tsv format")
self.ui.input_python("annotate-ms1", num_cols=2)
with t[1]:
self.ui.input_widget("annotate-ms2", False, "annotate consensus features", help="Based on MS2 spectrum similarity.")
self.ui.simple_file_uploader("ms2-library", "mgf", "MS2 library in mgf format")

self.ui.input_TOPP("GNPSExport")

def execution(self) -> None:
# Set log levels from st.session_state
if st.session_state["log_level"] in [
"commands and execution times",
"show all",
]:
self.executor.log_commands = True
if st.session_state["log_level"] in ["tool outputs", "show all"]:
self.executor.log_tool_outputs = True


# # Get mzML files
# Get mzML files
df_path = Path(st.session_state.workspace, "mzML-files.tsv")

if not df_path.exists():
Expand All @@ -157,7 +146,7 @@ def execution(self) -> None:
# Construct full file paths
mzML = [str(Path(st.session_state.workspace, "mzML-files", file_name)) for file_name in selected_files]

if len(mzML) <= 1:
if len(mzML) == 0:
self.logger.log("ERROR: Select at leat two mzML files to run this workflow.")
return

Expand Down Expand Up @@ -353,7 +342,7 @@ def execution(self) -> None:
"out": self.file_manager.get_files(mzML, "ms", "sirius-export"),
},
)
if self.params["export-gnps"]:
if self.params["export-gnps"] or self.params["annotate-ms2"]:
self.logger.log("Exporting input files for GNPS.")
# Map MS2 specs to features
self.executor.run_topp(
Expand Down Expand Up @@ -383,6 +372,8 @@ def execution(self) -> None:
"export_consensus_df",
{"in": gnps_consensus, "out": consensus_df},
)

if self.params["export-gnps"]:
# Filter consensus features which have missing values
self.executor.run_topp(
"FileFilter",
Expand Down Expand Up @@ -417,10 +408,19 @@ def execution(self) -> None:
if files:
self.logger.log("Annotating consensus features on MS1 level.")
self.executor.run_python("annotate-ms1", {"in": consensus_df, "in_lib": str(files[0])})


if self.params["annotate-ms2"]:
dir_path = Path(self.workflow_dir, "input-files", "ms2-library")
if dir_path.exists():
files = [p for p in dir_path.iterdir()]
if files:
self.logger.log("Annotating consensus features on MS2 level.")
ms2_matches = self.file_manager.get_files(mzML, "mzTab", "ms2-matches")
self.executor.run_topp("MetaboliteSpectralMatcher", {"in": mzML, "database": self.file_manager.get_files(str(files[0])), "out": ms2_matches})
self.executor.run_python("annotate-ms2", {"in": consensus_df})

# ZIP all relevant files for Download
self.executor.run_python("zip-result-files", {"in": consensus_df})


def results(self) -> None:
def load_parquet(file):
Expand Down Expand Up @@ -486,6 +486,7 @@ def quality_colors(value):
"intensity",
df_matrix.apply(lambda row: [row[col] for col in sample_cols], axis=1),
)
df_matrix.set_index("metabolite", inplace=True)
st.dataframe(
df_matrix,
column_order=[
Expand All @@ -495,7 +496,7 @@ def quality_colors(value):
"charge",
"adduct",
"MS1 annotation",
"metabolite",
"MS2 annotation",
],
hide_index=False,
column_config={
Expand All @@ -517,12 +518,11 @@ def quality_colors(value):

with tabs[1]:
c1, c2 = st.columns(2)
metabolite = c1.selectbox("Select metabolite", df_matrix["metabolite"])
metabolite = c1.selectbox("Select metabolite", df_matrix.index)

@st.cache_data
def get_chroms_for_each_sample(metabolite):
# Get index of row in df_matrix where "metabolite" is equal to metabolite
index = df_matrix[df_matrix["metabolite"] == metabolite].index[0]
all_samples = [
col.replace(".mzML_IDs", "")
for col in df_matrix.columns
Expand All @@ -532,7 +532,7 @@ def get_chroms_for_each_sample(metabolite):
samples = []
for sample in all_samples:
# Get feature ID for sample
fid = df_matrix.loc[index, sample + ".mzML_IDs"]
fid = df_matrix.loc[metabolite, sample + ".mzML_IDs"]
path = Path(feature_df_dir, sample + ".parquet")
f_df = load_parquet(path)
if fid in f_df.index:
Expand Down Expand Up @@ -639,7 +639,7 @@ def get_feature_intensity_plot(df):
),

with tabs[3]:
if st.button("Prepare result files for download", type="primary"):
if st.button("Prepare result files for download"):
with open(Path(self.workflow_dir, "results", "results.zip"), "rb") as fp:
st.download_button(
label="Download Results",
Expand Down
6 changes: 4 additions & 2 deletions src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def page_setup(page: str = "", help_text: str = "") -> dict[str, Any]:
# Set Streamlit page configurations
st.set_page_config(
page_title=APP_NAME,
page_icon="assets/icon.png",
page_icon="assets/umetaflow-logo.png",
layout="wide",
initial_sidebar_state="auto",
menu_items=None
Expand Down Expand Up @@ -243,7 +243,9 @@ def change_workspace():
)
if help_text:
st.info(help_text)
st.image("assets/pyopenms_transparent_background.png", "powered by")
c1, c2 = st.columns(2)
c1.image("assets/pyopenms_transparent_background.png")
c2.image("assets/umetaflow-logo.png")
return params

def v_space(n: int, col=None) -> None:
Expand Down
68 changes: 68 additions & 0 deletions src/python-tools/annotate-ms2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import json
import sys
from pathlib import Path
import pandas as pd
from pyteomics.mztab import MzTab

############################
# default paramter values #
###########################
#
# Mandatory keys for each parameter
# key: a unique identifier
# value: the default value
#
# Optional keys for each parameter
# name: the name of the parameter
# hide: don't show the parameter in the parameter section (e.g. for input/output files)
# options: a list of valid options for the parameter
# min: the minimum value for the parameter (int and float)
# max: the maximum value for the parameter (int and float)
# step_size: the step size for the parameter (int and float)
# help: a description of the parameter
# widget_type: the type of widget to use for the parameter (default: auto)
# advanced: whether or not the parameter is advanced (default: False)

DEFAULTS = [
{"key": "in", "value": [], "help": "feature matrix parquet file", "hide": True},
]

def get_params():
if len(sys.argv) > 1:
with open(sys.argv[1], "r") as f:
return json.load(f)
else:
return {}

if __name__ == "__main__":
params = get_params()

df = pd.read_parquet(params["in"][0])

df.insert(3, "MS2 annotation", "")

if not df["MS2_native_specs"].any():
print("No MS2 native spectra found for MS2 annotation.")

def annotate(x, mztab_file, df_mztab):
# get spec ids for the matching mztab file
spec_ids = [spec[1] for spec in [spec.split("_") for spec in x["MS2_native_specs"].split(";")] if Path(spec[0]).stem == Path(mztab_file).stem]
ann = x["MS2 annotation"]
if spec_ids:
for sid in spec_ids:
if sid in df_mztab["native id"].values:
tmp_ann = df_mztab[df_mztab["native id"] == sid]["description"].values[0]
if ann:
ann += "; "
ann += tmp_ann
return ann

mztab_dir = Path(Path(params["in"][0]).parent.parent, "ms2-matches")
for file in mztab_dir.iterdir():
spectralmatch = MzTab(str(file), encoding="UTF8", table_format="df")
spectralmatch.metadata
df_mztab = spectralmatch.small_molecule_table
df_mztab["native id"] = df_mztab["opt_spec_native_id"].str.split("=").str.get(-1)
df["MS2 annotation"] = df.apply(annotate, args=(str(file), df_mztab), axis=1)

df.to_parquet(params["in"][0])
15 changes: 3 additions & 12 deletions src/python-tools/export_consensus_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,24 +89,15 @@ def get_params():
for i, f in enumerate(fnames):
df[f"{fnames[i]}_IDs"] = ids[i]

# annotate spectrum IDs for MS2 specs associated with feature
df["MS2_native_specs"] = [";".join([f"{fnames[p.getMetaValue('map_index')]}_{p.getMetaValue('spectrum_index')+1}" for p in f.getPeptideIdentifications()]) for f in consensus_map]

# Rename columns to not show full file path
df = df.rename(columns={col: Path(col).name for col in df.columns if Path(col).exists()})

df = df.reset_index(drop=True)
df.index = df.index + 1
df.index.name = "id"

# # Get info if requantified or not from one (first) of the feature maps
# df["re-quantified"] = df[[c for c in df.columns if c.endswith("_IDs")]].apply(lambda x: x.isna().any(), axis=1)

# # Function to generate evenly spaced ranks
# def generate_ranks(group):
# group = group.sort_values("quality") # Sort by quality
# group["quality ranked"] = np.linspace(0, 1, len(group)) # Generate ranks
# return group

# # Apply the function to each group and concatenate the results
# df = df.groupby("re-quantified").apply(generate_ranks).sort_values("quality ranked", ascending=False)

path = Path(params["out"][0])
df.to_parquet(path)
Expand Down
3 changes: 1 addition & 2 deletions src/python-tools/export_ffmid_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
# advanced: whether or not the parameter is advanced (default: False)

DEFAULTS = [
{"key": "in", "value": [], "help": "ffm featureXML dir", "hide": True},
{"key": "in", "value": [], "help": "ffmid featureXML dir", "hide": True},
]

def get_params():
Expand All @@ -41,7 +41,6 @@ def get_params():
if not out_path.exists():
out_path.mkdir(exist_ok=True)
for file in Path(params["in"][0]).parent.glob("*.featureXML"):
print(file)
fm = poms.FeatureMap()
poms.FeatureXMLFile().load(str(file), fm)
# Get DataFrame with meta values
Expand Down
Loading

0 comments on commit bf45b36

Please sign in to comment.