Merge pull request #23 from hgb-bin-proteomics/develop

Support for xiFDR input
hgb-bin-proteomics · Dec 6, 2024 · 0d7fc8b · 0d7fc8b
2 parents 4b92a23 + 59c822e
commit 0d7fc8b
Show file tree

Hide file tree

Showing 8 changed files with 975 additions and 14 deletions.
diff --git a/.github/workflows/python-app-xi.yml b/.github/workflows/python-app-xi.yml
@@ -0,0 +1,49 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# Reference workflow provided by (c) GitHub
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: msannika_spectral_library_xi
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Copy scripts and data to "/tests"
+      run: |
+        rm -f config.py
+        cp data/config_xi.py .
+        mv config_xi.py config.py
+        cp create_spectral_library.py tests
+        cp config.py tests
+        cp data/XLpeplib_Beveridge_QEx-HFX_DSS_R1.mgf .
+        cp data/example_CSM_xiFDR2.2.1.csv .
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest tests/tests-xi.py
diff --git a/README.md b/README.md
@@ -23,6 +23,12 @@ Generate a spectral library for [Spectronaut](https://biognosys.com/software/spe
 - The full spectral library including all target and decoy annotations is created with extension `_spectralLibraryFULL.csv`.
   - This spectral library should be used with [Spectronaut](https://biognosys.com/software/spectronaut/)!
 
+## Usage with xiSearch + xiFDR
+
+Starting with version [1.4.4](https://github.com/hgb-bin-proteomics/MSAnnika_Spectral_Library_exporter/releases/tag/v1.1.6) this script also supports input from
+[xiSearch](https://www.rappsilberlab.org/software/xisearch/) with [xiFDR](https://www.rappsilberlab.org/software/xifdr/). Simply use the validated CSMs file from
+xiFDR (e.g. usually ending with extension `CSM_xiFDR*.*.*.csv` where `*` denotes the xiFDR version) as input for the `CSMS_FILE` parameter in the `config.py` file!
+
 ## GUI
 
 ![Screenshot](gui/screenshot.png)
@@ -61,6 +67,8 @@ The following parameters need to be adjusted for your needs in the `config.py` f
 
 # name of the mgf file containing the MS2 spectra
 SPECTRA_FILE = ["20220215_Eclipse_LC6_PepMap50cm-cartridge_mainlib_DSSO_3CV_stepHCD_OT_001.mgf"]
+# you can process multiple files like this:
+# SPECTRA_FILE = ["20220215_Eclipse_LC6_PepMap50cm-cartridge_mainlib_DSSO_3CV_stepHCD_OT_001.mgf", "20220215_Eclipse_LC6_PepMap50cm-cartridge_mainlib_DSSO_3CV_stepHCD_OT_002.mgf"]
 # name of the CSM file exported from Proteome Discoverer
 CSMS_FILE = "20220215_Eclipse_LC6_PepMap50cm-cartridge_mainlib_DSSO_3CV_stepHCD_OT_001.xlsx"
 # name of the experiment / run (any descriptive text is allowed)
@@ -74,6 +82,10 @@ MODIFICATIONS = \
     {"Oxidation": [15.994915],
      "Carbamidomethyl": [57.021464],
      "DSSO": [54.01056, 85.98264, 103.99320]}
+# modifications mapping for xiFDR sequences
+MODIFICATIONS_XI = \
+    {"Ccm": ["C", "Carbamidomethyl"],
+     "Mox": ["M", "Oxidation"]}
 # expected ion types (any of a, b, c, x, y, z)
 ION_TYPES = ("b", "y")
 # maximum expected charge of fragment ions
@@ -82,6 +94,8 @@ MAX_CHARGE = 4
 MATCH_TOLERANCE = 0.02
 # parameters for calculating iRT
 iRT_PARAMS = {"iRT_m": 1.3066, "iRT_t": 29.502}
+# regex pattern used for parsing scan number from the spectrum title
+PARSER_PATTERN = "\\.\\d+\\."
 ```
 
 In case you have more than one `SPECTRA_FILE` you can specify that like this:
@@ -105,6 +119,10 @@ MODIFICATIONS = \
     {"Oxidation": [15.994915],
      "Carbamidomethyl": [57.021464],
      "DSSO": [54.01056, 85.98264, 103.99320]}
+# modifications mapping for xiFDR sequences
+MODIFICATIONS_XI = \
+    {"Ccm": ["C", "Carbamidomethyl"],
+     "Mox": ["M", "Oxidation"]}
 # expected ion types (any of a, b, c, x, y, z)
 ION_TYPES = ("b", "y")
 # maximum expected charge of fragment ions
@@ -113,6 +131,8 @@ MAX_CHARGE = 4
 MATCH_TOLERANCE = 0.02
 # parameters for calculating iRT
 iRT_PARAMS = {"iRT_m": 1.3066, "iRT_t": 29.502}
+# regex pattern used for parsing scan number from the spectrum title
+PARSER_PATTERN = "\\.\\d+\\."
 ```
 
 ## Known Issues

diff --git a/config.py b/config.py
@@ -17,6 +17,10 @@
     {"Oxidation": [15.994915],
      "Carbamidomethyl": [57.021464],
      "DSSO": [54.01056, 85.98264, 103.99320]}
+# modifications mapping for xiFDR sequences
+MODIFICATIONS_XI = \
+    {"Ccm": ["C", "Carbamidomethyl"],
+     "Mox": ["M", "Oxidation"]}
 # expected ion types (any of a, b, c, x, y, z)
 ION_TYPES = ("b", "y")
 # maximum expected charge of fragment ions
@@ -25,3 +29,5 @@
 MATCH_TOLERANCE = 0.02
 # parameters for calculating iRT
 iRT_PARAMS = {"iRT_m": 1.3066, "iRT_t": 29.502}
+# regex pattern used for parsing scan number from the spectrum title
+PARSER_PATTERN = "\\.\\d+\\."