Merge pull request #44 from margotbligh/dev

Dev
margotbligh · Jan 31, 2024 · 756d6f8 · 756d6f8
2 parents aacad61 + edeab37
commit 756d6f8
Show file tree

Hide file tree

Showing 33 changed files with 12,752 additions and 39 deletions.
diff --git a/.github/.gitignore b/.github/.gitignore
@@ -0,0 +1 @@
+*.html
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -0,0 +1,48 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+  release:
+    types: [published]
+  workflow_dispatch:
+
+name: pkgdown
+
+jobs:
+  pkgdown:
+    runs-on: ubuntu-latest
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
+
+      - name: Deploy to GitHub pages 🚀
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/github-pages-deploy-action@v4.4.1
+        with:
+          clean: false
+          branch: gh-pages
+          folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -6,7 +6,7 @@ Authors@R:
 Maintainer: Margot Bligh <mbligh@mpi-bremen.de>
 Description: This package is intended for "calculation" of all possible sugars / glycans within a set of constraining parameters. The function "builds" and returns names, formulas and masses. The user also provides parameters related to mass spectrometry.
 License: GPL-2
-URL: https://margotbligh.github.io/GlycoAnnotateR, https://github.com/margotbligh/GlycoAnnotateR
+URL: https://margotbligh.github.io/GlycoAnnotateR, https://github.com/margotbligh/GlycoAnnotateR, https://margotbligh.github.io/GlycoAnnotateR/
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
@@ -16,7 +16,10 @@ Imports:
 	magrittr,
 	dplyr,
 	stringr,
-	data.table
+	data.table,
+	xcms
+biocViews:
+	xcms
 Config/reticulate:
   list(
     packages = list(
@@ -32,3 +35,4 @@ Collate:
     'setClass.R'
     'glycoPredict.R'
     'glycoAnnotate.R'
+    'glycoMS2Extract.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(glycoAnnotate)
 export(glycoAnnotationsCollapse)
+export(glycoMS2Extract)
 export(glycoPredict)
 export(glycoPredictParam)
 exportClasses(glycoPredictParam)
diff --git a/R/glycoAnnotate.R b/R/glycoAnnotate.R
@@ -151,7 +151,7 @@ glycoAnnotate <- function(data,
 
     if(isTRUE(collapse)){
       if(!is.null(collapse_columns)){
-        if(!collapse_columns %in% names(pred_table)){
+        if(!all(collapse_columns %in% names(pred_table))){
           stop("collapse_columns are not columns in the generated prediction table.",
                "either remove collapse_columns or ensure they match columns!")}
 

diff --git a/R/glycoMS2Extract.R b/R/glycoMS2Extract.R
@@ -0,0 +1,45 @@
+#' MS/MS Spectra Extraction from annotated features
+#'
+#' @description This function extracts MS/MS spectra associated with 
+#' features annotated by \link[GlycoAnnotateR]{glycoAnnotate} function.
+
+#' @param data_ms2 [MSnbase::MSnExp()], [MSnbase::OnDiskMSnExp()] or [xcms::XCMSnExp()]
+#'  object with MS/MS spectra.
+#' @param data_features [xcms::XCMSnExp()] with MS1 features defined by XCMS processing
+#' that were annotated by \link[GlycoAnnotateR]{glycoAnnotate}.
+#' @param annotations Output of \link[GlycoAnnotateR]{glycoAnnotate}. It needs to be a
+#'  `data.frame` with numerical columns named "mz" and "rt" (mz columns contains *m/z* 
+#'  of the features, not of the annotations!).
+#'
+#' @return It returns a MSpectra object with all msLevel=2 spectra whose 
+#' precursors are the features annotated by \link[GlycoAnnotateR]{glycoAnnotate} function.
+#'
+#' @export
+#' 
+#' @seealso [GlycoAnnotateR::glycoPredict()]
+#' @seealso [GlycoAnnotateR::glycoPredictParam()]
+#'
+
+glycoMS2Extract = function(data_ms2, data_features, annotations){
+
+  # Check if "data_ms2" has msLevel = 2 spectra.
+  if(any(table(data_ms2@featureData@data$msLevel, data_ms2@featureData@data$fileIdx)[2,]==0)){
+    stop("Error: Some of the files do not have MS level 2 data.")
+  }
+
+  # Change "data_ms2" into "XCMSnExp"
+  data_ms2 = as(data_ms2, "XCMSnExp")
+
+  # Overwrite "data_ms2" peaks and features for the ones in "data_features"
+  chromPeaks(data_ms2) = xcms::chromPeaks(data_features)
+  featureDefinition = xcms::featureDefinitions(data_features)
+  featureDefinition_filtered = featureDefinition[paste0(featureDefinition$mzmed,"_", featureDefinition$rtmed) %in%
+                                                   paste0(annotations$mz,"_",annotations$rt),]
+
+  featureDefinitions(data_ms2) = featureDefinition_filtered
+
+  # Get msLevel=2 spectra that is related to defined features.
+  MS2Spectra = xcms::featureSpectra(data_ms2, msLevel = 2, expandMz = 0.005)
+
+  return(MS2Spectra)
+}
diff --git a/R/glycoPredict.R b/R/glycoPredict.R
@@ -159,5 +159,9 @@ glycoPredict <- function(param){
       dplyr::select(!matches("delta_|^[[:upper:]][a,c]?$|_effect"))
 
   }
+
+  if (nrow(df) == 0){
+    warning('Output has zero rows! Check your scan range, adducts/polarity and DP range are sensible')
+  }
   return(df)
 }
diff --git a/README.md b/README.md
@@ -38,19 +38,102 @@ The 'prediction' or 'calculation' of glycan compositions is the core utility of
 
 * Label, `label`
 
-  Are sugars labelled by reductive amination? Current supported labels are: "none", "procainamide","2-aminobenzoic acid", "2-aminobenzamide", "1-phenyl-3-methyl-5-pyrazolone". Common abbreviations or notations for these labels are generally accepted (e.g. 'pmp' or 'PMP' for the latter).
+  Are sugars labelled by reductive amination? Current supported labels are `none` (default) and those givin in the table below:
+
+| **Label**                      | **Accepted names**                                  |
+|--------------------------------|-----------------------------------------------------|
+| procainamide                   | "procainamide", "proca", "procA", "ProA"            |
+| 2-aminopyridine                | "2-ap", "2-AP", "pa", "PA", "2-aminopyridine"       |
+| 2-aminobenzoic acid            | "2-aa", "2-AA", "aba", "ABA", "2-aminobenzoic acid" |
+| 2-aminobenzamide               | "2-ab", "2-AB", "ab", "AB", "2-aminobenzamide"      |
+| 1-phenyl-3-methyl-5-pyrazolone | "pmp", "PMP", "1-phenyl-3-methyl-5-pyrazolone"      |
+
+* Double sulphate, `double_sulphate`
+
+  Can monomers be disulphated? Logical option required. To work `sulphate` must be in modifications and `nmod_max` at least 2.
+
+* Glycan linkage, `glycan_linkage`
+
+  By default `none`. When `oglycan` or `nglycan` the limits described by Cooper et al. (2021) for the GlycoMod software are implemented. Rules are listed here: https://web.expasy.org/glycomod/glycomod-doc.html
+
+* Modification limits, `modification_limits`
+
+  User provided limits on monomers or modifications. Provide as a named list.
 
 * Modifications, `modifications`
 
   By default, each modification can occur once per monomer, and it is possible to have all modifications selected present on one monomer. After calculation of modified monomers they are filtered by the `nmod_max` term before output is returned. So, for example, for `modifications = c('deoxy', 'sulphate', 'carboxylicacid')`, the program will generate as one possible composition all three modifications on one monomer (i.e. 'DeoxyHex1 CarboxylicAcid1 Sulphate1'). If `nmod_max` is at the default 1, this composition will be filtered out before output is returned (as the `nmod` = 3). Sulphate is the only modification which is allowed to occur twice per mononer. For this, you need to set `double_sulphate=TRUE` and `nmod_max` to be at least 2.
 
   The different modifications and their namings are summarised below:
 
-
 
-
-  * Sulphate, `'sulphate'`
-  * Phosphate, `'phosphate'`
+| **Modification**  | **Definition / description**                                                                                                                                                                                  | **IUPAC naming** | **GlycoCT naming** | **Oxford naming** |
+|-------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|--------------------|-------------------|
+| carboxylicacid    | Effective loss of two hydrogens and gain of one oxygen to form a carboxylic acid group on C6. The modified monomer is commonly called a 'uronic acid'                                                         | CarboxylicAcid   | COOH               | A                 |
+| sialicacid        | Effect addition of C11H19N1O9 to hexose. Here, sialic acid only refers to N-Acetylneuraminic acid (Neu5Ac), the most common sialic acid. Predominantly found in complex mammalian glycans.                    | NeuAc            | SIA                | SA                |
+| phosphate         |                                                                                                                                                                                                               | Phosphate        | PO4                | P                 |
+| sulphate          | Addition of SO3. Only modification allowed to occur twice per monomer (see options for `double_sulphate`)                                                                                                     | Sulfate          | SO4                | S                 |
+| amino             | Gain of NH and loss of of O - result ofreplacing a hydroxyl group with an amino group.                                                                                                                        | Amino            | NH2                | Am                |
+| deoxy             | One hydroxyl group is replaced by an H atom. Fucose and rhamnose are two common deoxyhexoses. NB: GlycoAnnotateR currently only considers deoxyhexoses and not deoxypentoses.                                 | DeoxyHex         | DHEX               | D                 |
+| nacetyl           | Addition of an N-acetyl group (net change = +C2H3N) . Common example of N-acetylated hexose is N-acetylglucosamine. Note that here, N-acetylglucosamine would be termed in e.g. IUPAC naming Hex1 N-Acetyl1.  | N-Acetyl         | NAc                | N                 |
+| oacetyl           | Acetylation of a hydroxyl group (net change = +C2H2O).                                                                                                                                                        | O-Acetyl         | Ac                 | Ac                |
+| omethyl           | Addition of CH2 to an hydroxyl group. Natural modification, but can also be generated by permethylation.                                                                                                      | O-Methyl         | OMe                | M                 |
+| anhydrobridge     | Water loss formed by bridge between two hydroxyl groups. Occurs from C6 to C3, C2 or C1. Seen in e.g. carrageenans.                                                                                           | AnhydroBridge    | ANH                | B                 |
+| unsaturated       | Water loss to form a C-C double bond inside a ring. Seen for example in ulvans and are the target of polysaccharide lyases.                                                                                   | Unsaturated      | UNS                | U                 |
+| dehydrated        | Water loss that occurs during ionisation or other reactions.                                                                                                                                                  | Dehydrated       | Y                  | Y                 |
+| alditol           | Reducing end monomer is opened and the aldehyde reduced to an alcohol. Commonly done before PGC-LC to reduce anomer splitting of peaks. Refers to an alditol 'modification' not a monomer here.               | Alditol          | ALD                | o                 |
+| aminopentyllinker | Functional group used in synthetic chemistry. Can occur once per composition.                                                                                                                                 | NH2Pent1         | NH2Pent1           | NH2Pent1          |
+
+### Mass spec parameters
+
+* Scan range, `scan_range`
+
+  Scan range (*m/z*) used during acquisition. For prediction/computation purposes only this can be set very wide. Compositions with no adduct with an *m/z* value inside the scan range will be filtered out.
+
+* Polarity, `polarity`
+
+  Negative (`neg`) and/or positive (`pos`) ionisation polarity used during acquisition. Changes the adducts returned. See below for specific adducts generated.
+
+* Ionisation type, `ion_type`
+
+  ESI (`ESI`) and/or MALDI (`MALDI`) ionisation used. Changes the adducts returned (MALDI has only singly charged ions, ESI can have multiply charged). See below for specific adducts generated.
+
+
+### Output and other parameters
+
+* Naming, `naming`
+
+  How should compositions be named? Options are `IUPAC`, `GlycoCT` and `Oxford`. As only compositions and not structures are given, conventions could not be followed closely, but common abbreviations from the conventions are used (see modifications table above).
+
+* Adducts, `adducts`
+
+Options are: `H`, `Na`, `NH4`, `K`, `Cl` and `CHOO`. The adducts generated depends on `adducts`, `polarity` and `ion type`. The resulting adducts are summarised in the table below:
+
+__NB: *n* is the number of anionic groups. Where relevant, ions will be generated with *n* values from 2-*n*. For example, in negative mode with MALDI and Na adducts, for a composition with four sulphate groups (*n* = 4) the adducts will include [M-2H+1Na]<sup>-</sup>, [M-3H+2Na]<sup>-</sup> and [M-4H+3Na]<sup>-</sup>.
+
+| **Adduct** | **Ion type** | **Polarity** | **Adducts generated**      |
+|------------|--------------|--------------|----------------------------|
+| H          | ESI          | Positive     | [M+H]<sup>+</sup>                     |
+|            |              | Negative     | [M-H]<sup>-</sup>, [M-*n*H]<sup>-*n*</sup>           |
+|            | MALDI        | Positive     | [M+H]<sup>+</sup>                     |
+|            |              | Negative     | [M-H]<sup>-</sup>                     |
+| Na         | ESI          | Positive     | [M+Na]<sup>+</sup>, [M-*n*H+(*n*+1)Na]<sup>+</sup>   |
+|            |              | Negative     | [M-*n*H+(*n*-1)Na]<sup>-</sup>            |
+|            | MALDI        | Positive     | [M+Na]<sup>+</sup>, [M-*n*H+(*n*+1)Na]<sup>+</sup>   |
+|            |              | Negative     | [M+*n*H+(*n*-1)Na]<sup>-</sup>            |
+| NH4        | ESI          | Positive     | [M+NH4]<sup>+</sup>, [M-*n*H+(*n*+1)NH4]<sup>+</sup> |
+|            | MALDI        | Positive     | [M+NH4]<sup>+</sup>, [M-*n*H+(*n*+1)NH4]<sup>+</sup> |
+|            |              | Negative     | [M-*n*H+(*n*-1)NH4]<sup>-</sup>           |
+| K          | ESI          | Positive     | [M+K]<sup>+</sup>, [M-*n*H+(*n*+1)K]<sup>+</sup>     |
+|            | MALDI        | Positive     | [M+K]<sup>+</sup>, [M-*n*H+(*n*+1)K]<sup>+</sup>     |
+|            |              | Negative     | [M-*n*H+(*n*-1)K]<sup>-</sup>             |
+| Cl         | ESI          | Negative     | [M+Cl]<sup>-</sup>                    |
+|            | MALDI        | Negative     | [M+Cl]<sup>-</sup>                    |
+| CHOO       | ESI          | Negative     | [M+CHOO]<sup>-</sup>                  |
+|            | MALDI        | Negative     | [M+CHOO]<sup>-</sup>                  |
+
+
+
 
 
 
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -0,0 +1,4 @@
+url: https://margotbligh.github.io/GlycoAnnotateR/
+template:
+  bootstrap: 5
+