Merge pull request #45 from margotbligh/dev

Dev
margotbligh · Feb 5, 2024 · c95d13f · c95d13f
2 parents 756d6f8 + 0953022
commit c95d13f
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 90 deletions.
diff --git a/R/glycoAnnotate.R b/R/glycoAnnotate.R
@@ -5,12 +5,12 @@
 #'
 #' @description \code{glycoAnnotate()} annotates peaks or features in MS data,
 #' using either a pre-generated table by \link[GlycoAnnotateR]{glycoPredict} or
-#' by generating a new table. 
+#' by generating a new table.
 #'
 #' @export
-#' 
+#'
 #' @slot data Dataframe containing data to be annotated. For example,
-#' feature dataframe from XCMS pre-processing (LC-MS or direct inject) 
+#' feature dataframe from XCMS pre-processing (LC-MS or direct inject)
 #' or features from Cardinal (MALDI).
 #' @slot mz_column Name of column containing m/z values.
 #' @slot mzmin_column OPTIONAL: Name of column containing minimum m/z data values.
@@ -21,38 +21,38 @@
 #' If supplied, will do overlap-overlap matching. Generally only if mzmin and mzmax
 #' values generated during peak picking.If not provided, mz value will be annotated
 #' if within range of theoretical mz +- error.
-#' @slot pred_table Table generated previously by \link[GlycoAnnotateR]{glycoPredict}. 
+#' @slot pred_table Table generated previously by \link[GlycoAnnotateR]{glycoPredict}.
 #' MUST BE LONG FORMAT - select \code{format='long'} when running prediction.
 #' @slot param \link[GlycoAnnotateR]{glycoPredictParam} object for generation of table
 #' of theoretical mz values for annotation.
 #' @slot collapse Logical. If \code{TRUE}, annotations will be 'collapsed' so that multiple
 #' annotations for one mz will be in the same row, comma separated (nrow of output is in
 #' this case equal to nrow of input data). If \code{FALSE} (default), it is possible
 #' that rows in the input dataframe are repeated with different annotations. The
-#' information on annotations is more detailed in this case. Collapsing can also be done 
+#' information on annotations is more detailed in this case. Collapsing can also be done
 #' afterwards on the output using \link[GlycoAnnotateR]{glycoAnnotationsCollapse}.
 #' @slot collapse_columns Columns to be pasted together before collapsing.
 #' Only needed if \code{collapse=TRUE} and non-default columns wanted - default is
-#' molecule name and ion. If prediction table provided to \code{pred_table} instead of 
-#' \code{param}, column names are required. 
+#' molecule name and ion. If prediction table provided to \code{pred_table} instead of
+#' \code{param}, column names are required.
 #' @slot error Numeric value - error used to create window for matching. mz values
 #' will be matched against theoretical mzs +- error.
 #' @slot error_units Units for error - can be 'ppm' or 'Da'
-#' 
+#'
 #' @examples
-#' 
+#'
 #' #with prediction parameters
 #' gpp <- glycoPredictParam(dp = c(1, 8), modifications = "deoxy", polarity = "pos", naming = "IUPAC")
 #' annotated_data <- glycoAnnotate(data = data, param = gpp, error = 1.5, units = 'ppm', collapse = T)
-#' 
+#'
 #' #with prediction table
 #' gpp <- glycoPredictParam(dp = c(1, 8), modifications = "deoxy", polarity = "pos",  naming = "IUPAC")
 #' pred_table <- glycoPredict(param = gpp)
 #' annotated_data <- glycoAnnotate(data = data, pred_table = pred_table, error = 1.5, units = 'ppm', collapse = T, collapse_columns = c("IUPAC name", "ion"))
-#' 
+#'
 #' @seealso glycoAnnotateR::glycoPredict()
 #' @seealso glycoAnnotateR::glycoPredictParam()
-#' 
+#'
 
 glycoAnnotate <- function(data,
                           mz_column = 'mz',
@@ -85,11 +85,11 @@ glycoAnnotate <- function(data,
     }
   }
   if (!is.null(pred_table) & !is.null(param)){
-    stop("pred_table and param supplied.", 
+    stop("pred_table and param supplied.",
          " please provide ONE type of input for annotation")
   }
   if (is.null(pred_table) & is.null(param)){
-    stop("no glycoPredictParam supplied to 'param' AND no prediction table", 
+    stop("no glycoPredictParam supplied to 'param' AND no prediction table",
          " supplied to 'pred_table'. please provide one type of input for annotation")
   }
   if (!is.null(pred_table)){
@@ -121,7 +121,7 @@ glycoAnnotate <- function(data,
     if(!all(collapse_columns %in% names(pred_table))){
       stop("collapse_columns are not column names in pred_table!")
     }
-  } 
+  }
   if (!is.null(collapse_columns) & is.null(pred_table)){
     message("warning: collapse_columns provided but no pred_table...",
             "these must correspond to columns in the table newly generated",
@@ -133,48 +133,48 @@ glycoAnnotate <- function(data,
           " before collapsing (and collapsed) - for example the annotation name",
           " and ion column names")
   }
-  
+
   if(!is.null(collapse_columns) & isFALSE(collapse)){
     message('collapse_columns provided but collapse is FALSE, no collapse',
             'will be performed')
   }
-  
+
   if(!is.null(param)){
     if(param@format != "long"){
       message('change "format" to long in param!')}
   }
-  
+
   #run glycoPredict
   if (!is.null(param)){
     message("Starting glycoPredict to generate possible annotations")
     pred_table <- GlycoAnnotateR::glycoPredict(param = param)
-    
+
     if(isTRUE(collapse)){
       if(!is.null(collapse_columns)){
         if(!all(collapse_columns %in% names(pred_table))){
           stop("collapse_columns are not columns in the generated prediction table.",
                "either remove collapse_columns or ensure they match columns!")}
-        
+
       }
     }
   }
-  
+
   #generate mzmin and mzmax columns in pred_table
   if(error_units == 'ppm'){
     ppm_to_mz = function(mz, noise){
       ppm = mz / 1000000 * noise
       return(ppm)
     }
-    pred_table <- pred_table %>% 
+    pred_table <- pred_table %>%
       dplyr::mutate(mzmin = mz - ppm_to_mz(mz, error),
                     mzmax = mz + ppm_to_mz(mz, error))
   }
   if(error_units == 'Da'){
-    pred_table <- pred_table %>% 
+    pred_table <- pred_table %>%
       dplyr::mutate(mzmin = mz - error,
                     mzmax = mz + error)
   }
-  
+
   #run annotation
   message("Starting annotation with predictions against data")
   if(!is.null(mzmin_column) & !is.null(mzmax_column)){
@@ -187,26 +187,26 @@ glycoAnnotate <- function(data,
     data.table::setDT(data)
     data.table::setDT(pred_table)
     data.table::setkey(pred_table, mzmin, mzmax)
-    
+
     data_annot <- data.table::foverlaps(data, pred_table)
   }
   if(is.null(mzmin_column) & is.null(mzmax_column)){
-    data <- data %>% 
+    data <- data %>%
       dplyr::mutate(mzmin = get(mz_column),
                     mzmax = get(mz_column))
 
     data.table::setDT(data)
     data.table::setDT(pred_table)
     data.table::setkey(pred_table, mzmin, mzmax)
-    
+
     data_annot <- data.table::foverlaps(data, pred_table)
   }
-  
+
   #collapse annotations
   data.table::setDF(data_annot)
   if(isTRUE(collapse) & nrow(data_annot) > nrow(data)){
     message("Collapsing annotations")
-    
+
     #add annotation column that is pasted together for collapsing
     if (is.null(collapse_columns)){
       if(length(param@naming) == 1){
@@ -216,59 +216,57 @@ glycoAnnotate <- function(data,
         collapse_columns = c(paste(param@naming[1], "name"), "ion")
       }
     }
-    
-    data_annot <- data_annot %>% 
-      dplyr::mutate(annotations = paste0(apply(data_annot[collapse_columns], 1, 
+
+    data_annot <- data_annot %>%
+      dplyr::mutate(annotations = paste0(apply(data_annot[collapse_columns], 1,
                                                paste, collapse=':')))
     group_column_names <- setdiff(names(data_annot), names(pred_table))
     group_column_names <- group_column_names[group_column_names != "annotations"]
     data_annot <- data_annot %>%
       dplyr::group_by(across(all_of(group_column_names))) %>%
       dplyr::summarise(annotations = toString(annotations)) %>%
       dplyr::ungroup() %>%
-      dplyr::distinct(across(all_of(c(group_column_names, "annotations")))) %>% 
+      dplyr::distinct(across(all_of(c(group_column_names, "annotations")))) %>%
       dplyr::mutate(annotations = sub('NA:NA', NA, annotations))
 
   }
-  
+
   #format final df
   if(isFALSE(collapse)){
-    data_annot <- data_annot %>% 
+    data_annot <- data_annot %>%
       dplyr::select(!c('mzmin', 'mzmax'))
   }
   if('mz' %in% names(pred_table) & 'mz' %in% names(data)){
     if(isFALSE(collapse)){
-      data_annot <-  data_annot %>% 
+      data_annot <-  data_annot %>%
         dplyr::rename(mz_pred = mz)
     }
-    data_annot <-  data_annot %>% 
-      dplyr::rename(mz = `i.mz`)
   }
   if(!is.null(mzmin_column)){
     if ("i.mzmin" %in% names(data_annot)){
       names(data_annot)[names(data_annot) == "i.mzmin"] <-  mzmin_column
     }
-  } 
+  }
   if(!is.null(mzmax_column)){
     if("i.mzmax" %in% names(data_annot)){
       names(data_annot)[names(data_annot) == "i.mzmax"] <-  mzmax_column
-      
+
     }
   }
-  
+
   if(is.null(mzmin_column)){
     if ("i.mzmin" %in% names(data_annot)){
-      data_annot <- data_annot %>% 
+      data_annot <- data_annot %>%
         dplyr::select(!'i.mzmin')
     }
-  } 
+  }
   if(is.null(mzmax_column)){
     if("i.mzmax" %in% names(data_annot)){
-      data_annot <- data_annot %>% 
+      data_annot <- data_annot %>%
         dplyr::select(!'i.mzmax')
     }
   }
-  
+
   return(data_annot)
 }
 
@@ -280,26 +278,26 @@ glycoAnnotate <- function(data,
 #' @description \code{glycoAnnotationsCollapse()} collapses the output of
 #' \link[GlycoAnnotateR]{glycoAnnotate} in the case of multiple annotations
 #' per peak or feature so that there is one row per peak/feature with
-#' multiple annotations comma-separated. 
+#' multiple annotations comma-separated.
 #'
 #' @export
-#' 
-#' @slot annotated_data Dataframe annotated by \link[GlycoAnnotateR]{glycoAnnotate} 
+#'
+#' @slot annotated_data Dataframe annotated by \link[GlycoAnnotateR]{glycoAnnotate}
 #' that has NOT been collapsed and has multiple annotations per peak/feature.
 #' @slot collapse_columns Names of columns to be pasted together before collapsing.
-#' Suggested is molecule name and ion. 
+#' Suggested is molecule name and ion.
 #' @slot noncollapse_columns Names of columns that uniquely identify peaks and
 #' that should be retained after collapsing - these are generally the column
 #' names of your input dataframe before annotation.
-#' 
+#'
 #' @examples
 #' #annotate dataframe
 #' gpp <- glycoPredictParam(dp = c(1, 8), modifications = "deoxy", polarity = "pos", naming = "IUPAC")
 #' annotated_data <- glycoAnnotate(data = data, param = gpp, error = 1.5, units = 'ppm', collapse = F)
-#' 
+#'
 #' #collapse multiple annotations
 #' annotated_data_collapsed <- glycoAnnotationsCollapse(annotated_data = annotated_data, collapse_columns = c('IUPAC name', 'ion'), noncollapse_columns = c('mz', 'rt', 'sampleA', 'sampleB'))
-#' 
+#'
 #' @seealso glycoAnnotateR::glycoPredict()
 #' @seealso glycoAnnotateR::glycoPredictParam()
 #' @seealso glycoAnnotateR::glycoAnnotate()
@@ -315,8 +313,8 @@ glycoAnnotationsCollapse <- function(annotated_data,
   if(!all(collapse_columns %in% names(annotated_data))){
       stop("collapse_columns are not column names in annotated_data!")
   }
-  nrow_distinct = dplyr::distinct(annotated_data, 
-                                  dplyr::across(dplyr::all_of(noncollapse_columns))) %>% 
+  nrow_distinct = dplyr::distinct(annotated_data,
+                                  dplyr::across(dplyr::all_of(noncollapse_columns))) %>%
     nrow()
   nrow = nrow(annotated_data)
   if(nrow_distinct == nrow){
@@ -326,19 +324,19 @@ glycoAnnotationsCollapse <- function(annotated_data,
   #collapse annotations
   data.table::setDF(annotated_data)
   message("Collapsing annotations")
-  
-  annotated_data_collapsed <- annotated_data %>% 
-    dplyr::mutate(annotations = paste0(apply(annotated_data[collapse_columns], 1, 
-                                             paste, collapse=':'))) %>% 
+
+  annotated_data_collapsed <- annotated_data %>%
+    dplyr::mutate(annotations = paste0(apply(annotated_data[collapse_columns], 1,
+                                             paste, collapse=':'))) %>%
     dplyr::group_by(across(all_of(noncollapse_columns))) %>%
     dplyr::summarise(annotations = toString(annotations)) %>%
     dplyr::ungroup() %>%
     dplyr::distinct(across(all_of(c(noncollapse_columns, "annotations"))))
-  
+
   return(annotated_data_collapsed)
-  
+
 }
-  
+
 
 
 

diff --git a/R/glycoMS2Extract.R b/R/glycoMS2Extract.R
@@ -1,45 +1,45 @@
 #' MS/MS Spectra Extraction from annotated features
 #'
-#' @description This function extracts MS/MS spectra associated with 
+#' @description This function extracts MS/MS spectra associated with
 #' features annotated by \link[GlycoAnnotateR]{glycoAnnotate} function.
 
 #' @param data_ms2 [MSnbase::MSnExp()], [MSnbase::OnDiskMSnExp()] or [xcms::XCMSnExp()]
 #'  object with MS/MS spectra.
 #' @param data_features [xcms::XCMSnExp()] with MS1 features defined by XCMS processing
 #' that were annotated by \link[GlycoAnnotateR]{glycoAnnotate}.
 #' @param annotations Output of \link[GlycoAnnotateR]{glycoAnnotate}. It needs to be a
-#'  `data.frame` with numerical columns named "mz" and "rt" (mz columns contains *m/z* 
+#'  `data.frame` with numerical columns named "mz" and "rt" (mz columns contains *m/z*
 #'  of the features, not of the annotations!).
 #'
-#' @return It returns a MSpectra object with all msLevel=2 spectra whose 
+#' @return It returns a MSpectra object with all msLevel=2 spectra whose
 #' precursors are the features annotated by \link[GlycoAnnotateR]{glycoAnnotate} function.
 #'
 #' @export
-#' 
+#'
 #' @seealso [GlycoAnnotateR::glycoPredict()]
 #' @seealso [GlycoAnnotateR::glycoPredictParam()]
 #'
 
 glycoMS2Extract = function(data_ms2, data_features, annotations){
-  
+
   # Check if "data_ms2" has msLevel = 2 spectra.
-  if(any(table(data_ms2@featureData@data$msLevel, data_ms2@featureData@data$fileIdx)[2,]==0)){
-    stop("Error: Some of the files do not have MS level 2 data.")
+  if(all(table(data_ms2@featureData@data$msLevel, data_ms2@featureData@data$fileIdx)[2,]==0)){
+    stop("Error: All of the files do not have MS level 2 data.")
   }
-  
+
   # Change "data_ms2" into "XCMSnExp"
   data_ms2 = as(data_ms2, "XCMSnExp")
-  
+
   # Overwrite "data_ms2" peaks and features for the ones in "data_features"
   chromPeaks(data_ms2) = xcms::chromPeaks(data_features)
   featureDefinition = xcms::featureDefinitions(data_features)
   featureDefinition_filtered = featureDefinition[paste0(featureDefinition$mzmed,"_", featureDefinition$rtmed) %in%
                                                    paste0(annotations$mz,"_",annotations$rt),]
-  
+
   featureDefinitions(data_ms2) = featureDefinition_filtered
-  
+
   # Get msLevel=2 spectra that is related to defined features.
   MS2Spectra = xcms::featureSpectra(data_ms2, msLevel = 2, expandMz = 0.005)
-  
+
   return(MS2Spectra)
-}
+}