diff --git a/R/glycoAnnotate.R b/R/glycoAnnotate.R index e60d0ef..654b1ee 100644 --- a/R/glycoAnnotate.R +++ b/R/glycoAnnotate.R @@ -5,12 +5,12 @@ #' #' @description \code{glycoAnnotate()} annotates peaks or features in MS data, #' using either a pre-generated table by \link[GlycoAnnotateR]{glycoPredict} or -#' by generating a new table. +#' by generating a new table. #' #' @export -#' +#' #' @slot data Dataframe containing data to be annotated. For example, -#' feature dataframe from XCMS pre-processing (LC-MS or direct inject) +#' feature dataframe from XCMS pre-processing (LC-MS or direct inject) #' or features from Cardinal (MALDI). #' @slot mz_column Name of column containing m/z values. #' @slot mzmin_column OPTIONAL: Name of column containing minimum m/z data values. @@ -21,7 +21,7 @@ #' If supplied, will do overlap-overlap matching. Generally only if mzmin and mzmax #' values generated during peak picking.If not provided, mz value will be annotated #' if within range of theoretical mz +- error. -#' @slot pred_table Table generated previously by \link[GlycoAnnotateR]{glycoPredict}. +#' @slot pred_table Table generated previously by \link[GlycoAnnotateR]{glycoPredict}. #' MUST BE LONG FORMAT - select \code{format='long'} when running prediction. #' @slot param \link[GlycoAnnotateR]{glycoPredictParam} object for generation of table #' of theoretical mz values for annotation. @@ -29,30 +29,30 @@ #' annotations for one mz will be in the same row, comma separated (nrow of output is in #' this case equal to nrow of input data). If \code{FALSE} (default), it is possible #' that rows in the input dataframe are repeated with different annotations. The -#' information on annotations is more detailed in this case. Collapsing can also be done +#' information on annotations is more detailed in this case. Collapsing can also be done #' afterwards on the output using \link[GlycoAnnotateR]{glycoAnnotationsCollapse}. #' @slot collapse_columns Columns to be pasted together before collapsing. #' Only needed if \code{collapse=TRUE} and non-default columns wanted - default is -#' molecule name and ion. If prediction table provided to \code{pred_table} instead of -#' \code{param}, column names are required. +#' molecule name and ion. If prediction table provided to \code{pred_table} instead of +#' \code{param}, column names are required. #' @slot error Numeric value - error used to create window for matching. mz values #' will be matched against theoretical mzs +- error. #' @slot error_units Units for error - can be 'ppm' or 'Da' -#' +#' #' @examples -#' +#' #' #with prediction parameters #' gpp <- glycoPredictParam(dp = c(1, 8), modifications = "deoxy", polarity = "pos", naming = "IUPAC") #' annotated_data <- glycoAnnotate(data = data, param = gpp, error = 1.5, units = 'ppm', collapse = T) -#' +#' #' #with prediction table #' gpp <- glycoPredictParam(dp = c(1, 8), modifications = "deoxy", polarity = "pos", naming = "IUPAC") #' pred_table <- glycoPredict(param = gpp) #' annotated_data <- glycoAnnotate(data = data, pred_table = pred_table, error = 1.5, units = 'ppm', collapse = T, collapse_columns = c("IUPAC name", "ion")) -#' +#' #' @seealso glycoAnnotateR::glycoPredict() #' @seealso glycoAnnotateR::glycoPredictParam() -#' +#' glycoAnnotate <- function(data, mz_column = 'mz', @@ -85,11 +85,11 @@ glycoAnnotate <- function(data, } } if (!is.null(pred_table) & !is.null(param)){ - stop("pred_table and param supplied.", + stop("pred_table and param supplied.", " please provide ONE type of input for annotation") } if (is.null(pred_table) & is.null(param)){ - stop("no glycoPredictParam supplied to 'param' AND no prediction table", + stop("no glycoPredictParam supplied to 'param' AND no prediction table", " supplied to 'pred_table'. please provide one type of input for annotation") } if (!is.null(pred_table)){ @@ -121,7 +121,7 @@ glycoAnnotate <- function(data, if(!all(collapse_columns %in% names(pred_table))){ stop("collapse_columns are not column names in pred_table!") } - } + } if (!is.null(collapse_columns) & is.null(pred_table)){ message("warning: collapse_columns provided but no pred_table...", "these must correspond to columns in the table newly generated", @@ -133,48 +133,48 @@ glycoAnnotate <- function(data, " before collapsing (and collapsed) - for example the annotation name", " and ion column names") } - + if(!is.null(collapse_columns) & isFALSE(collapse)){ message('collapse_columns provided but collapse is FALSE, no collapse', 'will be performed') } - + if(!is.null(param)){ if(param@format != "long"){ message('change "format" to long in param!')} } - + #run glycoPredict if (!is.null(param)){ message("Starting glycoPredict to generate possible annotations") pred_table <- GlycoAnnotateR::glycoPredict(param = param) - + if(isTRUE(collapse)){ if(!is.null(collapse_columns)){ if(!all(collapse_columns %in% names(pred_table))){ stop("collapse_columns are not columns in the generated prediction table.", "either remove collapse_columns or ensure they match columns!")} - + } } } - + #generate mzmin and mzmax columns in pred_table if(error_units == 'ppm'){ ppm_to_mz = function(mz, noise){ ppm = mz / 1000000 * noise return(ppm) } - pred_table <- pred_table %>% + pred_table <- pred_table %>% dplyr::mutate(mzmin = mz - ppm_to_mz(mz, error), mzmax = mz + ppm_to_mz(mz, error)) } if(error_units == 'Da'){ - pred_table <- pred_table %>% + pred_table <- pred_table %>% dplyr::mutate(mzmin = mz - error, mzmax = mz + error) } - + #run annotation message("Starting annotation with predictions against data") if(!is.null(mzmin_column) & !is.null(mzmax_column)){ @@ -187,26 +187,26 @@ glycoAnnotate <- function(data, data.table::setDT(data) data.table::setDT(pred_table) data.table::setkey(pred_table, mzmin, mzmax) - + data_annot <- data.table::foverlaps(data, pred_table) } if(is.null(mzmin_column) & is.null(mzmax_column)){ - data <- data %>% + data <- data %>% dplyr::mutate(mzmin = get(mz_column), mzmax = get(mz_column)) data.table::setDT(data) data.table::setDT(pred_table) data.table::setkey(pred_table, mzmin, mzmax) - + data_annot <- data.table::foverlaps(data, pred_table) } - + #collapse annotations data.table::setDF(data_annot) if(isTRUE(collapse) & nrow(data_annot) > nrow(data)){ message("Collapsing annotations") - + #add annotation column that is pasted together for collapsing if (is.null(collapse_columns)){ if(length(param@naming) == 1){ @@ -216,9 +216,9 @@ glycoAnnotate <- function(data, collapse_columns = c(paste(param@naming[1], "name"), "ion") } } - - data_annot <- data_annot %>% - dplyr::mutate(annotations = paste0(apply(data_annot[collapse_columns], 1, + + data_annot <- data_annot %>% + dplyr::mutate(annotations = paste0(apply(data_annot[collapse_columns], 1, paste, collapse=':'))) group_column_names <- setdiff(names(data_annot), names(pred_table)) group_column_names <- group_column_names[group_column_names != "annotations"] @@ -226,49 +226,47 @@ glycoAnnotate <- function(data, dplyr::group_by(across(all_of(group_column_names))) %>% dplyr::summarise(annotations = toString(annotations)) %>% dplyr::ungroup() %>% - dplyr::distinct(across(all_of(c(group_column_names, "annotations")))) %>% + dplyr::distinct(across(all_of(c(group_column_names, "annotations")))) %>% dplyr::mutate(annotations = sub('NA:NA', NA, annotations)) } - + #format final df if(isFALSE(collapse)){ - data_annot <- data_annot %>% + data_annot <- data_annot %>% dplyr::select(!c('mzmin', 'mzmax')) } if('mz' %in% names(pred_table) & 'mz' %in% names(data)){ if(isFALSE(collapse)){ - data_annot <- data_annot %>% + data_annot <- data_annot %>% dplyr::rename(mz_pred = mz) } - data_annot <- data_annot %>% - dplyr::rename(mz = `i.mz`) } if(!is.null(mzmin_column)){ if ("i.mzmin" %in% names(data_annot)){ names(data_annot)[names(data_annot) == "i.mzmin"] <- mzmin_column } - } + } if(!is.null(mzmax_column)){ if("i.mzmax" %in% names(data_annot)){ names(data_annot)[names(data_annot) == "i.mzmax"] <- mzmax_column - + } } - + if(is.null(mzmin_column)){ if ("i.mzmin" %in% names(data_annot)){ - data_annot <- data_annot %>% + data_annot <- data_annot %>% dplyr::select(!'i.mzmin') } - } + } if(is.null(mzmax_column)){ if("i.mzmax" %in% names(data_annot)){ - data_annot <- data_annot %>% + data_annot <- data_annot %>% dplyr::select(!'i.mzmax') } } - + return(data_annot) } @@ -280,26 +278,26 @@ glycoAnnotate <- function(data, #' @description \code{glycoAnnotationsCollapse()} collapses the output of #' \link[GlycoAnnotateR]{glycoAnnotate} in the case of multiple annotations #' per peak or feature so that there is one row per peak/feature with -#' multiple annotations comma-separated. +#' multiple annotations comma-separated. #' #' @export -#' -#' @slot annotated_data Dataframe annotated by \link[GlycoAnnotateR]{glycoAnnotate} +#' +#' @slot annotated_data Dataframe annotated by \link[GlycoAnnotateR]{glycoAnnotate} #' that has NOT been collapsed and has multiple annotations per peak/feature. #' @slot collapse_columns Names of columns to be pasted together before collapsing. -#' Suggested is molecule name and ion. +#' Suggested is molecule name and ion. #' @slot noncollapse_columns Names of columns that uniquely identify peaks and #' that should be retained after collapsing - these are generally the column #' names of your input dataframe before annotation. -#' +#' #' @examples #' #annotate dataframe #' gpp <- glycoPredictParam(dp = c(1, 8), modifications = "deoxy", polarity = "pos", naming = "IUPAC") #' annotated_data <- glycoAnnotate(data = data, param = gpp, error = 1.5, units = 'ppm', collapse = F) -#' +#' #' #collapse multiple annotations #' annotated_data_collapsed <- glycoAnnotationsCollapse(annotated_data = annotated_data, collapse_columns = c('IUPAC name', 'ion'), noncollapse_columns = c('mz', 'rt', 'sampleA', 'sampleB')) -#' +#' #' @seealso glycoAnnotateR::glycoPredict() #' @seealso glycoAnnotateR::glycoPredictParam() #' @seealso glycoAnnotateR::glycoAnnotate() @@ -315,8 +313,8 @@ glycoAnnotationsCollapse <- function(annotated_data, if(!all(collapse_columns %in% names(annotated_data))){ stop("collapse_columns are not column names in annotated_data!") } - nrow_distinct = dplyr::distinct(annotated_data, - dplyr::across(dplyr::all_of(noncollapse_columns))) %>% + nrow_distinct = dplyr::distinct(annotated_data, + dplyr::across(dplyr::all_of(noncollapse_columns))) %>% nrow() nrow = nrow(annotated_data) if(nrow_distinct == nrow){ @@ -326,19 +324,19 @@ glycoAnnotationsCollapse <- function(annotated_data, #collapse annotations data.table::setDF(annotated_data) message("Collapsing annotations") - - annotated_data_collapsed <- annotated_data %>% - dplyr::mutate(annotations = paste0(apply(annotated_data[collapse_columns], 1, - paste, collapse=':'))) %>% + + annotated_data_collapsed <- annotated_data %>% + dplyr::mutate(annotations = paste0(apply(annotated_data[collapse_columns], 1, + paste, collapse=':'))) %>% dplyr::group_by(across(all_of(noncollapse_columns))) %>% dplyr::summarise(annotations = toString(annotations)) %>% dplyr::ungroup() %>% dplyr::distinct(across(all_of(c(noncollapse_columns, "annotations")))) - + return(annotated_data_collapsed) - + } - +