Skip to content

Commit

Permalink
Merge pull request #46 from margotbligh/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
margotbligh authored Feb 29, 2024
2 parents c95d13f + b0ba692 commit e5f1e67
Show file tree
Hide file tree
Showing 8 changed files with 1,082 additions and 142 deletions.
74 changes: 27 additions & 47 deletions R/glycoAnnotate.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,42 +166,52 @@ glycoAnnotate <- function(data,
return(ppm)
}
pred_table <- pred_table %>%
dplyr::mutate(mzmin = mz - ppm_to_mz(mz, error),
mzmax = mz + ppm_to_mz(mz, error))
dplyr::mutate(mzmin_match = mz - ppm_to_mz(mz, error),
mzmax_match = mz + ppm_to_mz(mz, error))
}
if(error_units == 'Da'){
pred_table <- pred_table %>%
dplyr::mutate(mzmin = mz - error,
mzmax = mz + error)
dplyr::mutate(mzmin_match = mz - error,
mzmax_match = mz + error)
}

#rename mz column in pred_table
names(pred_table)[names(pred_table) == 'mz'] <- 'mz_pred'

#run annotation
message("Starting annotation with predictions against data")
if(!is.null(mzmin_column) & !is.null(mzmax_column)){
if (mzmin_column != "mzmin"){
names(data)[names(data) == mzmin_column] <- "mzmin"
}
if (mzmax_column != "mzmax"){
names(data)[names(data) == mzmax_column] <- "mzmax"
}
data$mzmin_match <- data[, mzmin_column]
data$mzmax_match <- data[, mzmax_column]

data.table::setDT(data)
data.table::setDT(pred_table)
data.table::setkey(pred_table, mzmin, mzmax)
data.table::setkey(pred_table, mzmin_match, mzmax_match)

data_annot <- data.table::foverlaps(data, pred_table)
}
if(is.null(mzmin_column) & is.null(mzmax_column)){
data <- data %>%
dplyr::mutate(mzmin = get(mz_column),
mzmax = get(mz_column))
dplyr::mutate(mzmin_match = get(mz_column),
mzmax_match = get(mz_column))

data.table::setDT(data)
data.table::setDT(pred_table)
data.table::setkey(pred_table, mzmin, mzmax)
data.table::setkey(pred_table, mzmin_match, mzmax_match)

data_annot <- data.table::foverlaps(data, pred_table)
}

#calculate mass error
if(mz_column == 'mz'){
data_annot <- data_annot %>%
dplyr::mutate(mass_error = abs(mz - mz_pred))
}
if(mz_column != 'mz'){
data_annot <- data_annot %>%
dplyr::mutate(mass_error = abs(get(mz_column) - mz_pred))
}

#collapse annotations
data.table::setDF(data_annot)
if(isTRUE(collapse) & nrow(data_annot) > nrow(data)){
Expand Down Expand Up @@ -232,40 +242,10 @@ glycoAnnotate <- function(data,
}

#format final df
if(isFALSE(collapse)){
data_annot <- data_annot %>%
dplyr::select(!c('mzmin', 'mzmax'))
}
if('mz' %in% names(pred_table) & 'mz' %in% names(data)){
if(isFALSE(collapse)){
data_annot <- data_annot %>%
dplyr::rename(mz_pred = mz)
}
}
if(!is.null(mzmin_column)){
if ("i.mzmin" %in% names(data_annot)){
names(data_annot)[names(data_annot) == "i.mzmin"] <- mzmin_column
}
}
if(!is.null(mzmax_column)){
if("i.mzmax" %in% names(data_annot)){
names(data_annot)[names(data_annot) == "i.mzmax"] <- mzmax_column
data_annot <- data_annot %>%
dplyr::select(!any_of(c('mzmin_match', 'mzmax_match',
'i.mzmin_match', 'i.mzmax_match')))

}
}

if(is.null(mzmin_column)){
if ("i.mzmin" %in% names(data_annot)){
data_annot <- data_annot %>%
dplyr::select(!'i.mzmin')
}
}
if(is.null(mzmax_column)){
if("i.mzmax" %in% names(data_annot)){
data_annot <- data_annot %>%
dplyr::select(!'i.mzmax')
}
}

return(data_annot)
}
Expand Down
155 changes: 85 additions & 70 deletions R/glycoPredict.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#'
#' @include setClass.R
#'
#' @description \code{glycoPredict()} predicts all possible glycan within the
#' @description \code{glycoPredict()} predicts all possible glycan within the
#' constraints set by the \code{glycoPredictParam} object.
#' @param param A \code{glycoPredictParam} object. See \link[GlycoAnnotateR]{glycoPredictParam}
#'
#' @export
#'
#' @export
#'
#' @examples
#' gpp <- glycoPredictParam()
#' gpp@@dp <- c(1,7)
Expand All @@ -16,19 +16,19 @@
#' gpp@@modifications <- c('sulphate', 'carboxylicacid')
#' gpp@@double_sulphate <- TRUE
#' predicted.df <- glycoPredict(param = gpp)
#'
#' @details
#' \code{glycoPredict()} is used to predict masses and mass to charge ratios of all theoretically
#' possible glycans within a set of constraining parameters (defined in the
#' \code{glycoPredictParam} object). This package was written
#' for annotation of mass spec data (especially LC-MS) but if used for
#' other purposes either ionisation mode and very wide scan ranges can be given.
#' The function works by sourcing a python file and then using the function
#'
#' @details
#' \code{glycoPredict()} is used to predict masses and mass to charge ratios of all theoretically
#' possible glycans within a set of constraining parameters (defined in the
#' \code{glycoPredictParam} object). This package was written
#' for annotation of mass spec data (especially LC-MS) but if used for
#' other purposes either ionisation mode and very wide scan ranges can be given.
#' The function works by sourcing a python file and then using the function
#' encoded in the python script.
#'
#' @seealso
#'
#' @seealso
#' glycoAnnotateR::glycoPredictParam()
#'
#'

glycoPredict <- function(param){
path <- paste(system.file(package="GlycoAnnotateR"), "sugarMassesPredict.py", sep="/")
Expand Down Expand Up @@ -56,9 +56,9 @@ glycoPredict <- function(param){
naming = as.list(param@naming)
glycan_linkage = as.list(param@glycan_linkage)
modification_limits = param@modification_limits

message(paste("Glycans will be predicted according to the following glycoPredictParam() object:\n", str(param)))

df <- predict_sugars(dp = dp, polarity = polarity,
scan_range = scan_range,
pent_option = pent_option, modifications = modifications,
Expand All @@ -77,89 +77,104 @@ glycoPredict <- function(param){
x[na] = NA_real_
x
}
df.l <- df %>%
df.l <- df %>%
#make long
tidyr::pivot_longer(cols = starts_with("[M"),
names_to = "ion",
values_to = "mz") %>%
values_to = "mz") %>%
#remove ions outside scan range
tidyr::drop_na(mz) %>%
tidyr::drop_na(mz) %>%
#calculate ion formula
dplyr::mutate(C = stringr::str_split_i(formula, "C", 2) %>%
sub("\\D.*", "", .) %>%
dplyr::mutate(C = stringr::str_split_i(formula, "C", 2) %>%
sub("\\D.*", "", .) %>%
as.num(),
H = stringr::str_split_i(formula, "H", 2) %>%
sub("\\D.*", "", .) %>%
H = stringr::str_split_i(formula, "H", 2) %>%
sub("\\D.*", "", .) %>%
as.num(),
N = stringr::str_split_i(formula, "N", 2) %>%
sub("\\D.*", "", .) %>%
N = stringr::str_split_i(formula, "N", 2) %>%
sub("\\D.*", "", .) %>%
as.num(),
N = dplyr::case_when(grepl("N", formula) & is.na(N) ~ 1,
TRUE ~ N),
O = stringr::str_split_i(formula, "O", 2) %>%
sub("\\D.*", "", .) %>%
O = stringr::str_split_i(formula, "O", 2) %>%
sub("\\D.*", "", .) %>%
as.num(),
P = stringr::str_split_i(formula, "P", 2) %>%
sub("\\D.*", "", .) %>%
P = stringr::str_split_i(formula, "P", 2) %>%
sub("\\D.*", "", .) %>%
as.num(),
P = dplyr::case_when(grepl("P", formula) & is.na(P) ~ 1,
TRUE ~ P),
S = stringr::str_split_i(formula, "S", 2) %>%
sub("\\D.*", "", .) %>%
S = stringr::str_split_i(formula, "S", 2) %>%
sub("\\D.*", "", .) %>%
as.num(),
S = dplyr::case_when(grepl("S", formula) & is.na(S) ~ 1,
TRUE ~ S),
ion_effect = gsub("\\[M|\\].*", "", ion),
delta_H = sub(".*([+-]\\d*H).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^H].*|[-+][A-G, I-Z].*", "", .) %>%
sub("H", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
as.num())
df.l$delta_H[df.l$ion_effect == "+NH4"] <- 4
df.l <- df.l %>%
dplyr::mutate(delta_N = sub(".*([+-]\\d*N[^a]).*", "\\1", ion_effect) %>%
sub("[+-]Na", "", .) %>%
sub("[-+]\\d[^N].*|[-+][A-M, O-Z].*|[A-M, O-Z]", "", .) %>%
sub("N", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
delta_H = sub(".*([+-]\\d*H).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^H].*|[-+][A-G, I-Z].*", "", .) %>%
sub("H", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
as.num(),
multiple_ammonium = dplyr::case_when(grepl('\\dNH4', ion) ~
stringr::str_split_i(ion_effect,
'\\+|N', 2) %>%
as.numeric(),
TRUE ~ NA),
delta_H = dplyr::case_when(grepl('\\dNH4', ion) ~
delta_H + (multiple_ammonium*4),
grepl('\\+NH4', ion) ~ 4,
grepl('\\+CHOO', ion) ~ 1,
TRUE ~ delta_H),
delta_N = sub(".*([+-]\\d*N[^a]).*", "\\1", ion_effect) %>%
sub("[+-]Na", "", .) %>%
sub("[-+]\\d[^N].*|[-+][A-M, O-Z].*|[A-M, O-Z]", "", .) %>%
sub("N", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
as.num(),
delta_Cl = sub(".*([+-]\\d*Cl).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^Cl].*|[-+][A-B, D-Z].*", "", .) %>%
sub("Cl", "", .) %>%
sub("^-$", "-1", .) %>%
sub("^\\+$", "1", .) %>%
delta_Cl = sub(".*([+-]\\d*Cl).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^Cl].*|[-+][A-B, D-Z].*", "", .) %>%
sub("Cl", "", .) %>%
sub("^-$", "-1", .) %>%
sub("^\\+$", "1", .) %>%
as.num(na.strings = "+CHOO"),
delta_Na = sub(".*([+-]\\d*Na).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^Na].*|[-+][A-M, O-Z].*", "", .) %>%
sub("Na", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
delta_Na = sub(".*([+-]\\d*Na).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^Na].*|[-+][A-M, O-Z].*", "", .) %>%
sub("Na", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
as.num(na.strings = "+NH4"),
delta_K = sub(".*([+-]\\d*K).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^K].*|[-+][A-J, L-Z].*", "", .) %>%
sub("K", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
as.num())
delta_K = sub(".*([+-]\\d*K).*", "\\1", ion_effect) %>%
sub("[-+]\\d[^K].*|[-+][A-J, L-Z].*", "", .) %>%
sub("K", "", .) %>%
sub("^-$", -1, .) %>%
sub("^\\+$", 1, .) %>%
as.num(),
delta_C = dplyr::case_when(grepl('\\+CHOO', ion) ~ 1,
TRUE ~ 0),
delta_O = dplyr::case_when(grepl('\\+CHOO', ion) ~ 2,
TRUE ~ 0))
df.l[is.na(df.l)] <- 0
df.l <- df.l %>%
dplyr::mutate(ion_formula = paste0("C", C,
df.l <- df.l %>%
dplyr::mutate(ion_formula = paste0("C", C + delta_C,
"Cl", delta_Cl,
"H", H + delta_H,
"K", delta_K,
"N", N + delta_N,
"Na", delta_Na,
"O", O,
"S", S, "P", P) %>%
gsub("[A-Z]0|Na0|Cl0", "", .) %>%
"O", O + delta_O,
"S", S, "P", P) %>%
gsub("[A-Z]0|Na0|Cl0", "", .) %>%
gsub("(\\b|\\D)1(\\b|\\D)", "\\1\\2", .))
df <- df.l %>%
dplyr::select(!matches("delta_|^[[:upper:]][a,c]?$|_effect"))

df <- df.l %>%
dplyr::select(!matches("delta_|^[[:upper:]][a,c]?$|_effect|multiple_")) %>%
dplyr::mutate(charge = stringr::str_split_i(ion, '\\]', 2) %>%
sub('\\+$', '+1', .) %>%
sub('\\-$', '-1', .))

}

if (nrow(df) == 0){
warning('Output has zero rows! Check your scan range, adducts/polarity and DP range are sensible')
}
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit e5f1e67

Please sign in to comment.