MINVAL 1.0 rc

MINVAL update
gibbslab · Mar 31, 2017 · 7a68179 · 7a68179
2 parents 6e6c088 + 3f44dca
commit 7a68179
Show file tree

Hide file tree

Showing 21 changed files with 491 additions and 178 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,10 +2,14 @@ Package: minval
 Type: Package
 Title: MINimal VALidation for Stoichiometric Reactions
 Version: 1.0
-Author: c(person("Daniel","Osorio",email="dcosorioh@unal.edu.co",role=c("aut","cre")), person("Janneth","Gonzalez",role=c("aut","ths")), person("Andr\'es","Pinz\'on",role=c("aut","ths")))
+Author: c(person("Daniel","Osorio",email="dcosorioh@unal.edu.co",role=c("aut","cre")), person("Janneth","Gonzalez",role=c("aut","ths")), person("Andres","Pinzon",role=c("aut","ths")))
 Maintainer: Daniel Osorio <dcosorioh@unal.edu.co>
 Suggests:
-    testthat
+    testthat,
+    knitr,
+    rmarkdown,
+    sybil,
+    sybilSBML
 Description: For a given set of stoichiometric reactions, this package
     evaluates the mass and charge balance, extracts all reactants, products, orphan
     metabolites, metabolite names and compartments. Also are included some options
@@ -16,3 +20,4 @@ NeedsCompilation: no
 Depends:
     R (>= 2.10)
 RoxygenNote: 6.0.1
+VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(characterizeRXNs)
 export(checkBalance)
 export(compartments)
 export(downloadChEBI)

diff --git a/R/characterizeRXNs.R b/R/characterizeRXNs.R
@@ -0,0 +1,49 @@
+#' @export characterizeRXNs
+#' @author Daniel Camilo Osorio <dcosorioh@unal.edu.co>
+#  Bioinformatics and Systems Biology Lab      | Universidad Nacional de Colombia
+#  Experimental and Computational Biochemistry | Pontificia Universidad Javeriana
+#' @title Characterize stoichiometric reactions by compartments and reaction type
+#' @description For a given set of stoichiometric reactions, this function: \itemize{\item Counts the number of reactions, \item Compute the relative frequency of each reaction type (transport, exchange and compartmentalized), \item Compute the relative frequency of reactions by compartment, \item Count the number of unique metabolites, \item Compute the relative frequency of metabolites by compartment.}
+#' @param reactionList A set of stoichiometric reaction with the following characteristics: \itemize{
+#' \item Arrows symbols must be given in the form \code{'=>'} or \code{'<=>'}
+#' \item Inverse arrow symbols \code{'<='} or other types as: \code{'-->'}, \code{'<==>'}, \code{'->'} will not be parsed and will lead to errors.
+#' \item Arrow symbols and plus signs (\code{+}) must be surrounded by a space character
+#' \item Stoichiometric coefficients must be surrounded by a space character and not by parentheses.
+#' \item Each metabolite must have only one stoichiometric coefficient, substituents must be joined to metabolite name by a hyphen (\code{-}) symbol.
+#' \item Exchange reactions have only one metabolite before arrow symbol
+#' \item Compartments must be given between square brackets ([compartment]) joined at the end of metabolite name
+#' }
+#' Some examples of valid stoichiometric reactions are: \itemize{
+#' \item \code{H2O[c] + Urea-1-Carboxylate[c] <=> 2 CO2[c] + 2 NH3[c]}
+#' \item \code{ADP[c] + Phosphoenolpyruvate[c] => ATP[c] + Pyruvate[c]}
+#' \item \code{CO2[c] <=> }
+#' }
+#' @param rawOutput A boolean value \code{'TRUE'} or \code{'FALSE'} if computed values should be returned instead of raw data
+#' @examples 
+#' # Loading a set of stoichiometric reactions
+#' glycolysis <- read.csv(system.file("extdata/glycolysisModel.csv",package = "minval"), sep='\t')
+#'
+#' # Characterizing the reactions
+#' characterizeRXNs(reactionList = glycolysis$REACTION)
+characterizeRXNs <- function (reactionList, rawOutput = FALSE){
+  reactionList <- as.vector(reactionList)[validateSyntax(reactionList = reactionList)]
+  model <- list ()
+  model$nReactions <- length(unique(reactionList))
+  model$rType <- reactionType(reactionList = reactionList)
+  model$cReaction <- unlist(sapply(reactionList[model$rType == "Compartmentalized reaction"], function(reaction){compartments(reactionList = reaction, uniques = TRUE)},USE.NAMES = FALSE))
+  model$rCompartments <- compartments(reactionList = reactionList)
+  model$nMetabolites <- metabolites(reactionList = reactionList, uniques = TRUE)
+  model$cMetabolites <- compartments(reactionList = model$nMetabolites, uniques = FALSE)
+  model$nMetabolites <- length(model$nMetabolites)
+  if (rawOutput == TRUE){
+    return(model)
+  } else {
+    summary <- list ()
+    summary$nReactions <- model$nReactions
+    summary$rType <- (table(model$rType) / model$nReactions) * 100
+    summary$cReaction <- (table(model$cReaction)/ model$nReactions) * 100
+    summary$nMetabolites <- model$nMetabolites
+    summary$cMetabolites <- (table(model$cMetabolites)/ model$nMetabolites) * 100
+    return(summary)
+  }
+}
diff --git a/R/checkBalance.R b/R/checkBalance.R
@@ -24,6 +24,7 @@
 #' @param mFormula An optional ID of molecular formula column in the referenceData
 #' @param mWeight An optional ID of molecular weight column in the referenceData
 #' @param mCharge An optional ID of net charge column in the referenceData
+#' @param woCompartment A boolean value \code{'TRUE'} or \code{'FALSE'} to indicate if compartment label should be removed of stoichiometric reactions
 #' @return This function returns a boolean value \code{'TRUE'} if reaction is balanced.
 #' @examples
 #' # Loading a set of stoichiometric reactions
@@ -47,13 +48,14 @@ checkBalance <-
            ids,
            mFormula = NULL,
            mWeight = NULL,
-           mCharge = NULL) {
+           mCharge = NULL,
+           woCompartment = TRUE) {
     # Validate syntax
     reactionList <-
       as.vector(x = reactionList[validateSyntax(reactionList = reactionList)])
 
     # Extract unique metabolites
-    metabolites <- metabolites(reactionList, woCompartment = TRUE)
+    metabolites <- metabolites(reactionList, woCompartment = woCompartment)
 
     # Validate options
     if (!any(colnames(referenceData) %in% ids)) {
@@ -97,41 +99,17 @@ checkBalance <-
 
     # Split reaction
     reactants <- getLeft(reactionList)
-    reactants <-
-      lapply(reactants, function(reactants) {
-        rep(
-          metabolites(reactionList = reactants, woCompartment = TRUE),
-          coefficients(reactants)
-        )
-      })
     products <- getRight(reactionList)
-    products <-
-      lapply(products, function(products) {
-        rep(
-          metabolites(reactionList = products, woCompartment = TRUE),
-          coefficients(products)
-        )
-      })
-
-    # Extract chemical information by reaction
-    reactants <-
-      lapply(reactants, function(reactants) {
-        referenceData[reactants, 2]
-      })
-    products <-
-      lapply(products, function(products) {
-        referenceData[products, 2]
-      })
 
     # If molecular formula is given, split formulas and add atoms by type
     if (!is.null(mFormula)) {
       reactants <-
         lapply(reactants, function(reactants) {
-          splitFormula(reactants)
+          splitFormula(coefficients(reactants), referenceData[metabolites(reactionList = reactants, woCompartment = woCompartment),2])
         })
       products <-
         lapply(products, function(products) {
-          splitFormula(products)
+          splitFormula(coefficients(products), referenceData[metabolites(reactionList = products, woCompartment = woCompartment),2])
         })
       balanced <-
         sapply(seq_along(reactants), function(reaction) {
@@ -141,11 +119,11 @@ checkBalance <-
       # If other type is given, add the associated values
       reactants <-
         lapply(reactants, function(reactants) {
-          sum(as.numeric(reactants))
+          sum((referenceData[metabolites(reactionList = reactants, woCompartment = woCompartment),2]) * coefficients(reactants))
         })
       products <-
         lapply(products, function(products) {
-          sum(as.numeric(products))
+          sum((referenceData[metabolites(reactionList = products, woCompartment = woCompartment),2]) * coefficients(products))
         })
       # Check balance
       balanced <-

diff --git a/R/compartments.R b/R/compartments.R
@@ -51,6 +51,10 @@ compartments <- function(reactionList, uniques = TRUE) {
     ))
   # Remove brackets
   compartments <- gsub("\\[|\\]", "", compartments)
+  # NA return
+  if (length(compartments) == 0) {
+    compartments <- NA
+  }
   # Return compartments
   if (uniques == TRUE) {
     compartments <- unique(compartments)

diff --git a/R/downloadChEBI.R b/R/downloadChEBI.R
@@ -37,7 +37,6 @@
 #' ChEBI <- downloadChEBI(release = '142', woAssociations = TRUE)
 #'  }
 #' @seealso The ChEBI database webpage: https://www.ebi.ac.uk/chebi/
-#' @keywords Download ChEBI check mass charge balance
 downloadChEBI <- function(release = "latest",
                           woAssociations = FALSE) {
   # Download folder

diff --git a/R/internalFunctions.R b/R/internalFunctions.R
@@ -70,17 +70,19 @@ getRight <- function(reactionList) {
 }
 
 # Split chemical formula
-splitFormula <- function(chemicalFormula) {
-  splitAtoms <-
-    unlist(regmatches(
-      chemicalFormula,
-      gregexpr("([A-Z]{1}[a-z]?)([0-9]*)", chemicalFormula)
-    ))
-  atoms <- sub("([A-Z]{1}[a-z]?)([0-9]*)", '\\1', splitAtoms)
+splitFormula <- function(coefficient, chemicalFormula) {
+  splitAtoms <-regmatches(chemicalFormula,gregexpr("([A-Z]{1}[a-z]?)([0-9]*)", chemicalFormula))
+  atoms <- lapply(splitAtoms,function(splitAtoms){
+    sub("([A-Z]{1}[a-z]?)([0-9]*)", '\\1', splitAtoms)
+  })
   atomsNumber <-
-    as.numeric(regmatches(x = splitAtoms, m = gregexpr('[0-9]+', splitAtoms)))
-  atomsNumber[is.na(atomsNumber)] <- 1
-  tapply(atomsNumber, atoms, sum)
+    lapply(splitAtoms,function(splitAtoms){
+      number <- as.numeric(regmatches(x = splitAtoms, m = gregexpr('[0-9]+', splitAtoms)))
+      number[is.na(number)] <- 1
+      return(number)
+    })
+  atomsNumber <- sapply(seq_along(atomsNumber),function(molecule){atomsNumber[[molecule]]*coefficient[[molecule]]})
+  tapply(unlist(atomsNumber), unlist(atoms), sum)
 }
 
 # Identify the reaction type
@@ -103,16 +105,17 @@ reactionType <- function(reactionList) {
     })
   # Define type of reaction
   sapply(seq_along(reactionList), function(reaction) {
-    if (length(right[[reaction]]) > 0) {
-      if (length(left[[reaction]]) > 1 | length(right[[reaction]]) > 1) {
-        return("Transport reaction")
-      } else if (all.equal(target = left[[reaction]], current = right[[reaction]]) == TRUE) {
-        return("Compartmentalized reaction")
-      } else {
-        return("Transport reaction")
-      }
+    if (all(is.na(left[[reaction]])) && all(is.na(right[[reaction]]))) {
+      return("No compartmentalized reaction")
+    } else if (all(is.na(right[[reaction]]))) {
+      return("Exchange reaction")
+    } else if (length(left[[reaction]]) > 1 ||
+               length(right[[reaction]]) > 1) {
+      return("Transport reaction")
+    } else if (all.equal(target = left[[reaction]], current = right[[reaction]]) == TRUE) {
+      return("Compartmentalized reaction")
     } else {
-      return ("Exchange reaction")
+      return("Transport reaction")
     }
   })
 }
@@ -148,7 +151,7 @@ validateData <- function(modelData) {
 removeComments <- function(modelData) {
   comments <- grepl("^#", modelData[, 1])
   if (any(comments)) {
-    modelData <- modelData[!comments, ]
+    modelData <- modelData[!comments,]
   }
   return (modelData)
 }
@@ -231,13 +234,33 @@ rearmReactions <-
     } else if (type == "TSV") {
       unlist(lapply(seq_len(dim(S)[2]), function(reaction) {
         met = S[, reaction] < 0
-        reactants = paste0("(", abs(S[which(met == TRUE), reaction]), ") ", gsub("\\+","_ChargedP",gsub("[[:space:]]+","_",rownames(S)[which(met == TRUE)])), collapse = " + ")
+        reactants = paste0("(",
+                           abs(S[which(met == TRUE), reaction]),
+                           ") ",
+                           gsub(
+                             "\\+",
+                             "_ChargedP",
+                             gsub("[[:space:]]+", "_", rownames(S)[which(met == TRUE)])
+                           ),
+                           collapse = " + ")
         if (any(S[, reaction] > 0)) {
           met = S[, reaction] > 0
-          produts = paste0("(", abs(S[which(met == TRUE), reaction]), ") ", gsub("\\+","_ChargedP",gsub("[[:space:]]+","_",rownames(S)[which(met == TRUE)])), collapse = " + ")
+          produts = paste0("(",
+                           abs(S[which(met == TRUE), reaction]),
+                           ") ",
+                           gsub(
+                             "\\+",
+                             "_ChargedP",
+                             gsub("[[:space:]]+", "_", rownames(S)[which(met == TRUE)])
+                           ),
+                           collapse = " + ")
         } else {
           met = S[, reaction] > 0
-          produts = paste0(abs(S[which(met == TRUE), reaction]), " ", gsub("\\+","_ChargedP",gsub("[[:space:]]+","_",rownames(S)[which(met == TRUE)])), collapse = " + ")
+          produts = paste0(abs(S[which(met == TRUE), reaction]), " ", gsub(
+            "\\+",
+            "_ChargedP",
+            gsub("[[:space:]]+", "_", rownames(S)[which(met == TRUE)])
+          ), collapse = " + ")
         }
 
         paste(reactants,

diff --git a/R/orphanMetabolites.R b/R/orphanMetabolites.R
@@ -75,7 +75,11 @@ orphanMetabolites <-
           orphan[grep(paste0("\\[", compartment, "\\]"), orphan)]
         }, simplify = FALSE)
     }
-    return(orphan)
+    if (length(orphan) == 0){
+      return (NA)
+    } else {
+      return(orphan)
+    }
   }
 
 #' @describeIn orphanMetabolites Identify the orphan reactants of a set of stoichometric reactions

diff --git a/R/products.R b/R/products.R
@@ -20,26 +20,41 @@
 #' }
 #' @return A vector with the identified products in the reaction, or a list with the identified products in each reaction if a set of stoichiometric reactions was given.
 
-products <- function(reactionList){
+products <- function(reactionList) {
   # Convert to a vector
   reactionList <- as.vector(reactionList)
   # Remove reaction with invalid syntax
   reactionList <- reactionList[validateSyntax(reactionList)]
   # Extract reactants for irreversible reactions
-  reaction <- strsplit(reactionList,"[[:blank:]]+=>[[:blank:]]+")
-  reaction[lengths(reaction)>1] <- lapply(reaction[lengths(reaction)>1],function(reaction){reaction[[2]]})
+  reaction <- strsplit(reactionList, "[[:blank:]]+=>[[:blank:]]*")
+  reaction[lengths(reaction) > 1] <-
+    lapply(reaction[lengths(reaction) > 1], function(reaction) {
+      reaction[[2]]
+    })
   # Extract metabolites for reversible reactions
-  reaction <- strsplit(unlist(reaction), "[[:blank:]]+<=>[[:blank:]]+")
+  reaction <-
+    strsplit(unlist(reaction), "[[:blank:]]+<=>[[:blank:]]*")
   # Split independient reactants
-  reaction <- lapply(reaction, function(reaction){strsplit(reaction,"[[:blank:]]+\\+[[:blank:]]+")})
+  reaction <-
+    lapply(reaction, function(reaction) {
+      strsplit(reaction, "[[:blank:]]+\\+[[:blank:]]+")
+    })
   # Remove spaces and report uniques
-  reaction <- lapply(reaction, function(reaction){unique(removeSpaces(unlist(reaction)))})
+  reaction <-
+    lapply(reaction, function(reaction) {
+      unique(removeSpaces(unlist(reaction)))
+    })
   # Use a regex to extract stoichiometric coefficients and separate the metabolite name
-  products <- lapply(reaction, function(reaction){unique(removeCoefficients(reaction))})
-  if (length(products)==1){
+  products <-
+    lapply(reaction, function(reaction) {
+      unique(removeCoefficients(reaction))
+    })
+  # Return
+  if (length(products) == 0) {
+    return(NA)
+  } else if (length(products) == 1) {
     return(unlist(products))
   } else {
     return(products)
   }
 }
-
diff --git a/R/reactants.R b/R/reactants.R
@@ -20,22 +20,36 @@
 #' }
 #' @return A vector with the identified reactants in the reaction, or a list with the identified reactats in each reaction if a set of stoichiometric reactions was given.
 
-reactants <- function(reactionList){
+reactants <- function(reactionList) {
   # Convert to a vector
   reactionList <- as.vector(reactionList)
   # Remove reaction with invalid syntax
   reactionList <- reactionList[validateSyntax(reactionList)]
   # Extract reactants for irreversible reactions
-  reaction <- unlist(lapply(strsplit(reactionList,"[[:space:]]+=>[[:space:]]+"),function(reactionList){reactionList[[1]]}))
+  reaction <-
+    unlist(lapply(strsplit(reactionList, "[[:space:]]+=>[[:space:]]*"), function(reactionList) {
+      reactionList[[1]]
+    }))
   # Extract metabolites for reversible reactions
-  reaction <- strsplit(reaction, "[[:space:]]+<=>[[:space:]]+")
+  reaction <- strsplit(reaction, "[[:space:]]+<=>[[:space:]]*")
   # Split independient reactants
-  reaction <- lapply(reaction, function(reaction){strsplit(reaction,"[[:space:]]+\\+[[:space:]]+")})
+  reaction <-
+    lapply(reaction, function(reaction) {
+      strsplit(reaction, "[[:space:]]+\\+[[:space:]]+")
+    })
   # Remove spaces and report uniques
-  reaction <- lapply(reaction, function(reaction){unique(removeSpaces(unlist(reaction)))})
+  reaction <-
+    lapply(reaction, function(reaction) {
+      unique(removeSpaces(unlist(reaction)))
+    })
   # Use a regex to extract stoichiometric coefficients and separate the metabolite name
-  reactants <- lapply(reaction, function(reaction){unique(removeCoefficients(reaction))})
-  if (length(reactionList)==1){
+  reactants <-
+    lapply(reaction, function(reaction) {
+      unique(removeCoefficients(reaction))
+    })
+  if (length(reactants) == 0) {
+    return(NA)
+  } else if (length(reactants) == 1) {
     return(unlist(reactants))
   } else {
     return(reactants)