Merge pull request #220 from ldecicco-USGS/master

Vignettes and other doc
DOI-USGS · Apr 3, 2018 · 85d3dd2 · 85d3dd2
2 parents a596e4c + 4e373bc
commit 85d3dd2
Show file tree

Hide file tree

Showing 125 changed files with 2,257 additions and 9,600 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: toxEval
 Type: Package
 Title: ToxCast Evaluations
-Version: 0.3.8.9009
+Version: 0.3.8.9010
 Date: 2018-01-04
 Authors@R: c( person("Steven", "Corsi", role = c("aut"),
  email = "srcorsi@usgs.gov"),

diff --git a/R/clean_endPoint_info.R b/R/clean_endPoint_info.R
@@ -1,9 +1,18 @@
 #' clean_endPoint_info
 #' 
-#' Clean up the endPointInfo table from toxCast. Filtering and cleaning based on ES&T (cite Dan/Brett's paper)
+#' Clean up the endPointInfo table from ToxCast. Filtering based on 
+#' \url{https://pubs.acs.org/doi/10.1021/acs.est.7b01613}. Specifically, 
+#' this function hard-codes in the removal of endPoints that are ATG 
+#' sources with signal loss, and NVS with signal gain. Also, this function 
+#' adds some additional categories to intended_target_family and 
+#' intended_target_family_sub as described in the paper linked above.
 #' 
 #' @param endPointInfo data frame Endpoint information from ToxCast
 #' @export
+#' @return data frame based on endPointInfo, but with some endPoints
+#' filtered out, some additional categories in intended_target_family and
+#' intended_target_family_sub. Also, the names in intended_target_family
+#' are cleaned up to look good in graphs and tables.
 #' @importFrom stringi stri_trans_totitle
 #' @examples 
 #' endPointInfo <- endPointInfo

diff --git a/R/create_toxEval.R b/R/create_toxEval.R
@@ -1,5 +1,49 @@
-#' create_toxEval
+#' Load and check toxEval data
 #' 
+#' This function requires a path to a single Excel file. The Excel
+#' file should include 3 mandatory tabs named "Data", "Chemicals", and "Sites".
+#' Additionally there are 2 optional tabs: "Exclude" and "Benchmarks". This function
+#' will load each sheet, creating a data frame for each sheet. It will
+#' perform basic checks on the data to make sure there are the required columns in
+#' each tab. 
+#' 
+#' The Data tab needs to have columns "CAS", "SiteID", "Value", "Sample Date".
+#' The "Value" column is assumed to be concentration measurements in ug/L. "Sample Date" 
+#' can be either a date or date/time or an integer. Any other column can be included, 
+#' but won't be used in general toxEval functions.
+#' 
+#' The Chemical tab needs to have columns "CAS", "Class". The "CAS" in this
+#' tab must exactly match the "CAS" in the Data tab. The "Class" designation
+#' allows the data to be grouped in a user-specified way. For example, you
+#' may want to explore the difference between pesticides and herbicides. 
+#' 
+#' The Sites tab needs to have the columns "SiteID", "Short Name", and for the Shiny application 
+#' "dec_lat","dec_lon". The "SiteID" column in this tab must match exactly
+#' the "SiteID" column in the Data tab.
+#' 
+#' The optional tab Exclude needs to have the columns "CAS", "endPoint". These
+#' are used to exclude particular chemicals (via CAS), ToxCast endpoints (via endPoint),
+#' or a unique chemical/endpoint combination.
+#' 
+#' The optional tab Benchmarks needs to have columns "CAS", "endPoint","ACC_value","chnm". This
+#' tab is used to over-ride the functions using ToxCast endpoints, allowing the user
+#' to import endpoint information from potentially other sources. It
+#' could also be useful for reproducing results in the future (for example,
+#' if ToxCast updates their data, you could use this tab to run the analysis
+#' on the older "v2" version).
+#' 
+#' 
+#' For more information, see the "User Guide" vignette.
+#' 
+#' All remaining toxEval functions will expect the data to be supplied
+#' via the list that is returned from this function.
+#' 
+#' @return list of 3 data frames, potentially up to 5. The guaranteed data
+#' frames are chem_data (containing at least the columns: "CAS", "SiteID", "Value", "Sample Date"),
+#' chem_info (containing at least the columns: "CAS", "Class"),
+#' chem_site (containing at least the columns: "SiteID", "Short Name", would need "dec_lat" and "dec_lon" for shiny app).
+#' The optional data frames are exclusions (containing at least the columns: "CAS", "endPoint"),
+#' and benchmarks (containing at least the columns: "CAS", "endPoint","ACC_value","chnm")
 #' 
 #' @param excel_file_path Path to Excel file that contains at least 3 tabs: Data, Chemicals, and Sites, 
 #' and could optionally contain Exclude and Benchmarks

diff --git a/R/explore_endpoints.R b/R/explore_endpoints.R
@@ -1,6 +1,10 @@
-#' Explore endpoint groupings
+#' Explore data in the Shiny Application
 #' 
-#' Open an interactive app
+#' Open an interactive app in a browser. See the vignette 'User Guide'
+#' for more details. Using this function is a quick and convenient way
+#' to explore your data. For more customization, the R-code to 
+#' produce each graph and table is displayed in the app. That is 
+#' a good starting-point for a custom analysis.
 #' 
 #' @param browse use browser for map rendering
 #' @export

diff --git a/R/filter_endPoint_info.R b/R/filter_endPoint_info.R
@@ -1,11 +1,19 @@
-#' filter_groups
+#' Filter endPoints based on groups and assays.
 #' 
-#' Clean up the endPointInfo table from toxCast. Filtering and cleaning based on ES&T (cite Dan/Brett's paper)
+#' This function takes the data frame from \code{\link{endPointInfo}} 
+#' and filters the endpoints in 3 steps. First, the user specifies
+#' the "groupCol" which is a column header from \code{\link{endPointInfo}}. 
+#' The default category is intended_target_family. Second, the user specifies the assays to use. By default, the BioSeek
+#' set of assays are removed. Finally, the user can also choose to remove
+#' specific group from the category. The default is to remove "Background Measurement"
+#' and "Undefined", but it is a good idea to check if other groups may 
+#' not be relevant to the study.
 #' 
 #' @param ep data frame Endpoint information from ToxCast
 #' @param groupCol character name of column to use as a group catetory
 #' @param assays vector of assays to use. Possible values are "ATG","NVS","OT","TOX21","CEETOX", "APR", "BSK",
-#' "CLD","TANGUAY","NHEERL_PADILLA","NCCT_SIMMONS","ACEA" 
+#' "CLD","TANGUAY","NHEERL_PADILLA","NCCT_SIMMONS","ACEA". By default, the 
+#' "BSK" (BioSeek) assay is removed.
 #' @param remove_groups vector of groups to remove
 #' @export
 #' @importFrom stringi stri_trans_totitle

diff --git a/R/get_ACC.R b/R/get_ACC.R
@@ -1,8 +1,10 @@
-#' get_ACC
+#' Get the ACC values for a selection of chemicals
 #' 
-#' Get ACC values for vector of CAS's
+#' Data from the ToxCast are included in the toxEval package. This data is
+#' called and filtered based on the provided vector of CAS values.
 #' 
-#' @param CAS vector of CAS
+#' @param CAS vector of CAS. 
+#' @return data frame with columns CAS, chnm, flags, endPoint, ACC, MlWt, and ACC_value
 #' @export
 #' @importFrom tidyr gather
 #' @importFrom dplyr select filter right_join mutate

diff --git a/R/get_chemical_summary.R b/R/get_chemical_summary.R
@@ -1,19 +1,32 @@
-#' get_chemical_summary
+#' Create a chemical summary of the data.
+#' 
+#' This function takes the measured user data from the output of \code{\link{create_toxEval}},
+#' and joins the data with the endPoint information provided by ToxCast.
+#' Data from ToxCast is included with this package, but alternative 
+#' benchmark data can be provided to perform the same "toxEval" analysis.
+#' 
+#' To use the data provided by the package, a sample workflow is shown below
+#' in the examples. It includes getting the ToxCast (ACC) values that will
+#' be used to calculate the EAR, filtering out the endToints that should
+#' be ignored based on "flags" in the data, and filtering out any groups
+#' that may not be important to the analysis at hand.
+#' 
 #' 
-#' Get ACC values for vector of CAS's
 #' @param tox_list list with data frames for chem_data, chem_info, chem_site, 
 #' and optionally exclusions and benchmarks. Created with \code{\link{create_toxEval}}
-#' @param ACClong data frame with at least columns: CAS, chnm, endPoint, ACC_value
+#' @param ACClong data frame with at least columns: CAS, chnm, endPoint, ACC_value. To use data
+#' provided by this package from ToxCast, use the \code{\link{get_ACC}} function. You may wish
+#' to remove endPoints with specific flags using the \code{\link{remove_flags}} function.
 #' @param filtered_ep data frame with colums: endPoints, groupCol. Default is \code{"All"}, where no
 #' filtering occurs.
 #' @param chem.data OPTIONAL data frame with (at least) columns: CAS, SiteID, Value. Default is \code{NULL}. 
-#' Will over-ride what is in tox_list.
+#' The argument will over-ride what is in tox_list.
 #' @param chem.site OPTIONAL data frame with (at least) columns: SiteID, Short Name. Default is \code{NULL}. 
-#' Will over-ride what is in tox_list.
+#' The argument will over-ride what is in tox_list.
 #' @param chem.info OPTIONAL data frame with (at least) columns: CAS, class. Default is \code{NULL}. 
-#' Will over-ride what is in tox_list.
+#' The argument will over-ride what is in tox_list.
 #' @param exclusion OPTIONAL data frame with (at least) columns: CAS and endPoint. Default is \code{NULL}. 
-#' Will over-ride what is in tox_list.
+#' The argument will over-ride what is in tox_list.
 #' @export
 #' @importFrom tidyr gather
 #' @importFrom dplyr full_join filter mutate select left_join right_join anti_join
@@ -23,15 +36,15 @@
 #' full_path <- file.path(path_to_tox, file_name)
 #' 
 #' tox_list <- create_toxEval(full_path)
-#' 
+#' \dontrun{
 #' ACClong <- get_ACC(tox_list$chem_info$CAS)
 #' ACClong <- remove_flags(ACClong)
 #' 
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' 
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#'   
+#' } 
 get_chemical_summary <- function(tox_list, ACClong = NULL, filtered_ep = "All", 
  chem.data=NULL, chem.site=NULL, 
  chem.info=NULL, exclusion=NULL){
@@ -159,10 +172,21 @@ orderChem <- function(graphData, orderClass_df){
  return(orderChem_df)
 }
 
-#' remove_flags
-#' 
 #' Remove endpoints with specific flags from data
 #' 
+#' Remove endpoints with specific flags associated with the ACC values. The set
+#' of flags that are included are: 
+#' \tabular{ll}{
+#' Flag \tab flagsShort\cr
+#' Borderline active \tab Borderline \cr
+#' Only highest conc above baseline, active \tab OnlyHighest \cr
+#' Only one conc above baseline, active \tab OneAbove \cr
+#' Noisy data \tab Noisy \cr
+#' Hit-call potentially confounded by overfitting \tab HitCall \cr
+#' Gain AC50 < lowest conc & loss AC50 < mean conc \tab GainAC50 \cr
+#' Biochemical assay with < 50% efficacy \tab Biochemical \cr
+#' }
+#' 
 #' @param ACClong data frame with columns: casn, chnm, endPoint, ACC_value
 #' @param flagsShort vector of flags to TAKE OUT. Possible values are 
 #' "Borderline", "OnlyHighest", "OneAbove","Noisy", "HitCall", "GainAC50", "Biochemical"
@@ -240,14 +264,11 @@ remove_flags <- function(ACClong, flagsShort = c("Borderline",
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' chemicalSummary <- ex_chemSum #loading example data
 #' exclusion <- data.frame(CAS = c("134-62-3","486-56-6"),
 #' endPoint = c("", "TOX21_p53_BLA_p3_viability"),
 #' stringsAsFactors = FALSE)
 #' chemicalSummary <- exclude_points(chemicalSummary, exclusion)
+#' }
 exclude_points <- function(chemicalSummary, exclusion){
 
  CAS <- endPoint <- casrn <- ".dplyr"

diff --git a/R/makeMap.R b/R/makeMap.R
@@ -28,11 +28,8 @@
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' chemicalSummary <- ex_chemSum #loading example data
 #' mapData <- getMapInfo(chemicalSummary, tox_list$chem_site, "Biological") 
+#' }
 getMapInfo <- function(chemicalSummary,
  chem_site,
  category = "Biological",
@@ -119,19 +116,18 @@ getMapInfo <- function(chemicalSummary,
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
 #' 
-#' chemicalSummary <- ex_chemSum #loading example data
 #' makeMap(chemicalSummary, tox_list$chem_site, "Biological") 
 #' makeMap(chemicalSummary, tox_list$chem_site, "Chemical Class")
 #' makeMap(chemicalSummary, tox_list$chem_site, "Chemical") 
+#' }
 makeMap <- function(chemicalSummary,
  chem_site,
  category = "Biological",
  mean_logic = FALSE){
 
+ SiteID <- ".dplyr
+ "
  maxEARWords <- ifelse(mean_logic,"meanEAR","maxEAR")
 
  mapDataList <- getMapInfo(chemicalSummary, 

diff --git a/R/plot_chemical_boxplots.R b/R/plot_chemical_boxplots.R
@@ -25,12 +25,9 @@
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' chemicalSummary <- ex_chemSum #loading example data
 #' 
 #' plot_chemical_boxplots(chemicalSummary)
+#' }
 plot_chemical_boxplots <- function(chemicalSummary, 
  manual_remove=NULL,
  mean_logic = FALSE,
@@ -155,11 +152,9 @@ plot_chemical_boxplots <- function(chemicalSummary,
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' chemicalSummary <- ex_chemSum #loading example data
+#'
 #' graphData <- graph_chem_data(chemicalSummary)
+#' }
 graph_chem_data <- function(chemicalSummary, 
  manual_remove=NULL,
  mean_logic = FALSE){

diff --git a/R/plot_group_boxplots.R b/R/plot_group_boxplots.R
@@ -26,14 +26,10 @@
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' 
-#' chemicalSummary <- ex_chemSum #loading example data
 #' plot_tox_boxplots(chemicalSummary, "Biological") 
 #' plot_tox_boxplots(chemicalSummary, "Chemical Class")
 #' plot_tox_boxplots(chemicalSummary, "Chemical") 
+#' }
 plot_tox_boxplots <- function(chemicalSummary, 
  category = "Biological",
  manual_remove = NULL,

diff --git a/R/plot_heat_chemical.R b/R/plot_heat_chemical.R
@@ -22,11 +22,6 @@
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' 
-#' chemicalSummary <- ex_chemSum #loading example data
 #' 
 #' graphData <- graph_chem_data(chemicalSummary)
 #' plot_heat_chemicals(graphData, tox_list$chem_site)
@@ -59,6 +54,7 @@
 #' levels = sitesOrdered)
 #' 
 #' plot_heat_chemicals(graphData, tox_list$chem_site)
+#' }
 plot_heat_chemicals <- function(graphData, chem_site){
 
  SiteID <- site_grouping <- `Short Name` <- chnm <- maxEAR <- ".dplyr"
@@ -115,7 +111,7 @@ plot_heat_chemicals <- function(graphData, chem_site){
 #' path_to_tox <- system.file("extdata", package="toxEval")
 #' file_name <- "OWC_data_fromSup.xlsx"
 #' full_path <- file.path(path_to_tox, file_name)
-#' 
+#' \dontrun{
 #' tox_list <- create_toxEval(full_path)
 #' 
 #' ACClong <- get_ACC(tox_list$chem_info$CAS)
@@ -157,6 +153,7 @@ plot_heat_chemicals <- function(graphData, chem_site){
 #' manual_remove = "Undefined")
 #' plot_tox_heatmap(chemicalSummary, tox_list$chem_site, category = "Chemical Class")
 #' plot_tox_heatmap(chemicalSummary, tox_list$chem_site, category = "Chemical")
+#' }
 plot_tox_heatmap <- function(chemicalSummary, 
  chem_site, 
  category = "Biological",

diff --git a/R/plot_tox_endpoints.R b/R/plot_tox_endpoints.R
@@ -27,13 +27,11 @@
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
+#'
+#' plot_tox_endpoints(chemicalSummary, filterBy = "Cell Cycle")
+#' plot_tox_endpoints(chemicalSummary, category = "Chemical Class", filterBy = "PAHs")
+#' plot_tox_endpoints(chemicalSummary, category = "Chemical", filterBy = "Atrazine")
 #' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' chemicalSummary <- ex_chemSum #loading example data
-#' plot_tox_endpoints(chemicalSummary, filterBy = "Cell Cycle")
-#' plot_tox_endpoints(chemicalSummary, category = "Chemical Class", filterBy = "PAHs")
-#' plot_tox_endpoints(chemicalSummary, category = "Chemical", filterBy = "Atrazine")
 plot_tox_endpoints <- function(chemicalSummary, 
  category = "Biological",
  filterBy = "All",

diff --git a/R/plot_tox_stacks.R b/R/plot_tox_stacks.R
@@ -27,14 +27,11 @@
 #' cleaned_ep <- clean_endPoint_info(endPointInfo)
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
-#' }
-#' # The example workflow takes a bit of time to load and compute, 
-#' # so an example chemicalSummary is included pre-calculated in the package. 
-#' chemicalSummary <- ex_chemSum #loading example data
-#' 
+#' 
 #' plot_tox_stacks(chemicalSummary, tox_list$chem_site, "Biological") 
 #' plot_tox_stacks(chemicalSummary, tox_list$chem_site, "Chemical Class")
 #' plot_tox_stacks(chemicalSummary, tox_list$chem_site, "Chemical", include_legend = FALSE) 
+#' }
 plot_tox_stacks <- function(chemicalSummary, 
  chem_site,
  category = "Biological",
@@ -45,7 +42,7 @@ plot_tox_stacks <- function(chemicalSummary,
  match.arg(category, c("Biological","Chemical Class","Chemical"))
 
  site <- EAR <- sumEAR <- meanEAR <- groupCol <- nonZero <- ".dplyr"
- SiteID <- site_grouping <- `Short Name` <- count <- ".dplyr"
+ SiteID <- site_grouping <- index <- `Short Name` <- count <- ".dplyr"
 
  if(!("site_grouping" %in% names(chem_site))){
  chem_site$site_grouping <- ""