Merge pull request #227 from ldecicco-USGS/master

Docs and bug fixes
DOI-USGS · Apr 16, 2018 · 5705b93 · 5705b93
2 parents 4637f14 + 81da028
commit 5705b93
Show file tree

Hide file tree

Showing 26 changed files with 591 additions and 321 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -7,12 +7,14 @@ export(createLink)
 export(create_toxEval)
 export(endPointInfo)
 export(endpoint_hits)
-export(endpoint_table)
+export(endpoint_hits_DT)
 export(explore_endpoints)
 export(filter_groups)
 export(get_ACC)
 export(get_chemical_summary)
 export(graph_chem_data)
+export(hits_by_groupings)
+export(hits_by_groupings_DT)
 export(hits_summary)
 export(hits_summary_DT)
 export(make_tox_map)
@@ -26,8 +28,6 @@ export(plot_tox_stacks)
 export(rank_sites)
 export(rank_sites_DT)
 export(remove_flags)
-export(table_endpoint_hits)
-export(table_tox_endpoint)
 export(tox_boxplot_data)
 export(tox_chemicals)
 import(DT)

diff --git a/R/table_endpoint_hits.R → R/endpoint_hits.R b/R/table_endpoint_hits.R → R/endpoint_hits.R
@@ -1,13 +1,20 @@
-#' table_endpoint_hits
+#' Rank endpoints by category
+#' 
+#' These functions create a table with one row per endPoint, and one 
+#' column per category("Biological", "Chemical", or "Chemical Class"). 
+#' The values in the table are number of sites with hits in that endpoint/category 
+#' combination, based on a user-specified threshold (defined by hit_threshold).
+#' 
+#' The tables show slightly different results for a single site. Instead of the 
+#' number of sites with hits above a threshold, it is now the number of samples with hits.
 #' 
-#' Table of ranks
 #' @param chemicalSummary data frame from \code{get_chemical_summary}
 #' @param mean_logic logical \code{TRUE} is mean, \code{FALSE} is maximum
 #' @param category either "Biological", "Chemical Class", or "Chemical"
 #' @param hit_threshold numeric threshold defining a "hit"
 #' @export
 #' @import DT
-#' @rdname table_endpoint_hits
+#' @rdname endpoint_hits_DT
 #' @importFrom stats median
 #' @importFrom tidyr spread unite
 #' @importFrom dplyr full_join filter mutate select left_join right_join
@@ -28,11 +35,11 @@
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
 #' 
 #' hits_df <- endpoint_hits(chemicalSummary, category = "Biological") 
-#' table_endpoint_hits(chemicalSummary, category = "Biological")
-#' table_endpoint_hits(chemicalSummary, category = "Chemical Class")
-#' table_endpoint_hits(chemicalSummary, category = "Chemical")
+#' endpoint_hits_DT(chemicalSummary, category = "Biological")
+#' endpoint_hits_DT(chemicalSummary, category = "Chemical Class")
+#' endpoint_hits_DT(chemicalSummary, category = "Chemical")
 #' }
-table_endpoint_hits <- function(chemicalSummary, 
+endpoint_hits_DT <- function(chemicalSummary, 
  category = "Biological",
  mean_logic = FALSE,
  hit_threshold = 0.1){
@@ -45,35 +52,72 @@ table_endpoint_hits <- function(chemicalSummary,
  hit_threshold = hit_threshold)
 
  if(category == "Chemical"){
+ orig_names <- names(fullData)
+
  casKey <- select(chemicalSummary, chnm, CAS) %>%
  distinct()
-
+
+ numeric_hits <- fullData
  hits <- sapply(fullData, function(x) as.character(x))
 
  for(k in 1:nrow(fullData)){
  for(z in 2:ncol(fullData)){
  if(!is.na(fullData[k,z])){
- hits[k,z] <- createLink(cas = casKey$CAS[casKey$chnm == names(fullData)[z]],
- endpoint = fullData[k,1],
- hits = fullData[k,z])
+ if(fullData[k,z] < 10){
+ hit_char <- paste0("0",fullData[k,z])
+ } else{
+ hit_char <- as.character(fullData[k,z])
+ }
+ hits[k,z] <- paste(hit_char,createLink(cas = casKey$CAS[casKey$chnm == names(fullData)[z]],
+ endpoint = fullData[k,1]))
  }
  }
  }
 
- fullData <- hits
+ fullData <- data.frame(hits, stringsAsFactors = FALSE)
+ names(fullData) <- orig_names
+ }
+
+ n <- ncol(fullData)-1
+
+ if(n > 20 & n<30){
+ colors <- c(brewer.pal(n = 12, name = "Set3"),
+ brewer.pal(n = 8, name = "Set2"),
+ brewer.pal(n = max(c(3,n-20)), name = "Set1"))
+ } else if (n <= 20){
+ colors <- c(brewer.pal(n = 12, name = "Set3"),
+ brewer.pal(n = max(c(3,n-12)), name = "Set2")) 
+ } else {
+ colors <- colorRampPalette(brewer.pal(11,"Spectral"))(n)
  }
 
- fullData <- DT::datatable(fullData, extensions = 'Buttons',
+ fullData_dt <- DT::datatable(fullData, extensions = 'Buttons',
  escape = FALSE,
  rownames = FALSE,
  options = list(dom = 'Bfrtip',
  buttons = list('colvis'),
  scrollX = TRUE,
- order=list(list(2,'desc'))))
- return(fullData)
+ order=list(list(1,'desc'))))
+
+ for(i in 2:ncol(fullData)){
+ fullData_dt <- formatStyle(fullData_dt,
+ names(fullData)[i],
+ backgroundColor = colors[i])
+
+ if(category != "Chemical"){
+ fullData_dt <- formatStyle(fullData_dt, names(fullData)[i],
+ background = styleColorBar(range(fullData[,names(fullData)[i]],na.rm = TRUE), 'goldenrod'),
+ backgroundSize = '100% 90%',
+ backgroundRepeat = 'no-repeat',
+ backgroundPosition = 'center' ) 
+ } 
+
+ }
+
+ return(fullData_dt)
 }
 
-#' @rdname table_endpoint_hits
+#' @rdname endpoint_hits_DT
 #' @export
 endpoint_hits <- function(chemicalSummary, 
  category = "Biological",
@@ -112,9 +156,7 @@ endpoint_hits <- function(chemicalSummary,
  group_by(category, endPoint, date) %>%
  summarize(sumEAR = sum(EAR)) %>%
  group_by(category, endPoint) %>%
- summarize(meanEAR = ifelse(mean_logic, mean(sumEAR),max(sumEAR))) %>%
- group_by(category, endPoint) %>%
- summarize(nSites = sum(meanEAR > hit_threshold)) %>%
+ summarise(nSites = sum(sumEAR > hit_threshold)) %>%
  spread(category, nSites)
 
  }
@@ -143,6 +185,6 @@ endpoint_hits <- function(chemicalSummary,
 #' @param hits character
 #' @export
 #' @keywords internal
-createLink <- function(cas, endpoint, hits) {
- paste0('<a href="http://actor.epa.gov/dashboard/#selected/',cas,"+",endpoint,'" target="_blank" >',hits,'</a>')
+createLink <- function(cas, endpoint) {
+ paste0('<a href="http://actor.epa.gov/dashboard/#selected/',cas,"+",endpoint,'" target="_blank">&#9432;</a>')
 }
diff --git a/R/table_tox_endpoint.R → R/hits_by_groupings.R b/R/table_tox_endpoint.R → R/hits_by_groupings.R
@@ -1,12 +1,21 @@
-#' rank_sites_DT
+#' Biological hits per category
+#' 
+#' These functions create a table with one row per category("Biological", 
+#' "Chemical", or "Chemical Class"). The columns are always the "Biological" 
+#' groupings. The values in the table are how many sites have hits (based on 
+#' hit_threshold), for that particular "Biological"/category combination. Note, 
+#' if the user chooses "Biological", it is a simple 2-column table of 
+#' "Biological" groupings and number of sites (nSites).
+#' 
+#' The tables show slightly different results for a single site, showing the 
+#' number of samples with hits (instead of number of sites).
 #' 
-#' Table of ranks
 #' @param chemicalSummary data frame from \code{get_chemical_summary}
 #' @param mean_logic logical \code{TRUE} is mean, \code{FALSE} is maximum
 #' @param category either "Biological", "Chemical Class", or "Chemical"
 #' @param hit_threshold numeric threshold defining a "hit"
 #' @export
-#' @rdname table_tox_endpoint
+#' @rdname hits_by_groupings_DT
 #' @import DT
 #' @importFrom stats median
 #' @importFrom tidyr spread unite
@@ -27,20 +36,20 @@
 #' filtered_ep <- filter_groups(cleaned_ep)
 #' chemicalSummary <- get_chemical_summary(tox_list, ACClong, filtered_ep)
 #'
-#' site_df <- endpoint_table(chemicalSummary, category = "Biological")
-#' table_tox_endpoint(chemicalSummary, category = "Biological")
-#' table_tox_endpoint(chemicalSummary, category = "Chemical Class")
-#' table_tox_endpoint(chemicalSummary, category = "Chemical")
+#' site_df <- hits_by_groupings(chemicalSummary, category = "Biological")
+#' hits_by_groupings_DT(chemicalSummary, category = "Biological")
+#' hits_by_groupings_DT(chemicalSummary, category = "Chemical Class")
+#' hits_by_groupings_DT(chemicalSummary, category = "Chemical")
 #' }
-table_tox_endpoint <- function(chemicalSummary, 
+hits_by_groupings_DT <- function(chemicalSummary, 
  category = "Biological",
  mean_logic = FALSE,
  hit_threshold = 0.1){
 
  match.arg(category, c("Biological","Chemical Class","Chemical"))
 
 
- tableData <- endpoint_table(chemicalSummary=chemicalSummary, 
+ tableData <- hits_by_groupings(chemicalSummary=chemicalSummary, 
  category = category,
  mean_logic = mean_logic,
  hit_threshold = hit_threshold)
@@ -68,8 +77,8 @@ table_tox_endpoint <- function(chemicalSummary,
 }
 
 #' @export
-#' @rdname table_tox_endpoint
-endpoint_table <- function(chemicalSummary, category, mean_logic=FALSE, hit_threshold = 0.1){
+#' @rdname hits_by_groupings_DT
+hits_by_groupings <- function(chemicalSummary, category, mean_logic=FALSE, hit_threshold = 0.1){
 
  Bio_category <- Class <- EAR <- sumEAR <- value <- calc <- chnm <- choice_calc <- n <- nHits <- site <- ".dplyr"
  meanEAR <- nSites <- ".dplyr"

diff --git a/R/table_tox_sum.R → R/hits_summary.R b/R/table_tox_sum.R → R/hits_summary.R
@@ -1,6 +1,17 @@
-#' hits_summary_DT
+#' Summary of hits per site/category
+#' 
+#' These functions create a table with several rows per site depending on which 
+#' categories get hits. A "hit" is based on a user specified hit_threshold. For example, 
+#' if "Biological" is the category, and a site has hits above a threshold for 
+#' "DNA Binding" and "Nuclear Receptors", that site will have 2 rows of data in this table.
+#'
+#' For each row, there are 4 columns. Site and category (as defined by the category argument) 
+#' define the row. "Hits per Sample" are how many samples at that site had hits (based on 
+#' hit_threshold) for the category. "Number of Samples" is number of samples.
+#' 
+#' The tables show slightly different results for a single site. Instead of one row per 
+#' site/category, there is one row per category.
 #' 
-#' Table of sums
 #' @param chemicalSummary data frame from \code{get_chemical_summary}
 #' @param mean_logic logical \code{TRUE} is mean, \code{FALSE} is maximum
 #' @param category either "Biological", "Chemical Class", or "Chemical"
@@ -37,7 +48,8 @@ hits_summary_DT <- function(chemicalSummary,
  mean_logic = FALSE,
  hit_threshold = 0.1){
 
- chnm <- Class <- Bio_category <- site <- EAR <- sumEAR <- hits <- n <- ".dplyr"
+ chnm <- Class <- Bio_category <- site <- EAR <- sumEAR <- hits <- n <- `Samples with
+ hits` <- ".dplyr"
 
  match.arg(category, c("Biological","Chemical Class","Chemical"))
 
@@ -58,12 +70,6 @@ hits_summary_DT <- function(chemicalSummary,
  dom = 'Bfrtip',
  buttons = list('colvis')))
 
- # tableGroup <- formatStyle(tableGroup, names(hits_summaryOrdered)[maxChem],
- # background = styleColorBar(range(hits_summaryOrdered[,maxChem],na.rm=TRUE), 'goldenrod'),
- # backgroundSize = '100% 90%',
- # backgroundRepeat = 'no-repeat',
- # backgroundPosition = 'center' )
-
  tableGroup <- formatStyle(tableGroup, names(hits_summaryOrdered)[meanChem],
  background = styleColorBar(range(hits_summaryOrdered[,meanChem],na.rm=TRUE), 'wheat'),
  backgroundSize = '100% 90%',
@@ -80,7 +86,7 @@ hits_summary_DT <- function(chemicalSummary,
 hits_summary <- function(chemicalSummary, category, 
  hit_threshold = 0.1, mean_logic = FALSE){
 
- Class <- Bio_category <- `Hits per Sample` <- site <- EAR <- sumEAR <- chnm <- n <- hits <- ".dplyr"
+ Class <- Bio_category <- `Samples with hits` <- nSamples <- site <- EAR <- sumEAR <- chnm <- n <- hits <- ".dplyr"
 
  siteToFind <- unique(chemicalSummary$site)
 
@@ -109,5 +115,11 @@ hits_summary <- function(chemicalSummary, category,
  nSamples = n()) %>%
  arrange(desc(`Samples with hits`))
 
+ if(length(siteToFind) == 1){
+ hits_summary <- hits_summary[,c("category","Samples with hits","nSamples")]
+ }
+
+ hits_summary <- rename(hits_summary, `Number of Samples`=nSamples)
+
  return(hits_summary)
 }