From 8d7bc026bbaeb432c34256b3724cd677d6cf84ed Mon Sep 17 00:00:00 2001 From: Nicholas Tierney Date: Wed, 21 Mar 2018 10:08:13 +1100 Subject: [PATCH] add note + examples for downsampling data for plotting - see #32. --- R/vis_dat.R | 20 ++++++++++++++++++++ R/vis_miss.R | 20 ++++++++++++++++++++ man/vis_dat.Rd | 21 +++++++++++++++++++++ man/vis_miss.Rd | 21 +++++++++++++++++++++ 4 files changed, 82 insertions(+) diff --git a/R/vis_dat.R b/R/vis_dat.R index 9fadb51..5c49c37 100644 --- a/R/vis_dat.R +++ b/R/vis_dat.R @@ -26,6 +26,12 @@ #' #' @seealso [vis_miss()] #' +#' @note Some datasets might be too large to plot, sometimes creating a blank +#' plot - if this happens, I would recommend downsampling the data, either +#' looking at the first 1,000 rows or by taking a random sample. This means +#' that you won't get the same "look" at the data, but it is better than +#' a blank plot! See example code for suggestions on doing this. +#' #' @examples #' #' vis_dat(airquality) @@ -34,6 +40,20 @@ #' vis_dat(airquality, palette = "cb_safe") #' vis_dat(airquality, palette = "qual") #' +#' \dontrun{ +#' # if you have a large dataset, you might want to try downsampling: +#' library(nycflight13) +#' library(dplyr) +#' flights %>% +#' sample_n(1000) %>% +#' vis_dat() +#' +#' flights %>% +#' slice(1:1000) %>% +#' vis_dat() +#' +#' } +#' #' @export vis_dat <- function(x, sort_type = TRUE, diff --git a/R/vis_miss.R b/R/vis_miss.R index 399c021..f0fef38 100644 --- a/R/vis_miss.R +++ b/R/vis_miss.R @@ -28,6 +28,12 @@ #' #' @seealso [vis_dat()] #' +#' @note Some datasets might be too large to plot, sometimes creating a blank +#' plot - if this happens, I would recommend downsampling the data, either +#' looking at the first 1,000 rows or by taking a random sample. This means +#' that you won't get the same "look" at the data, but it is better than +#' a blank plot! See example code for suggestions on doing this. +#' #' @examples #' #' vis_miss(airquality) @@ -36,6 +42,20 @@ #' #' vis_miss(airquality, sort_miss = TRUE) #' +#' #' \dontrun{ +#' # if you have a large dataset, you might want to try downsampling: +#' library(nycflight13) +#' library(dplyr) +#' flights %>% +#' sample_n(1000) %>% +#' vis_miss() +#' +#' flights %>% +#' slice(1:1000) %>% +#' vis_miss() +#' +#' } +#' #' @export vis_miss <- function(x, cluster = FALSE, diff --git a/man/vis_dat.Rd b/man/vis_dat.Rd index a06628d..65dace4 100644 --- a/man/vis_dat.Rd +++ b/man/vis_dat.Rd @@ -34,6 +34,13 @@ dataframe. Cells are coloured according to what class they are and whether the values are missing. As \code{vis_dat} returns a ggplot object, it is very easy to customize and change labels, and customize the plot } +\note{ +Some datasets might be too large to plot, sometimes creating a blank +plot - if this happens, I would recommend downsampling the data, either +looking at the first 1,000 rows or by taking a random sample. This means +that you won't get the same "look" at the data, but it is better than +a blank plot! See example code for suggestions on doing this. +} \examples{ vis_dat(airquality) @@ -42,6 +49,20 @@ vis_dat(airquality) vis_dat(airquality, palette = "cb_safe") vis_dat(airquality, palette = "qual") +\dontrun{ +# if you have a large dataset, you might want to try downsampling: +library(nycflight13) +library(dplyr) +flights \%>\% + sample_n(1000) \%>\% + vis_dat() + +flights \%>\% + slice(1:1000) \%>\% + vis_dat() + +} + } \seealso{ \code{\link[=vis_miss]{vis_miss()}} diff --git a/man/vis_miss.Rd b/man/vis_miss.Rd index 8013051..b1e849c 100644 --- a/man/vis_miss.Rd +++ b/man/vis_miss.Rd @@ -36,6 +36,13 @@ dataframe, colouring cells according to missingness, where black indicates a missing cell and grey indicates a present cell. As it returns a ggplot object, it is very easy to customize and change labels. } +\note{ +Some datasets might be too large to plot, sometimes creating a blank +plot - if this happens, I would recommend downsampling the data, either +looking at the first 1,000 rows or by taking a random sample. This means +that you won't get the same "look" at the data, but it is better than +a blank plot! See example code for suggestions on doing this. +} \examples{ vis_miss(airquality) @@ -44,6 +51,20 @@ vis_miss(airquality, cluster = TRUE) vis_miss(airquality, sort_miss = TRUE) +#' \dontrun{ +# if you have a large dataset, you might want to try downsampling: +library(nycflight13) +library(dplyr) +flights \%>\% + sample_n(1000) \%>\% + vis_miss() + +flights \%>\% + slice(1:1000) \%>\% + vis_miss() + +} + } \seealso{ \code{\link[=vis_dat]{vis_dat()}}