From 8d7bc026bbaeb432c34256b3724cd677d6cf84ed Mon Sep 17 00:00:00 2001
From: Nicholas Tierney <nicholas.tierney@gmail.com>
Date: Wed, 21 Mar 2018 10:08:13 +1100
Subject: [PATCH] add note  + examples  for downsampling data for plotting -
 see #32.

---
 R/vis_dat.R     | 20 ++++++++++++++++++++
 R/vis_miss.R    | 20 ++++++++++++++++++++
 man/vis_dat.Rd  | 21 +++++++++++++++++++++
 man/vis_miss.Rd | 21 +++++++++++++++++++++
 4 files changed, 82 insertions(+)

diff --git a/R/vis_dat.R b/R/vis_dat.R
index 9fadb51..5c49c37 100644
--- a/R/vis_dat.R
+++ b/R/vis_dat.R
@@ -26,6 +26,12 @@
 #'
 #' @seealso [vis_miss()]
 #'
+#' @note Some datasets might be too large to plot, sometimes creating a blank
+#'   plot - if this happens, I would recommend downsampling the data, either
+#'   looking at the first 1,000 rows or by taking a random sample. This means
+#'   that you won't get the same "look" at the data, but it is better than
+#'   a blank plot! See example code for suggestions on doing this.
+#'
 #' @examples
 #'
 #' vis_dat(airquality)
@@ -34,6 +40,20 @@
 #' vis_dat(airquality, palette = "cb_safe")
 #' vis_dat(airquality, palette = "qual")
 #'
+#' \dontrun{
+#' # if you have a large dataset, you might want to try downsampling:
+#' library(nycflight13)
+#' library(dplyr)
+#' flights %>%
+#'   sample_n(1000) %>%
+#'   vis_dat()
+#'
+#' flights %>%
+#'   slice(1:1000) %>%
+#'   vis_dat()
+#'
+#' }
+#'
 #' @export
 vis_dat <- function(x,
                     sort_type = TRUE,
diff --git a/R/vis_miss.R b/R/vis_miss.R
index 399c021..f0fef38 100644
--- a/R/vis_miss.R
+++ b/R/vis_miss.R
@@ -28,6 +28,12 @@
 #'
 #' @seealso [vis_dat()]
 #'
+#' @note Some datasets might be too large to plot, sometimes creating a blank
+#'   plot - if this happens, I would recommend downsampling the data, either
+#'   looking at the first 1,000 rows or by taking a random sample. This means
+#'   that you won't get the same "look" at the data, but it is better than
+#'   a blank plot! See example code for suggestions on doing this.
+#'
 #' @examples
 #'
 #' vis_miss(airquality)
@@ -36,6 +42,20 @@
 #'
 #' vis_miss(airquality, sort_miss = TRUE)
 #'
+#' #' \dontrun{
+#' # if you have a large dataset, you might want to try downsampling:
+#' library(nycflight13)
+#' library(dplyr)
+#' flights %>%
+#'   sample_n(1000) %>%
+#'   vis_miss()
+#'
+#' flights %>%
+#'   slice(1:1000) %>%
+#'   vis_miss()
+#'
+#' }
+#'
 #' @export
 vis_miss <- function(x,
                      cluster = FALSE,
diff --git a/man/vis_dat.Rd b/man/vis_dat.Rd
index a06628d..65dace4 100644
--- a/man/vis_dat.Rd
+++ b/man/vis_dat.Rd
@@ -34,6 +34,13 @@ dataframe. Cells are coloured according to what class they are and whether
 the values are missing. As \code{vis_dat} returns a ggplot object, it is very
 easy to customize and change labels, and customize the plot
 }
+\note{
+Some datasets might be too large to plot, sometimes creating a blank
+plot - if this happens, I would recommend downsampling the data, either
+looking at the first 1,000 rows or by taking a random sample. This means
+that you won't get the same "look" at the data, but it is better than
+a blank plot! See example code for suggestions on doing this.
+}
 \examples{
 
 vis_dat(airquality)
@@ -42,6 +49,20 @@ vis_dat(airquality)
 vis_dat(airquality, palette = "cb_safe")
 vis_dat(airquality, palette = "qual")
 
+\dontrun{
+# if you have a large dataset, you might want to try downsampling:
+library(nycflight13)
+library(dplyr)
+flights \%>\%
+  sample_n(1000) \%>\%
+  vis_dat()
+
+flights \%>\%
+  slice(1:1000) \%>\%
+  vis_dat()
+
+}
+
 }
 \seealso{
 \code{\link[=vis_miss]{vis_miss()}}
diff --git a/man/vis_miss.Rd b/man/vis_miss.Rd
index 8013051..b1e849c 100644
--- a/man/vis_miss.Rd
+++ b/man/vis_miss.Rd
@@ -36,6 +36,13 @@ dataframe, colouring cells according to missingness, where black indicates
 a missing cell and grey indicates a present cell. As it returns a ggplot
 object, it is very easy to customize and change labels.
 }
+\note{
+Some datasets might be too large to plot, sometimes creating a blank
+plot - if this happens, I would recommend downsampling the data, either
+looking at the first 1,000 rows or by taking a random sample. This means
+that you won't get the same "look" at the data, but it is better than
+a blank plot! See example code for suggestions on doing this.
+}
 \examples{
 
 vis_miss(airquality)
@@ -44,6 +51,20 @@ vis_miss(airquality, cluster = TRUE)
 
 vis_miss(airquality, sort_miss = TRUE)
 
+#' \dontrun{
+# if you have a large dataset, you might want to try downsampling:
+library(nycflight13)
+library(dplyr)
+flights \%>\%
+  sample_n(1000) \%>\%
+  vis_miss()
+
+flights \%>\%
+  slice(1:1000) \%>\%
+  vis_miss()
+
+}
+
 }
 \seealso{
 \code{\link[=vis_dat]{vis_dat()}}