From df1b901b15439a670dbf38e668c16094e867b01f Mon Sep 17 00:00:00 2001 From: Teun van den Brand <49372158+teunbrand@users.noreply.github.com> Date: Mon, 7 Aug 2023 20:26:52 +0200 Subject: [PATCH] Discarding boxplot outliers (#5379) * Add `outliers` param to boxplot * Add test * Redocument * Add news bullet --- NEWS.md | 6 ++++++ R/geom-boxplot.R | 21 ++++++++++++++------- man/geom_boxplot.Rd | 16 +++++++++------- tests/testthat/test-geom-boxplot.R | 9 +++++++++ 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/NEWS.md b/NEWS.md index b62ba9eba0..099a3142dc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # ggplot2 (development version) +* `geom_boxplot()` gains an `outliers` argument to switch outliers on or off, + in a manner that does affects the scale range. For hiding outliers that does + not affect the scale range, you can continue to use `outlier.shape = NA` + (@teunbrand, #4892). + * Binned scales now treat `NA`s in limits the same way continuous scales do (#5355). @@ -9,6 +14,7 @@ deprecated. The `hjust` setting of the `legend.text` and `legend.title` elements continues to fulfil the role of text alignment (@teunbrand, #5347). + * Integers are once again valid input to theme arguments that expect numeric input (@teunbrand, #5369) diff --git a/R/geom-boxplot.R b/R/geom-boxplot.R index 6b4160dd88..33528157f3 100644 --- a/R/geom-boxplot.R +++ b/R/geom-boxplot.R @@ -33,6 +33,12 @@ #' @inheritParams geom_bar #' @param geom,stat Use to override the default connection between #' `geom_boxplot()` and `stat_boxplot()`. +#' @param outliers Whether to display (`TRUE`) or discard (`FALSE`) outliers +#' from the plot. Hiding or discarding outliers can be useful when, for +#' example, raw data points need to be displayed on top of the boxplot. +#' By discarding outliers, the axis limits will adapt to the box and whiskers +#' only, not the full data range. If outliers need to be hidden and the axes +#' needs to show the full data range, please use `outlier.shape = NA` instead. #' @param outlier.colour,outlier.color,outlier.fill,outlier.shape,outlier.size,outlier.stroke,outlier.alpha #' Default aesthetics for outliers. Set to `NULL` to inherit from the #' aesthetics used for the box. @@ -40,12 +46,6 @@ #' In the unlikely event you specify both US and UK spellings of colour, the #' US spelling will take precedence. #' -#' Sometimes it can be useful to hide the outliers, for example when overlaying -#' the raw data points on top of the boxplot. Hiding the outliers can be achieved -#' by setting `outlier.shape = NA`. Importantly, this does not remove the outliers, -#' it only hides them, so the range calculated for the y-axis will be the -#' same with outliers shown and outliers hidden. -#' #' @param notch If `FALSE` (default) make a standard box plot. If #' `TRUE`, make a notched box plot. Notches are used to compare groups; #' if the notches of two boxes do not overlap, this suggests that the medians @@ -109,6 +109,7 @@ geom_boxplot <- function(mapping = NULL, data = NULL, stat = "boxplot", position = "dodge2", ..., + outliers = TRUE, outlier.colour = NULL, outlier.color = NULL, outlier.fill = NULL, @@ -133,6 +134,7 @@ geom_boxplot <- function(mapping = NULL, data = NULL, position$preserve <- "single" } } + check_bool(outliers) layer( data = data, @@ -143,6 +145,7 @@ geom_boxplot <- function(mapping = NULL, data = NULL, show.legend = show.legend, inherit.aes = inherit.aes, params = list2( + outliers = outliers, outlier.colour = outlier.color %||% outlier.colour, outlier.fill = outlier.fill, outlier.shape = outlier.shape, @@ -167,7 +170,7 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom, # need to declare `width` here in case this geom is used with a stat that # doesn't have a `width` parameter (e.g., `stat_identity`). - extra_params = c("na.rm", "width", "orientation"), + extra_params = c("na.rm", "width", "orientation", "outliers"), setup_params = function(data, params) { params$flipped_aes <- has_flipped_aes(data, params) @@ -180,6 +183,10 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom, data$width <- data$width %||% params$width %||% (resolution(data$x, FALSE) * 0.9) + if (isFALSE(params$outliers)) { + data$outliers <- NULL + } + if (!is.null(data$outliers)) { suppressWarnings({ out_min <- vapply(data$outliers, min, numeric(1)) diff --git a/man/geom_boxplot.Rd b/man/geom_boxplot.Rd index 86ea238000..5948e6b2c4 100644 --- a/man/geom_boxplot.Rd +++ b/man/geom_boxplot.Rd @@ -11,6 +11,7 @@ geom_boxplot( stat = "boxplot", position = "dodge2", ..., + outliers = TRUE, outlier.colour = NULL, outlier.color = NULL, outlier.fill = NULL, @@ -71,17 +72,18 @@ often aesthetics, used to set an aesthetic to a fixed value, like \code{colour = "red"} or \code{size = 3}. They may also be parameters to the paired geom/stat.} +\item{outliers}{Whether to display (\code{TRUE}) or discard (\code{FALSE}) outliers +from the plot. Hiding or discarding outliers can be useful when, for +example, raw data points need to be displayed on top of the boxplot. +By discarding outliers, the axis limits will adapt to the box and whiskers +only, not the full data range. If outliers need to be hidden and the axes +needs to show the full data range, please use \code{outlier.shape = NA} instead.} + \item{outlier.colour, outlier.color, outlier.fill, outlier.shape, outlier.size, outlier.stroke, outlier.alpha}{Default aesthetics for outliers. Set to \code{NULL} to inherit from the aesthetics used for the box. In the unlikely event you specify both US and UK spellings of colour, the -US spelling will take precedence. - -Sometimes it can be useful to hide the outliers, for example when overlaying -the raw data points on top of the boxplot. Hiding the outliers can be achieved -by setting \code{outlier.shape = NA}. Importantly, this does not remove the outliers, -it only hides them, so the range calculated for the y-axis will be the -same with outliers shown and outliers hidden.} +US spelling will take precedence.} \item{notch}{If \code{FALSE} (default) make a standard box plot. If \code{TRUE}, make a notched box plot. Notches are used to compare groups; diff --git a/tests/testthat/test-geom-boxplot.R b/tests/testthat/test-geom-boxplot.R index 5414b3fdfa..f7ec35ef4c 100644 --- a/tests/testthat/test-geom-boxplot.R +++ b/tests/testthat/test-geom-boxplot.R @@ -8,6 +8,15 @@ test_that("geom_boxplot range includes all outliers", { expect_true(miny <= min(dat$y)) expect_true(maxy >= max(dat$y)) + + # Unless specifically directed not to + p <- ggplot_build(ggplot(dat, aes(x, y)) + geom_boxplot(outliers = FALSE)) + + miny <- p$layout$panel_params[[1]]$y.range[1] + maxy <- p$layout$panel_params[[1]]$y.range[2] + + expect_lte(maxy, max(dat$y)) + expect_gte(miny, min(dat$y)) }) test_that("geom_boxplot works in both directions", {