From 3cf424332d3ecea618ec92c80f684ac6de7eb2a8 Mon Sep 17 00:00:00 2001 From: Teun van den Brand Date: Wed, 28 Aug 2024 14:44:38 +0200 Subject: [PATCH] draft summarise stat --- DESCRIPTION | 1 + NAMESPACE | 2 + R/stat-summarise.R | 129 +++++++++++++++++++++++++++++++ man/ggplot2-ggproto.Rd | 5 +- man/stat_summarise.Rd | 171 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 R/stat-summarise.R create mode 100644 man/stat_summarise.Rd diff --git a/DESCRIPTION b/DESCRIPTION index b4cd9ec950..ab1ffe9fd1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -262,6 +262,7 @@ Collate: 'stat-smooth-methods.R' 'stat-smooth.R' 'stat-sum.R' + 'stat-summarise.R' 'stat-summary-2d.R' 'stat-summary-bin.R' 'stat-summary-hex.R' diff --git a/NAMESPACE b/NAMESPACE index f0ccf3bec1..31c870a5aa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -274,6 +274,7 @@ export(StatSf) export(StatSfCoordinates) export(StatSmooth) export(StatSum) +export(StatSummarise) export(StatSummary) export(StatSummary2d) export(StatSummaryBin) @@ -686,6 +687,7 @@ export(stat_sf_coordinates) export(stat_smooth) export(stat_spoke) export(stat_sum) +export(stat_summarise) export(stat_summary) export(stat_summary2d) export(stat_summary_2d) diff --git a/R/stat-summarise.R b/R/stat-summarise.R new file mode 100644 index 0000000000..183097b742 --- /dev/null +++ b/R/stat-summarise.R @@ -0,0 +1,129 @@ +#' Arbitrary summaries by group +#' +#' `stat_summarise()` handles arbitrary summaries by group. Unlike +#' [`stat_summary()`], it does not automatically summarise by `x`. +#' +#' @inheritParams stat_identity +#' @param fun A function that is given the complete data per group and should +#' return a data.frame with variables compatible with the layer's `geom` +#' argument. Defaults to `express_group()` which will evaluate +#' [data-masked][rlang::args_data_masking] expressions passed to `fun.args`. +#' @param fun.args Optional additional arguments to pass to `fun`. When +#' `fun = express_group` (default), can be a list of expressions created by +#' `vars()`. +#' @eval rd_aesthetics("stat", "summarise") +#' @export +#' +#' @examples +#' # Using a helper summary function +#' my_summary_fun <- function(data) { +#' data.frame( +#' x = min(data$x) - 0.5, +#' xend = max(data$x) + 0.5, +#' y = mean(data$y) +#' ) +#' } +#' +#' ggplot(mpg, aes(drv, displ)) + +#' geom_jitter(aes(colour = drv)) + +#' stat_summarise( +#' geom = "segment", +#' fun = my_summary_fun +#' ) +#' +#' # Making the same summary using the `exprs` argument +#' ggplot(mpg, aes(drv, displ)) + +#' geom_jitter(aes(colour = drv)) + +#' stat_summarise( +#' geom = "segment", +#' fun.args = vars( +#' # Note: we use `xend` first to not mutate `x` too soon +#' xend = max(x) + 0.5, +#' x = min(x) - 0.5, +#' y = mean(y) +#' ) +#' ) +#' +#' # You can use a temporary variable that needn't conform to data.frame columns +#' # as long as it is removed afterwards +#' ggplot(mpg, aes(displ, colour = drv)) + +#' stat_summarise( +#' geom = "line", +#' fun.args = vars( +#' dens = density(x), # non-vector temporary variable +#' x = dens$x, +#' y = dens$y, +#' dens = NULL # deleting the temporary variable +#' ) +#' ) +stat_summarise <- function( + mapping = NULL, + data = NULL, + geom = "point", + position = "identity", + ..., + fun = express_group, + fun.args = list(), + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) { + + layer( + data = data, + mapping = mapping, + stat = StatSummarise, + geom = geom, + position = position, + show.legend = show.legend, + inherit.aes = inherit.aes, + params = list2( + na.rm = na.rm, + fun = fun, + fun.args = fun.args, + ... + ) + ) +} + +#' @rdname ggplot2-ggproto +#' @format NULL +#' @usage NULL +#' @export +StatSummarise <- ggproto( + "StatSummarise", Stat, + + setup_params = function(data, params) { + params$fun <- allow_lambda(params$fun) + check_function(params$fun, arg = "fun") + if (!is_named2(params$fun.args)) { + cli::cli_abort("Every element in {.arg fun.args} must be named.") + } + if (identical(params$fun, express_group) && length(params$fun.args) > 0) { + check_object( + params$fun.args, is_quosures, arg = "fun.args", + "generated by `vars()`", allow_null = TRUE + ) + } + params + }, + + compute_group = function(data, scales, fun = express_group, fun.args = list()) { + inject(fun(data, !!!fun.args)) + } +) + +express_group <- function(data, ...) { + exprs <- list2(...) + if (length(exprs) < 1) { + return(data) + } + data <- unclass(data) + # We append each evaluated expression to `data`, so that the next expression + # can use the result of all previously evaluated expressions + nms <- names(exprs) + for (i in seq_along(exprs)) { + data[[nms[[i]]]] <- eval_tidy(exprs[[i]], data) + } + data_frame0(!!!data[unique0(names(exprs))]) +} diff --git a/man/ggplot2-ggproto.Rd b/man/ggplot2-ggproto.Rd index c3384f1e45..ae36223102 100644 --- a/man/ggplot2-ggproto.Rd +++ b/man/ggplot2-ggproto.Rd @@ -25,8 +25,8 @@ % R/stat-count.R, R/stat-density-2d.R, R/stat-density.R, R/stat-ecdf.R, % R/stat-ellipse.R, R/stat-function.R, R/stat-identity.R, R/stat-qq-line.R, % R/stat-qq.R, R/stat-quantilemethods.R, R/stat-smooth.R, R/stat-sum.R, -% R/stat-summary-2d.R, R/stat-summary-bin.R, R/stat-summary-hex.R, -% R/stat-summary.R, R/stat-unique.R, R/stat-ydensity.R +% R/stat-summarise.R, R/stat-summary-2d.R, R/stat-summary-bin.R, +% R/stat-summary-hex.R, R/stat-summary.R, R/stat-unique.R, R/stat-ydensity.R \docType{data} \name{ggplot2-ggproto} \alias{ggplot2-ggproto} @@ -144,6 +144,7 @@ \alias{StatQuantile} \alias{StatSmooth} \alias{StatSum} +\alias{StatSummarise} \alias{StatSummary2d} \alias{StatSummaryBin} \alias{StatSummaryHex} diff --git a/man/stat_summarise.Rd b/man/stat_summarise.Rd new file mode 100644 index 0000000000..7002194126 --- /dev/null +++ b/man/stat_summarise.Rd @@ -0,0 +1,171 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stat-summarise.R +\name{stat_summarise} +\alias{stat_summarise} +\title{Arbitrary summaries} +\usage{ +stat_summarise( + mapping = NULL, + data = NULL, + geom = "point", + position = "identity", + ..., + fun = express_group, + fun.args = list(), + na.rm = FALSE, + show.legend = NA, + inherit.aes = TRUE +) +} +\arguments{ +\item{mapping}{Set of aesthetic mappings created by \code{\link[=aes]{aes()}}. If specified and +\code{inherit.aes = TRUE} (the default), it is combined with the default mapping +at the top level of the plot. You must supply \code{mapping} if there is no plot +mapping.} + +\item{data}{The data to be displayed in this layer. There are three +options: + +If \code{NULL}, the default, the data is inherited from the plot +data as specified in the call to \code{\link[=ggplot]{ggplot()}}. + +A \code{data.frame}, or other object, will override the plot +data. All objects will be fortified to produce a data frame. See +\code{\link[=fortify]{fortify()}} for which variables will be created. + +A \code{function} will be called with a single argument, +the plot data. The return value must be a \code{data.frame}, and +will be used as the layer data. A \code{function} can be created +from a \code{formula} (e.g. \code{~ head(.x, 10)}).} + +\item{geom}{The geometric object to use to display the data for this layer. +When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument +can be used to override the default coupling between stats and geoms. The +\code{geom} argument accepts the following: +\itemize{ +\item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. +\item A string naming the geom. To give the geom as a string, strip the +function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, +give the geom as \code{"point"}. +\item For more information and other ways to specify the geom, see the +\link[=layer_geoms]{layer geom} documentation. +}} + +\item{position}{A position adjustment to use on the data for this layer. This +can be used in various ways, including to prevent overplotting and +improving the display. The \code{position} argument accepts the following: +\itemize{ +\item The result of calling a position function, such as \code{position_jitter()}. +This method allows for passing extra arguments to the position. +\item A string naming the position adjustment. To give the position as a +string, strip the function name of the \code{position_} prefix. For example, +to use \code{position_jitter()}, give the position as \code{"jitter"}. +\item For more information and other ways to specify the position, see the +\link[=layer_positions]{layer position} documentation. +}} + +\item{...}{Other arguments passed on to \code{\link[=layer]{layer()}}'s \code{params} argument. These +arguments broadly fall into one of 4 categories below. Notably, further +arguments to the \code{position} argument, or aesthetics that are required +can \emph{not} be passed through \code{...}. Unknown arguments that are not part +of the 4 categories below are ignored. +\itemize{ +\item Static aesthetics that are not mapped to a scale, but are at a fixed +value and apply to the layer as a whole. For example, \code{colour = "red"} +or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} +section that lists the available options. The 'required' aesthetics +cannot be passed on to the \code{params}. Please note that while passing +unmapped aesthetics as vectors is technically possible, the order and +required length is not guaranteed to be parallel to the input data. +\item When constructing a layer using +a \verb{stat_*()} function, the \code{...} argument can be used to pass on +parameters to the \code{geom} part of the layer. An example of this is +\code{stat_density(geom = "area", outline.type = "both")}. The geom's +documentation lists which parameters it can accept. +\item Inversely, when constructing a layer using a +\verb{geom_*()} function, the \code{...} argument can be used to pass on parameters +to the \code{stat} part of the layer. An example of this is +\code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation +lists which parameters it can accept. +\item The \code{key_glyph} argument of \code{\link[=layer]{layer()}} may also be passed on through +\code{...}. This can be one of the functions described as +\link[=draw_key]{key glyphs}, to change the display of the layer in the legend. +}} + +\item{fun}{A function that is given the complete data per group and should +return a data.frame with variables compatible with the layer's \code{geom} +argument. Defaults to \code{express_group()} which will evaluate +\link[rlang:args_data_masking]{data-masked} expressions passed to \code{fun.args}.} + +\item{fun.args}{Optional additional arguments to pass to \code{fun}. When +\code{fun = express_group} (default), can be a list of expressions created by +\code{vars()}.} + +\item{show.legend}{logical. Should this layer be included in the legends? +\code{NA}, the default, includes if any aesthetics are mapped. +\code{FALSE} never includes, and \code{TRUE} always includes. +It can also be a named logical vector to finely select the aesthetics to +display. To include legend keys for all levels, even +when no data exists, use \code{TRUE}. If \code{NA}, all levels are shown in legend, +but unobserved levels are omitted.} + +\item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, +rather than combining with them. This is most useful for helper functions +that define both data and aesthetics and shouldn't inherit behaviour from +the default plot specification, e.g. \code{\link[=borders]{borders()}}.} +} +\description{ +Arbitrary summaries +} +\section{Aesthetics}{ + +\code{stat_summarise()} understands the following aesthetics (required aesthetics are in bold): +\itemize{ +\item \code{\link[=aes_group_order]{group}} +} +Learn more about setting these aesthetics in \code{vignette("ggplot2-specs")}. +} + +\examples{ +# Using a helper summary function +my_summary_fun <- function(data) { + data.frame( + x = min(data$x) - 0.5, + xend = max(data$x) + 0.5, + y = mean(data$y) + ) +} + +ggplot(mpg, aes(drv, displ)) + + geom_jitter(aes(colour = drv)) + + stat_summarise( + geom = "segment", + fun = my_summary_fun + ) + +# Making the same summary using the `exprs` argument +ggplot(mpg, aes(drv, displ)) + + geom_jitter(aes(colour = drv)) + + stat_summarise( + geom = "segment", + fun.args = vars( + # Note: we use `xend` first to not mutate `x` too soon + xend = max(x) + 0.5, + x = min(x) - 0.5, + y = mean(y) + ) + ) + +# You can use a temporary variable that needn't conform to data.frame columns +# as long as it is removed afterwards +ggplot(mpg, aes(displ, colour = drv)) + + stat_summarise( + geom = "line", + fun.args = vars( + dens = density(x), # non-vector temporary variable + x = dens$x, + y = dens$y, + dens = NULL # deleting the temporary variable + ) + ) +}