diff --git a/NEWS.md b/NEWS.md index a4bce3f6be..85999a742e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # ggplot2 (development version) +* Default aesthetics in stats are now evaluated prior to computation + (@teunbrand, #3860). * The `summary()` method for ggplots is now more terse about facets (@teunbrand, #5989). * `guide_bins()`, `guide_colourbar()` and `guide_coloursteps()` gain an `angle` diff --git a/R/layer.R b/R/layer.R index 8acb438c9e..ae1ef23888 100644 --- a/R/layer.R +++ b/R/layer.R @@ -58,8 +58,8 @@ #' `NA`, the default, includes if any aesthetics are mapped. #' `FALSE` never includes, and `TRUE` always includes. #' It can also be a named logical vector to finely select the aesthetics to -#' display. To include legend keys for all levels, even -#' when no data exists, use `TRUE`. If `NA`, all levels are shown in legend, +#' display. To include legend keys for all levels, even +#' when no data exists, use `TRUE`. If `NA`, all levels are shown in legend, #' but unobserved levels are omitted. #' @param inherit.aes If `FALSE`, overrides the default aesthetics, #' rather than combining with them. This is most useful for helper functions @@ -362,6 +362,7 @@ Layer <- ggproto("Layer", NULL, return(data_frame0()) self$computed_stat_params <- self$stat$setup_params(data, self$stat_params) + data <- self$stat$use_defaults(data) data <- self$stat$setup_data(data, self$computed_stat_params) self$stat$compute_layer(data, self$computed_stat_params, layout) }, diff --git a/R/stat-.R b/R/stat-.R index 2d56937b06..f26090cf6c 100644 --- a/R/stat-.R +++ b/R/stat-.R @@ -215,6 +215,33 @@ Stat <- ggproto("Stat", required_aes <- unlist(strsplit(self$required_aes, '|', fixed = TRUE)) } c(union(required_aes, names(self$default_aes)), self$optional_aes, "group") - } + }, + + use_defaults = function(self, data, default_aes = NULL) { + default_aes <- default_aes %||% self$default_aes + if (length(default_aes) == 0) { + return(data) + } + + missing_aes <- setdiff(names(default_aes), names(data)) + default_aes <- default_aes[missing_aes] + + delayed <- is_calculated_aes(default_aes) | is_staged_aes(default_aes) | + is_scaled_aes(default_aes) + default_aes <- default_aes[!delayed] + + if (length(default_aes) == 0) { + return(data) + } + + evaled <- compact(lapply(default_aes, eval_tidy)) + if (empty(data)) { + data <- as_gg_data_frame(evaled) + } else { + data[names(evaled)] <- evaled + } + + data + } ) diff --git a/R/stat-bin2d.R b/R/stat-bin2d.R index 69f57ebee3..31166b0e6d 100644 --- a/R/stat-bin2d.R +++ b/R/stat-bin2d.R @@ -66,9 +66,6 @@ StatBin2d <- ggproto("StatBin2d", Stat, xbin <- cut(data$x, xbreaks, include.lowest = TRUE, labels = FALSE) ybin <- cut(data$y, ybreaks, include.lowest = TRUE, labels = FALSE) - if (is.null(data$weight)) - data$weight <- 1 - out <- tapply_df(data$weight, list(xbin = xbin, ybin = ybin), sum, drop = drop) xdim <- bin_loc(xbreaks, out$xbin) diff --git a/R/stat-bindot.R b/R/stat-bindot.R index 85eecc4d54..273e0e8962 100644 --- a/R/stat-bindot.R +++ b/R/stat-bindot.R @@ -5,8 +5,7 @@ StatBindot <- ggproto("StatBindot", Stat, required_aes = "x", non_missing_aes = "weight", - default_aes = aes(y = after_stat(count)), - dropped_aes = c("bin", "bincenter"), # these are temporary variables that are created and then removed by the stat + default_aes = aes(y = after_stat(count), weight = 1L), setup_params = function(data, params) { if (is.null(params$binwidth)) { @@ -126,6 +125,7 @@ StatBindot <- ggproto("StatBindot", Stat, return(data) }, + # these are temporary variables that are created and then removed by the stat dropped_aes = c("weight", "bin", "bincenter") ) diff --git a/R/stat-binhex.R b/R/stat-binhex.R index 0b5d3991c6..77c2839fc5 100644 --- a/R/stat-binhex.R +++ b/R/stat-binhex.R @@ -51,8 +51,7 @@ StatBinhex <- ggproto("StatBinhex", Stat, check_installed("hexbin", reason = "for `stat_bin_hex()`.") binwidth <- binwidth %||% hex_binwidth(bins, scales) - wt <- data$weight %||% rep(1L, nrow(data)) - out <- hexBinSummarise(data$x, data$y, wt, binwidth, sum) + out <- hexBinSummarise(data$x, data$y, data$weight, binwidth, sum) out$density <- as.vector(out$value / sum(out$value, na.rm = TRUE)) out$ndensity <- out$density / max(out$density, na.rm = TRUE) out$count <- out$value diff --git a/R/stat-boxplot.R b/R/stat-boxplot.R index 46ce14879f..c0f107a61d 100644 --- a/R/stat-boxplot.R +++ b/R/stat-boxplot.R @@ -48,13 +48,12 @@ stat_boxplot <- function(mapping = NULL, data = NULL, #' @export StatBoxplot <- ggproto("StatBoxplot", Stat, required_aes = c("y|x"), - non_missing_aes = "weight", + default_aes = aes(x = 0, y = 0), # either the x or y aesthetic will get dropped during # statistical transformation, depending on the orientation dropped_aes = c("x", "y", "weight"), setup_data = function(self, data, params) { data <- flip_data(data, params$flipped_aes) - data$x <- data$x %||% 0 data <- remove_missing( data, na.rm = params$na.rm, @@ -118,7 +117,7 @@ StatBoxplot <- ggproto("StatBoxplot", Stat, n <- sum(!is.na(data$y)) } else { # Sum up weights for non-NA positions of y and weight - n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)]) + n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)]) } df$notchupper <- df$middle + 1.58 * iqr / sqrt(n) diff --git a/R/stat-count.R b/R/stat-count.R index fd78d1beaa..e694f93713 100644 --- a/R/stat-count.R +++ b/R/stat-count.R @@ -70,15 +70,13 @@ StatCount <- ggproto("StatCount", Stat, compute_group = function(self, data, scales, width = NULL, flipped_aes = FALSE) { data <- flip_data(data, flipped_aes) - x <- data$x - weight <- data$weight %||% rep(1, length(x)) - count <- as.vector(rowsum(weight, x, na.rm = TRUE)) + count <- as.vector(rowsum(data$weight, data$x, na.rm = TRUE)) bars <- data_frame0( count = count, prop = count / sum(abs(count)), - x = sort(unique0(x)), + x = sort(unique0(data$x)), width = width, flipped_aes = flipped_aes, .size = length(count) diff --git a/R/stat-quantilemethods.R b/R/stat-quantilemethods.R index 9afb7e0b92..c7e6732124 100644 --- a/R/stat-quantilemethods.R +++ b/R/stat-quantilemethods.R @@ -46,6 +46,8 @@ stat_quantile <- function(mapping = NULL, data = NULL, StatQuantile <- ggproto("StatQuantile", Stat, required_aes = c("x", "y"), + default_aes = aes(weight = 1), + compute_group = function(data, scales, quantiles = c(0.25, 0.5, 0.75), formula = NULL, xseq = NULL, method = "rq", method.args = list(), lambda = 1, na.rm = FALSE) { @@ -66,8 +68,6 @@ StatQuantile <- ggproto("StatQuantile", Stat, cli::cli_inform("Smoothing formula not specified. Using: {deparse(formula)}") } - if (is.null(data$weight)) data$weight <- 1 - if (is.null(xseq)) { xmin <- min(data$x, na.rm = TRUE) xmax <- max(data$x, na.rm = TRUE) diff --git a/R/stat-smooth.R b/R/stat-smooth.R index 147bd06e41..fa1dcd69c7 100644 --- a/R/stat-smooth.R +++ b/R/stat-smooth.R @@ -157,6 +157,8 @@ StatSmooth <- ggproto("StatSmooth", Stat, extra_params = c("na.rm", "orientation"), + default_aes = aes(weight = 1), + compute_group = function(data, scales, method = NULL, formula = NULL, se = TRUE, n = 80, span = 0.75, fullrange = FALSE, xseq = NULL, level = 0.95, method.args = list(), @@ -167,8 +169,6 @@ StatSmooth <- ggproto("StatSmooth", Stat, return(data_frame0()) } - if (is.null(data$weight)) data$weight <- 1 - if (is.null(xseq)) { if (is.integer(data$x)) { if (fullrange) {