diff --git a/R/rdeephaven/R/aggregate_wrapper.R b/R/rdeephaven/R/aggregate_wrapper.R index ccfb0689124..daafae48361 100644 --- a/R/rdeephaven/R/aggregate_wrapper.R +++ b/R/rdeephaven/R/aggregate_wrapper.R @@ -1,95 +1,85 @@ -#' @export Aggregation <- R6Class("Aggregation", cloneable = FALSE, public = list( .internal_rcpp_object = NULL, - initialize = function(aggregation) { - if (class(aggregation) != "Rcpp_INTERNAL_Aggregate") { - stop("'aggregation' should be an internal Deephaven Aggregation. If you're seeing this,\n you are trying to call the constructor of an Aggregation directly, which is not advised.\n Please use one of the provided aggregation functions instead.") + .internal_num_cols = NULL, + .internal_agg_name = NULL, + initialize = function(aggregation, agg_name, ...) { + self$.internal_agg_name <- agg_name + args <- list(...) + if (any(names(args) == "cols")) { + self$.internal_num_cols <- length(args$cols) } - self$.internal_rcpp_object <- aggregation + self$.internal_rcpp_object <- do.call(aggregation, args) } ) ) ### All of the functions below return an instance of the above class -#' @export agg_first <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_first(cols))) + return(Aggregation$new(INTERNAL_agg_first, "agg_first", cols=cols)) } -#' @export agg_last <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_last(cols))) + return(Aggregation$new(INTERNAL_agg_last, "agg_last", cols=cols)) } -#' @export agg_min <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_min(cols))) + return(Aggregation$new(INTERNAL_agg_min, "agg_min", cols=cols)) } -#' @export agg_max <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_max(cols))) + return(Aggregation$new(INTERNAL_agg_max, "agg_max", cols=cols)) } -#' @export agg_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_sum(cols))) + return(Aggregation$new(INTERNAL_agg_sum, "agg_sum", cols=cols)) } -#' @export agg_abs_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_abs_sum(cols))) + return(Aggregation$new(INTERNAL_agg_abs_sum, "agg_abs_sum", cols=cols)) } -#' @export agg_avg <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_avg(cols))) + return(Aggregation$new(INTERNAL_agg_avg, "agg_avg", cols=cols)) } -#' @export agg_w_avg <- function(wcol, cols = character()) { verify_string("wcol", wcol, TRUE) verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_w_avg(wcol, cols))) + return(Aggregation$new(INTERNAL_agg_w_avg, "agg_w_avg", wcol=wcol, cols=cols)) } -#' @export agg_median <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_median(cols))) + return(Aggregation$new(INTERNAL_agg_median, "agg_median", cols=cols)) } -#' @export agg_var <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_var(cols))) + return(Aggregation$new(INTERNAL_agg_var, "agg_var", cols=cols)) } -#' @export agg_std <- function(cols = character()) { verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_std(cols))) + return(Aggregation$new(INTERNAL_agg_std, "agg_std", cols=cols)) } -#' @export agg_percentile <- function(percentile, cols = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("cols", cols, FALSE) - return(Aggregation$new(INTERNAL_agg_percentile(percentile, cols))) + return(Aggregation$new(INTERNAL_agg_percentile, "agg_percentile", percentile=percentile, cols=cols)) } -#' @export agg_count <- function(col) { verify_string("col", col, TRUE) - return(Aggregation$new(INTERNAL_agg_count(col))) -} + return(Aggregation$new(INTERNAL_agg_count, "agg_count", col=col)) +} \ No newline at end of file diff --git a/R/rdeephaven/R/helper_functions.R b/R/rdeephaven/R/helper_functions.R index f378f089408..165834a97cb 100644 --- a/R/rdeephaven/R/helper_functions.R +++ b/R/rdeephaven/R/helper_functions.R @@ -20,7 +20,7 @@ verify_type <- function(arg_name, candidate, required_type, message_type_name, i } else { stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got an object of class ", first_class(candidate), ".")) } - } else if (is_scalar && (length(candidate) != 1)) { + } else if (is_scalar && (length(c(candidate)) != 1)) { stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got a vector of length ", length(candidate), ".")) } } diff --git a/R/rdeephaven/R/table_handle_wrapper.R b/R/rdeephaven/R/table_handle_wrapper.R index 4e6c481279f..63c2b8071d9 100644 --- a/R/rdeephaven/R/table_handle_wrapper.R +++ b/R/rdeephaven/R/table_handle_wrapper.R @@ -107,9 +107,19 @@ TableHandle <- R6Class("TableHandle", verify_type("aggs", aggs, "Aggregation", "Deephaven Aggregation", FALSE) verify_string("by", by, FALSE) aggs <- c(aggs) + for (i in 1:length(aggs)) { + if (!is.null(aggs[[i]]$.internal_num_cols) && aggs[[i]]$.internal_num_cols == 0) { + stop(paste0("Aggregations with no columns cannot be used in 'agg_by'. Got '", aggs[[i]]$.internal_agg_name, "' at index ", i, " with an empty 'cols' argument.")) + } + } unwrapped_aggs <- lapply(aggs, strip_r6_wrapping) return(TableHandle$new(self$.internal_rcpp_object$agg_by(unwrapped_aggs, by))) }, + agg_all_by = function(agg, by = character()) { + verify_type("agg", agg, "Aggregation", "Deephaven Aggregation", TRUE) + verify_string("by", by, FALSE) + return(TableHandle$new(self$.internal_rcpp_object$agg_all_by(agg$.internal_rcpp_object, by))) + }, first_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$first_by(by))) @@ -170,7 +180,7 @@ TableHandle <- R6Class("TableHandle", verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$percentile_by(percentile, by))) }, - count_by = function(col = "n", by = character()) { + count_by = function(col, by = character()) { verify_string("col", col, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$count_by(col, by))) diff --git a/R/rdeephaven/inst/tests/testthat/test_agg_by.R b/R/rdeephaven/inst/tests/testthat/test_agg_by.R index 12f30300f53..4669f3440a5 100644 --- a/R/rdeephaven/inst/tests/testthat/test_agg_by.R +++ b/R/rdeephaven/inst/tests/testthat/test_agg_by.R @@ -55,6 +55,8 @@ setup <- function() { )) } +##### TESTING GOOD INPUTS ##### + test_that("agg_first behaves as expected", { data <- setup() @@ -105,6 +107,21 @@ test_that("agg_first behaves as expected", { agg_by(agg_first(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), first)) + new_th7 <- data$th5$ + agg_all_by(agg_first(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), first)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_first(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -159,6 +176,21 @@ test_that("agg_last behaves as expected", { agg_by(agg_last(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), last)) + new_th7 <- data$th5$ + agg_all_by(agg_last(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), last)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_last(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -213,6 +245,21 @@ test_that("agg_min behaves as expected", { agg_by(agg_min(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), min)) + new_th7 <- data$th5$ + agg_all_by(agg_min(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), min)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_min(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -267,6 +314,21 @@ test_that("agg_max behaves as expected", { agg_by(agg_max(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + group_by(X) %>% + summarise(across(everything(), max)) + new_th7 <- data$th5$ + agg_all_by(agg_max(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), max)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_max(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -321,6 +383,23 @@ test_that("agg_sum behaves as expected", { agg_by(agg_sum(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), sum)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_sum(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), sum)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_sum(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -375,6 +454,23 @@ test_that("agg_abs_sum behaves as expected", { agg_by(agg_abs_sum(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), ~ sum(abs(.x)))) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_abs_sum(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), ~ sum(abs(.x)))) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_abs_sum(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -429,6 +525,23 @@ test_that("agg_avg behaves as expected", { agg_by(agg_avg(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), mean)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_avg(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), mean)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_avg(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -493,6 +606,25 @@ test_that("agg_w_avg behaves as expected", { agg_by(agg_w_avg("weights", c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), ~ weighted.mean(.x, Number2))) %>% + select(-Number2) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_w_avg("Number2"), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), ~ weighted.mean(.x, Number2))) %>% + select(-Number2) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_w_avg("Number2"), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -547,6 +679,23 @@ test_that("agg_median behaves as expected", { agg_by(agg_median(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), median)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_median(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), median)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_median(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -601,6 +750,23 @@ test_that("agg_var behaves as expected", { agg_by(agg_var(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), var)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_var(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), var)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_var(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -655,6 +821,23 @@ test_that("agg_std behaves as expected", { agg_by(agg_std(c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_tb7 <- data$df5 %>% + select(-Y) %>% + group_by(X) %>% + summarise(across(everything(), sd)) + new_th7 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_std(), "X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb7)) + + new_th8 <- rbind(data$df5, data$df6, data$df6, data$df5) %>% + group_by(X, Y) %>% + summarise(across(everything(), sd)) + new_tb8 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_std(), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb8)) data$client$close() }) @@ -690,6 +873,26 @@ test_that("agg_percentile behaves as expected", { agg_by(agg_percentile(0.3, c("Number1", "Number2")), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th3), new_df3) + + new_df4 <- data.frame( + X = c("A", "B", "C"), + Number1 = c(50, -44, -70), + Number2 = c(-50, 76, 130) + ) + new_th4 <- data$th5$ + drop_columns("Y")$ + agg_all_by(agg_percentile(0.4), "X") + expect_equal(as.data.frame(new_th4), new_df4) + + new_df5 <- data.frame( + X = c("A", "B", "A", "C", "B", "B", "C", "B", "A", "C"), + Y = c("M", "N", "O", "N", "P", "O", "M", "M", "P", "P"), + Number1 = c(50, -44, 1, 11, -66, 99, -70, 86, -45, 0), + Number2 = c(-55, 76, 12, 4, 137, 45, 214, -6, 34, -76) + ) + new_th5 <- merge_tables(data$th5, data$th6, data$th6, data$th5)$ + agg_all_by(agg_percentile(0.4), c("X", "Y")) + expect_equal(as.data.frame(new_th4), new_df4) data$client$close() }) @@ -738,6 +941,33 @@ test_that("agg_count behaves as expected", { agg_by(agg_count("n"), c("X", "Y"))$ sort(c("X", "Y")) expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + new_th7 <- data$th5$ + agg_all_by(agg_count("n"), "X")$ + sort("X") + expect_equal(as.data.frame(new_th7), as.data.frame(new_tb5)) + + new_th8 <- data$th6$ + agg_all_by(agg_count("n"), c("X", "Y"))$ + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th8), as.data.frame(new_tb6)) + + data$client$close() +}) + +##### TESTING BAD INPUTS ##### + +test_that("agg_by behaves nicely when given bad input", { + data <- setup() + + expect_error(data$th1$agg_by(agg_first()), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_first' at index 1 with an empty 'cols' argument.") + + expect_error(data$th1$agg_by(c(agg_first("int_col"), agg_last())), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_last' at index 2 with an empty 'cols' argument.") + + expect_error(data$th1$agg_by(c(agg_first("int_col"), agg_last("int_col"), agg_count("n"), agg_avg())), + "Aggregations with no columns cannot be used in 'agg_by'. Got 'agg_avg' at index 4 with an empty 'cols' argument.") data$client$close() }) diff --git a/R/rdeephaven/src/client.cpp b/R/rdeephaven/src/client.cpp index df1f6a4360c..150ff74e966 100644 --- a/R/rdeephaven/src/client.cpp +++ b/R/rdeephaven/src/client.cpp @@ -152,7 +152,12 @@ class TableHandleWrapper { TableHandleWrapper* AggBy(Rcpp::List aggregations, std::vector group_by_columns) { std::vector converted_aggregations = convertRcppListToVectorOfTypeAggregate(aggregations); return new TableHandleWrapper(internal_tbl_hdl.By(deephaven::client::AggregateCombo::Create(converted_aggregations), group_by_columns)); - } + }; + + TableHandleWrapper* AggAllBy(AggregateWrapper &aggregation, std::vector group_by_columns) { + std::vector converted_aggregation = {aggregation.internal_agg_op}; + return new TableHandleWrapper(internal_tbl_hdl.By(deephaven::client::AggregateCombo::Create(converted_aggregation), group_by_columns)); + }; TableHandleWrapper* FirstBy(std::vector cols) { return new TableHandleWrapper(internal_tbl_hdl.FirstBy(cols)); @@ -404,7 +409,6 @@ class ClientWrapper { return new TableHandleWrapper(internal_tbl_hdl_mngr.TimeTable(period_ISO, start_time_ISO)); }; - TableHandleWrapper* MakeTableHandleFromTicket(std::string ticket) { return new TableHandleWrapper(internal_tbl_hdl_mngr.MakeTableHandleFromTicket(ticket)); } @@ -527,6 +531,8 @@ RCPP_MODULE(DeephavenInternalModule) { .method("ungroup", &TableHandleWrapper::Ungroup) .method("agg_by", &TableHandleWrapper::AggBy) + .method("agg_all_by", &TableHandleWrapper::AggAllBy) + .method("first_by", &TableHandleWrapper::FirstBy) .method("last_by", &TableHandleWrapper::LastBy) .method("head_by", &TableHandleWrapper::HeadBy)