From f3406b859afa6466eb9b9e2c5aee3bf148b8e035 Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 12:45:40 -0500 Subject: [PATCH 01/11] `usethis::use_tidy_upkeep_issue()` --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 2e43e81c..5edb820f 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,3 +70,4 @@ LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.1 Config/testthat/edition: 3 +Config/usethis/last-upkeep: 2025-04-25 From d822b67a809275b87b70d51e20d3eaddf3f442bf Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 12:46:13 -0500 Subject: [PATCH 02/11] `usethis::use_air()` --- .Rbuildignore | 2 ++ .vscode/extensions.json | 5 +++++ .vscode/settings.json | 6 ++++++ air.toml | 0 4 files changed, 13 insertions(+) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json create mode 100644 air.toml diff --git a/.Rbuildignore b/.Rbuildignore index 7b7c1e1f..24a5b363 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -26,3 +26,5 @@ README_files/ ^\.github$ ^LICENSE\.md$ ^man-roxygen$ +^[\.]?air\.toml$ +^\.vscode$ diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..344f76eb --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,5 @@ +{ + "recommendations": [ + "Posit.air-vscode" + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..f2d0b79d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[r]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "Posit.air-vscode" + } +} diff --git a/air.toml b/air.toml new file mode 100644 index 00000000..e69de29b From e353e5d4ec3107e34dac5334af234cc7cc6b140e Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 12:46:34 -0500 Subject: [PATCH 03/11] `air format .` --- R/assume.R | 85 ++- R/calculate.R | 97 ++- R/deprecated.R | 10 +- R/fit.R | 17 +- R/generate.R | 129 ++-- R/get_confidence_interval.R | 73 ++- R/get_p_value.R | 46 +- R/hypothesize.R | 128 ++-- R/infer.R | 46 +- R/observe.R | 83 ++- R/rep_sample_n.R | 68 +- R/set_params.R | 25 +- R/shade_confidence_interval.R | 31 +- R/shade_p_value.R | 108 +++- R/specify.R | 104 +-- R/utils.R | 607 ++++++++++++------ R/visualize.R | 234 ++++--- R/wrappers.R | 377 ++++++----- data-raw/save_gss.R | 83 ++- tests/testthat/helper-data.R | 11 +- tests/testthat/test-aliases.R | 6 +- tests/testthat/test-assume.R | 78 ++- tests/testthat/test-calculate.R | 232 +++---- tests/testthat/test-fit.R | 17 +- tests/testthat/test-generate.R | 227 ++++--- tests/testthat/test-get_confidence_interval.R | 175 +++-- tests/testthat/test-get_p_value.R | 161 +++-- tests/testthat/test-hypothesize.R | 125 ++-- tests/testthat/test-observe.R | 39 +- tests/testthat/test-rep_sample_n.R | 65 +- .../testthat/test-shade_confidence_interval.R | 25 +- tests/testthat/test-shade_p_value.R | 100 +-- tests/testthat/test-specify.R | 34 +- tests/testthat/test-utils.R | 88 ++- tests/testthat/test-visualize.R | 203 +++--- tests/testthat/test-wrappers.R | 413 ++++++------ 36 files changed, 2633 insertions(+), 1717 deletions(-) diff --git a/R/assume.R b/R/assume.R index 07340410..f9bcd62a 100644 --- a/R/assume.R +++ b/R/assume.R @@ -169,8 +169,16 @@ assume <- function(x, distribution, df = NULL, ...) { # store df for easier passing to p* functions df = df, # store df in `specify`-esque format for use in `visualize` - distr_param = if (length(df) > 0) {df[1]} else {NULL}, - distr_param2 = if (length(df) == 2) {df[2]} else {NULL}, + distr_param = if (length(df) > 0) { + df[1] + } else { + NULL + }, + distr_param2 = if (length(df) == 2) { + df[2] + } else { + NULL + }, # bring along x attributes theory_type = attr(x, "theory_type"), params = attr(x, "params"), @@ -187,19 +195,23 @@ check_distribution <- function(x, distribution, df, ..., call = caller_env()) { dist <- tolower(distribution) if (!dist %in% c("f", "chisq", "t", "z")) { - cli_abort( + cli_abort( 'The distribution argument must be one of "Chisq", "F", "t", or "z".', call = call ) } - if ((dist == "f" && attr(x, "theory_type") != "ANOVA") || - (dist == "chisq" && !attr(x, "theory_type") %in% c("Chi-square test of indep", - "Chi-square Goodness of Fit")) || - (dist == "t" && !attr(x, "theory_type") %in% c("One sample t", - "Two sample t")) || - (dist == "z" && !attr(x, "theory_type") %in% c("One sample prop z", - "Two sample props z"))) { + if ( + (dist == "f" && attr(x, "theory_type") != "ANOVA") || + (dist == "chisq" && + !attr(x, "theory_type") %in% + c("Chi-square test of indep", "Chi-square Goodness of Fit")) || + (dist == "t" && + !attr(x, "theory_type") %in% c("One sample t", "Two sample t")) || + (dist == "z" && + !attr(x, "theory_type") %in% + c("One sample prop z", "Two sample props z")) + ) { if (has_explanatory(x)) { msg_tail <- glue( "a {get_stat_type_desc(attr(x, 'type_desc_explanatory'))} ", @@ -210,33 +222,36 @@ check_distribution <- function(x, distribution, df, ..., call = caller_env()) { msg_tail <- "no explanatory variable." } - cli_abort( + cli_abort( 'The supplied distribution {.val {distribution}} is not well-defined for a \\ {get_stat_type_desc(attr(x, "type_desc_response"))} response \\ - variable ({response_name(x)}) and {msg_tail}', call = call) + variable ({response_name(x)}) and {msg_tail}', + call = call + ) } if (!is.numeric(df) && !is.null(df)) { - cli_abort( + cli_abort( "{.fun assume} expects the {.arg df} argument to be a numeric vector, \\ but you supplied a {list(class(df))} object.", call = call - ) + ) } if (length(list(...)) != 0) { dots <- list(...) - cli_abort(c( - "{.fun assume} ignores the dots `...` argument, though the \\ + cli_abort( + c( + "{.fun assume} ignores the dots `...` argument, though the \\ {qty(dots)}argument{?s} {.field {names(dots)}} {?was/were} supplied. ", - i = "Did you forget to concatenate the {.arg df} argument with {.fun c}?"), + i = "Did you forget to concatenate the {.arg df} argument with {.fun c}?" + ), call = call ) } if (dist_df_length(distribution) != length(df) && !is.null(df)) { - cli_abort( '{distribution_desc(distribution)} distribution requires \\ {dist_df_length(distribution)} degrees of freedom argument{?s}, \\ @@ -266,7 +281,8 @@ dist_df_length <- function(distribution) { switch( tolower(distribution), `f` = 2, - `chisq` = , `t` = 1, + `chisq` = , + `t` = 1, `z` = 0 ) } @@ -291,16 +307,26 @@ df_desc <- function(df) { paste0( ' with ', - if (plural) {paste0(round(df), collapse = " and ")} else {round(df)}, + if (plural) { + paste0(round(df), collapse = " and ") + } else { + round(df) + }, ' degree', - if (!plural && df == 1) {''} else {'s'}, - ' of freedom') + if (!plural && df == 1) { + '' + } else { + 's' + }, + ' of freedom' + ) } } # process df for passing to p* functions process_df <- function(df) { - switch(as.character(length(df)), + switch( + as.character(length(df)), "0" = list(), "1" = list(df = df), "2" = list(df1 = df[1], df2 = df[2]) @@ -311,7 +337,6 @@ process_df <- function(df) { # hypothesize and, if it doesn't match the # supplied one, raise a message determine_df <- function(x, dist, df) { - if (!is.null(df) && !all(round(df) %in% round(acceptable_dfs(x)))) { cli_inform( "Message: The supplied {.arg df} argument does not match its \\ @@ -342,17 +367,19 @@ acceptable_dfs <- function(x) { unname( unlist( attr(x, "distr_param") <- - stats::t.test(response_variable(x) ~ - explanatory_variable(x))[["parameter"]] + stats::t.test(response_variable(x) ~ explanatory_variable(x))[[ + "parameter" + ]] ) ), # t.test param with var.equal = TRUE unname( unlist( attr(x, "distr_param") <- - stats::t.test(response_variable(x) ~ - explanatory_variable(x), - var.equal = TRUE)[["parameter"]] + stats::t.test( + response_variable(x) ~ explanatory_variable(x), + var.equal = TRUE + )[["parameter"]] ) ), # min(n1 - 1, n2 - 1) diff --git a/R/calculate.R b/R/calculate.R index 9bcff31a..846a8f83 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -86,15 +86,31 @@ #' @importFrom rlang !! sym quo enquo eval_tidy #' @family core functions #' @export -calculate <- function(x, - stat = c( - "mean", "median", "sum", "sd", "prop", "count", - "diff in means", "diff in medians", "diff in props", - "Chisq", "F", "slope", "correlation", "t", "z", - "ratio of props", "odds ratio", "ratio of means" - ), - order = NULL, - ...) { +calculate <- function( + x, + stat = c( + "mean", + "median", + "sum", + "sd", + "prop", + "count", + "diff in means", + "diff in medians", + "diff in props", + "Chisq", + "F", + "slope", + "correlation", + "t", + "z", + "ratio of props", + "odds ratio", + "ratio of means" + ), + order = NULL, + ... +) { check_type(x, tibble::is_tibble) check_if_mlr(x, "calculate") stat <- check_calculate_stat(stat) @@ -112,7 +128,10 @@ calculate <- function(x, # Use S3 method to match correct calculation result <- calc_impl( - structure(stat, class = gsub(" ", "_", stat)), x, order, ... + structure(stat, class = gsub(" ", "_", stat)), + x, + order, + ... ) result <- copy_attrs(to = result, from = x) @@ -129,7 +148,7 @@ calculate <- function(x, check_if_mlr <- function(x, fn, call = caller_env()) { if (fn == "calculate") { suggestion <- - "When working with multiple explanatory variables, use \\ + "When working with multiple explanatory variables, use \\ {.help [{.fun fit}](infer::fit.infer)} instead." } else { suggestion <- "" @@ -137,15 +156,16 @@ check_if_mlr <- function(x, fn, call = caller_env()) { if (is_mlr(x)) { cli_abort( - c("Multiple explanatory variables are not supported in {.fun {fn}}.", - i = suggestion), + c( + "Multiple explanatory variables are not supported in {.fun {fn}}.", + i = suggestion + ), call = call ) } } check_calculate_stat <- function(stat, call = caller_env()) { - check_type(stat, rlang::is_string, call = call) # Check for possible `stat` aliases @@ -199,8 +219,10 @@ check_input_vs_stat <- function(x, stat, call = caller_env()) { if (is_hypothesized(x)) { stat_nulls <- stat_hypotheses %>% - dplyr::filter(stat == !!stat & - hypothesis == attr(x, "null")) + dplyr::filter( + stat == !!stat & + hypothesis == attr(x, "null") + ) if (nrow(stat_nulls) == 0) { cli_abort( @@ -250,17 +272,19 @@ message_on_excessive_null <- function(x, stat = "mean", fn) { # User didn't supply "enough" information - no hypothesis for a theorized # statistic on a point estimate, so warn that a reasonable value was assumed. warn_on_insufficient_null <- function(x, stat, ...) { - if (!is_hypothesized(x) && - !has_explanatory(x) && + if ( + !is_hypothesized(x) && + !has_explanatory(x) && !stat %in% untheorized_stats && - !(stat == "t" && "mu" %in% names(list(...)))) { + !(stat == "t" && "mu" %in% names(list(...))) + ) { attr(x, "null") <- "point" attr(x, "params") <- assume_null(x, stat) cli_warn(c( "{get_stat_desc(stat)} requires a null \\ hypothesis to calculate the observed statistic.", - "Output assumes the following null value{print_params(x)}." + "Output assumes the following null value{print_params(x)}." )) } @@ -276,7 +300,7 @@ calc_impl_one_f <- function(f) { col <- base::setdiff(names(x), "replicate") if (!identical(dplyr::group_vars(x), "replicate")) { - x <- dplyr::group_by(x, replicate) + x <- dplyr::group_by(x, replicate) } res <- x %>% @@ -314,7 +338,7 @@ calc_impl_success_f <- function(f, output_name) { success <- attr(x, "success") if (!identical(dplyr::group_vars(x), "replicate")) { - x <- dplyr::group_by(x, replicate) + x <- dplyr::group_by(x, replicate) } res <- x %>% @@ -383,8 +407,8 @@ calc_impl_diff_f <- function(f, operator) { dplyr::group_by(replicate) %>% dplyr::summarize( stat = operator( - value[!!(explanatory_expr(x)) == order[1]], - value[!!(explanatory_expr(x)) == order[2]] + value[!!(explanatory_expr(x)) == order[1]], + value[!!(explanatory_expr(x)) == order[2]] ) ) @@ -468,11 +492,19 @@ calc_impl.Chisq <- function(type, x, order, ...) { } copy_attrs( - to = result, from = x, + to = result, + from = x, attrs = c( - "response", "success", "explanatory", "response_type", - "explanatory_type", "distr_param", "distr_param2", "theory_type", - "type_desc_response", "type_desc_explanatory" + "response", + "success", + "explanatory", + "response_type", + "explanatory_type", + "distr_param", + "distr_param2", + "theory_type", + "type_desc_response", + "type_desc_explanatory" ) ) } @@ -549,7 +581,8 @@ calc_impl.t <- function(type, x, order, ...) { df_out <- x %>% dplyr::summarize( stat = stats::t.test( - !!response_expr(x) ~ !!explanatory_expr(x), ... + !!response_expr(x) ~ !!explanatory_expr(x), + ... )[["statistic"]] ) } else if (theory_type(x) == "One sample t") { @@ -618,9 +651,9 @@ calc_impl.z <- function(type, x, order, ...) { df_out <- x %>% dplyr::summarize( - stat = ( - mean(rlang::eval_tidy(col) == rlang::eval_tidy(success), ...) - p0 - ) / sqrt((p0 * (1 - p0)) / num_rows) + stat = (mean(rlang::eval_tidy(col) == rlang::eval_tidy(success), ...) - + p0) / + sqrt((p0 * (1 - p0)) / num_rows) ) df_out diff --git a/R/deprecated.R b/R/deprecated.R index 3d5a9397..d6bd2fde 100644 --- a/R/deprecated.R +++ b/R/deprecated.R @@ -15,8 +15,12 @@ NULL #' @rdname deprecated #' @export -conf_int <- function(x, level = 0.95, type = "percentile", - point_estimate = NULL) { +conf_int <- function( + x, + level = 0.95, + type = "percentile", + point_estimate = NULL +) { lifecycle::deprecate_stop("0.4.0", "conf_int()", "get_confidence_interval()") } @@ -24,5 +28,5 @@ conf_int <- function(x, level = 0.95, type = "percentile", #' @rdname deprecated #' @export p_value <- function(x, obs_stat, direction) { - lifecycle::deprecate_stop("0.4.0", "conf_int()", "get_p_value()") + lifecycle::deprecate_stop("0.4.0", "conf_int()", "get_p_value()") } diff --git a/R/fit.R b/R/fit.R index 44876439..8384e653 100644 --- a/R/fit.R +++ b/R/fit.R @@ -162,10 +162,12 @@ check_family <- function(object, ..., call = caller_env()) { response_type <- attr(object, "type_desc_response") if (response_type == "mult") { - cli_abort(c( - "infer does not support fitting models for categorical response variables \\ + cli_abort( + c( + "infer does not support fitting models for categorical response variables \\ with more than two levels.", - i = "Please see {.fun multinom_reg} from the parsnip package."), + i = "Please see {.fun multinom_reg} from the parsnip package." + ), call = call ) } @@ -194,7 +196,6 @@ relevel_response <- function(x) { ) } - x } @@ -216,10 +217,10 @@ get_formula <- function(x) { fit_linear_model <- function(object, formula, ...) { stats::glm( - formula = formula, - data = object, - ... - ) %>% + formula = formula, + data = object, + ... + ) %>% broom::tidy() %>% dplyr::select( ., diff --git a/R/generate.R b/R/generate.R index 7860fe84..1f95891d 100755 --- a/R/generate.R +++ b/R/generate.R @@ -74,8 +74,13 @@ #' @importFrom dplyr group_by #' @family core functions #' @export -generate <- function(x, reps = 1, type = NULL, - variables = !!response_expr(x), ...) { +generate <- function( + x, + reps = 1, + type = NULL, + variables = !!response_expr(x), + ... +) { # Check type argument, warning if necessary type <- sanitize_generation_type(type) auto_type <- sanitize_generation_type(attr(x, "type")) @@ -113,14 +118,14 @@ sanitize_generation_type <- function(x, call = caller_env()) { 'The `type` argument should be one of "bootstrap", "permute", \\ or "draw". See {.help [{.fun generate}](infer::generate)} for more details.', call = call - ) + ) } if (x == "simulate") { - cli_inform( + cli_inform( 'The `"simulate"` generation type has been renamed to `"draw"`. \\ Use `type = "draw"` instead to quiet this message.' - ) + ) } x @@ -128,35 +133,37 @@ sanitize_generation_type <- function(x, call = caller_env()) { # Ensure that the supplied type matches what would be assumed from input compare_type_vs_auto_type <- function(type, auto_type, x) { - if(is.null(auto_type)) { + if (is.null(auto_type)) { return(type) } - if ((type == "bootstrap" && has_p_param(x)) || - (type != "bootstrap" && auto_type != type && + if ( + (type == "bootstrap" && has_p_param(x)) || + (type != "bootstrap" && + auto_type != type && # make sure auto_type vs type difference isn't just an alias (any(!c(auto_type, type) %in% c("draw", "simulate")))) - ) { - cli_warn( - "You have given `type = \"{type}\"`, but `type` is expected \\ + ) { + cli_warn( + "You have given `type = \"{type}\"`, but `type` is expected \\ to be `\"{auto_type}\"`. This workflow is untested and \\ the results may not mean what you think they mean." - ) + ) } type } has_p_param <- function(x) { - if (!has_attr(x, "params")) { - return(FALSE) - } + if (!has_attr(x, "params")) { + return(FALSE) + } - if (all(grepl("^p\\.", names(attr(x, "params"))))) { - return(TRUE) - } + if (all(grepl("^p\\.", names(attr(x, "params"))))) { + return(TRUE) + } - FALSE + FALSE } use_auto_type <- function(auto_type) { @@ -165,30 +172,39 @@ use_auto_type <- function(auto_type) { } check_permutation_attributes <- function(x, call = caller_env()) { - if (any(!has_attr(x, "response"), !has_attr(x, "explanatory")) && - !identical(attr(x, "null"), "paired independence")) { - cli_abort( - "Please {.fun specify} an explanatory and a response variable \\ + if ( + any(!has_attr(x, "response"), !has_attr(x, "explanatory")) && + !identical(attr(x, "null"), "paired independence") + ) { + cli_abort( + "Please {.fun specify} an explanatory and a response variable \\ when permuting.", - call = call - ) + call = call + ) } } -check_cols <- function(x, variables, type, missing, arg_name = "variables", call = caller_env()) { +check_cols <- function( + x, + variables, + type, + missing, + arg_name = "variables", + call = caller_env() +) { if (!rlang::is_symbolic(rlang::get_expr(variables)) && type == "permute") { - cli_abort( - "The {.arg {arg_name}} argument should be one or more unquoted variable names \\ + cli_abort( + "The {.arg {arg_name}} argument should be one or more unquoted variable names \\ (not strings in quotation marks).", - call = call - ) + call = call + ) } if (!missing && type != "permute") { - cli_warn( + cli_warn( 'The {.arg {arg_name}} argument is only relevant for the "permute" \\ generation type and will be ignored.' - ) + ) should_prompt <- FALSE } else { @@ -197,7 +213,6 @@ check_cols <- function(x, variables, type, missing, arg_name = "variables", call col_names <- process_variables(variables, should_prompt) - if (any(!col_names %in% colnames(x))) { bad_cols <- col_names[!col_names %in% colnames(x)] @@ -214,17 +229,16 @@ bootstrap <- function(x, reps = 1, ...) { if (is_hypothesized(x)) { # If so, shift the variable chosen to have a mean corresponding # to that specified in `hypothesize` - if (!is.null(attr(attr(x, "params"), "names"))){ + if (!is.null(attr(attr(x, "params"), "names"))) { if (identical(attr(attr(x, "params"), "names"), "mu")) { col <- response_name(x) x[[col]] <- x[[col]] - mean(x[[col]], na.rm = TRUE) + attr(x, "params") - } - - # Similarly for median + } # Similarly for median else if (identical(attr(attr(x, "params"), "names"), "med")) { col <- response_name(x) x[[col]] <- x[[col]] - - stats::median(x[[col]], na.rm = TRUE) + attr(x, "params") + stats::median(x[[col]], na.rm = TRUE) + + attr(x, "params") } } } @@ -239,7 +253,11 @@ bootstrap <- function(x, reps = 1, ...) { #' @importFrom dplyr bind_rows group_by permute <- function(x, reps = 1, variables, ..., call = caller_env()) { nrow_x <- nrow(x) - df_out <- replicate(reps, permute_once(x, variables, call = call), simplify = FALSE) %>% + df_out <- replicate( + reps, + permute_once(x, variables, call = call), + simplify = FALSE + ) %>% dplyr::bind_rows() %>% dplyr::mutate(replicate = rep(1:reps, each = !!nrow_x)) %>% group_by_replicate(reps, nrow_x) @@ -253,27 +271,29 @@ permute_once <- function(x, variables, ..., call = caller_env()) { dots <- list(...) null <- attr(x, "null") - if (!is_hypothesized(x) || - !null %in% c("independence", "paired independence")) { - cli_abort( - "Permuting should be done only when doing an independence \\ + if ( + !is_hypothesized(x) || + !null %in% c("independence", "paired independence") + ) { + cli_abort( + "Permuting should be done only when doing an independence \\ hypothesis test. See {.help [{.fun hypothesize}](infer::hypothesize)}.", - call = call - ) + call = call + ) } variables <- process_variables(variables, FALSE) if (null == "independence") { - # for each column, determine whether it should be permuted - needs_permuting <- colnames(x) %in% variables + # for each column, determine whether it should be permuted + needs_permuting <- colnames(x) %in% variables - # pass each to permute_column with its associated logical - out <- purrr::map2(x, needs_permuting, permute_column) - out <- tibble::new_tibble(out) + # pass each to permute_column with its associated logical + out <- purrr::map2(x, needs_permuting, permute_column) + out <- tibble::new_tibble(out) } else { - out <- x - signs <- sample(c(-1, 1), nrow(x), replace = TRUE, prob = c(.5, .5)) - out[[variables]] <- x[[variables]] * signs + out <- x + signs <- sample(c(-1, 1), nrow(x), replace = TRUE, prob = c(.5, .5)) + out[[variables]] <- x[[variables]] * signs } copy_attrs(out, x) @@ -292,7 +312,6 @@ process_variables <- function(variables, should_prompt) { out <- purrr::map(out, as.character) } - # drop c() out[out == "c"] <- NULL @@ -300,7 +319,7 @@ process_variables <- function(variables, should_prompt) { interactions <- purrr::map_lgl(out, `%in%`, x = "*") if (any(interactions) && should_prompt) { - cli_inform( + cli_inform( "Message: Please supply only data columns to the {.arg variables} argument. \\ Note that any derived effects that depend on these columns will also \\ be affected." diff --git a/R/get_confidence_interval.R b/R/get_confidence_interval.R index fceb1cbf..9848603a 100644 --- a/R/get_confidence_interval.R +++ b/R/get_confidence_interval.R @@ -143,11 +143,15 @@ #' @name get_confidence_interval #' @family auxillary functions #' @export -get_confidence_interval <- function(x, level = 0.95, type = NULL, - point_estimate = NULL) { +get_confidence_interval <- function( + x, + level = 0.95, + type = NULL, + point_estimate = NULL +) { # Inform if no `level` was explicitly supplied if (!("level" %in% rlang::call_args_names(match.call()))) { - cli_inform("Using `level = {level}` to compute confidence interval.") + cli_inform("Using `level = {level}` to compute confidence interval.") } if (is.null(type)) { @@ -210,11 +214,12 @@ get_confidence_interval <- function(x, level = 0.95, type = NULL, #' @rdname get_confidence_interval #' @export -get_ci <- function(x, level = 0.95, type = NULL, - point_estimate = NULL) { +get_ci <- function(x, level = 0.95, type = NULL, point_estimate = NULL) { get_confidence_interval( x, - level = level, type = type, point_estimate = point_estimate + level = level, + type = type, + point_estimate = point_estimate ) } @@ -232,8 +237,10 @@ remove_missing_estimates <- function(estimates) { na_estimates_n <- sum(na_estimates) if (na_estimates_n > 0) { - cli_warn("{na_estimates_n} estimates were missing and were removed when \\ - calculating the confidence interval.") + cli_warn( + "{na_estimates_n} estimates were missing and were removed when \\ + calculating the confidence interval." + ) } estimates[!na_estimates] @@ -312,66 +319,72 @@ check_ci_args <- function(x, level, type, point_estimate, call = caller_env()) { check_type(level, is.numeric, call = call) if ((level <= 0) || (level >= 1)) { - cli_abort( - "The value of {.arg level} must be between 0 and 1, non-inclusive.", - call = call - ) + cli_abort( + "The value of {.arg level} must be between 0 and 1, non-inclusive.", + call = call + ) } if (inherits(x, "infer_dist") && !is.null(type) && type != "se") { - cli_abort( + cli_abort( 'The only {.arg type} option for theory-based confidence intervals \\ is `type = "se"`.', - call = call - ) + call = call + ) } if (!(type %in% c("percentile", "se", "bias-corrected"))) { - cli_abort( + cli_abort( 'The options for `type` are "percentile", "se", or "bias-corrected".', call = call - ) + ) } if ((type %in% c("se", "bias-corrected")) && is.null(point_estimate)) { - cli_abort( + cli_abort( 'A numeric value needs to be given for {.arg point_estimate} \\ for `type` "se" or "bias-corrected".', call = call - ) + ) } if (inherits(x, "infer_dist")) { # theoretical CIs require the full point estimate infer object as they # contain the necessary standard error if (!inherits(point_estimate, "infer")) { - cli_abort( + cli_abort( 'For theoretical confidence intervals, the `point_estimate` argument \\ must be an `infer` object. Have you made sure to supply the output of \\ - {.fun calculate} as the `point_estimate` argument?', call = call) + {.fun calculate} as the `point_estimate` argument?', + call = call + ) } - if (!attr(point_estimate, "stat") %in% - c("mean", "prop", "diff in means", "diff in props")) { - cli_abort( + if ( + !attr(point_estimate, "stat") %in% + c("mean", "prop", "diff in means", "diff in props") + ) { + cli_abort( 'The only allowable statistics for theoretical confidence intervals \\ are "mean", "prop", "diff in means", and "diff in props". See \\ the "Details" section of \\ {.help [{.fun get_confidence_interval}](infer::get_confidence_interval)} \\ for more details.', call = call - ) + ) } - if ((attr(x, "distribution") == "t" && - !attr(point_estimate, "stat") %in% c("mean", "diff in means")) || + if ( + (attr(x, "distribution") == "t" && + !attr(point_estimate, "stat") %in% c("mean", "diff in means")) || (attr(x, "distribution") == "norm" && - !attr(point_estimate, "stat") %in% c("prop", "diff in props"))) { - cli_abort( + !attr(point_estimate, "stat") %in% c("prop", "diff in props")) + ) { + cli_abort( 'Confidence intervals using a `{attr(x, "dist_")}` distribution for \\ `stat = {attr(point_estimate, "stat")}` are not implemented.', call = call - ) + ) } } } diff --git a/R/get_p_value.R b/R/get_p_value.R index 0a9a257a..750a8dbd 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -131,7 +131,7 @@ get_p_value <- function(x, obs_stat, direction) { get_p_value.default <- function(x, obs_stat, direction) { check_type(x, is.data.frame) if (!is_generated(x) & is_hypothesized(x)) { - cli_abort(c( + cli_abort(c( "Theoretical p-values are not yet supported. ", i = "`x` should be the result of calling {.fun generate}." )) @@ -203,8 +203,10 @@ get_p_value.infer_dist <- function(x, obs_stat, direction) { # supply everything to the base R distribution function res <- do.call( dist_fn, - c(list(q = as.numeric(obs_stat), lower.tail = lower_tail), - process_df(attr(x, "df"))) + c( + list(q = as.numeric(obs_stat), lower.tail = lower_tail), + process_df(attr(x, "df")) + ) ) if (dir == "both") { @@ -214,7 +216,12 @@ get_p_value.infer_dist <- function(x, obs_stat, direction) { tibble::tibble(p_value = res) } -simulation_based_p_value <- function(x, obs_stat, direction, call = caller_env()) { +simulation_based_p_value <- function( + x, + obs_stat, + direction, + call = caller_env() +) { check_x_vs_obs_stat(x, obs_stat, call = call) obs_stat <- check_obs_stat(obs_stat) @@ -228,7 +235,7 @@ simulation_based_p_value <- function(x, obs_stat, direction, call = caller_env() } if (abs(pval) < 1e-16) { - cli_warn(c( + cli_warn(c( "Please be cautious in reporting a p-value of 0. This result is an \\ approximation based on the number of `reps` chosen in the {.fun generate} step.", i = "See {.help [{.fun get_p_value}](infer::get_p_value)} for more information." @@ -255,28 +262,35 @@ two_sided_p_value <- function(vec, obs_stat) { } check_hypotheses_align <- function(x, obs_stat) { - if (is_hypothesized(x) && + if ( + is_hypothesized(x) && is_hypothesized(obs_stat) && - any(attr(x, "params") != attr(obs_stat, "params"))) { - cli_warn( - "`x` and `obs_stat` were generated using different null hypotheses. \\ + any(attr(x, "params") != attr(obs_stat, "params")) + ) { + cli_warn( + "`x` and `obs_stat` were generated using different null hypotheses. \\ This workflow is untested and results may not mean what you think \\ they mean." - ) + ) } } check_x_vs_obs_stat <- function(x, obs_stat, call = caller_env()) { # check if x and obs_stat might have been mistakenly supplied # in the reverse order - if (is_generated(obs_stat) && - !is_generated(x)) { - cli_abort(c( - "It seems like the `obs_stat` argument has been passed to `get_p_value()` \\ + if ( + is_generated(obs_stat) && + !is_generated(x) + ) { + cli_abort( + c( + "It seems like the `obs_stat` argument has been passed to `get_p_value()` \\ as the first argument when `get_p_value()` expects `x`, a distribution \\ of statistics or coefficient estimates, as the first argument. ", - i = "Have you mistakenly switched the order of `obs_stat` and `x`?" - ), call = call) + i = "Have you mistakenly switched the order of `obs_stat` and `x`?" + ), + call = call + ) } invisible(TRUE) diff --git a/R/hypothesize.R b/R/hypothesize.R index 3130e8bf..846d18a0 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -53,11 +53,17 @@ #' @importFrom purrr compact #' @family core functions #' @export -hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL) { - +hypothesize <- function( + x, + null, + p = NULL, + mu = NULL, + med = NULL, + sigma = NULL +) { # Check arguments if (missing(null)) { - null <- NA + null <- NA } null <- match_null_hypothesis(null) hypothesize_checks(x, null) @@ -70,7 +76,7 @@ hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL) # Set parameters and determine appropriate generation type switch( null, - independence = { + independence = { params <- sanitize_hypothesis_params_independence(dots) attr(x, "type") <- "permute" }, @@ -92,8 +98,8 @@ hypothesize <- function(x, null, p = NULL, mu = NULL, med = NULL, sigma = NULL) } }, `paired independence` = { - params <- sanitize_hypothesis_params_paired_independence(dots) - attr(x, "type") <- "permute" + params <- sanitize_hypothesis_params_paired_independence(dots) + attr(x, "type") <- "permute" } ) @@ -108,45 +114,47 @@ hypothesise <- hypothesize hypothesize_checks <- function(x, null, call = caller_env()) { if (!inherits(x, "data.frame")) { - cli_abort("x must be a data.frame or tibble", call = call) + cli_abort("x must be a data.frame or tibble", call = call) } if ((null == "independence") && !has_explanatory(x)) { - cli_abort( + cli_abort( 'Please {.fun specify} an explanatory and a response variable when \\ testing a null hypothesis of `"independence"`.', call = call - ) + ) } if (null == "paired independence" && has_explanatory(x)) { - cli_abort( - c('Please {.fun specify} only a response variable when \\ + cli_abort( + c( + 'Please {.fun specify} only a response variable when \\ testing a null hypothesis of `"paired independence"`.', - "i" = 'The supplied response variable should be the \\ - pre-computed difference between paired observations.'), - call = call - ) + "i" = 'The supplied response variable should be the \\ + pre-computed difference between paired observations.' + ), + call = call + ) } } match_null_hypothesis <- function(null, call = caller_env()) { null_hypothesis_types <- c("point", "independence", "paired independence") - if(length(null) != 1) { - cli_abort( - 'You should specify exactly one type of null hypothesis.', - call = call - ) + if (length(null) != 1) { + cli_abort( + 'You should specify exactly one type of null hypothesis.', + call = call + ) } i <- pmatch(null, null_hypothesis_types) - if(is.na(i)) { - cli_abort( - '`null` should be either "point", "independence", or "paired independence".', - call = call - ) + if (is.na(i)) { + cli_abort( + '`null` should be either "point", "independence", or "paired independence".', + call = call + ) } null_hypothesis_types[i] @@ -154,7 +162,7 @@ match_null_hypothesis <- function(null, call = caller_env()) { sanitize_hypothesis_params_independence <- function(dots) { if (length(dots) > 0) { - cli_warn( + cli_warn( "Parameter values should not be specified when testing that two \\ variables are independent." ) @@ -164,11 +172,11 @@ sanitize_hypothesis_params_independence <- function(dots) { } sanitize_hypothesis_params_point <- function(dots, x, call = caller_env()) { - if(length(dots) != 1) { - cli_abort( - "You must specify exactly one of `p`, `mu`, `med`, or `sigma`.", - call = call - ) + if (length(dots) != 1) { + cli_abort( + "You must specify exactly one of `p`, `mu`, `med`, or `sigma`.", + call = call + ) } if (!is.null(dots$p)) { @@ -179,40 +187,44 @@ sanitize_hypothesis_params_point <- function(dots, x, call = caller_env()) { } sanitize_hypothesis_params_proportion <- function(p, x, call = caller_env()) { - eps <- if (capabilities("long.double")) {sqrt(.Machine$double.eps)} else {0.01} + eps <- if (capabilities("long.double")) { + sqrt(.Machine$double.eps) + } else { + 0.01 + } - if(anyNA(p)) { - cli_abort( - '`p` should not contain missing values.', - call = call - ) + if (anyNA(p)) { + cli_abort( + '`p` should not contain missing values.', + call = call + ) } - if(any(p < 0 | p > 1)) { - cli_abort( - '`p` should only contain values between zero and one.', - call = call - ) + if (any(p < 0 | p > 1)) { + cli_abort( + '`p` should only contain values between zero and one.', + call = call + ) } - if(length(p) == 1) { - if(!has_attr(x, "success")) { - cli_abort( - "A point null regarding a proportion requires that `success` \\ + if (length(p) == 1) { + if (!has_attr(x, "success")) { + cli_abort( + "A point null regarding a proportion requires that `success` \\ be indicated in `specify()`.", - call = call - ) + call = call + ) } p <- c(p, 1 - p) names(p) <- get_success_then_response_levels(x) } else { if (sum(p) < 1 - eps | sum(p) > 1 + eps) { - cli_abort( - "Make sure the hypothesized values for the `p` parameters sum to 1. \\ + cli_abort( + "Make sure the hypothesized values for the `p` parameters sum to 1. \\ Please try again.", - call = call - ) + call = call + ) } } @@ -220,11 +232,11 @@ sanitize_hypothesis_params_proportion <- function(p, x, call = caller_env()) { } sanitize_hypothesis_params_paired_independence <- function(dots) { - if (length(dots) > 0) { - cli_warn( - "Parameter values should not be specified when testing paired independence." - ) - } + if (length(dots) > 0) { + cli_warn( + "Parameter values should not be specified when testing paired independence." + ) + } - NULL + NULL } diff --git a/R/infer.R b/R/infer.R index 992c5318..d5a5be86 100755 --- a/R/infer.R +++ b/R/infer.R @@ -18,12 +18,46 @@ if (getRversion() >= "2.15.1") { utils::globalVariables( c( - "prop", "stat", "value", "x", "y", "..density..", "statistic", ".", - "parameter", "p.value", "xmin", "x_min", "xmax", "x_max", "density", - "denom", "diff_prop", "group_num", "n1", "n2", "num_suc", "p_hat", - "total_suc", "explan", "probs", "conf.low", "conf.high", "prop_1", - "prop_2", "data", "setNames", "resp", "capture.output", "stats", - "estimate", "any_of", "model", "term", "where", "hypothesis" + "prop", + "stat", + "value", + "x", + "y", + "..density..", + "statistic", + ".", + "parameter", + "p.value", + "xmin", + "x_min", + "xmax", + "x_max", + "density", + "denom", + "diff_prop", + "group_num", + "n1", + "n2", + "num_suc", + "p_hat", + "total_suc", + "explan", + "probs", + "conf.low", + "conf.high", + "prop_1", + "prop_2", + "data", + "setNames", + "resp", + "capture.output", + "stats", + "estimate", + "any_of", + "model", + "term", + "where", + "hypothesis" ) ) } diff --git a/R/observe.R b/R/observe.R index cc6e7ddc..6f88d6b9 100644 --- a/R/observe.R +++ b/R/observe.R @@ -58,44 +58,73 @@ #' @family functions for calculating observed statistics #' @export observe <- function( - x, - # specify arguments - formula, response = NULL, explanatory = NULL, success = NULL, - # hypothesize arguments - null = NULL, p = NULL, mu = NULL, med = NULL, sigma = NULL, - # calculate arguments - stat = c("mean", "median", "sum", "sd", "prop", "count", "diff in means", - "diff in medians", "diff in props", "Chisq", "F", "slope", - "correlation", "t", "z", "ratio of props", "odds ratio"), - order = NULL, - ...) { - + x, + # specify arguments + formula, + response = NULL, + explanatory = NULL, + success = NULL, + # hypothesize arguments + null = NULL, + p = NULL, + mu = NULL, + med = NULL, + sigma = NULL, + # calculate arguments + stat = c( + "mean", + "median", + "sum", + "sd", + "prop", + "count", + "diff in means", + "diff in medians", + "diff in props", + "Chisq", + "F", + "slope", + "correlation", + "t", + "z", + "ratio of props", + "odds ratio" + ), + order = NULL, + ... +) { # use hypothesize() if appropriate (or needed to pass an informative # message/warning). otherwise, pipe directly to calculate(). if (!all(sapply(list(p, mu, med, sigma), is.null))) { hypothesize_fn <- hypothesize } else { - hypothesize_fn <- function(x, ...) {x} + hypothesize_fn <- function(x, ...) { + x + } } # pass arguments on to core verbs specify( x = x, formula = formula, - response = {{response}}, - explanatory = {{explanatory}}, + response = {{ response }}, + explanatory = {{ explanatory }}, success = success ) %>% - hypothesize_fn( - null = if (has_explanatory(.)) {"independence"} else {"point"}, - p = p, - mu = mu, - med = med, - sigma = sigma - ) %>% - calculate( - stat = stat, - order = order, - ... - ) + hypothesize_fn( + null = if (has_explanatory(.)) { + "independence" + } else { + "point" + }, + p = p, + mu = mu, + med = med, + sigma = sigma + ) %>% + calculate( + stat = stat, + order = order, + ... + ) } diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R index b2c89d35..37e4c958 100644 --- a/R/rep_sample_n.R +++ b/R/rep_sample_n.R @@ -107,8 +107,14 @@ rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1, prob = NULL) { #' @rdname rep_sample_n #' @export -rep_slice_sample <- function(.data, n = NULL, prop = NULL, replace = FALSE, - weight_by = NULL, reps = 1) { +rep_slice_sample <- function( + .data, + n = NULL, + prop = NULL, + replace = FALSE, + weight_by = NULL, + reps = 1 +) { check_type(.data, is.data.frame) check_type( n, @@ -127,16 +133,18 @@ rep_slice_sample <- function(.data, n = NULL, prop = NULL, replace = FALSE, check_type(replace, is_truefalse, "TRUE or FALSE") eval_weight_by <- try(rlang::eval_tidy(weight_by), silent = TRUE) if (inherits(eval_weight_by, "try-error")) { - weight_by <- rlang::enquo(weight_by) - check_cols(.data, weight_by, "permute", FALSE, "weight_by") - weight_by <- .data[[rlang::as_name(weight_by)]] + weight_by <- rlang::enquo(weight_by) + check_cols(.data, weight_by, "permute", FALSE, "weight_by") + weight_by <- .data[[rlang::as_name(weight_by)]] } check_type( - weight_by, - ~ is.numeric(.) && (length(.) == nrow(.data)), - glue::glue("a numeric vector with length `nrow(.data)` = {nrow(.data)} \\ - or an unquoted column name"), - allow_null = TRUE + weight_by, + ~ is.numeric(.) && (length(.) == nrow(.data)), + glue::glue( + "a numeric vector with length `nrow(.data)` = {nrow(.data)} \\ + or an unquoted column name" + ), + allow_null = TRUE ) check_type( reps, @@ -168,14 +176,14 @@ make_replicate_tbl <- function(tbl, size, replace, prob, reps) { n <- nrow(tbl) if (!replace) { - idx_list <- replicate( - reps, - sample_int(n, size, replace = FALSE, prob = prob), - simplify = FALSE - ) + idx_list <- replicate( + reps, + sample_int(n, size, replace = FALSE, prob = prob), + simplify = FALSE + ) } else { - idx_list <- sample_int(n, size * reps, replace = TRUE, prob = prob) - idx_list <- vctrs::vec_chop(idx_list, sizes = rep(size, reps)) + idx_list <- sample_int(n, size * reps, replace = TRUE, prob = prob) + idx_list <- vctrs::vec_chop(idx_list, sizes = rep(size, reps)) } # Get actual sample size which can differ from `size` (currently if it is @@ -185,15 +193,23 @@ make_replicate_tbl <- function(tbl, size, replace, prob, reps) { res <- vctrs::vec_slice(tbl, i) res <- - dplyr::bind_cols( - tibble::new_tibble(list(replicate = rep(seq_len(reps), each = sample_size))), - res - ) + dplyr::bind_cols( + tibble::new_tibble(list( + replicate = rep(seq_len(reps), each = sample_size) + )), + res + ) res <- group_by_replicate(res, reps = reps, n = sample_size) copy_attrs(res, tbl) } -notify_extra_size <- function(size, tbl, replace, notify_type, call = caller_env()) { +notify_extra_size <- function( + size, + tbl, + replace, + notify_type, + call = caller_env() +) { if (!replace && (size > nrow(tbl))) { msg <- glue::glue( "Asked sample size ({size}) is bigger than ", @@ -237,10 +253,10 @@ make_slice_size <- function(n, prop, n_total, call = caller_env()) { if (is.null(prop)) { n } else { - cli_abort( - "Please supply exactly one of the `n` or `prop` arguments.", - call = call - ) + cli_abort( + "Please supply exactly one of the `n` or `prop` arguments.", + call = call + ) } } } diff --git a/R/set_params.R b/R/set_params.R index 1e938f13..8f9d6bc4 100755 --- a/R/set_params.R +++ b/R/set_params.R @@ -32,10 +32,11 @@ set_params <- function(x) { # One variable if ( - has_response(x) && !has_explanatory(x) && - has_attr(x, "response_type") && !has_attr(x, "explanatory_type") + has_response(x) && + !has_explanatory(x) && + has_attr(x, "response_type") && + !has_attr(x, "explanatory_type") ) { - # One mean if (attr(x, "response_type") %in% c("integer", "numeric")) { attr(x, "theory_type") <- "One sample t" @@ -44,7 +45,6 @@ set_params <- function(x) { )[["parameter"]] attr(x, "type") <- "bootstrap" } else if ( - # One prop (attr(x, "response_type") == "factor") && (num_response_levels == 2) ) { @@ -61,17 +61,18 @@ set_params <- function(x) { # Two variables if ( - has_response(x) && has_explanatory(x) & - has_attr(x, "response_type") && has_attr(x, "explanatory_type") + has_response(x) && + has_explanatory(x) & + has_attr(x, "response_type") && + has_attr(x, "explanatory_type") ) { attr(x, "type") <- "bootstrap" # Response is numeric, explanatory is categorical if ( (attr(x, "response_type") %in% c("integer", "numeric")) & - (attr(x, "explanatory_type") == "factor") + (attr(x, "explanatory_type") == "factor") ) { - # Two sample means (t distribution) if (num_explanatory_levels == 2) { attr(x, "theory_type") <- "Two sample t" @@ -81,7 +82,6 @@ set_params <- function(x) { response_variable(x) ~ explanatory_variable(x) )[["parameter"]] } else { - # >2 sample means (F distribution) attr(x, "theory_type") <- "ANOVA" # Get numerator and denominator degrees of freedom @@ -96,7 +96,7 @@ set_params <- function(x) { # Response is categorical, explanatory is categorical if ( (attr(x, "response_type") == "factor") & - (attr(x, "explanatory_type") == "factor") + (attr(x, "explanatory_type") == "factor") ) { attr(x, "type") <- "bootstrap" @@ -104,11 +104,10 @@ set_params <- function(x) { # Parameter(s) not needed since standard normal if ( (num_response_levels == 2) & - (num_explanatory_levels == 2) + (num_explanatory_levels == 2) ) { attr(x, "theory_type") <- "Two sample props z" } else { - # >2 sample proportions (chi-square test of indep) attr(x, "theory_type") <- "Chi-square test of indep" attr(x, "distr_param") <- suppressWarnings( @@ -122,7 +121,7 @@ set_params <- function(x) { # Response is numeric, explanatory is numeric if ( (attr(x, "response_type") %in% c("integer", "numeric")) & - (attr(x, "explanatory_type") %in% c("integer", "numeric")) + (attr(x, "explanatory_type") %in% c("integer", "numeric")) ) { response_string <- response_name(x) explanatory_string <- explanatory_name(x) diff --git a/R/shade_confidence_interval.R b/R/shade_confidence_interval.R index 07e603ff..2236dad0 100644 --- a/R/shade_confidence_interval.R +++ b/R/shade_confidence_interval.R @@ -120,8 +120,12 @@ NULL #' @rdname shade_confidence_interval #' @family visualization functions #' @export -shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", - fill = "turquoise", ...) { +shade_confidence_interval <- function( + endpoints, + color = "mediumaquamarine", + fill = "turquoise", + ... +) { # since most of the logic for shading is in shade_confidence_interval_term, which # is only called by `+.gg`, we need to check for mistakenly piped inputs here check_for_piped_visualize(endpoints, color, fill) @@ -131,17 +135,25 @@ shade_confidence_interval <- function(endpoints, color = "mediumaquamarine", "A confidence interval shading layer.", class = "infer_layer", fn = "shade_confidence_interval", - endpoints = if (is.null(endpoints)) {NA} else {endpoints}, + endpoints = if (is.null(endpoints)) { + NA + } else { + endpoints + }, color = color, fill = list(fill), dots = list(...) ) } -shade_confidence_interval_term <- function(plot, endpoints, - color = "mediumaquamarine", - fill = "turquoise", dots, - call = rlang::call2("shade_confidence_interval")) { +shade_confidence_interval_term <- function( + plot, + endpoints, + color = "mediumaquamarine", + fill = "turquoise", + dots, + call = rlang::call2("shade_confidence_interval") +) { if (all(is.na(endpoints))) { endpoints <- NULL } @@ -163,7 +175,10 @@ shade_confidence_interval_term <- function(plot, endpoints, list( data = data.frame(endpoints[1]), mapping = aes( - xmin = endpoints[1], xmax = endpoints[2], ymin = 0, ymax = Inf + xmin = endpoints[1], + xmax = endpoints[2], + ymin = 0, + ymax = Inf ), fill = fill, inherit.aes = FALSE diff --git a/R/shade_p_value.R b/R/shade_p_value.R index e2ff24a2..14f0c6e3 100644 --- a/R/shade_p_value.R +++ b/R/shade_p_value.R @@ -109,8 +109,13 @@ NULL #' @rdname shade_p_value #' @family visualization functions #' @export -shade_p_value <- function(obs_stat, direction, - color = "red2", fill = "pink", ...) { +shade_p_value <- function( + obs_stat, + direction, + color = "red2", + fill = "pink", + ... +) { # since most of the logic for p-value shading is in shade_p_value_term, which # is only called by `+.gg`, we need to check for mistakenly piped inputs here check_for_piped_visualize(obs_stat, direction, color, fill) @@ -120,8 +125,16 @@ shade_p_value <- function(obs_stat, direction, "A p-value shading layer.", class = "infer_layer", fn = "shade_p_value", - obs_stat = if (is.null(obs_stat)) {NA} else {obs_stat}, - direction = if (is.null(direction)) {NA} else {direction}, + obs_stat = if (is.null(obs_stat)) { + NA + } else { + obs_stat + }, + direction = if (is.null(direction)) { + NA + } else { + direction + }, color = color, fill = list(fill), dots = list(...) @@ -132,9 +145,15 @@ shade_p_value <- function(obs_stat, direction, #' @export shade_pvalue <- shade_p_value -shade_p_value_term <- function(plot, obs_stat, direction, - color = "red2", fill = "pink", dots, - call = rlang::call2("shade_p_value")) { +shade_p_value_term <- function( + plot, + obs_stat, + direction, + color = "red2", + fill = "pink", + dots, + call = rlang::call2("shade_p_value") +) { if (all(is.na(obs_stat))) { obs_stat <- NULL } @@ -160,17 +179,19 @@ shade_p_value_term <- function(plot, obs_stat, direction, tail_area <- one_tail_area(obs_stat, direction) res <- c(res, do.call(geom_tail_area, c(list(tail_area, fill), dots))) - } else if (direction %in% c("two_sided", "both", - "two-sided", "two sided", "two.sided")) { + } else if ( + direction %in% + c("two_sided", "both", "two-sided", "two sided", "two.sided") + ) { tail_area <- two_tail_area(obs_stat, direction) res <- c(res, do.call(geom_tail_area, c(list(tail_area, fill), dots))) } else { - cli_warn( + cli_warn( '`direction` should be one of `"less"`, `"left"`, `"greater"`, \\ `"right"`, `"two-sided"`, `"both"`, `"two_sided"`, `"two sided"`, \\ or `"two.sided"`.' - ) + ) } } @@ -200,7 +221,13 @@ shade_p_value_term <- function(plot, obs_stat, direction, } -check_shade_p_value_args <- function(obs_stat, direction, color, fill, call = caller_env()) { +check_shade_p_value_args <- function( + obs_stat, + direction, + color, + fill, + call = caller_env() +) { if (!is.null(obs_stat)) { check_type(obs_stat, is.numeric, call = call) } @@ -246,10 +273,14 @@ two_tail_area <- function(obs_stat, direction) { } left_area <- one_tail_area( - min(obs_stat, second_border), "left", do_warn = FALSE + min(obs_stat, second_border), + "left", + do_warn = FALSE )(data) right_area <- one_tail_area( - max(obs_stat, second_border), "right", do_warn = FALSE + max(obs_stat, second_border), + "right", + do_warn = FALSE )(data) ret <- dplyr::bind_rows(left_area, right_area) @@ -258,7 +289,7 @@ two_tail_area <- function(obs_stat, direction) { # so that their heights aren't summed common_x <- which.max(ret$x[ret$dir == "left"]) - ret$x[common_x] <- ret$x[common_x] - 1e-5*ret$x[common_x] + ret$x[common_x] <- ret$x[common_x] - 1e-5 * ret$x[common_x] ret } @@ -276,8 +307,8 @@ one_tail_area <- function(obs_stat, direction, do_warn = TRUE) { switch( viz_method, theoretical = theor_area(data, obs_stat, norm_dir), - simulation = hist_area(data, obs_stat, norm_dir, yval = "ymax"), - both = hist_area(data, obs_stat, norm_dir, yval = "density") + simulation = hist_area(data, obs_stat, norm_dir, yval = "ymax"), + both = hist_area(data, obs_stat, norm_dir, yval = "density") ) } } @@ -289,14 +320,17 @@ theor_area <- function(data, obs_stat, direction, n_grid = 1001) { g_data <- ggplot2::ggplot_build(g)[["data"]][[1]] curve_fun <- stats::approxfun( - x = g_data[["x"]], y = g_data[["y"]], yleft = 0, yright = 0 + x = g_data[["x"]], + y = g_data[["y"]], + yleft = 0, + yright = 0 ) # Compute "x" grid of curve, area under which will be shaded. x_grid <- switch( # `direction` can be one of "left" or "right" at this point of execution direction, - left = seq(from = min(g_data[["x"]]), to = obs_stat, length.out = n_grid), + left = seq(from = min(g_data[["x"]]), to = obs_stat, length.out = n_grid), right = seq(from = obs_stat, to = max(g_data[["x"]]), length.out = n_grid) ) @@ -311,8 +345,8 @@ hist_area <- function(data, obs_stat, direction, yval) { # between them. # "x" coordinates are computed from `x_left` and `x_right`: "x" coordinates # of "shrinked" (to avoid duplicte points later) histogram bars. - x_left <- (1-1e-5)*g_data[["xmin"]] + 1e-5*g_data[["xmax"]] - x_right <- 1e-5*g_data[["xmin"]] + (1 - 1e-5)*g_data[["xmax"]] + x_left <- (1 - 1e-5) * g_data[["xmin"]] + 1e-5 * g_data[["xmax"]] + x_right <- 1e-5 * g_data[["xmin"]] + (1 - 1e-5) * g_data[["xmax"]] # `x` is created as `c(x_left[1], x_right[1], x_left[2], ...)` x <- c(t(cbind(x_left, x_right))) @@ -339,12 +373,16 @@ hist_area <- function(data, obs_stat, direction, yval) { # "almost vertical" lines with `geom_area()` usage. If don't do this, then # area might be shaded under line segments connecting edges of consequtive # histogram bars. - x_extra <- switch(direction, left = g_data[["xmax"]], right = g_data[["xmin"]]) + x_extra <- switch( + direction, + left = g_data[["xmax"]], + right = g_data[["xmin"]] + ) x_extra <- sort(c(x, x_extra)) x_grid <- switch( # `direction` can be one of "left" or "right" at this point of execution direction, - left = c(x_extra[x_extra < obs_stat], obs_stat), + left = c(x_extra[x_extra < obs_stat], obs_stat), right = c(obs_stat, x_extra[x_extra > obs_stat]) ) @@ -360,20 +398,29 @@ hist_area <- function(data, obs_stat, direction, yval) { norm_direction <- function(direction) { switch( direction, - less = , left = "left", - greater = , right = "right", - two_sided = , `two-sided` = , `two sided` = , `two.sided` = , both = "both" + less = , + left = "left", + greater = , + right = "right", + two_sided = , + `two-sided` = , + `two sided` = , + `two.sided` = , + both = "both" ) } warn_right_tail_test <- function(direction, stat_name, do_warn = TRUE) { - if (do_warn && !is.null(direction) && + if ( + do_warn && + !is.null(direction) && !(direction %in% c("greater", "right")) && - (stat_name %in% c("F", "Chi-Square"))) { - cli_warn( + (stat_name %in% c("F", "Chi-Square")) + ) { + cli_warn( "{stat_name} usually corresponds to right-tailed tests. \\ Proceed with caution." - ) + ) } TRUE @@ -384,4 +431,3 @@ mirror_obs_stat <- function(vector, observation) { stats::quantile(vector, probs = 1 - obs_percentile) } - diff --git a/R/specify.R b/R/specify.R index f37f5c3a..910509ed 100755 --- a/R/specify.R +++ b/R/specify.R @@ -46,8 +46,13 @@ #' @importFrom methods hasArg #' @family core functions #' @export -specify <- function(x, formula, response = NULL, - explanatory = NULL, success = NULL) { +specify <- function( + x, + formula, + response = NULL, + explanatory = NULL, + success = NULL +) { check_type(x, is.data.frame) # Standardize variable types @@ -82,47 +87,55 @@ specify <- function(x, formula, response = NULL, append_infer_class(x) } -parse_variables <- function(x, formula, response, explanatory, call = caller_env()) { +parse_variables <- function( + x, + formula, + response, + explanatory, + call = caller_env() +) { if (methods::hasArg(formula)) { tryCatch( rlang::is_formula(formula), error = function(e) { - cli_abort( - c("The argument you passed in for the formula does not exist.", - i = "Were you trying to pass in an unquoted column name?", - i = "Did you forget to name one or more arguments?"), - call = call - ) + cli_abort( + c( + "The argument you passed in for the formula does not exist.", + i = "Were you trying to pass in an unquoted column name?", + i = "Did you forget to name one or more arguments?" + ), + call = call + ) } ) if (!rlang::is_formula(formula)) { cli_abort( - c( - "The first unnamed argument must be a formula.", - i = "You passed in '{get_type(formula)}'.", - x = "Did you forget to name one or more arguments?" - ), - call = call + c( + "The first unnamed argument must be a formula.", + i = "You passed in '{get_type(formula)}'.", + x = "Did you forget to name one or more arguments?" + ), + call = call ) } } - attr(x, "response") <- get_expr(response) + attr(x, "response") <- get_expr(response) attr(x, "explanatory") <- get_expr(explanatory) attr(x, "formula") <- NULL if (methods::hasArg(formula)) { - attr(x, "response") <- f_lhs(formula) + attr(x, "response") <- f_lhs(formula) attr(x, "explanatory") <- f_rhs(formula) attr(x, "formula") <- formula } # Check response and explanatory variables to be appropriate for later use if (!has_response(x)) { - cli_abort( - "Please supply a response variable that is not `NULL`.", - call = call - ) + cli_abort( + "Please supply a response variable that is not `NULL`.", + call = call + ) } check_var_correct(x, "response", call = call) @@ -158,42 +171,43 @@ check_success_arg <- function(x, success, call = caller_env()) { if (!is.null(success)) { if (!is.character(success)) { - cli_abort("`success` must be a string.", call = call) + cli_abort("`success` must be a string.", call = call) } if (!is.factor(response_col)) { - cli_abort( + cli_abort( "`success` should only be specified if the response is a categorical \\ variable.", call = call - ) + ) } if (!(success %in% levels(response_col))) { - cli_abort( - '{success} is not a valid level of {response_name(x)}.', - call = call - ) + cli_abort( + '{success} is not a valid level of {response_name(x)}.', + call = call + ) } if (sum(table(response_col) > 0) > 2) { - cli_abort( + cli_abort( "`success` can only be used if the response has two levels. \\ `filter()` can reduce a variable to two levels.", call = call - ) + ) } } - if ((attr(x, "response_type") == "factor" && + if ( + (attr(x, "response_type") == "factor" && is.null(success) && length(levels(response_variable(x))) == 2) && - ((!has_attr(x, "explanatory_type") || - length(levels(explanatory_variable(x))) == 2))) { - cli_abort( - 'A level of the response variable `{response_name(x)}` needs to be \\ + ((!has_attr(x, "explanatory_type") || + length(levels(explanatory_variable(x))) == 2)) + ) { + cli_abort( + 'A level of the response variable `{response_name(x)}` needs to be \\ specified for the `success` argument in `specify()`.', - call = call - ) - } - + call = call + ) + } } check_var_correct <- function(x, var_name, call = caller_env()) { @@ -202,18 +216,18 @@ check_var_correct <- function(x, var_name, call = caller_env()) { # Variable (if present) should be a symbolic column name if (!is.null(var)) { if (!rlang::is_symbolic(var)) { - cli_abort( + cli_abort( "The {var_name} should be a bare variable name (not a string in \\ quotation marks).", call = call - ) + ) } if (any(!(all.vars(var) %in% names(x)))) { - cli_abort( + cli_abort( 'The {var_name} variable `{var}` cannot be found in this dataframe.', call = call - ) + ) } } @@ -223,11 +237,11 @@ check_var_correct <- function(x, var_name, call = caller_env()) { check_vars_different <- function(x, call = caller_env()) { if (has_response(x) && has_explanatory(x)) { if (identical(response_name(x), explanatory_name(x))) { - cli_abort( + cli_abort( "The response and explanatory variables must be different from one \\ another.", call = call - ) + ) } } diff --git a/R/utils.R b/R/utils.R index f2cf3123..6fa75661 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,7 +21,7 @@ print_params <- function(x) { as.character(length(params)), "1" = glue(": `{names(params)} = {unname(params)}`", .null = "NULL"), "2" = glue(": `p = .5`", .null = "NULL"), - glue("s: `p = c({put_params(x, params)})`", .null = "NULL") + glue("s: `p = c({put_params(x, params)})`", .null = "NULL") ) } @@ -34,14 +34,29 @@ get_par_levels <- function(x) { gsub("^.\\.", "", par_names) } -copy_attrs <- function(to, from, - attrs = c( - "response", "success", "explanatory", "response_type", - "explanatory_type", "distr_param", "distr_param2", - "null", "params", "theory_type", "generated", "type", - "hypothesized", "formula", "fitted", - "type_desc_response", "type_desc_explanatory" - )) { +copy_attrs <- function( + to, + from, + attrs = c( + "response", + "success", + "explanatory", + "response_type", + "explanatory_type", + "distr_param", + "distr_param2", + "null", + "params", + "theory_type", + "generated", + "type", + "hypothesized", + "formula", + "fitted", + "type_desc_response", + "type_desc_explanatory" + ) +) { for (at in attrs) { attr(to, at) <- attr(from, at) } @@ -71,53 +86,52 @@ reorder_explanatory <- function(x, order) { } standardize_variable_types <- function(x) { - tibble::as_tibble(x) %>% - # character and ordered to factor - dplyr::mutate( - dplyr::across( - where(~ is.character(.x) || is.ordered(.x)), - ~ factor(.x, ordered = FALSE) - ) - ) %>% - # logical to factor, with TRUE as the first level - dplyr::mutate( - dplyr::across( - where(~ is.logical(.x)), - ~ factor(.x, levels = c("TRUE", "FALSE")) - ) - ) %>% - # integer to numeric - dplyr::mutate( - dplyr::across( - where(is.integer), - as.numeric - ) + tibble::as_tibble(x) %>% + # character and ordered to factor + dplyr::mutate( + dplyr::across( + where(~ is.character(.x) || is.ordered(.x)), + ~ factor(.x, ordered = FALSE) + ) + ) %>% + # logical to factor, with TRUE as the first level + dplyr::mutate( + dplyr::across( + where(~ is.logical(.x)), + ~ factor(.x, levels = c("TRUE", "FALSE")) ) + ) %>% + # integer to numeric + dplyr::mutate( + dplyr::across( + where(is.integer), + as.numeric + ) + ) } # Performant grouping ---------------------------------------------------------- group_by_replicate <- function(tbl, reps, n) { - dplyr::new_grouped_df( - tbl, - groups = make_replicate_groups(tbl, reps = reps, n = n) - ) + dplyr::new_grouped_df( + tbl, + groups = make_replicate_groups(tbl, reps = reps, n = n) + ) } make_replicate_groups <- function(tbl, reps, n) { - res <- - tibble::new_tibble(list( - replicate = 1:reps, - .rows = - vctrs::as_list_of( - vctrs::vec_chop(seq_len(n*reps), sizes = rep(n, reps)), - .ptype = integer() - ) - )) + res <- + tibble::new_tibble(list( + replicate = 1:reps, + .rows = vctrs::as_list_of( + vctrs::vec_chop(seq_len(n * reps), sizes = rep(n, reps)), + .ptype = integer() + ) + )) - attr(res, ".drop") <- TRUE + attr(res, ".drop") <- TRUE - res + res } # Getters, setters, and indicators ------------------------------------------ @@ -176,11 +190,11 @@ is_generated <- function(x) { isTRUE(attr(x, "generated")) } -is_hypothesized <- function(x){ +is_hypothesized <- function(x) { isTRUE(attr(x, "hypothesized")) } -is_fitted <- function(x){ +is_fitted <- function(x) { isTRUE(attr(x, "fitted")) } @@ -202,16 +216,34 @@ has_response <- function(x) { is_color_string <- function(x) { rlang::is_string(x) && - tryCatch(is.matrix(grDevices::col2rgb(x)), error = function(e) {FALSE}) -} - -is_single_number <- function(x, min_val = -Inf, max_val = Inf, - include_min_val = TRUE, include_max_val = TRUE) { - left_compare <- if (include_min_val) {`>=`} else {`>`} - right_compare <- if (include_max_val) {`<=`} else {`<`} + tryCatch(is.matrix(grDevices::col2rgb(x)), error = function(e) { + FALSE + }) +} + +is_single_number <- function( + x, + min_val = -Inf, + max_val = Inf, + include_min_val = TRUE, + include_max_val = TRUE +) { + left_compare <- if (include_min_val) { + `>=` + } else { + `>` + } + right_compare <- if (include_max_val) { + `<=` + } else { + `<` + } - is.numeric(x) && (length(x) == 1) && is.finite(x) && - left_compare(x, min_val) && right_compare(x, max_val) + is.numeric(x) && + (length(x) == 1) && + is.finite(x) && + left_compare(x, min_val) && + right_compare(x, max_val) } is_truefalse <- function(x) { @@ -223,24 +255,50 @@ is_truefalse <- function(x) { # Simplify and standardize checks by grouping statistics based on variable types # num = numeric, bin = binary (dichotomous), mult = multinomial stat_types <- tibble::tribble( - ~resp, ~exp, ~stats, - "num", "", c("mean", "median", "sum", "sd", "t"), - "num", "num", c("slope", "correlation"), - "num", "bin", c("diff in means", "diff in medians", "t", "ratio of means"), - "num", "mult", c("F"), - "bin", "", c("prop", "count", "z"), - "bin", "bin", c("diff in props", "z", "ratio of props", "odds ratio", "Chisq"), - "bin", "mult", c("Chisq"), - "mult", "bin", c("Chisq"), - "mult", "", c("Chisq"), - "mult", "mult", c("Chisq"), + ~resp, + ~exp, + ~stats, + "num", + "", + c("mean", "median", "sum", "sd", "t"), + "num", + "num", + c("slope", "correlation"), + "num", + "bin", + c("diff in means", "diff in medians", "t", "ratio of means"), + "num", + "mult", + c("F"), + "bin", + "", + c("prop", "count", "z"), + "bin", + "bin", + c("diff in props", "z", "ratio of props", "odds ratio", "Chisq"), + "bin", + "mult", + c("Chisq"), + "mult", + "bin", + c("Chisq"), + "mult", + "", + c("Chisq"), + "mult", + "mult", + c("Chisq"), ) stat_type_desc <- tibble::tribble( - ~type, ~description, - "num", "numeric", - "bin", "dichotomous categorical", - "mult", "multinomial categorical" + ~type, + ~description, + "num", + "numeric", + "bin", + "dichotomous categorical", + "mult", + "multinomial categorical" ) get_stat_type_desc <- function(stat_type) { @@ -248,54 +306,99 @@ get_stat_type_desc <- function(stat_type) { } stat_desc <- tibble::tribble( - ~stat, ~description, - "mean", "A mean", - "median", "A median", - "sum", "A sum", - "sd", "A standard deviation", - "prop", "A proportion", - "count", "A count", - "diff in means", "A difference in means", - "diff in medians", "A difference in medians", - "diff in props", "A difference in proportions", - "Chisq", "A chi-square statistic", - "F", "An F statistic", - "slope", "A slope", - "correlation", "A correlation", - "t", "A t statistic", - "z", "A z statistic", - "ratio of props", "A ratio of proportions", - "ratio of means", "A ratio of means", - "odds ratio", "An odds ratio" + ~stat, + ~description, + "mean", + "A mean", + "median", + "A median", + "sum", + "A sum", + "sd", + "A standard deviation", + "prop", + "A proportion", + "count", + "A count", + "diff in means", + "A difference in means", + "diff in medians", + "A difference in medians", + "diff in props", + "A difference in proportions", + "Chisq", + "A chi-square statistic", + "F", + "An F statistic", + "slope", + "A slope", + "correlation", + "A correlation", + "t", + "A t statistic", + "z", + "A z statistic", + "ratio of props", + "A ratio of proportions", + "ratio of means", + "A ratio of means", + "odds ratio", + "An odds ratio" ) stat_hypotheses <- tibble::tribble( - ~stat, ~hypothesis, - "mean", "point", - "median", "point", - "sum", "point", - "sd", "point", - "prop", "point", - "count", "point", - "mean", "paired independence", - "median", "paired independence", - "sum", "paired independence", - "sd", "paired independence", - "diff in means", "independence", - "diff in medians", "independence", - "diff in props", "independence", - "Chisq", "independence", - "Chisq", "point", - "F", "independence", - "slope", "independence", - "correlation", "independence", - "t", "independence", - "t", "point", - "z", "independence", - "z", "point", - "ratio of props", "independence", - "ratio of means", "independence", - "odds ratio", "independence" + ~stat, + ~hypothesis, + "mean", + "point", + "median", + "point", + "sum", + "point", + "sd", + "point", + "prop", + "point", + "count", + "point", + "mean", + "paired independence", + "median", + "paired independence", + "sum", + "paired independence", + "sd", + "paired independence", + "diff in means", + "independence", + "diff in medians", + "independence", + "diff in props", + "independence", + "Chisq", + "independence", + "Chisq", + "point", + "F", + "independence", + "slope", + "independence", + "correlation", + "independence", + "t", + "independence", + "t", + "point", + "z", + "independence", + "z", + "point", + "ratio of props", + "independence", + "ratio of means", + "independence", + "odds ratio", + "independence" ) get_stat_desc <- function(stat) { @@ -303,23 +406,46 @@ get_stat_desc <- function(stat) { } # Values of `stat` argument of `calculate()` -implemented_stats <- c( - "mean", "median", "sum", "sd", "prop", "count", - "diff in means", "diff in medians", "diff in props", - "Chisq", "F", "slope", "correlation", "t", "z", - "ratio of props", "ratio of means", "odds ratio" +implemented_stats <- c( + "mean", + "median", + "sum", + "sd", + "prop", + "count", + "diff in means", + "diff in medians", + "diff in props", + "Chisq", + "F", + "slope", + "correlation", + "t", + "z", + "ratio of props", + "ratio of means", + "odds ratio" ) implemented_stats_aliases <- tibble::tribble( - ~ alias, ~ target, + ~alias, + ~target, # Allow case insensitive stat names - "f", "F", - "chisq", "Chisq" + "f", + "F", + "chisq", + "Chisq" ) -untheorized_stats <- implemented_stats[!implemented_stats %in% c( - "Chisq", "F", "t", "z" -)] +untheorized_stats <- implemented_stats[ + !implemented_stats %in% + c( + "Chisq", + "F", + "t", + "z" + ) +] # Given a statistic and theory type, assume a reasonable null p_null <- function(x) { @@ -332,10 +458,16 @@ p_null <- function(x) { # The "null_fn" column is a function(x) whose output gives attr(x, "params") theorized_nulls <- tibble::tribble( - ~stat, ~null_fn, - "Chisq", p_null, - "t", function(x) {setNames(0, "mu")}, - "z", p_null + ~stat, + ~null_fn, + "Chisq", + p_null, + "t", + function(x) { + setNames(0, "mu") + }, + "z", + p_null ) determine_variable_type <- function(x, variable) { @@ -358,19 +490,34 @@ determine_variable_type <- function(x, variable) { # Argument checking -------------------------------------------------------- -check_order <- function(x, order, in_calculate = TRUE, stat, call = caller_env()) { +check_order <- function( + x, + order, + in_calculate = TRUE, + stat, + call = caller_env() +) { # If there doesn't need to be an order argument, warn if there is one, # and otherwise, skip checks - if (!(theory_type(x) %in% c("Two sample props z", "Two sample t") || - is.null(stat) || - stat %in% c("diff in means", "diff in medians", - "diff in props", "ratio of props", "odds ratio"))) { + if ( + !(theory_type(x) %in% + c("Two sample props z", "Two sample t") || + is.null(stat) || + stat %in% + c( + "diff in means", + "diff in medians", + "diff in props", + "ratio of props", + "odds ratio" + )) + ) { if (!is.null(order)) { - cli_warn( - "Statistic is not based on a difference or ratio; the `order` argument \\ + cli_warn( + "Statistic is not based on a difference or ratio; the `order` argument \\ will be ignored. Check {.help [{.fun calculate}](infer::calculate)} \\ for details." - ) + ) } else { return(order) } @@ -401,31 +548,31 @@ check_order <- function(x, order, in_calculate = TRUE, stat, call = caller_env() the order \"{unique_ex[1]}\" / \"{unique_ex[2]}\" for ratio-based \\ statistics. To specify this order yourself, supply `order = \\ c(\"{unique_ex[1]}\", \"{unique_ex[2]}\")`." - ) + ) } else { if (xor(is.na(order[1]), is.na(order[2]))) { - cli_abort( + cli_abort( "Only one level specified in `order`. Both levels need to be specified.", call = call - ) + ) } if (length(order) > 2) { - cli_abort( - "`order` is expecting only two entries.", - call = call - ) + cli_abort( + "`order` is expecting only two entries.", + call = call + ) } if (order[1] %in% unique_ex == FALSE) { - cli_abort( - "{order[1]} is not a level of the explanatory variable.", - call = call - ) + cli_abort( + "{order[1]} is not a level of the explanatory variable.", + call = call + ) } if (order[2] %in% unique_ex == FALSE) { - cli_abort( - "{order[2]} is not a level of the explanatory variable.", - call = call - ) + cli_abort( + "{order[2]} is not a level of the explanatory variable.", + call = call + ) } } # return the order as given (unless the argument was invalid or NULL) @@ -435,19 +582,27 @@ check_order <- function(x, order, in_calculate = TRUE, stat, call = caller_env() check_point_params <- function(x, stat, call = caller_env()) { param_names <- attr(attr(x, "params"), "names") hyp_text <- 'to be set in `hypothesize()`.' - if (is_hypothesized(x) && !identical(attr(x, "null"), "paired independence")) { + if ( + is_hypothesized(x) && !identical(attr(x, "null"), "paired independence") + ) { if (stat %in% c("mean", "median", "sd", "prop")) { if ((stat == "mean") && !("mu" %in% param_names)) { - cli_abort('`stat == "mean"` requires `"mu"` {hyp_text}', call = call) + cli_abort('`stat == "mean"` requires `"mu"` {hyp_text}', call = call) } if (!(stat == "mean") && ("mu" %in% param_names)) { - cli_abort('`"mu"` does not correspond to `stat = "{stat}"`.', call = call) + cli_abort( + '`"mu"` does not correspond to `stat = "{stat}"`.', + call = call + ) } if ((stat == "median") && !("med" %in% param_names)) { - cli_abort('`stat == "median"` requires `"med"` {hyp_text}', call = call) + cli_abort('`stat == "median"` requires `"med"` {hyp_text}', call = call) } if (!(stat == "median") && ("med" %in% param_names)) { - cli_abort('`"med"` does not correspond to `stat = "{stat}"`.', call = call) + cli_abort( + '`"med"` does not correspond to `stat = "{stat}"`.', + call = call + ) } } } @@ -467,56 +622,87 @@ check_for_nan <- function(x, context) { return(x) } - calc_ref <- c(i = "See {.help [{.fun calculate}](infer::calculate)} for more details.") + calc_ref <- c( + i = "See {.help [{.fun calculate}](infer::calculate)} for more details." + ) # If all of the data is NaN, raise an error if (num_nans == nrow(x)) { - cli_abort( - c("All calculated statistics were `NaN`.", calc_ref), - call = NULL - ) + cli_abort( + c("All calculated statistics were `NaN`.", calc_ref), + call = NULL + ) } - stats_were <- if (num_nans == 1) {"statistic was"} else {"statistics were"} + stats_were <- if (num_nans == 1) { + "statistic was" + } else { + "statistics were" + } num_nans_msg <- glue::glue("{num_nans} calculated {stats_were} `NaN`") if (context == "visualize") { # Raise a warning and plot the data with NaNs removed cli_warn( - c("{num_nans_msg}. `NaN`s have been omitted from visualization.", calc_ref) + c( + "{num_nans_msg}. `NaN`s have been omitted from visualization.", + calc_ref + ) ) return(x[!stat_is_nan, ]) } else if (context == "get_p_value") { # Raise an error cli_abort( - c("{num_nans_msg}. Simulation-based p-values are not well-defined for \\ - null distributions with non-finite values.", calc_ref), + c( + "{num_nans_msg}. Simulation-based p-values are not well-defined for \\ + null distributions with non-finite values.", + calc_ref + ), call = NULL ) } } -check_direction <- function(direction = c("less", "greater", "two_sided", - "left", "right", "both", - "two-sided", "two sided", - "two.sided"), call = caller_env()) { +check_direction <- function( + direction = c( + "less", + "greater", + "two_sided", + "left", + "right", + "both", + "two-sided", + "two sided", + "two.sided" + ), + call = caller_env() +) { check_type(direction, is.character, call = call) if ( - !(direction %in% c("less", "greater", "two_sided", "left", "right", - "both", "two-sided", "two sided", "two.sided")) + !(direction %in% + c( + "less", + "greater", + "two_sided", + "left", + "right", + "both", + "two-sided", + "two sided", + "two.sided" + )) ) { cli_abort( - 'The provided value for `direction` is not appropriate. Possible values \\ + 'The provided value for `direction` is not appropriate. Possible values \\ are "less", "greater", "two-sided", "left", "right", "both", \ "two_sided", "two sided", or "two.sided".', - call = call + call = call ) } } check_obs_stat <- function(obs_stat, plot = NULL, call = caller_env()) { if (!is.null(obs_stat)) { - if ("data.frame" %in% class(obs_stat)) { if (is_fitted(obs_stat)) { x_lab <- x_axis_label(plot) @@ -531,10 +717,10 @@ check_obs_stat <- function(obs_stat, plot = NULL, call = caller_env()) { check_type(obs_stat, is.data.frame, call = call) if ((nrow(obs_stat) != 1) || (ncol(obs_stat) != 1)) { - cli_warn( + cli_warn( "The first row and first column value of the given `obs_stat` will \\ be used." - ) + ) } # [[1]] is used in case `stat` is not specified as name of 1x1 @@ -548,37 +734,47 @@ check_obs_stat <- function(obs_stat, plot = NULL, call = caller_env()) { obs_stat } -check_mlr_x_and_obs_stat <- function(x, obs_stat, fn, arg, call = caller_env()) { +check_mlr_x_and_obs_stat <- function( + x, + obs_stat, + fn, + arg, + call = caller_env() +) { if (!is_fitted(obs_stat)) { - cli_abort( - c("The `{arg}` argument should be the output of `fit()`.", - i = "See the documentation with `?{fn}`."), - call = call - ) + cli_abort( + c( + "The `{arg}` argument should be the output of `fit()`.", + i = "See the documentation with `?{fn}`." + ), + call = call + ) } if (!is_generated(x)) { - cli_abort( + cli_abort( "The `x` argument needs to be passed to `generate()` before `fit()`.", call = call - ) + ) } - if (any(!unique(x$term) %in% unique(obs_stat$term)) || - any(!unique(obs_stat$term) %in% unique(x$term))) { - cli_abort( + if ( + any(!unique(x$term) %in% unique(obs_stat$term)) || + any(!unique(obs_stat$term) %in% unique(x$term)) + ) { + cli_abort( "The explanatory variables used to generate the distribution of \\ null fits are not the same used to fit the observed data.", call = call - ) + ) } if (response_name(x) != response_name(obs_stat)) { - cli_abort( + cli_abort( "The response variable of the null fits ({response_name(x)}) is not \\ the same as that of the observed fit ({response_name(obs_stat)}).", - call = call - ) + call = call + ) } invisible(TRUE) @@ -617,8 +813,15 @@ check_mlr_x_and_obs_stat <- function(x, obs_stat, fn, arg, call = caller_env()) #' #' @keywords internal #' @noRd -check_type <- function(x, predicate, type_name = NULL, x_name = NULL, - allow_null = FALSE, ..., call = caller_env()) { +check_type <- function( + x, + predicate, + type_name = NULL, + x_name = NULL, + allow_null = FALSE, + ..., + call = caller_env() +) { if (is.null(x_name)) { x_name <- deparse(substitute(x)) } @@ -634,10 +837,10 @@ check_type <- function(x, predicate, type_name = NULL, x_name = NULL, if (!is_pred_true) { # Not using "must be of type" because of 'tibble' and 'string' cases - cli_abort( - "`{x_name}` must be '{type_name}', not '{get_type(x)}'.", - call = call - ) + cli_abort( + "`{x_name}` must be '{type_name}', not '{get_type(x)}'.", + call = call + ) } x @@ -667,10 +870,10 @@ parse_type <- function(f_name) { check_is_distribution <- function(x, fn, call = caller_env()) { if (!any(inherits(x, "infer_dist") || is.data.frame(x))) { - cli_abort( + cli_abort( "The `x` argument to `{fn}()` must be an infer distribution, \\ outputted by `assume()` or `calculate()`.", call = call - ) + ) } } diff --git a/R/visualize.R b/R/visualize.R index 224ef108..3dcc4c6b 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -174,15 +174,19 @@ ggplot2::ggplot_add #' @importFrom ggplot2 xlab ylab geom_vline geom_rect geom_bar #' @importFrom stats dt qt df qf dnorm qnorm dchisq qchisq #' @export -visualize <- function(data, bins = 15, method = "simulation", - dens_color = "black", - ...) { +visualize <- function( + data, + bins = 15, + method = "simulation", + dens_color = "black", + ... +) { if (inherits(data, "infer_dist")) { if (!missing(method) && method != "theoretical") { - cli_warn(c( + cli_warn(c( 'Simulation-based visualization methods are not well-defined for \\ `assume()` output; the `method` argument will be ignored.', - i = 'Set `method = "theoretical"` to silence this message.' + i = 'Set `method = "theoretical"` to silence this message.' )) } @@ -252,9 +256,16 @@ visualize <- function(data, bins = 15, method = "simulation", visualise <- visualize -visualize_term <- function(data, term, bins = 15, method = "simulation", - dens_color = "black", dots, do_warn = TRUE, - call = rlang::call2("visualize")) { +visualize_term <- function( + data, + term, + bins = 15, + method = "simulation", + dens_color = "black", + dots, + do_warn = TRUE, + call = rlang::call2("visualize") +) { data <- check_for_nan(data, "visualize") check_visualize_args(data, bins, method, dens_color, call = call) plot_data <- create_plot_data(data) @@ -268,8 +279,15 @@ visualize_term <- function(data, term, bins = 15, method = "simulation", } check_dots_for_deprecated <- function(dots) { - dep_args <- c("obs_stat", "obs_stat_color", "pvalue_fill", "direction", - "endpoints", "endpoints_color", "ci_fill") + dep_args <- c( + "obs_stat", + "obs_stat_color", + "pvalue_fill", + "direction", + "endpoints", + "endpoints_color", + "ci_fill" + ) if (any(dep_args %in% names(dots))) { bad_args <- dep_args[dep_args %in% names(dots)] @@ -286,38 +304,44 @@ check_dots_for_deprecated <- function(dots) { list(NULL) } -check_visualize_args <- function(data, bins, method, dens_color, call = caller_env()) { +check_visualize_args <- function( + data, + bins, + method, + dens_color, + call = caller_env() +) { check_is_distribution(data, "visualize") check_type(bins, is.numeric, call = call) check_type(method, is.character, call = call) check_type(dens_color, is.character, call = call) if (!(method %in% c("simulation", "theoretical", "both"))) { - cli_abort( - 'Provide `method` with one of three options: `"theoretical"`, `"both"`, \\ + cli_abort( + 'Provide `method` with one of three options: `"theoretical"`, `"both"`, \\ or `"simulation"`. `"simulation"` is the default for simulation-based \\ null distributions, while `"theoretical"` is the only option for \\ null distributions outputted by `assume()`.', - call = call - ) + call = call + ) } if (method == "both") { if (!("stat" %in% names(data))) { - cli_abort( - '`generate()` and `calculate()` are both required to be done prior \\ + cli_abort( + '`generate()` and `calculate()` are both required to be done prior \\ to `visualize(method = "both")`', - call = call - ) + call = call + ) } if ( ("replicate" %in% names(data)) && (length(unique(data$replicate)) < 100) ) { - cli_warn( + cli_warn( "With only {length(unique(data$replicate))} replicates, it may be \\ difficult to see the relationship between simulation and theory." - ) + ) } } @@ -327,18 +351,18 @@ check_visualize_args <- function(data, bins, method, dens_color, call = caller_e # a function for checking arguments to functions that are added as layers # to visualize()d objects to make sure they weren't mistakenly piped check_for_piped_visualize <- function(..., call = caller_env()) { - is_ggplot_output <- vapply(list(...), ggplot2::is_ggplot, logical(1)) if (any(is_ggplot_output)) { - called_function <- sys.call(-1)[[1]] - cli_abort(c( - "It looks like you piped the result of `visualize()` into \\ + cli_abort( + c( + "It looks like you piped the result of `visualize()` into \\ `{called_function}()` (using `%>%`) rather than adding the result of \\ `{called_function}()` as a layer with `+`.", - i = "Consider changing `%>%` to `+`."), + i = "Consider changing `%>%` to `+`." + ), call = call ) } @@ -361,19 +385,19 @@ impute_endpoints <- function(endpoints, plot = NULL, call = caller_env()) { } if (is.vector(endpoints) && (length(endpoints) != 2)) { - cli_warn( + cli_warn( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. \\ Using the first two entries as the `endpoints`." - ) + ) res <- endpoints[1:2] } if (is.data.frame(endpoints)) { if ((nrow(endpoints) != 1) || (ncol(endpoints) != 2)) { - cli_abort( + cli_abort( "Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector.", call = call - ) + ) } res <- unlist(endpoints) @@ -382,18 +406,23 @@ impute_endpoints <- function(endpoints, plot = NULL, call = caller_env()) { res %>% copy_attrs(endpoints, attrs = c("se", "point_estimate")) } -impute_obs_stat <- function(obs_stat, direction, endpoints, call = caller_env()) { +impute_obs_stat <- function( + obs_stat, + direction, + endpoints, + call = caller_env() +) { obs_stat <- check_obs_stat(obs_stat) if ( !is.null(direction) && - (is.null(obs_stat) + is.null(endpoints) != 1) + (is.null(obs_stat) + is.null(endpoints) != 1) ) { - cli_abort( - "Shading requires either `endpoints` values for a confidence interval \\ + cli_abort( + "Shading requires either `endpoints` values for a confidence interval \\ or the observed statistic `obs_stat` to be provided.", - call = call - ) + call = call + ) } obs_stat @@ -420,11 +449,15 @@ simulation_layer <- function(data, dots = list(NULL)) { res <- list( do.call( ggplot2::stat_bin, - c(list(mapping = aes(x = stat), - bins = bins, - color = "white", - breaks = bin_breaks), - dots) + c( + list( + mapping = aes(x = stat), + bins = bins, + color = "white", + breaks = bin_breaks + ), + dots + ) ) ) } else { @@ -440,12 +473,15 @@ simulation_layer <- function(data, dots = list(NULL)) { res <- list( do.call( ggplot2::stat_bin, - c(list(mapping = aes(x = stat, y = ggplot2::after_stat(density)), - bins = bins, - color = "white", - breaks = bin_breaks), - dots) - + c( + list( + mapping = aes(x = stat, y = ggplot2::after_stat(density)), + bins = bins, + color = "white", + breaks = bin_breaks + ), + dots + ) ) ) } @@ -460,8 +496,14 @@ compute_bin_breaks <- function(data, bins) { c(g_tbl[["xmin"]][1], g_tbl[["xmax"]]) } -theoretical_layer <- function(data, dens_color, dots = list(NULL), do_warn = TRUE, - mean_shift = 0, sd_shift = 1) { +theoretical_layer <- function( + data, + dens_color, + dots = list(NULL), + do_warn = TRUE, + mean_shift = 0, + sd_shift = 1 +) { method <- get_viz_method(data) if (method == "simulation") { @@ -475,20 +517,39 @@ theoretical_layer <- function(data, dens_color, dots = list(NULL), do_warn = TRU switch( theory_type, t = theory_curve( - method, dt, qt, list(df = attr(data, "distr_param")), dens_color, - mean_shift = mean_shift, sd_shift = sd_shift + method, + dt, + qt, + list(df = attr(data, "distr_param")), + dens_color, + mean_shift = mean_shift, + sd_shift = sd_shift ), `F` = theory_curve( - method, df, qf, + method, + df, + qf, list( - df1 = attr(data, "distr_param"), df2 = attr(data, "distr_param2") + df1 = attr(data, "distr_param"), + df2 = attr(data, "distr_param2") ), dens_color = dens_color ), - z = theory_curve(method, dnorm, qnorm, list(), dens_color, - mean_shift = mean_shift, sd_shift = sd_shift), + z = theory_curve( + method, + dnorm, + qnorm, + list(), + dens_color, + mean_shift = mean_shift, + sd_shift = sd_shift + ), `Chi-Square` = theory_curve( - method, dchisq, qchisq, list(df = attr(data, "distr_param")), dens_color + method, + dchisq, + qchisq, + list(df = attr(data, "distr_param")), + dens_color ) ) } @@ -507,37 +568,46 @@ warn_theoretical_layer <- function(data, do_warn = TRUE, call = caller_env()) { if ( has_attr(data, "stat") && - !(attr(data, "stat") %in% c("t", "z", "Chisq", "F")) + !(attr(data, "stat") %in% c("t", "z", "Chisq", "F")) ) { if (method == "theoretical") { - cli_warn( + cli_warn( "Your `calculate`d statistic and the theoretical distribution are on \\ different scales. Displaying only the theoretical distribution." - ) + ) } else if (method == "both") { - cli_abort( + cli_abort( "Your `calculate`d statistic and the theoretical distribution are on \\ different scales. Use a standardized `stat` instead.", call = call - ) + ) } } } -theory_curve <- function(method, d_fun, q_fun, args_list, dens_color, - mean_shift = 0, sd_shift = 1) { - +theory_curve <- function( + method, + d_fun, + q_fun, + args_list, + dens_color, + mean_shift = 0, + sd_shift = 1 +) { if (method == "theoretical") { d_fun_ <- shift_d_fun(d_fun, mean_shift, sd_shift) x_range <- (do.call(q_fun, c(p = list(c(0.001, 0.999)), args_list)) * - sd_shift) + + sd_shift) + mean_shift res <- list( ggplot2::geom_path( - data = data.frame(x = x_range), mapping = aes(x = x), - stat = "function", fun = d_fun_, args = args_list, + data = data.frame(x = x_range), + mapping = aes(x = x), + stat = "function", + fun = d_fun_, + args = args_list, color = dens_color ) ) @@ -545,7 +615,9 @@ theory_curve <- function(method, d_fun, q_fun, args_list, dens_color, res <- list( ggplot2::geom_path( mapping = aes(x = stat), - stat = "function", fun = d_fun, args = args_list, + stat = "function", + fun = d_fun, + args = args_list, color = dens_color ) ) @@ -631,7 +703,11 @@ facet_layer <- function() { ) } -check_shade_confidence_interval_args <- function(color, fill, call = caller_env()) { +check_shade_confidence_interval_args <- function( + color, + fill, + call = caller_env() +) { check_type(color, is_color_string, "color string", call = call) if (!is.null(fill)) { check_type(fill, is_color_string, "color string", call = call) @@ -652,7 +728,13 @@ short_theory_type <- function(x) { `Chi-Square` = c("Chi-square test of indep", "Chi-square Goodness of Fit") ) - is_type <- vapply(theory_types, function(x) {theory_attr %in% x}, logical(1)) + is_type <- vapply( + theory_types, + function(x) { + theory_attr %in% x + }, + logical(1) + ) names(theory_types)[which(is_type)[1]] } @@ -687,8 +769,10 @@ ggplot_add.infer_layer <- function(object, plot, object_name) { # process object_name (shade_* call) ---------------------------------- shade_fn <- attr(object, "fn") - shade_args <- attributes(object)[!names(attributes(object)) %in% - c("class", "fn")] + shade_args <- attributes(object)[ + !names(attributes(object)) %in% + c("class", "fn") + ] shade_args["fill"] <- shade_args[["fill"]] # if a patchwork object, use a custom `infer_layer` `+.gg` method. @@ -732,8 +816,10 @@ x_axis_label <- function(x) { create_plot_data <- function(data) { if (inherits(data, "infer_dist")) { res <- tibble::tibble() %>% - copy_attrs(data, - c("theory_type", "distr_param", "distr_param2", "viz_method")) + copy_attrs( + data, + c("theory_type", "distr_param", "distr_param2", "viz_method") + ) } else { res <- data } diff --git a/R/wrappers.R b/R/wrappers.R index 7666b1dc..6e6663a9 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -44,26 +44,32 @@ #' @importFrom stats as.formula #' @family wrapper functions #' @export -t_test <- function(x, formula, - response = NULL, - explanatory = NULL, - order = NULL, - alternative = "two-sided", - mu = 0, - conf_int = TRUE, - conf_level = 0.95, - ...) { - +t_test <- function( + x, + formula, + response = NULL, + explanatory = NULL, + order = NULL, + alternative = "two-sided", + mu = 0, + conf_int = TRUE, + conf_level = 0.95, + ... +) { check_conf_level(conf_level) # convert all character and logical variables to be factor variables x <- standardize_variable_types(x) # parse response and explanatory variables - response <- enquo(response) + response <- enquo(response) explanatory <- enquo(explanatory) - x <- parse_variables(x = x, formula = formula, - response = response, explanatory = explanatory) + x <- parse_variables( + x = x, + formula = formula, + response = response, + explanatory = explanatory + ) # match with old "dot" syntax in t.test if (alternative %in% c("two-sided", "two_sided", "two sided", "two.sided")) { @@ -74,31 +80,45 @@ t_test <- function(x, formula, if (has_explanatory(x)) { order <- check_order(x, order, in_calculate = FALSE, stat = NULL) x <- reorder_explanatory(x, order) - prelim <- stats::t.test(formula = new_formula(response_expr(x), explanatory_expr(x)), - data = x, - alternative = alternative, - mu = mu, - conf.level = conf_level, - ...) %>% + prelim <- stats::t.test( + formula = new_formula(response_expr(x), explanatory_expr(x)), + data = x, + alternative = alternative, + mu = mu, + conf.level = conf_level, + ... + ) %>% broom::glance() - } else { # one sample - prelim <- stats::t.test(response_variable(x), - alternative = alternative, - mu = mu, - conf.level = conf_level) %>% + } else { + # one sample + prelim <- stats::t.test( + response_variable(x), + alternative = alternative, + mu = mu, + conf.level = conf_level + ) %>% broom::glance() } if (conf_int) { results <- prelim %>% dplyr::select( - statistic, t_df = parameter, p_value = p.value, alternative, - estimate, lower_ci = conf.low, upper_ci = conf.high + statistic, + t_df = parameter, + p_value = p.value, + alternative, + estimate, + lower_ci = conf.low, + upper_ci = conf.high ) } else { results <- prelim %>% dplyr::select( - statistic, t_df = parameter, p_value = p.value, alternative, estimate + statistic, + t_df = parameter, + p_value = p.value, + alternative, + estimate ) } @@ -141,15 +161,18 @@ t_test <- function(x, formula, #' @family wrapper functions #' @family functions for calculating observed statistics #' @export -t_stat <- function(x, formula, - response = NULL, - explanatory = NULL, - order = NULL, - alternative = "two-sided", - mu = 0, - conf_int = FALSE, - conf_level = 0.95, - ...) { +t_stat <- function( + x, + formula, + response = NULL, + explanatory = NULL, + order = NULL, + alternative = "two-sided", + mu = 0, + conf_int = FALSE, + conf_level = 0.95, + ... +) { lifecycle::deprecate_warn( when = "1.0.0", what = "t_stat()", @@ -162,10 +185,14 @@ t_stat <- function(x, formula, x <- standardize_variable_types(x) # parse response and explanatory variables - response <- enquo(response) + response <- enquo(response) explanatory <- enquo(explanatory) - x <- parse_variables(x = x, formula = formula, - response = response, explanatory = explanatory) + x <- parse_variables( + x = x, + formula = formula, + response = response, + explanatory = explanatory + ) # match with old "dot" syntax in t.test if (alternative %in% c("two-sided", "two_sided", "two sided", "two.sided")) { @@ -176,18 +203,23 @@ t_stat <- function(x, formula, if (has_explanatory(x)) { order <- check_order(x, order, in_calculate = FALSE, stat = NULL) x <- reorder_explanatory(x, order) - prelim <- stats::t.test(formula = new_formula(response_expr(x), explanatory_expr(x)), - data = x, - alternative = alternative, - mu = mu, - conf.level = conf_level, - ...) %>% + prelim <- stats::t.test( + formula = new_formula(response_expr(x), explanatory_expr(x)), + data = x, + alternative = alternative, + mu = mu, + conf.level = conf_level, + ... + ) %>% broom::glance() - } else { # one sample - prelim <- stats::t.test(response_variable(x), - alternative = alternative, - mu = mu, - conf.level = conf_level) %>% + } else { + # one sample + prelim <- stats::t.test( + response_variable(x), + alternative = alternative, + mu = mu, + conf.level = conf_level + ) %>% broom::glance() } @@ -229,29 +261,34 @@ t_stat <- function(x, formula, #' #' @family wrapper functions #' @export -chisq_test <- function(x, formula, response = NULL, - explanatory = NULL, ...) { +chisq_test <- function(x, formula, response = NULL, explanatory = NULL, ...) { # Parse response and explanatory variables - response <- enquo(response) + response <- enquo(response) explanatory <- enquo(explanatory) x <- standardize_variable_types(x) - x <- parse_variables(x = x, formula = formula, - response = response, explanatory = explanatory) + x <- parse_variables( + x = x, + formula = formula, + response = response, + explanatory = explanatory + ) if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) { - cli_abort( + cli_abort( 'The response variable of `{response_name(x)}` is not appropriate \\ since the response variable is expected to be categorical.' - ) + ) } - if (has_explanatory(x) && - !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))) { - cli_abort( + if ( + has_explanatory(x) && + !(class(explanatory_variable(x)) %in% c("logical", "character", "factor")) + ) { + cli_abort( 'The explanatory variable of `{explanatory_name(x)}` is not appropriate \\ since the explanatory variable is expected to be categorical.' - ) + ) } x <- x %>% @@ -296,34 +333,39 @@ chisq_test <- function(x, formula, response = NULL, #' @family wrapper functions #' @family functions for calculating observed statistics #' @export -chisq_stat <- function(x, formula, response = NULL, - explanatory = NULL, ...) { - lifecycle::deprecate_warn( - when = "1.0.0", - what = "chisq_stat()", - with = "observe()" - ) +chisq_stat <- function(x, formula, response = NULL, explanatory = NULL, ...) { + lifecycle::deprecate_warn( + when = "1.0.0", + what = "chisq_stat()", + with = "observe()" + ) # Parse response and explanatory variables - response <- enquo(response) + response <- enquo(response) explanatory <- enquo(explanatory) x <- standardize_variable_types(x) - x <- parse_variables(x = x, formula = formula, - response = response, explanatory = explanatory) + x <- parse_variables( + x = x, + formula = formula, + response = response, + explanatory = explanatory + ) if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) { - cli_abort( + cli_abort( 'The response variable of `{response_name(x)}` is not appropriate \\ since the response variable is expected to be categorical.' - ) + ) } - if (has_explanatory(x) && - !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))) { - cli_abort( + if ( + has_explanatory(x) && + !(class(explanatory_variable(x)) %in% c("logical", "character", "factor")) + ) { + cli_abort( 'The explanatory variable of `{explanatory_name(x)}` is not appropriate \\ since the response variable is expected to be categorical.' - ) + ) } x <- x %>% @@ -339,10 +381,10 @@ check_conf_level <- function(conf_level, call = caller_env()) { if ( (!inherits(conf_level, "numeric")) | (conf_level < 0) | (conf_level > 1) ) { - cli_abort( - "The `conf_level` argument must be a number between 0 and 1.", - call = call - ) + cli_abort( + "The `conf_level` argument must be a number between 0 and 1.", + call = call + ) } } @@ -418,40 +460,55 @@ check_conf_level <- function(conf_level, call = caller_env()) { #' #' @family wrapper functions #' @export -prop_test <- function(x, formula, - response = NULL, - explanatory = NULL, - p = NULL, - order = NULL, - alternative = "two-sided", - conf_int = TRUE, - conf_level = 0.95, - success = NULL, - correct = NULL, - z = FALSE, - ...) { +prop_test <- function( + x, + formula, + response = NULL, + explanatory = NULL, + p = NULL, + order = NULL, + alternative = "two-sided", + conf_int = TRUE, + conf_level = 0.95, + success = NULL, + correct = NULL, + z = FALSE, + ... +) { # Parse response and explanatory variables - response <- enquo(response) + response <- enquo(response) explanatory <- enquo(explanatory) x <- standardize_variable_types(x) - x <- parse_variables(x = x, formula = formula, - response = response, explanatory = explanatory) + x <- parse_variables( + x = x, + formula = formula, + response = response, + explanatory = explanatory + ) - correct <- if (z) {FALSE} else if (is.null(correct)) {TRUE} else {correct} + correct <- if (z) { + FALSE + } else if (is.null(correct)) { + TRUE + } else { + correct + } if (!(class(response_variable(x)) %in% c("logical", "character", "factor"))) { - cli_abort( + cli_abort( 'The response variable of `{response_name(x)}` is not appropriate \\ since the response variable is expected to be categorical.' - ) + ) } - if (has_explanatory(x) && - !(class(explanatory_variable(x)) %in% c("logical", "character", "factor"))) { - cli_abort( + if ( + has_explanatory(x) && + !(class(explanatory_variable(x)) %in% c("logical", "character", "factor")) + ) { + cli_abort( 'The explanatory variable of `{explanatory_name(x)}` is not appropriate \\ since the explanatory variable is expected to be categorical.' - ) + ) } # match with old "dot" syntax in t.test if (alternative %in% c("two-sided", "two_sided", "two sided", "two.sided")) { @@ -483,31 +540,34 @@ prop_test <- function(x, formula, if (has_explanatory(x)) { # make a summary table to supply to prop.test sum_table <- x %>% - select(explanatory_name(x), response_name(x)) %>% - table() + select(explanatory_name(x), response_name(x)) %>% + table() length_exp_levels <- length(levels(explanatory_variable(x))) if (length_exp_levels == 2) { - order <- check_order(x, order, in_calculate = FALSE, stat = NULL) - # reorder according to the order and success arguments - sum_table <- sum_table[order, lvls] + order <- check_order(x, order, in_calculate = FALSE, stat = NULL) + # reorder according to the order and success arguments + sum_table <- sum_table[order, lvls] } else if (length_exp_levels >= 3 && !is.null(order)) { - cli_warn(c( - "The `order` argument will be ignored as it is not well-defined \\ + cli_warn(c( + "The `order` argument will be ignored as it is not well-defined \\ for explanatory variables with more than 2 levels. ", - i = "To silence this message, avoid passing the `order` argument." - )) - # reorder according to the success argument - sum_table <- sum_table[, lvls] + i = "To silence this message, avoid passing the `order` argument." + )) + # reorder according to the success argument + sum_table <- sum_table[, lvls] } - prelim <- stats::prop.test(x = sum_table, - alternative = alternative, - conf.level = conf_level, - p = p, - correct = correct, - ...) - } else { # one sample + prelim <- stats::prop.test( + x = sum_table, + alternative = alternative, + conf.level = conf_level, + p = p, + correct = correct, + ... + ) + } else { + # one sample response_tbl <- response_variable(x) %>% factor() %>% stats::relevel(success) %>% @@ -520,39 +580,42 @@ prop_test <- function(x, formula, ) } - prelim <- stats::prop.test(x = response_tbl, - alternative = alternative, - conf.level = conf_level, - p = p, - correct = correct, - ...) - + prelim <- stats::prop.test( + x = response_tbl, + alternative = alternative, + conf.level = conf_level, + p = p, + correct = correct, + ... + ) } if (length(prelim$estimate) <= 2) { if (conf_int & is.null(p)) { - results <- prelim %>% - broom::glance() %>% - dplyr::select(statistic, - chisq_df = parameter, - p_value = p.value, - alternative, - lower_ci = conf.low, - upper_ci = conf.high) + results <- prelim %>% + broom::glance() %>% + dplyr::select( + statistic, + chisq_df = parameter, + p_value = p.value, + alternative, + lower_ci = conf.low, + upper_ci = conf.high + ) } else { - results <- prelim %>% - broom::glance() %>% - dplyr::select(statistic, - chisq_df = parameter, - p_value = p.value, - alternative) + results <- prelim %>% + broom::glance() %>% + dplyr::select( + statistic, + chisq_df = parameter, + p_value = p.value, + alternative + ) } } else { results <- prelim %>% broom::glance() %>% - dplyr::select(statistic, - chisq_df = parameter, - p_value = p.value) + dplyr::select(statistic, chisq_df = parameter, p_value = p.value) } if (z) { @@ -563,19 +626,35 @@ prop_test <- function(x, formula, } calculate_z <- function(x, results, success, p, order) { - exp <- if (has_explanatory(x)) {explanatory_expr(x)} else {NULL} + exp <- if (has_explanatory(x)) { + explanatory_expr(x) + } else { + NULL + } form <- new_formula(response_expr(x), exp) stat <- x %>% specify(formula = form, success = success) %>% hypothesize( - null = if (has_explanatory(x)) {"independence"} else {"point"}, - p = if (is.null(p) && !has_explanatory(x)) {.5} else {p} + null = if (has_explanatory(x)) { + "independence" + } else { + "point" + }, + p = if (is.null(p) && !has_explanatory(x)) { + .5 + } else { + p + } ) %>% calculate( stat = "z", - order = if (has_explanatory(x)) {order} else {NULL} + order = if (has_explanatory(x)) { + order + } else { + NULL + } ) %>% dplyr::pull() diff --git a/data-raw/save_gss.R b/data-raw/save_gss.R index 5675f2a3..928bd8ed 100644 --- a/data-raw/save_gss.R +++ b/data-raw/save_gss.R @@ -5,7 +5,7 @@ library(ggplot2) # pull gss data temp <- tempfile() -download.file("https://gss.norc.org/documents/stata/GSS_stata.zip",temp) +download.file("https://gss.norc.org/documents/stata/GSS_stata.zip", temp) # if this next line errors with "No such file or directory", try # incrementing the number after "_R" @@ -16,39 +16,54 @@ unlink(temp) # select relevant columns gss_small <- gss_orig %>% filter(!stringr::str_detect(sample, "blk oversamp")) %>% # this is for weighting - select(year, age, sex, college = degree, partyid, hompop, - hours = hrs1, income, class, finrela, weight = wtssall) %>% - mutate_if(is.factor, ~fct_collapse(., NULL = c("IAP", "NA", "iap", "na"))) %>% - mutate(age = age %>% - fct_recode("89" = "89 or older", - NULL = "DK") %>% # truncated at 89 - as.character() %>% - as.numeric(), - hompop = hompop %>% - fct_collapse(NULL = c("DK")) %>% - as.character() %>% - as.numeric(), - hours = hours %>% - fct_recode("89" = "89+ hrs", - NULL = "DK") %>% # truncated at 89 - as.character() %>% - as.numeric(), - weight = weight %>% - as.character() %>% - as.numeric(), - partyid = fct_collapse(partyid, - dem = c("strong democrat", "not str democrat"), - rep = c("strong republican", "not str republican"), - ind = c("ind,near dem", "independent", "ind,near rep"), - other = "other party" - ), - income = factor(income, ordered = TRUE), - college = fct_collapse(college, - degree = c("junior college", "bachelor", "graduate"), - "no degree" = c("lt high school", "high school"), - NULL = "dk" # no dks show up in the data, so drop this level - ) - ) + select( + year, + age, + sex, + college = degree, + partyid, + hompop, + hours = hrs1, + income, + class, + finrela, + weight = wtssall + ) %>% + mutate_if( + is.factor, + ~ fct_collapse(., NULL = c("IAP", "NA", "iap", "na")) + ) %>% + mutate( + age = age %>% + fct_recode("89" = "89 or older", NULL = "DK") %>% # truncated at 89 + as.character() %>% + as.numeric(), + hompop = hompop %>% + fct_collapse(NULL = c("DK")) %>% + as.character() %>% + as.numeric(), + hours = hours %>% + fct_recode("89" = "89+ hrs", NULL = "DK") %>% # truncated at 89 + as.character() %>% + as.numeric(), + weight = weight %>% + as.character() %>% + as.numeric(), + partyid = fct_collapse( + partyid, + dem = c("strong democrat", "not str democrat"), + rep = c("strong republican", "not str republican"), + ind = c("ind,near dem", "independent", "ind,near rep"), + other = "other party" + ), + income = factor(income, ordered = TRUE), + college = fct_collapse( + college, + degree = c("junior college", "bachelor", "graduate"), + "no degree" = c("lt high school", "high school"), + NULL = "dk" # no dks show up in the data, so drop this level + ) + ) # sample 3k rows, first dropping NAs set.seed(20200201) diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R index 6f0bcc41..bc4e5a37 100644 --- a/tests/testthat/helper-data.R +++ b/tests/testthat/helper-data.R @@ -5,7 +5,11 @@ expect_doppelganger <- function(title, fig, ...) { vdiffr::expect_doppelganger(title, fig, ...) } -eps <- if (capabilities("long.double")) {sqrt(.Machine$double.eps)} else {0.01} +eps <- if (capabilities("long.double")) { + sqrt(.Machine$double.eps) +} else { + 0.01 +} gss_tbl <- tibble::as_tibble(gss) %>% dplyr::filter(!(is.na(sex) | is.na(college))) %>% @@ -20,7 +24,10 @@ gss_calc <- gss_tbl %>% mtcars_df <- mtcars %>% dplyr::mutate( - cyl = factor(cyl), vs = factor(vs), am = factor(am), gear = factor(gear), + cyl = factor(cyl), + vs = factor(vs), + am = factor(am), + gear = factor(gear), carb = factor(carb) ) diff --git a/tests/testthat/test-aliases.R b/tests/testthat/test-aliases.R index 516aa5a6..c7af9ed5 100644 --- a/tests/testthat/test-aliases.R +++ b/tests/testthat/test-aliases.R @@ -18,7 +18,7 @@ test_that("old aliases produce informative error", { ) expect_snapshot( - error = TRUE, - res_ <- gss_permute %>% conf_int() - ) + error = TRUE, + res_ <- gss_permute %>% conf_int() + ) }) diff --git a/tests/testthat/test-assume.R b/tests/testthat/test-assume.R index 240bcb1d..cee56fa6 100644 --- a/tests/testthat/test-assume.R +++ b/tests/testthat/test-assume.R @@ -9,8 +9,8 @@ test_that("distribution description works as expected", { specify(age ~ partyid) %>% hypothesize(null = "independence") %>% assume_( - distribution = "F", - df = c(length(unique(gss$partyid)) - 1, nrow(gss) - 4) + distribution = "F", + df = c(length(unique(gss$partyid)) - 1, nrow(gss) - 4) ), "An F distribution with 3 and 496 degrees of freedom." ) @@ -32,13 +32,17 @@ test_that("distribution description works as expected", { expect_equal( gss %>% specify(response = finrela) %>% - hypothesize(null = "point", - p = c("far below average" = 1/6, - "below average" = 1/6, - "average" = 1/6, - "above average" = 1/6, - "far above average" = 1/6, - "DK" = 1/6)) %>% + hypothesize( + null = "point", + p = c( + "far below average" = 1 / 6, + "below average" = 1 / 6, + "average" = 1 / 6, + "above average" = 1 / 6, + "far above average" = 1 / 6, + "DK" = 1 / 6 + ) + ) %>% assume_("Chisq", length(unique(gss$finrela)) - 1), "A Chi-squared distribution with 5 degrees of freedom." ) @@ -46,13 +50,17 @@ test_that("distribution description works as expected", { expect_equal( gss %>% specify(response = finrela) %>% - hypothesize(null = "point", - p = c("far below average" = 1/6, - "below average" = 1/6, - "average" = 1/6, - "above average" = 1/6, - "far above average" = 1/6, - "DK" = 1/6)) %>% + hypothesize( + null = "point", + p = c( + "far below average" = 1 / 6, + "below average" = 1 / 6, + "average" = 1 / 6, + "above average" = 1 / 6, + "far above average" = 1 / 6, + "DK" = 1 / 6 + ) + ) %>% assume_("Chisq"), "A Chi-squared distribution with 5 degrees of freedom." ) @@ -96,7 +104,8 @@ test_that("distribution description works as expected", { test_that("assume errors with bad arguments", { # supply a bad distribution - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ college) %>% hypothesize(null = "independence") %>% @@ -104,14 +113,16 @@ test_that("assume errors with bad arguments", { ) # bad number of df arguments - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ college) %>% hypothesize(null = "independence") %>% assume("t", c(nrow(gss) - 1, 2)) - ) + ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% @@ -119,7 +130,8 @@ test_that("assume errors with bad arguments", { ) # bad df argument type - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% @@ -127,14 +139,16 @@ test_that("assume errors with bad arguments", { ) # df argument possibly passed to dots - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% assume("F", nrow(gss) - 1, 1) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% @@ -142,21 +156,24 @@ test_that("assume errors with bad arguments", { ) # supply `distribution`s that don't align with the supplied variables - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ finrela) %>% hypothesize(null = "independence") %>% assume("t", nrow(gss) - 1) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ finrela) %>% hypothesize(null = "independence") %>% assume("z", nrow(gss) - 1) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(age ~ NULL) %>% hypothesize(null = "point", mu = 40) %>% @@ -164,12 +181,14 @@ test_that("assume errors with bad arguments", { ) # supply bad `x` arguments - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% assume("z", nrow(gss) - 1) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, "boop" %>% assume("z", nrow(gss) - 1) ) @@ -234,7 +253,6 @@ test_that("assume() handles automatic df gracefully", { 173 ) - # n1 + n2 - 2 expect_equal( expect_silent( @@ -249,7 +267,7 @@ test_that("assume() handles automatic df gracefully", { }) test_that("assume() brings along supplied arguments", { - t_dist <- gss %>% + t_dist <- gss %>% specify(age ~ college) %>% hypothesize(null = "independence") %>% assume("t") diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index 1e00c091..ec485085 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -14,18 +14,10 @@ test_that("calculate checks `stat` argument", { hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") - expect_snapshot(error = TRUE, - calculate(gen_gss_slope, stat = "slopee") - ) - expect_snapshot(error = TRUE, - calculate(gen_gss_slope, stat = "stdev") - ) - expect_snapshot(error = TRUE, - calculate(gen_gss_slope, stat = "stat") - ) - expect_snapshot(error = TRUE, - calculate(gen_gss_slope, stat = "chi sq") - ) + expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "slopee")) + expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "stdev")) + expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "stat")) + expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "chi sq")) # stat can be one of the allowed aliases chisq_df <- gss %>% specify(formula = finrela ~ sex) @@ -42,14 +34,16 @@ test_that("calculate checks `stat` argument", { }) test_that("errors informatively with incompatible stat vs hypothesis", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(college ~ sex, success = "degree") %>% hypothesise(null = "point", p = .40) %>% calculate(stat = "diff in props", order = c("female", "male")) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(college ~ sex, success = "degree") %>% hypothesise(null = "point", p = .40) %>% @@ -67,19 +61,24 @@ test_that("errors informatively with incompatible stat vs hypothesis", { expect_silent( gss %>% specify(response = finrela) %>% - hypothesize(null = "point", - p = c("far below average" = 1/6, - "below average" = 1/6, - "average" = 1/6, - "above average" = 1/6, - "far above average" = 1/6, - "DK" = 1/6)) %>% + hypothesize( + null = "point", + p = c( + "far below average" = 1 / 6, + "below average" = 1 / 6, + "average" = 1 / 6, + "above average" = 1 / 6, + "far above average" = 1 / 6, + "DK" = 1 / 6 + ) + ) %>% calculate(stat = "Chisq") ) }) test_that("response attribute has been set", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, tibble::as_tibble(gss) %>% calculate(stat = "median") ) }) @@ -200,13 +199,15 @@ test_that("two sample mean-type problems are working", { generate(reps = 10, type = "permute") expect_snapshot(res_ <- calculate(gen_gss5a, stat = "diff in means")) expect_silent( - calculate(gen_gss5a, + calculate( + gen_gss5a, stat = "diff in means", order = c("no degree", "degree") ) ) expect_snapshot(res_ <- calculate(gen_gss5a, stat = "t")) - expect_silent(calculate(gen_gss5a, + expect_silent(calculate( + gen_gss5a, stat = "t", order = c("no degree", "degree") )) @@ -229,14 +230,16 @@ test_that("order is working for diff in means", { hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") expect_equal( - nrow(calculate(gen_gss7, + nrow(calculate( + gen_gss7, stat = "diff in means", order = c("no degree", "degree") )), 10 ) expect_equal( - ncol(calculate(gen_gss7, + ncol(calculate( + gen_gss7, stat = "diff in means", order = c("no degree", "degree") )), @@ -299,28 +302,30 @@ test_that("chi-square matches chisq.test value", { # check that dots are passed correctly dat <- data.frame( - action = c(rep(x = "promote", times = 32), - rep(x = "hold file", times = 12), - rep(x = "promote", times = 19), - rep(x = "hold file", times = 30)), - sex = c(rep(x = "male", times = 44), - rep(x = "female", times = 49))) - + action = c( + rep(x = "promote", times = 32), + rep(x = "hold file", times = 12), + rep(x = "promote", times = 19), + rep(x = "hold file", times = 30) + ), + sex = c(rep(x = "male", times = 44), rep(x = "female", times = 49)) + ) promote_f <- dat %>% - specify(action ~ sex, success = "promote") %>% - calculate(stat = "Chisq", order = c("male", "female"), correct = FALSE) + specify(action ~ sex, success = "promote") %>% + calculate(stat = "Chisq", order = c("male", "female"), correct = FALSE) promote_t <- dat %>% - specify(action ~ sex, success = "promote") %>% - calculate(stat = "Chisq", order = c("male", "female"), correct = TRUE) + specify(action ~ sex, success = "promote") %>% + calculate(stat = "Chisq", order = c("male", "female"), correct = TRUE) expect_false(promote_f$stat == promote_t$stat) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, dat %>% - specify(action ~ sex, success = "promote") %>% - calculate(stat = "Chisq", order = c("male", "female"), correct = "boop") + specify(action ~ sex, success = "promote") %>% + calculate(stat = "Chisq", order = c("male", "female"), correct = "boop") ) }) @@ -356,46 +361,52 @@ test_that("`order` is working", { specify(hours ~ college) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, calculate(gen_gss_tbl10, stat = "diff in means", order = c(TRUE, FALSE)) ) gen_gss_tbl11 <- gss_tbl %>% specify(hours ~ college) %>% generate(reps = 10, type = "bootstrap") - expect_snapshot(error = TRUE, - calculate(gen_gss_tbl11, - stat = "diff in medians", - order = "no degree" - ) + expect_snapshot( + error = TRUE, + calculate(gen_gss_tbl11, stat = "diff in medians", order = "no degree") ) - expect_snapshot(error = TRUE, - calculate(gen_gss_tbl11, + expect_snapshot( + error = TRUE, + calculate( + gen_gss_tbl11, stat = "diff in medians", order = c(NA, "no degree") ) ) - expect_snapshot(error = TRUE, - calculate(gen_gss_tbl11, + expect_snapshot( + error = TRUE, + calculate( + gen_gss_tbl11, stat = "diff in medians", order = c("no degree", "other") ) ) expect_silent( - calculate(gen_gss_tbl11, + calculate( + gen_gss_tbl11, stat = "diff in medians", order = c("no degree", "degree") ) ) - expect_snapshot(error = TRUE, - calculate(gen_gss_tbl11, + expect_snapshot( + error = TRUE, + calculate( + gen_gss_tbl11, stat = "diff in means", order = c("no degree", "degree", "the last one") ) ) # order not given expect_snapshot( - res_ <- calculate(gen_gss_tbl11, stat = "diff in means") + res_ <- calculate(gen_gss_tbl11, stat = "diff in means") ) }) @@ -439,7 +450,7 @@ test_that("order being given when not needed gives warning", { hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") expect_snapshot( - res_ <- calculate(gen_gss_tbl15, stat = "Chisq", order = c("dem", "ind")) + res_ <- calculate(gen_gss_tbl15, stat = "Chisq", order = c("dem", "ind")) ) }) @@ -469,7 +480,7 @@ test_that("specify() %>% calculate() works", { ) expect_snapshot( - res_ <- gss_tbl %>% specify(partyid ~ NULL) %>% calculate(stat = "Chisq") + res_ <- gss_tbl %>% specify(partyid ~ NULL) %>% calculate(stat = "Chisq") ) }) @@ -483,7 +494,7 @@ test_that("One sample t hypothesis test is working", { ) expect_snapshot( - res_ <- gss_tbl %>% + res_ <- gss_tbl %>% specify(response = hours) %>% calculate(stat = "t") ) @@ -519,7 +530,7 @@ test_that("chisq GoF has params specified for observed stat", { test_that("One sample t bootstrap is working", { expect_snapshot( - res_ <- gss_tbl %>% + res_ <- gss_tbl %>% specify(hours ~ NULL) %>% generate(reps = 10, type = "bootstrap") %>% calculate(stat = "t") @@ -610,8 +621,8 @@ gss_tbl <- table(gss_biased$sex, gss_biased$college) test_that("calc_impl.odds_ratio works", { base_odds_ratio <- { - (gss_tbl [1, 1] * gss_tbl [2, 2]) / - (gss_tbl [1, 2] * gss_tbl [2, 1]) + (gss_tbl[1, 1] * gss_tbl[2, 2]) / + (gss_tbl[1, 2] * gss_tbl[2, 1]) } expect_equal( @@ -626,8 +637,8 @@ test_that("calc_impl.odds_ratio works", { test_that("calc_impl.ratio_of_props works", { base_ratio_of_props <- { - (gss_tbl [1, 2] / sum(gss_tbl [1, ])) / - (gss_tbl [2, 2] / sum(gss_tbl [2, ])) + (gss_tbl[1, 2] / sum(gss_tbl[1, ])) / + (gss_tbl[2, 2] / sum(gss_tbl[2, ])) } expect_equal( @@ -641,19 +652,19 @@ test_that("calc_impl.ratio_of_props works", { }) test_that("calc_impl.ratio_of_means works", { - base_ratio_of_means <- { - mean(gss$age[gss$college == "degree"]) / + base_ratio_of_means <- { + mean(gss$age[gss$college == "degree"]) / mean(gss$age[gss$college == "no degree"]) - } + } - expect_equal( - gss %>% - specify(age ~ college) %>% - calculate("ratio of means", order = c("degree", "no degree")) %>% - dplyr::pull(), - expected = base_ratio_of_means, - tolerance = eps - ) + expect_equal( + gss %>% + specify(age ~ college) %>% + calculate("ratio of means", order = c("degree", "no degree")) %>% + dplyr::pull(), + expected = base_ratio_of_means, + tolerance = eps + ) }) test_that("calc_impl.z works for one sample proportions", { @@ -671,23 +682,23 @@ test_that("calc_impl.z works for one sample proportions", { }) test_that("calculate warns informatively with insufficient null", { - expect_snapshot( - res_ <- gss %>% - specify(response = sex, success = "female") %>% - calculate(stat = "z") - ) + expect_snapshot( + res_ <- gss %>% + specify(response = sex, success = "female") %>% + calculate(stat = "z") + ) - expect_snapshot( - res_ <- gss %>% - specify(hours ~ NULL) %>% - calculate(stat = "t") - ) + expect_snapshot( + res_ <- gss %>% + specify(hours ~ NULL) %>% + calculate(stat = "t") + ) - expect_snapshot( - res_ <- gss %>% - specify(response = partyid) %>% - calculate(stat = "Chisq") - ) + expect_snapshot( + res_ <- gss %>% + specify(response = partyid) %>% + calculate(stat = "Chisq") + ) }) test_that("calculate messages informatively with excessive null", { @@ -736,14 +747,16 @@ test_that("calculate can handle variables named x", { }) test_that("calculate errors out with multiple explanatory variables", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% calculate(stat = "t") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% @@ -760,7 +773,7 @@ test_that("reported standard errors are correct", { expect_equal( attr(x_bar, "se"), - stats::sd(gss$hours)/sqrt(nrow(gss)), + stats::sd(gss$hours) / sqrt(nrow(gss)), tolerance = 1e-6 ) @@ -786,9 +799,9 @@ test_that("reported standard errors are correct", { attr(diff_bar, "se"), sqrt( (stats::sd(gss$hours[gss$college == "degree"]) / - sqrt(nrow(gss[gss$college == "degree",])))^2 + - (stats::sd(gss$hours[gss$college == "no degree"]) / - sqrt(nrow(gss[gss$college == "no degree",])))^2 + sqrt(nrow(gss[gss$college == "degree", ])))^2 + + (stats::sd(gss$hours[gss$college == "no degree"]) / + sqrt(nrow(gss[gss$college == "no degree", ])))^2 ), tolerance = 1e-6 ) @@ -801,12 +814,16 @@ test_that("reported standard errors are correct", { expect_equal( attr(diff_hat, "se"), sqrt( - abs((mean(gss[gss$college == "degree",]$sex == "female") * - (1 - mean(gss[gss$college == "degree",]$sex == "female"))) / - nrow(gss[gss$college == "degree",])) + - abs((mean(gss[gss$college == "no degree",]$sex == "female") * - (1 - mean(gss[gss$college == "no degree",]$sex == "female"))) / - nrow(gss[gss$college == "no degree",])) + abs( + (mean(gss[gss$college == "degree", ]$sex == "female") * + (1 - mean(gss[gss$college == "degree", ]$sex == "female"))) / + nrow(gss[gss$college == "degree", ]) + ) + + abs( + (mean(gss[gss$college == "no degree", ]$sex == "female") * + (1 - mean(gss[gss$college == "no degree", ]$sex == "female"))) / + nrow(gss[gss$college == "no degree", ]) + ) ), tolerance = 1e-6 ) @@ -815,19 +832,8 @@ test_that("reported standard errors are correct", { # this stat shares machinery with others that report se, so make # sure that we don't rat_hat <- gss %>% - specify(hours ~ college) %>% - calculate(stat = "ratio of means", order = c("no degree", "degree")) + specify(hours ~ college) %>% + calculate(stat = "ratio of means", order = c("no degree", "degree")) expect_null(attr(rat_hat, "se")) }) - - - - - - - - - - - diff --git a/tests/testthat/test-fit.R b/tests/testthat/test-fit.R index 74122823..eb96e28b 100644 --- a/tests/testthat/test-fit.R +++ b/tests/testthat/test-fit.R @@ -1,13 +1,13 @@ -x1 <- gss[1:100,] %>% specify(response = hours) -x2 <- gss[1:100,] %>% specify(hours ~ NULL) -x3 <- gss[1:100,] %>% specify(response = hours, explanatory = c(age, college)) -x4 <- gss[1:100,] %>% specify(hours ~ age + college) +x1 <- gss[1:100, ] %>% specify(response = hours) +x2 <- gss[1:100, ] %>% specify(hours ~ NULL) +x3 <- gss[1:100, ] %>% specify(response = hours, explanatory = c(age, college)) +x4 <- gss[1:100, ] %>% specify(hours ~ age + college) test_that("get_formula helper works", { expect_false(has_attr(x1, "formula")) - expect_true(has_attr( x2, "formula")) + expect_true(has_attr(x2, "formula")) expect_false(has_attr(x3, "formula")) - expect_true(has_attr( x4, "formula")) + expect_true(has_attr(x4, "formula")) expect_equal(get_formula(x1), get_formula(x2), ignore_attr = TRUE) expect_equal(get_formula(x3), get_formula(x4), ignore_attr = TRUE) @@ -51,7 +51,7 @@ test_that("fit.infer can handle generated objects", { x3_gen_fit <- x3 %>% hypothesize(null = 'independence') %>% - generate(reps = 2, type = "permute")%>% + generate(reps = 2, type = "permute") %>% fit() expect_equal(unique(x3_fit$term), unique(x3_gen_fit$term)) @@ -101,7 +101,8 @@ test_that("fit.infer logistic regression works", { ) # errors informatively with multinomial response variable - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss %>% specify(finrela ~ age + college) %>% fit() diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index 1fef48ef..a51afb5f 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -8,7 +8,7 @@ hyp_diff_in_props <- mtcars_df %>% hyp_chisq_gof <- mtcars_df %>% specify(response = cyl) %>% - hypothesize(null = "point", p = c("4" = 1/3, "6" = 1/3, "8" = 1/3)) + hypothesize(null = "point", p = c("4" = 1 / 3, "6" = 1 / 3, "8" = 1 / 3)) hyp_chisq_ind <- mtcars_df %>% specify(cyl ~ vs) %>% @@ -49,23 +49,18 @@ test_that("cohesion with type argument", { expect_snapshot(res_ <- generate(hyp_diff_in_props, type = "draw")) expect_silent(generate(hyp_chisq_gof, type = "draw")) expect_snapshot(res_ <- generate(hyp_chisq_ind, type = "draw")) - expect_snapshot(error = TRUE, - res_ <- generate(hyp_mean, type = "draw") - ) + expect_snapshot(error = TRUE, res_ <- generate(hyp_mean, type = "draw")) expect_snapshot(res_ <- generate(hyp_diff_in_means, type = "draw")) expect_snapshot(res_ <- generate(hyp_anova, type = "draw")) - expect_snapshot(error = TRUE, - res_ <- generate(hyp_prop, type = "permute") - ) + expect_snapshot(error = TRUE, res_ <- generate(hyp_prop, type = "permute")) expect_silent(generate(hyp_diff_in_props, type = "permute")) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- generate(hyp_chisq_gof, type = "permute") ) expect_silent(generate(hyp_chisq_ind, type = "permute")) - expect_snapshot(error = TRUE, - res_ <- generate(hyp_mean, type = "permute") - ) + expect_snapshot(error = TRUE, res_ <- generate(hyp_mean, type = "permute")) expect_silent(generate(hyp_diff_in_means, type = "permute")) expect_silent(generate(hyp_anova, type = "permute")) }) @@ -161,7 +156,8 @@ test_that("auto `type` works (generate)", { expect_equal(attr(two_props_boot, "type"), "bootstrap") expect_equal(attr(slope_boot, "type"), "bootstrap") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) %>% @@ -169,12 +165,13 @@ test_that("auto `type` works (generate)", { ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(response = mpg) %>% generate(reps = 100, type = "draw") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(response = mpg) %>% # formula alt: mpg ~ NULL hypothesize(null = "point", med = 26) %>% @@ -188,30 +185,33 @@ test_that("auto `type` works (generate)", { generate(reps = 100, type = "bootstrap") ) - expect_silent(mtcars_df %>% + expect_silent( + mtcars_df %>% specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am hypothesize(null = "independence") %>% generate(reps = 100, type = "draw") ) - expect_silent(mtcars_df %>% + expect_silent( + mtcars_df %>% specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am hypothesize(null = "independence") %>% - generate(reps = 100, type = "bootstrap")) + generate(reps = 100, type = "bootstrap") + ) expect_silent( mtcars_df %>% @@ -220,62 +220,71 @@ test_that("auto `type` works (generate)", { ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "draw") ) - expect_silent(mtcars_df %>% + expect_silent( + mtcars_df %>% specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") %>% generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(response = am, success = "1") %>% generate(reps = 100, type = "draw") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(mpg ~ am) %>% generate(reps = 100, type = "permute") ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(am ~ vs, success = "1") %>% generate(reps = 100, type = "draw") ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "draw") ) }) test_that("mismatches lead to error", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% generate(reps = 10, type = "permute") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(am ~ NULL, success = "1") %>% hypothesize(null = "independence", p = c("1" = 0.5)) %>% generate(reps = 100, type = "draw") ) expect_snapshot( - res_ <- mtcars_df %>% + res_ <- mtcars_df %>% specify(cyl ~ NULL) %>% # alt: response = cyl hypothesize( - null = "point", p = c("4" = .5, "6" = .25, "8" = .25) + null = "point", + p = c("4" = .5, "6" = .25, "8" = .25) ) %>% - generate(reps = 100, type = "bootstrap")) - expect_snapshot(error = TRUE, - res_ <- mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "other") + generate(reps = 100, type = "bootstrap") + ) + expect_snapshot( + error = TRUE, + res_ <- mtcars_df %>% + specify(mpg ~ hp) %>% + generate(reps = 100, type = "other") ) }) @@ -340,25 +349,28 @@ test_that("generate() can permute with multiple explanatory variables", { test_that("generate is sensitive to the variables argument", { # default argument works appropriately - expect_equal({ + expect_equal( + { set.seed(1) - gss[1:10,] %>% + gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute") - }, { + }, + { set.seed(1) - gss[1:10,] %>% + gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = hours) - }) + } + ) # permuting changes output expect_silent( - perm_age <- gss[1:10,] %>% + perm_age <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = age) @@ -369,7 +381,7 @@ test_that("generate is sensitive to the variables argument", { expect_true(all(perm_age$college[1:10] == perm_age$college[11:20])) expect_silent( - perm_college <- gss[1:10,] %>% + perm_college <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = college) @@ -380,97 +392,111 @@ test_that("generate is sensitive to the variables argument", { expect_false(all(perm_college$college[1:10] == perm_college$college[11:20])) expect_silent( - perm_college_age <- gss[1:10,] %>% + perm_college_age <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = c(college, age)) ) expect_false(all(perm_college_age$age[1:10] == perm_college_age$age[11:20])) - expect_true(all(perm_college_age$hours[1:10] == perm_college_age$hours[11:20])) - expect_false(all(perm_college_age$college[1:10] == perm_college_age$college[11:20])) + expect_true(all( + perm_college_age$hours[1:10] == perm_college_age$hours[11:20] + )) + expect_false(all( + perm_college_age$college[1:10] == perm_college_age$college[11:20] + )) # interaction effects are ignored - expect_equal({ - set.seed(1) + expect_equal( + { + set.seed(1) - expect_message( - res_1 <- gss[1:10,] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = c(hours, age*college)) - ) + expect_message( + res_1 <- gss[1:10, ] %>% + specify(hours ~ age + college) %>% + hypothesize(null = "independence") %>% + generate( + reps = 2, + type = "permute", + variables = c(hours, age * college) + ) + ) - res_1 - }, { - set.seed(1) + res_1 + }, + { + set.seed(1) - gss[1:10,] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = hours) - }) + gss[1:10, ] %>% + specify(hours ~ age + college) %>% + hypothesize(null = "independence") %>% + generate(reps = 2, type = "permute", variables = hours) + } + ) }) test_that("variables argument prompts when it ought to", { - expect_snapshot(error = TRUE, - res_ <- gss[1:10,] %>% + expect_snapshot( + error = TRUE, + res_ <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = c(howdy)) ) - expect_snapshot(error = TRUE, - res <- gss[1:10,] %>% + expect_snapshot( + error = TRUE, + res <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = c(howdy, doo)) ) expect_snapshot( - res_ <- gss[1:10,] %>% + res_ <- gss[1:10, ] %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 40) %>% generate(reps = 2, type = "bootstrap", variables = c(hours)) ) - expect_snapshot(error = TRUE, - res_ <- gss[1:10,] %>% + expect_snapshot( + error = TRUE, + res_ <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = "hours") ) expect_snapshot( - res_ <- gss[1:10,] %>% - specify(hours ~ age + college + age*college) %>% + res_ <- gss[1:10, ] %>% + specify(hours ~ age + college + age * college) %>% hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = age*college) + generate(reps = 2, type = "permute", variables = age * college) ) expect_snapshot( - res_ <- gss[1:10,] %>% - specify(hours ~ age + college + age*college) %>% + res_ <- gss[1:10, ] %>% + specify(hours ~ age + college + age * college) %>% hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = c(hours, age*college)) + generate(reps = 2, type = "permute", variables = c(hours, age * college)) ) expect_silent( - gss[1:10,] %>% - specify(hours ~ age + college + age*college) %>% + gss[1:10, ] %>% + specify(hours ~ age + college + age * college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", variables = c(hours)) ) expect_silent( - gss[1:10,] %>% - specify(hours ~ age + college + age*college) %>% + gss[1:10, ] %>% + specify(hours ~ age + college + age * college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute") ) expect_silent( - gss[1:10,] %>% + gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 2, type = "permute") @@ -479,14 +505,13 @@ test_that("variables argument prompts when it ought to", { # warn on type != permute but don't raise message re: interaction # effects unless otherwise used appropriately expect_snapshot( - res_ <- gss[1:10,] %>% - specify(hours ~ age*college) %>% + res_ <- gss[1:10, ] %>% + specify(hours ~ age * college) %>% generate( reps = 2, type = "bootstrap", - variables = c(hours, age*college) + variables = c(hours, age * college) ) - ) }) @@ -508,7 +533,8 @@ test_that("type = 'draw'/'simulate' superseding handled gracefully", { ) # mention new generation types when supplied a bad one - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(response = am, success = "1") %>% hypothesize(null = "point", p = .5) %>% @@ -516,14 +542,16 @@ test_that("type = 'draw'/'simulate' superseding handled gracefully", { ) # warns with either alias when given unexpected generate type - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 20) %>% generate(type = "draw") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 20) %>% @@ -542,7 +570,8 @@ test_that("type = 'draw'/'simulate' superseding handled gracefully", { ) res_1 - }, { + }, + { set.seed(1) res_2 <- mtcars_df %>% @@ -557,19 +586,19 @@ test_that("type = 'draw'/'simulate' superseding handled gracefully", { }) test_that("has_p_param handles edge cases", { - x <- NA - - set_p_names <- function(x, to) { - attr(x, "params") <- rep(NA, length(to)) - names(attr(x, "params")) <- to - x - } - - expect_true (has_p_param(set_p_names(x, c("p.boop")))) - expect_true (has_p_param(set_p_names(x, c("p.boop", "p.bop")))) - expect_false(has_p_param(set_p_names(x, c("p.boop", "pbop")))) - expect_false(has_p_param(set_p_names(x, c("p.boop", "bo.p")))) - expect_false(has_p_param(set_p_names(x, c("p.boop", "pbop")))) - expect_false(has_p_param(set_p_names(x, c(".p.boop")))) - expect_false(has_p_param(set_p_names(x, c("beep.boop")))) + x <- NA + + set_p_names <- function(x, to) { + attr(x, "params") <- rep(NA, length(to)) + names(attr(x, "params")) <- to + x + } + + expect_true(has_p_param(set_p_names(x, c("p.boop")))) + expect_true(has_p_param(set_p_names(x, c("p.boop", "p.bop")))) + expect_false(has_p_param(set_p_names(x, c("p.boop", "pbop")))) + expect_false(has_p_param(set_p_names(x, c("p.boop", "bo.p")))) + expect_false(has_p_param(set_p_names(x, c("p.boop", "pbop")))) + expect_false(has_p_param(set_p_names(x, c(".p.boop")))) + expect_false(has_p_param(set_p_names(x, c("beep.boop")))) }) diff --git a/tests/testthat/test-get_confidence_interval.R b/tests/testthat/test-get_confidence_interval.R index f00e1148..ff3708cd 100644 --- a/tests/testthat/test-get_confidence_interval.R +++ b/tests/testthat/test-get_confidence_interval.R @@ -1,7 +1,21 @@ set.seed(2018) -test_df <- gss_calc[1:20,] +test_df <- gss_calc[1:20, ] test_df$stat <- c( - -5, -4, -4, -4, -1, -0.5, rep(0, 6), 1, 1, 3.999, 4, 4, 4.001, 5, 5 + -5, + -4, + -4, + -4, + -1, + -0.5, + rep(0, 6), + 1, + 1, + 3.999, + 4, + 4, + 4.001, + 5, + 5 ) point <- mean(test_df[["stat"]]) @@ -20,7 +34,8 @@ test_that("get_confidence_interval works with defaults", { test_that("get_confidence_interval works with `type = 'percentile'`", { expect_message( expect_equal( - test_df %>% get_confidence_interval(type = "percentile"), perc_def_out + test_df %>% get_confidence_interval(type = "percentile"), + perc_def_out ), "Using `level = 0.95`" ) @@ -62,7 +77,8 @@ test_that("get_confidence_interval works with `type = 'bias-corrected'`", { expect_equal( test_df %>% get_confidence_interval( - type = "bias-corrected", point_estimate = point + type = "bias-corrected", + point_estimate = point ), tibble::tibble(lower_ci = -4.00, upper_ci = 5), tolerance = 1e-3 @@ -73,7 +89,9 @@ test_that("get_confidence_interval works with `type = 'bias-corrected'`", { expect_equal( test_df %>% get_confidence_interval( - level = 0.5, type = "bias-corrected", point_estimate = point + level = 0.5, + type = "bias-corrected", + point_estimate = point ), tibble::tibble(lower_ci = 0, upper_ci = 4.0007), tolerance = 1e-3 @@ -93,7 +111,8 @@ test_that("get_confidence_interval supports data frame `point_estimate`", { get_confidence_interval(type = "bias-corrected", point_estimate = point), test_df %>% get_confidence_interval( - type = "bias-corrected", point_estimate = point_df + type = "bias-corrected", + point_estimate = point_df ), tolerance = eps ) @@ -106,26 +125,38 @@ test_that("get_confidence_interval messages with no explicit `level`", { }) test_that("get_confidence_interval checks input", { - expect_snapshot(error = TRUE, test_df %>% get_confidence_interval(type = "other")) - expect_snapshot(error = TRUE, test_df %>% get_confidence_interval(level = 1.2)) + expect_snapshot( + error = TRUE, + test_df %>% get_confidence_interval(type = "other") + ) + expect_snapshot( + error = TRUE, + test_df %>% get_confidence_interval(level = 1.2) + ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, test_df %>% get_confidence_interval(point_estimate = "a") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, test_df %>% get_confidence_interval(type = "se", point_estimate = "a") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, test_df %>% get_confidence_interval( - type = "se", point_estimate = data.frame(p = "a") + type = "se", + point_estimate = data.frame(p = "a") ) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, test_df %>% get_confidence_interval(type = "se") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, test_df %>% get_confidence_interval(type = "bias-corrected") ) }) @@ -135,13 +166,13 @@ test_that("get_confidence_interval can handle fitted objects", { # generate example objects set.seed(1) - null_fits <- gss[1:50,] %>% + null_fits <- gss[1:50, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") %>% fit() - obs_fit <- gss[1:50,] %>% + obs_fit <- gss[1:50, ] %>% specify(hours ~ age + college) %>% fit() @@ -152,7 +183,8 @@ test_that("get_confidence_interval can handle fitted objects", { list( term = c("age", "collegedegree", "intercept"), lower_ci = c(-0.2139, -6.6020, 36.4537), - upper_ci = c(0.1064, 8.7479, 50.8005)), + upper_ci = c(0.1064, 8.7479, 50.8005) + ), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame") ), @@ -161,13 +193,18 @@ test_that("get_confidence_interval can handle fitted objects", { ) expect_equal( - get_confidence_interval(null_fits, point_estimate = obs_fit, - level = .95, type = "se"), + get_confidence_interval( + null_fits, + point_estimate = obs_fit, + level = .95, + type = "se" + ), structure( list( term = c("age", "collegedegree", "intercept"), lower_ci = c(-0.3809, -13.6182, 36.8694), - upper_ci = c(0.1124, 6.1680, 59.1752)), + upper_ci = c(0.1124, 6.1680, 59.1752) + ), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame") ), @@ -176,13 +213,18 @@ test_that("get_confidence_interval can handle fitted objects", { ) expect_equal( - get_confidence_interval(null_fits, point_estimate = obs_fit, - level = .95, type = "bias-corrected"), + get_confidence_interval( + null_fits, + point_estimate = obs_fit, + level = .95, + type = "bias-corrected" + ), structure( list( term = c("age", "collegedegree", "intercept"), lower_ci = c(-0.2177, -7.1506, 37.2941), - upper_ci = c(0.0806, 1.9707, 51.0512)), + upper_ci = c(0.0806, 1.9707, 51.0512) + ), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame") ), @@ -191,20 +233,22 @@ test_that("get_confidence_interval can handle fitted objects", { ) # errors out when it ought to - obs_fit_2 <- gss[1:50,] %>% + obs_fit_2 <- gss[1:50, ] %>% specify(hours ~ age) %>% fit() - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval(null_fits, point_estimate = obs_fit_2, level = .95) ) obs_fit_3 <- - obs_fit_2 <- gss[1:50,] %>% - specify(year ~ age + college) %>% - fit() + obs_fit_2 <- gss[1:50, ] %>% + specify(year ~ age + college) %>% + fit() - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval(null_fits, point_estimate = obs_fit_3, level = .95) ) }) @@ -212,26 +256,32 @@ test_that("get_confidence_interval can handle fitted objects", { test_that("get_confidence_interval can handle bad args with fitted objects", { set.seed(1) - null_fits <- gss[1:50,] %>% + null_fits <- gss[1:50, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") %>% fit() - obs_fit <- gss[1:50,] %>% + obs_fit <- gss[1:50, ] %>% specify(hours ~ age + college) %>% fit() - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval(null_fits, point_estimate = "boop", level = .95) ) - expect_snapshot(error = TRUE, - get_confidence_interval(null_fits, point_estimate = obs_fit$estimate, - level = .95) + expect_snapshot( + error = TRUE, + get_confidence_interval( + null_fits, + point_estimate = obs_fit$estimate, + level = .95 + ) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval(obs_fit, point_estimate = null_fits, level = .95) ) }) @@ -397,7 +447,8 @@ test_that("theoretical CIs check arguments properly", { ) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_theory, level = .95, @@ -406,7 +457,8 @@ test_that("theoretical CIs check arguments properly", { ) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_theory, level = .95, @@ -416,7 +468,8 @@ test_that("theoretical CIs check arguments properly", { ) # check that point estimate hasn't been post-processed - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_theory, level = .95, @@ -424,7 +477,8 @@ test_that("theoretical CIs check arguments properly", { ) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_theory, level = .95, @@ -438,7 +492,8 @@ test_that("theoretical CIs check arguments properly", { hypothesize(null = "point", mu = 40) %>% calculate(stat = "t") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_theory, level = .95, @@ -455,7 +510,8 @@ test_that("theoretical CIs check arguments properly", { specify(response = sex, success = "female") %>% assume(distribution = "z") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_theory, level = .95, @@ -463,7 +519,8 @@ test_that("theoretical CIs check arguments properly", { ) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_confidence_interval( null_dist_z, level = .95, @@ -473,20 +530,20 @@ test_that("theoretical CIs check arguments properly", { }) test_that("handles missing values gracefully (#520)", { - data <- data.frame( - prop = seq(0, 1, length.out = 10), - group = rep(c("a", "b"), each = 5L) - ) - - set.seed(1) - boot_dist <- - data %>% - specify(prop ~ group) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "bootstrap") %>% - calculate(stat = "diff in medians", order = c("b", "a")) - - expect_snapshot(res <- get_confidence_interval(boot_dist, .95)) - - expect_s3_class(res, "data.frame") + data <- data.frame( + prop = seq(0, 1, length.out = 10), + group = rep(c("a", "b"), each = 5L) + ) + + set.seed(1) + boot_dist <- + data %>% + specify(prop ~ group) %>% + hypothesize(null = "independence") %>% + generate(reps = 1000, type = "bootstrap") %>% + calculate(stat = "diff in medians", order = c("b", "a")) + + expect_snapshot(res <- get_confidence_interval(boot_dist, .95)) + + expect_s3_class(res, "data.frame") }) diff --git a/tests/testthat/test-get_p_value.R b/tests/testthat/test-get_p_value.R index 9b5287a0..7e5e65c7 100644 --- a/tests/testthat/test-get_p_value.R +++ b/tests/testthat/test-get_p_value.R @@ -1,26 +1,75 @@ set.seed(2018) -test_df <- gss_calc[1:20,] +test_df <- gss_calc[1:20, ] test_df$stat <- sample(c( - -5, -4, -4, -4, -1, -0.5, rep(0, 6), 1, 1, 3.999, 4, 4, 4.001, 5, 5 + -5, + -4, + -4, + -4, + -1, + -0.5, + rep(0, 6), + 1, + 1, + 3.999, + 4, + 4, + 4.001, + 5, + 5 )) test_that("direction is appropriate", { - expect_snapshot(error = TRUE, test_df %>% get_p_value(obs_stat = 0.5, direction = "righ")) + expect_snapshot( + error = TRUE, + test_df %>% get_p_value(obs_stat = 0.5, direction = "righ") + ) }) test_that("get_p_value works", { - expect_equal(get_p_value(test_df, 4, "right")[[1]][1], 5/20, tolerance = eps) - expect_equal(get_p_value(test_df, 4, "left")[[1]][1], 17/20, tolerance = eps) - expect_equal(get_p_value(test_df, 4, "both")[[1]][1], 10/20, tolerance = eps) + expect_equal( + get_p_value(test_df, 4, "right")[[1]][1], + 5 / 20, + tolerance = eps + ) + expect_equal( + get_p_value(test_df, 4, "left")[[1]][1], + 17 / 20, + tolerance = eps + ) + expect_equal( + get_p_value(test_df, 4, "both")[[1]][1], + 10 / 20, + tolerance = eps + ) - expect_equal(get_p_value(test_df, 0, "right")[[1]][1], 14/20, tolerance = eps) - expect_equal(get_p_value(test_df, 0, "left")[[1]][1], 12/20, tolerance = eps) + expect_equal( + get_p_value(test_df, 0, "right")[[1]][1], + 14 / 20, + tolerance = eps + ) + expect_equal( + get_p_value(test_df, 0, "left")[[1]][1], + 12 / 20, + tolerance = eps + ) # This is also a check for not returning value more than 1 expect_equal(get_p_value(test_df, 0, "both")[[1]][1], 1, tolerance = eps) - expect_equal(get_p_value(test_df, -3.999, "right")[[1]][1], 16/20, tolerance = eps) - expect_equal(get_p_value(test_df, -3.999, "left")[[1]][1], 4/20, tolerance = eps) - expect_equal(get_p_value(test_df, -3.999, "both")[[1]][1], 8/20, tolerance = eps) + expect_equal( + get_p_value(test_df, -3.999, "right")[[1]][1], + 16 / 20, + tolerance = eps + ) + expect_equal( + get_p_value(test_df, -3.999, "left")[[1]][1], + 4 / 20, + tolerance = eps + ) + expect_equal( + get_p_value(test_df, -3.999, "both")[[1]][1], + 8 / 20, + tolerance = eps + ) expect_equal( get_p_value(test_df, 4, "greater"), @@ -58,7 +107,8 @@ test_that("theoretical p-value not supported error", { obs_F <- gss_tbl %>% specify(hours ~ partyid) %>% calculate(stat = "F") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss_tbl %>% specify(hours ~ partyid) %>% hypothesize(null = "independence") %>% @@ -75,14 +125,10 @@ test_that("get_p_value warns in case of zero p-value", { test_that("get_p_value throws error in case of `NaN` stat", { gss_calc$stat[1] <- NaN - expect_snapshot(error = TRUE, - res_ <- get_p_value(gss_calc, 0, "both") - ) + expect_snapshot(error = TRUE, res_ <- get_p_value(gss_calc, 0, "both")) gss_calc$stat[2] <- NaN - expect_snapshot(error = TRUE, - res_ <- get_p_value(gss_calc, 0, "both") - ) + expect_snapshot(error = TRUE, res_ <- get_p_value(gss_calc, 0, "both")) # In the case that _all_ values are NaN, error should have different text gss_calc$stat <- NaN @@ -92,21 +138,23 @@ test_that("get_p_value throws error in case of `NaN` stat", { test_that("get_p_value can handle fitted objects", { set.seed(1) - null_fits <- gss[1:50,] %>% + null_fits <- gss[1:50, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") %>% fit() - obs_fit <- gss[1:50,] %>% + obs_fit <- gss[1:50, ] %>% specify(hours ~ age + college) %>% fit() expect_equal( get_p_value(null_fits, obs_fit, "both"), structure( - list(term = c("age", "collegedegree", "intercept"), - p_value = c(0.6, 0.4, 0.6)), + list( + term = c("age", "collegedegree", "intercept"), + p_value = c(0.6, 0.4, 0.6) + ), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame") ), @@ -114,31 +162,27 @@ test_that("get_p_value can handle fitted objects", { ) # errors out when it ought to - obs_fit_2 <- gss[1:50,] %>% + obs_fit_2 <- gss[1:50, ] %>% specify(hours ~ age) %>% fit() - expect_snapshot(error = TRUE, - get_p_value(null_fits, obs_fit_2, "both") - ) + expect_snapshot(error = TRUE, get_p_value(null_fits, obs_fit_2, "both")) - obs_fit_3 <- gss[1:50,] %>% + obs_fit_3 <- gss[1:50, ] %>% specify(year ~ age + college) %>% fit() - expect_snapshot(error = TRUE, - get_p_value(null_fits, obs_fit_3, "both") - ) + expect_snapshot(error = TRUE, get_p_value(null_fits, obs_fit_3, "both")) set.seed(1) - null_fits_4 <- gss[1:50,] %>% + null_fits_4 <- gss[1:50, ] %>% specify(hours ~ age) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") %>% fit() - obs_fit_4 <- gss[1:50,] %>% + obs_fit_4 <- gss[1:50, ] %>% specify(hours ~ age) %>% fit() @@ -149,7 +193,8 @@ test_that("get_p_value can handle fitted objects", { structure( list( term = c("age", "intercept"), - p_value = c(0.6, 0.6)), + p_value = c(0.6, 0.6) + ), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame") ), @@ -174,27 +219,24 @@ test_that("get_p_value can handle fitted objects", { test_that("get_p_value can handle bad args with fitted objects", { set.seed(1) - null_fits <- gss[1:50,] %>% + null_fits <- gss[1:50, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% generate(reps = 10, type = "permute") %>% fit() - obs_fit <- gss[1:50,] %>% + obs_fit <- gss[1:50, ] %>% specify(hours ~ age + college) %>% fit() - expect_snapshot(error = TRUE, - get_p_value(null_fits, "boop", "both") - ) + expect_snapshot(error = TRUE, get_p_value(null_fits, "boop", "both")) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, get_p_value(null_fits, obs_fit$estimate, "both") ) - expect_snapshot(error = TRUE, - get_p_value(obs_fit, null_fits, "both") - ) + expect_snapshot(error = TRUE, get_p_value(obs_fit, null_fits, "both")) }) test_that("get_p_value errors informatively when args are switched", { @@ -211,9 +253,7 @@ test_that("get_p_value errors informatively when args are switched", { generate(reps = 20, type = "bootstrap") %>% calculate(stat = "mean") - expect_snapshot(error = TRUE, - get_p_value(obs_stat, null_dist, "both") - ) + expect_snapshot(error = TRUE, get_p_value(obs_stat, null_dist, "both")) expect_silent( get_p_value(null_dist, obs_stat, "both") @@ -379,12 +419,30 @@ test_that("get_p_value can handle theoretical distributions", { tolerance = 1e-3 ) - old_way_z_both <- prop_test(gss, sex ~ NULL, success = "female", p = .5, - alternative = "two.sided", z = TRUE) - old_way_z_left <- prop_test(gss, sex ~ NULL, success = "female", p = .5, - alternative = "less", z = TRUE) - old_way_z_right <- prop_test(gss, sex ~ NULL, success = "female", p = .5, - alternative = "greater", z = TRUE) + old_way_z_both <- prop_test( + gss, + sex ~ NULL, + success = "female", + p = .5, + alternative = "two.sided", + z = TRUE + ) + old_way_z_left <- prop_test( + gss, + sex ~ NULL, + success = "female", + p = .5, + alternative = "less", + z = TRUE + ) + old_way_z_right <- prop_test( + gss, + sex ~ NULL, + success = "female", + p = .5, + alternative = "greater", + z = TRUE + ) expect_equal( get_p_value_(z_dist, z_obs, direction = "both"), @@ -441,4 +499,3 @@ test_that("get_p_value warns with bad theoretical distributions", { ) ) }) - diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index b1d19d5a..bee93bd6 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -57,51 +57,48 @@ test_that("auto `type` works (hypothesize)", { expect_equal(attr(slopes, "type"), "permute") }) -test_that( - "hypothesize() throws an error when null is not point or independence", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when null is not point or independence", { + expect_snapshot( + error = TRUE, mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "dependence") ) }) -test_that( - "hypothesize() allows partial matching of null arg for point", { +test_that("hypothesize() allows partial matching of null arg for point", { hyp_p <- mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "po", mu = 0) expect_equal(attr(hyp_p, "null"), "point") }) -test_that( - "hypothesize() allows partial matching of null arg for independence", { +test_that("hypothesize() allows partial matching of null arg for independence", { hyp_i <- mtcars_df %>% specify(mpg ~ vs) %>% hypothesize(null = "i") expect_equal(attr(hyp_i, "null"), "independence") }) -test_that( - "hypothesize() throws an error when multiple null values are provided", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when multiple null values are provided", { + expect_snapshot( + error = TRUE, mtcars_df %>% specify(response = mpg) %>% hypothesize(null = c("point", "independence")) ) }) -test_that( - "hypothesize() throws an error when multiple params are set", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when multiple params are set", { + expect_snapshot( + error = TRUE, mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 25, med = 20) ) }) -test_that( - "hypothesize() throws a warning when params are set with independence", { +test_that("hypothesize() throws a warning when params are set with independence", { expect_snapshot( res_ <- mtcars_df %>% specify(mpg ~ vs) %>% @@ -109,48 +106,47 @@ test_that( ) }) -test_that( - "hypothesize() throws a warning when params are set with paired independence", { - expect_snapshot( - res_ <- mtcars_df %>% - specify(response = mpg) %>% - hypothesize(null = "paired independence", mu = 25) - ) +test_that("hypothesize() throws a warning when params are set with paired independence", { + expect_snapshot( + res_ <- mtcars_df %>% + specify(response = mpg) %>% + hypothesize(null = "paired independence", mu = 25) + ) }) -test_that( - "hypothesize() throws an error when p is greater than 1", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when p is greater than 1", { + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize(null = "point", p = 1 + .Machine$double.eps) ) }) -test_that( - "hypothesize() throws an error when p is less than 0", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when p is less than 0", { + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% - hypothesize(null = "point", p = - .Machine$double.neg.eps) + hypothesize(null = "point", p = -.Machine$double.neg.eps) ) }) -test_that( - "hypothesize() throws an error when p contains missing values", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when p contains missing values", { + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize(null = "point", p = c("0" = 0.5, "1" = NA_real_)) ) }) -test_that( - "hypothesize() throws an error when vector p does not sum to 1", { - expect_snapshot(error = TRUE, +test_that("hypothesize() throws an error when vector p does not sum to 1", { + expect_snapshot( + error = TRUE, res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% - hypothesize(null = "point", p = c("0" = 0.5, "1" = 0.5 + (eps *2))) + hypothesize(null = "point", p = c("0" = 0.5, "1" = 0.5 + (eps * 2))) ) }) @@ -163,39 +159,65 @@ test_that("hypothesize arguments function", { expect_snapshot(error = TRUE, res_ <- hypothesize(mtcars_s, null = NA)) expect_snapshot(error = TRUE, res_ <- hypothesize(mtcars_s)) - expect_snapshot(error = TRUE, res_ <- mtcars_s %>% hypothesize(null = "point", mean = 3)) + expect_snapshot( + error = TRUE, + res_ <- mtcars_s %>% hypothesize(null = "point", mean = 3) + ) - expect_snapshot(error = TRUE, res_ <- mtcars_s %>% hypothesize(null = "independence")) - expect_snapshot(error = TRUE, res_ <- mtcars_s %>% hypothesize(null = "point")) + expect_snapshot( + error = TRUE, + res_ <- mtcars_s %>% hypothesize(null = "independence") + ) + expect_snapshot( + error = TRUE, + res_ <- mtcars_s %>% hypothesize(null = "point") + ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- - mtcars_f %>% - specify(mpg ~ am) %>% - hypothesize(null = "paired independence") + mtcars_f %>% + specify(mpg ~ am) %>% + hypothesize(null = "paired independence") ) # Produces error on win-build - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res <- mtcars_s %>% hypothesize(null = c("point", "independence"), mu = 3) ) - expect_snapshot(error = TRUE, - res_ <- mtcars_df %>% dplyr::select(vs) %>% hypothesize(null = "point", mu = 1) + expect_snapshot( + error = TRUE, + res_ <- mtcars_df %>% + dplyr::select(vs) %>% + hypothesize(null = "point", mu = 1) ) - expect_snapshot(error = TRUE, - res_ <- mtcars_df %>% specify(response = vs) %>% hypothesize(null = "point", mu = 1) + expect_snapshot( + error = TRUE, + res_ <- mtcars_df %>% + specify(response = vs) %>% + hypothesize(null = "point", mu = 1) ) - expect_snapshot(error = TRUE, res_ <- mtcars_s %>% hypothesize(null = "point", p = 0.2)) + expect_snapshot( + error = TRUE, + res_ <- mtcars_s %>% hypothesize(null = "point", p = 0.2) + ) expect_snapshot(error = TRUE, res_ <- mtcars_s %>% hypothesize()) }) test_that("params correct", { - expect_snapshot(error = TRUE, res_ <- hypothesize(one_prop_specify, null = "point", mu = 2)) - expect_snapshot(error = TRUE, res_ <- hypothesize(one_mean_specify, null = "point", mean = 0.5)) + expect_snapshot( + error = TRUE, + res_ <- hypothesize(one_prop_specify, null = "point", mu = 2) + ) + expect_snapshot( + error = TRUE, + res_ <- hypothesize(one_mean_specify, null = "point", mean = 0.5) + ) }) test_that("sensible output", { @@ -228,4 +250,3 @@ test_that("is_hypothesized works", { expect_true(is_hypothesized(one_mean)) expect_false(is_hypothesized(one_mean_specify)) }) - diff --git a/tests/testthat/test-observe.R b/tests/testthat/test-observe.R index d3a4bd6f..d44a67a0 100644 --- a/tests/testthat/test-observe.R +++ b/tests/testthat/test-observe.R @@ -35,25 +35,29 @@ test_that("observe messages/warns/errors informatively", { expect_message( gss %>% observe(hours ~ NULL, stat = "mean", mu = 40) - ) %>% conditionMessage(), + ) %>% + conditionMessage(), expect_message( gss %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 40) %>% calculate(stat = "mean") - ) %>% conditionMessage() + ) %>% + conditionMessage() ) expect_equal( - expect_warning( - gss %>% - observe(hours ~ NULL, stat = "t") - ) %>% conditionMessage(), - expect_warning( - gss %>% - specify(hours ~ NULL) %>% - calculate(stat = "t") - ) %>% conditionMessage() + expect_warning( + gss %>% + observe(hours ~ NULL, stat = "t") + ) %>% + conditionMessage(), + expect_warning( + gss %>% + specify(hours ~ NULL) %>% + calculate(stat = "t") + ) %>% + conditionMessage() ) expect_error( @@ -136,12 +140,12 @@ test_that("observe() works with either specify() interface", { }) test_that("observe() output is the same as the old wrappers", { - expect_snapshot( - res_wrap <- gss_tbl %>% - chisq_stat(college ~ partyid) - ) + expect_snapshot( + res_wrap <- gss_tbl %>% + chisq_stat(college ~ partyid) + ) - expect_equal( + expect_equal( gss_tbl %>% observe(college ~ partyid, stat = "Chisq") %>% dplyr::pull(), @@ -150,7 +154,7 @@ test_that("observe() output is the same as the old wrappers", { expect_snapshot( res_wrap_2 <- gss_tbl %>% - t_stat(hours ~ sex, order = c("male", "female")) + t_stat(hours ~ sex, order = c("male", "female")) ) expect_equal( @@ -160,4 +164,3 @@ test_that("observe() output is the same as the old wrappers", { res_wrap_2 ) }) - diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R index d36459dc..63abc8f2 100644 --- a/tests/testthat/test-rep_sample_n.R +++ b/tests/testthat/test-rep_sample_n.R @@ -23,32 +23,27 @@ test_that("`rep_sample_n` checks input", { expect_snapshot(error = TRUE, rep_sample_n(population, size = -1)) # `replace` - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, rep_sample_n(population, size = 1, replace = "a") ) # `reps` - expect_snapshot(error = TRUE, - rep_sample_n(population, size = 1, reps = "a") - ) - expect_snapshot(error = TRUE, - rep_sample_n(population, size = 1, reps = 1:2) - ) - expect_snapshot(error = TRUE, - rep_sample_n(population, size = 1, reps = 0.5) - ) + expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, reps = "a")) + expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, reps = 1:2)) + expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, reps = 0.5)) # `prob` - expect_snapshot(error = TRUE, - rep_sample_n(population, size = 1, prob = "a") - ) - expect_snapshot(error = TRUE, + expect_snapshot(error = TRUE, rep_sample_n(population, size = 1, prob = "a")) + expect_snapshot( + error = TRUE, rep_sample_n(population, size = 1, prob = c(0.1, 0.9)) ) }) test_that("`rep_sample_n` gives error on big sample size if `replace=FALSE`", { - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, rep_sample_n(population, size = n_population * 2) ) }) @@ -155,36 +150,32 @@ test_that("`rep_slice_sample` checks input", { expect_snapshot(error = TRUE, rep_slice_sample(population, prop = -1)) # Only one `n` or `prop` should be supplied - expect_snapshot(error = TRUE, - rep_slice_sample(population, n = 1, prop = 0.5) - ) + expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, prop = 0.5)) # `replace` - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, rep_slice_sample(population, n = 1, replace = "a") ) # `weight_by` - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, rep_slice_sample(population, n = 1, weight_by = "a") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, rep_slice_sample(population, n = 1, weight_by = c(0.1, 0.9)) ) - expect_snapshot(error = TRUE, - rep_slice_sample(population, n = 1, weight_by = wts) + expect_snapshot( + error = TRUE, + rep_slice_sample(population, n = 1, weight_by = wts) ) # `reps` - expect_snapshot(error = TRUE, - rep_slice_sample(population, n = 1, reps = "a") - ) - expect_snapshot(error = TRUE, - rep_slice_sample(population, n = 1, reps = 1:2) - ) - expect_snapshot(error = TRUE, - rep_slice_sample(population, n = 1, reps = 0.5) - ) + expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, reps = "a")) + expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, reps = 1:2)) + expect_snapshot(error = TRUE, rep_slice_sample(population, n = 1, reps = 0.5)) }) test_that("`rep_slice_sample` warns on big sample size if `replace = FALSE`", { @@ -271,13 +262,13 @@ test_that("`rep_slice_sample` uses `weight_by`", { ) population_wt <- - population %>% - dplyr::mutate(wts = rep(1, n_population) / n_population) + population %>% + dplyr::mutate(wts = rep(1, n_population) / n_population) set.seed(1) res3 <- rep_slice_sample( - population_wt, - n = n_population, - weight_by = wts + population_wt, + n = n_population, + weight_by = wts ) expect_equal(res1[["ball_id"]], res2[["ball_id"]]) diff --git a/tests/testthat/test-shade_confidence_interval.R b/tests/testthat/test-shade_confidence_interval.R index d7f33274..aa1ecd06 100644 --- a/tests/testthat/test-shade_confidence_interval.R +++ b/tests/testthat/test-shade_confidence_interval.R @@ -33,7 +33,7 @@ test_that("shade_confidence_interval works", { ) }) -test_that("shade_confidence_interval accepts `NULL` as `endpoints`", { +test_that("shade_confidence_interval accepts `NULL` as `endpoints`", { skip_if(getRversion() < "4.1.0") expect_doppelganger( @@ -59,29 +59,32 @@ test_that("shade_confidence_interval throws errors and warnings", { skip_if(getRversion() < "4.1.0") expect_snapshot(res_ <- gss_viz_sim + shade_confidence_interval(c(1, 2, 3))) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_viz_sim + shade_confidence_interval(data.frame(x = 1)) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_viz_sim + shade_confidence_interval(c(-1, 1), color = "x") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_viz_sim + shade_confidence_interval(c(-1, 1), fill = "x") ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_viz_sim %>% shade_confidence_interval(c(-1, 1)) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_viz_sim %>% shade_confidence_interval(endpoints = c(-1, 1)) ) - expect_snapshot(error = TRUE, - res_ <- gss_viz_sim %>% shade_ci(c(-1, 1)) - ) - expect_snapshot(error = TRUE, + expect_snapshot(error = TRUE, res_ <- gss_viz_sim %>% shade_ci(c(-1, 1))) + expect_snapshot( + error = TRUE, res_ <- gss_viz_sim %>% shade_ci(endpoints = c(-1, 1)) ) }) - # shade_ci ---------------------------------------------------------------- # Tested in `shade_confidence_interval()` diff --git a/tests/testthat/test-shade_p_value.R b/tests/testthat/test-shade_p_value.R index 0eb25451..18fb531e 100644 --- a/tests/testthat/test-shade_p_value.R +++ b/tests/testthat/test-shade_p_value.R @@ -4,7 +4,8 @@ test_that("shade_p_value works", { # Adding `shade_p_value()` to simulation plot expect_doppelganger( - "pval-sim-right", gss_viz_sim + shade_p_value(1, "right") + "pval-sim-right", + gss_viz_sim + shade_p_value(1, "right") ) expect_doppelganger("pval-sim-left", gss_viz_sim + shade_p_value(1, "left")) expect_doppelganger("pval-sim-both", gss_viz_sim + shade_p_value(1, "both")) @@ -20,16 +21,20 @@ test_that("shade_p_value works", { # Adding `shade_p_value()` to theoretical plot expect_doppelganger( - "pval-theor-right", gss_viz_theor + shade_p_value(1, "right") + "pval-theor-right", + gss_viz_theor + shade_p_value(1, "right") ) expect_doppelganger( - "pval-theor-left", gss_viz_theor + shade_p_value(1, "left") + "pval-theor-left", + gss_viz_theor + shade_p_value(1, "left") ) expect_doppelganger( - "pval-theor-both", gss_viz_theor + shade_p_value(1, "both") + "pval-theor-both", + gss_viz_theor + shade_p_value(1, "both") ) expect_doppelganger( - "pval-theor-null", gss_viz_theor + shade_p_value(1, NULL) + "pval-theor-null", + gss_viz_theor + shade_p_value(1, NULL) ) expect_warning( pval_theor_corrupt <- gss_viz_theor + shade_p_value(1, "aaa"), @@ -42,19 +47,23 @@ test_that("shade_p_value works", { # Adding `shade_p_value()` to "both" plot expect_doppelganger( - "pval-both-right", gss_viz_both + shade_p_value(1, "right") + "pval-both-right", + gss_viz_both + shade_p_value(1, "right") ) expect_doppelganger( - "pval-both-left", gss_viz_both + shade_p_value(1, "left") + "pval-both-left", + gss_viz_both + shade_p_value(1, "left") ) expect_doppelganger( - "pval-both-both", gss_viz_both + shade_p_value(1, "both") + "pval-both-both", + gss_viz_both + shade_p_value(1, "both") ) expect_doppelganger( - "pval-both-null", gss_viz_both + shade_p_value(1, NULL) + "pval-both-null", + gss_viz_both + shade_p_value(1, NULL) ) expect_warning( - pval_both_corrupt <- gss_viz_both + shade_p_value(1, "aaa"), + pval_both_corrupt <- gss_viz_both + shade_p_value(1, "aaa"), "direction" ) expect_doppelganger( @@ -65,8 +74,12 @@ test_that("shade_p_value works", { # -roper p-value shading when the calculated statistic falls exactly on the # boundaries of a histogram bin (#424) r_hat <- gss %>% - observe(college ~ sex, success = "no degree", - stat = "ratio of props", order = c("female", "male")) + observe( + college ~ sex, + success = "no degree", + stat = "ratio of props", + order = c("female", "male") + ) set.seed(33) @@ -76,7 +89,6 @@ test_that("shade_p_value works", { generate(reps = 1000) %>% calculate(stat = "ratio of props", order = c("female", "male")) - expect_doppelganger( "pval-stat-match", visualize(null_dist) + @@ -88,25 +100,31 @@ test_that("shade_p_value accepts synonyms for 'direction'", { skip_if(getRversion() < "4.1.0") expect_doppelganger( - "pval-direction-right", gss_viz_sim + shade_p_value(1, "greater") + "pval-direction-right", + gss_viz_sim + shade_p_value(1, "greater") ) expect_doppelganger( - "pval-direction-left", gss_viz_sim + shade_p_value(1, "less") + "pval-direction-left", + gss_viz_sim + shade_p_value(1, "less") ) # This currently results into the following {vdiffr} warning: # "Duplicated expectations: pval-direction-both, pval-direction-both" # However, having same figure here as expectation is exactly the goal of tests expect_doppelganger( - "pval-direction-both", gss_viz_sim + shade_p_value(1, "two_sided") + "pval-direction-both", + gss_viz_sim + shade_p_value(1, "two_sided") ) expect_doppelganger( - "pval-direction-both", gss_viz_sim + shade_p_value(1, "two-sided") + "pval-direction-both", + gss_viz_sim + shade_p_value(1, "two-sided") ) expect_doppelganger( - "pval-direction-both", gss_viz_sim + shade_p_value(1, "two sided") + "pval-direction-both", + gss_viz_sim + shade_p_value(1, "two sided") ) expect_doppelganger( - "pval-direction-both", gss_viz_sim + shade_p_value(1, "two.sided") + "pval-direction-both", + gss_viz_sim + shade_p_value(1, "two.sided") ) }) @@ -127,11 +145,12 @@ test_that("shade_p_value uses extra aesthetic", { ) }) -test_that("shade_p_value accepts `NULL` as `obs_stat`", { +test_that("shade_p_value accepts `NULL` as `obs_stat`", { skip_if(getRversion() < "4.1.0") expect_doppelganger( - "pval-null-obs_stat", gss_viz_sim + shade_p_value(NULL, "left") + "pval-null-obs_stat", + gss_viz_sim + shade_p_value(NULL, "left") ) }) @@ -140,28 +159,37 @@ test_that("shade_p_value throws errors", { expect_snapshot(error = TRUE, gss_viz_sim + shade_p_value("a", "right")) expect_snapshot(error = TRUE, gss_viz_sim + shade_p_value(1, 1)) - expect_snapshot(error = TRUE, gss_viz_sim + shade_p_value(1, "right", color = "x")) - expect_snapshot(error = TRUE, gss_viz_sim + shade_p_value(1, "right", fill = "x")) + expect_snapshot( + error = TRUE, + gss_viz_sim + shade_p_value(1, "right", color = "x") + ) + expect_snapshot( + error = TRUE, + gss_viz_sim + shade_p_value(1, "right", fill = "x") + ) expect_snapshot(error = TRUE, gss_viz_sim %>% shade_p_value(1, "right")) expect_snapshot(error = TRUE, gss_viz_sim %>% shade_p_value(obs_stat = 1)) - expect_snapshot(error = TRUE, gss_viz_sim %>% shade_p_value(obs_stat = 1, - direction = "right")) + expect_snapshot( + error = TRUE, + gss_viz_sim %>% shade_p_value(obs_stat = 1, direction = "right") + ) expect_snapshot(error = TRUE, gss_viz_sim %>% shade_pvalue(1, "right")) expect_snapshot(error = TRUE, gss_viz_sim %>% shade_pvalue(obs_stat = 1)) - expect_snapshot(error = TRUE, gss_viz_sim %>% shade_pvalue(obs_stat = 1, - direction = "right")) - + expect_snapshot( + error = TRUE, + gss_viz_sim %>% shade_pvalue(obs_stat = 1, direction = "right") + ) }) test_that("`shade_p_value()` handles 0-area shading without issue (#528)", { - expect_no_condition( - zero_area_shade <- visualize(gss_permute) + shade_p_value(100, "right") - ) - - expect_doppelganger( - "zero_area_shade", - expect_no_condition(print(zero_area_shade)), - ) + expect_no_condition( + zero_area_shade <- visualize(gss_permute) + shade_p_value(100, "right") + ) + + expect_doppelganger( + "zero_area_shade", + expect_no_condition(print(zero_area_shade)), + ) }) # norm_direction ---------------------------------------------------------- diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index 7444a791..2eae808a 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -25,21 +25,13 @@ test_that("data argument", { test_that("response and explanatory arguments", { expect_snapshot(error = TRUE, specify(mtcars_df, response = blah)) - expect_snapshot(error = TRUE, - specify(mtcars_df, response = "blah") - ) - expect_snapshot(error = TRUE, - specify(mtcars_df, formula = mpg ~ blah) - ) + expect_snapshot(error = TRUE, specify(mtcars_df, response = "blah")) + expect_snapshot(error = TRUE, specify(mtcars_df, formula = mpg ~ blah)) expect_snapshot(error = TRUE, specify(mtcars_df, blah2 ~ cyl)) expect_snapshot(error = TRUE, specify(mtcars_df)) expect_snapshot(error = TRUE, specify(mtcars_df, formula = mpg ~ mpg)) - expect_snapshot(error = TRUE, - specify(mtcars_df, formula = "mpg" ~ cyl) - ) - expect_snapshot(error = TRUE, - specify(mtcars_df, formula = mpg ~ "cyl") - ) + expect_snapshot(error = TRUE, specify(mtcars_df, formula = "mpg" ~ cyl)) + expect_snapshot(error = TRUE, specify(mtcars_df, formula = mpg ~ "cyl")) expect_silent(specify(mtcars_df, formula = mpg ~ cyl)) expect_snapshot(error = TRUE, specify(mtcars_df, formula = NULL ~ cyl)) @@ -47,12 +39,20 @@ test_that("response and explanatory arguments", { test_that("success argument", { expect_snapshot(error = TRUE, specify(mtcars_df, response = vs, success = 1)) - expect_snapshot(error = TRUE, specify(mtcars_df, response = vs, success = "bogus")) - expect_snapshot(error = TRUE, specify(mtcars_df, response = mpg, success = "1")) - expect_snapshot(error = TRUE, specify(mtcars_df, response = cyl, success = "4")) + expect_snapshot( + error = TRUE, + specify(mtcars_df, response = vs, success = "bogus") + ) + expect_snapshot( + error = TRUE, + specify(mtcars_df, response = mpg, success = "1") + ) + expect_snapshot( + error = TRUE, + specify(mtcars_df, response = cyl, success = "4") + ) # success not given expect_snapshot(error = TRUE, specify(mtcars_df, response = am)) - }) test_that("sensible output", { @@ -84,7 +84,7 @@ test_that("specify doesn't have NSE issues (#256)", { }) test_that("specify messages when dropping unused levels", { - expect_snapshot( + expect_snapshot( res_ <- gss %>% dplyr::filter(partyid %in% c("rep", "dem")) %>% specify(age ~ partyid) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 8bb33e20..5e7d4d99 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -57,15 +57,11 @@ test_that("check_type works", { expect_silent(check_type(x_var, is.integer)) expect_snapshot(error = TRUE, check_type(x_var, is.character)) - expect_snapshot(error = TRUE, - check_type(x_var, is.character, "symbolic") - ) + expect_snapshot(error = TRUE, check_type(x_var, is.character, "symbolic")) x_df <- data.frame(x = TRUE) expect_silent(check_type(x_df, is.data.frame)) - expect_snapshot(error = TRUE, - check_type(x_df, is.logical) - ) + expect_snapshot(error = TRUE, check_type(x_df, is.logical)) }) test_that("check_type allows `NULL`", { @@ -105,7 +101,7 @@ test_that("c_dedupl returns input when unnamed", { }) test_that("hypothesize errors out when x isn't a dataframe", { - expect_snapshot(error = TRUE, hypothesize(c(1, 2, 3), null = "point")) + expect_snapshot(error = TRUE, hypothesize(c(1, 2, 3), null = "point")) }) test_that("p_null supplies appropriate params", { @@ -116,46 +112,46 @@ test_that("p_null supplies appropriate params", { }) test_that("variables are standardized as expected", { - gss_types <- - gss %>% - dplyr::mutate( - age = as.integer(age), - is_dem = dplyr::if_else(partyid == "dem", TRUE, FALSE), - finrela = as.character(finrela) - ) - - gss_std <- standardize_variable_types(gss_types) - - expect_true(inherits(gss_types$age, "integer")) - expect_true(inherits(gss_types$finrela, "character")) - expect_true(inherits(gss_types$income, "ordered")) - expect_true(inherits(gss_types$college, "factor")) - expect_true(inherits(gss_types$is_dem, "logical")) - - expect_null(levels(gss_types$is_dem)) - - expect_true(inherits(gss_std$age, "numeric")) - expect_true(inherits(gss_std$finrela, "factor")) - expect_true(inherits(gss_std$income, "factor")) - expect_true(inherits(gss_std$college, "factor")) - expect_true(inherits(gss_std$is_dem, "factor")) - - expect_equal(levels(gss_std$is_dem), c("TRUE", "FALSE")) + gss_types <- + gss %>% + dplyr::mutate( + age = as.integer(age), + is_dem = dplyr::if_else(partyid == "dem", TRUE, FALSE), + finrela = as.character(finrela) + ) + + gss_std <- standardize_variable_types(gss_types) + + expect_true(inherits(gss_types$age, "integer")) + expect_true(inherits(gss_types$finrela, "character")) + expect_true(inherits(gss_types$income, "ordered")) + expect_true(inherits(gss_types$college, "factor")) + expect_true(inherits(gss_types$is_dem, "logical")) + + expect_null(levels(gss_types$is_dem)) + + expect_true(inherits(gss_std$age, "numeric")) + expect_true(inherits(gss_std$finrela, "factor")) + expect_true(inherits(gss_std$income, "factor")) + expect_true(inherits(gss_std$college, "factor")) + expect_true(inherits(gss_std$is_dem, "factor")) + + expect_equal(levels(gss_std$is_dem), c("TRUE", "FALSE")) }) test_that("group_by_replicate() helper returns correct results", { - reps <- 500 - nrow_gss <- nrow(gss) - - gss_gen <- - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = reps, type = "permute") %>% - dplyr::ungroup() - - expect_equal( - dplyr::group_by(gss_gen, replicate), - group_by_replicate(gss_gen, reps, nrow_gss) - ) + reps <- 500 + nrow_gss <- nrow(gss) + + gss_gen <- + gss %>% + specify(age ~ college) %>% + hypothesize(null = "independence") %>% + generate(reps = reps, type = "permute") %>% + dplyr::ungroup() + + expect_equal( + dplyr::group_by(gss_gen, replicate), + group_by_replicate(gss_gen, reps, nrow_gss) + ) }) diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index 6cac3fb8..ec5e2ead 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -39,8 +39,8 @@ obs_t <- gss_tbl %>% observe(hours ~ college, order = c("no degree", "degree"), stat = "t") obs_F <- anova( - aov(formula = hours ~ partyid, data = gss_tbl) - )$`F value`[1] + aov(formula = hours ~ partyid, data = gss_tbl) +)$`F value`[1] test_that("visualize warns with bad arguments", { skip_if(getRversion() < "4.1.0") @@ -48,7 +48,7 @@ test_that("visualize warns with bad arguments", { # warns when supplied deprecated args in what used to be # a valid way expect_snapshot( - res_ <- gss_tbl %>% + res_ <- gss_tbl %>% specify(age ~ hours) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% @@ -58,7 +58,7 @@ test_that("visualize warns with bad arguments", { # warning is the same when deprecated args are inappropriate expect_snapshot( - res_ <- gss_tbl %>% + res_ <- gss_tbl %>% specify(age ~ hours) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% @@ -68,7 +68,7 @@ test_that("visualize warns with bad arguments", { # same goes for CI args expect_snapshot( - res_ <- gss_tbl %>% + res_ <- gss_tbl %>% specify(age ~ hours) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% @@ -84,8 +84,8 @@ test_that("visualize warns with bad arguments", { calculate(stat = "slope") expect_snapshot( - res <- age_hours_df %>% - visualize(endpoints = c(.01, .02)) + res <- age_hours_df %>% + visualize(endpoints = c(.01, .02)) ) expect_equal( @@ -138,17 +138,18 @@ test_that("visualize basic tests", { ) expect_snapshot( - res_vis_theor_none_1 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - calculate(stat = "z", order = c("no degree", "degree")) %>% - visualize(method = "theoretical") + res_vis_theor_none_1 <- gss_tbl %>% + specify(sex ~ college, success = "female") %>% + hypothesize(null = "independence") %>% + calculate(stat = "z", order = c("no degree", "degree")) %>% + visualize(method = "theoretical") ) expect_doppelganger("vis-theor-none-1", res_vis_theor_none_1) # diff in props and z on different scales - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss_tbl %>% specify(sex ~ college, success = "female") %>% hypothesize(null = "independence") %>% @@ -171,13 +172,13 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_both_1 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", order = c("no degree", "degree")) %>% - visualize(method = "both") + - shade_p_value(direction = "both", obs_stat = obs_z) + vis_both_both_1 <- gss_tbl %>% + specify(sex ~ college, success = "female") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", order = c("no degree", "degree")) %>% + visualize(method = "both") + + shade_p_value(direction = "both", obs_stat = obs_z) ) expect_doppelganger( "vis-both-both-1", @@ -185,13 +186,13 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_both_2 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", order = c("degree", "no degree")) %>% - visualize(method = "both") + - shade_p_value(direction = "both", obs_stat = -obs_z) + vis_both_both_2 <- gss_tbl %>% + specify(sex ~ college, success = "female") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", order = c("degree", "no degree")) %>% + visualize(method = "both") + + shade_p_value(direction = "both", obs_stat = -obs_z) ) expect_doppelganger( "vis-both-both-2", @@ -199,13 +200,13 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_left_1 <- gss_tbl %>% - specify(age ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("female", "male")) %>% - visualize(method = "both") + - shade_p_value(direction = "left", obs_stat = obs_t) + vis_both_left_1 <- gss_tbl %>% + specify(age ~ sex) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("female", "male")) %>% + visualize(method = "both") + + shade_p_value(direction = "left", obs_stat = obs_t) ) expect_doppelganger( "vis-both-left-1", @@ -213,13 +214,13 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_left_1 <- gss_tbl %>% - specify(age ~ sex) %>% - hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("female", "male")) %>% - visualize(method = "theoretical") + - shade_p_value(direction = "left", obs_stat = obs_t) + vis_theor_left_1 <- gss_tbl %>% + specify(age ~ sex) %>% + hypothesize(null = "independence") %>% + # generate(reps = 100, type = "permute") %>% + calculate(stat = "t", order = c("female", "male")) %>% + visualize(method = "theoretical") + + shade_p_value(direction = "left", obs_stat = obs_t) ) expect_doppelganger( "vis-theor-left-1", @@ -240,10 +241,10 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_none_2 <- gss_tbl %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - visualize(method = "theoretical") + vis_theor_none_2 <- gss_tbl %>% + specify(age ~ college) %>% + hypothesize(null = "independence") %>% + visualize(method = "theoretical") ) expect_doppelganger( "vis-theor-none-2", @@ -276,13 +277,13 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_left_2 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", order = c("no degree", "degree")) %>% - visualize(method = "both") + - shade_p_value(direction = "left", obs_stat = obs_z) + vis_both_left_2 <- gss_tbl %>% + specify(sex ~ college, success = "female") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "z", order = c("no degree", "degree")) %>% + visualize(method = "both") + + shade_p_value(direction = "left", obs_stat = obs_z) ) expect_doppelganger( "vis-both-left-2", @@ -290,13 +291,13 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_right_2 <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% - visualize(method = "both") + - shade_p_value(obs_stat = obs_F, direction = "right") + vis_both_right_2 <- gss_tbl %>% + specify(sex ~ partyid, success = "female") %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "Chisq") %>% + visualize(method = "both") + + shade_p_value(obs_stat = obs_F, direction = "right") ) expect_doppelganger( "vis-both-right-2", @@ -304,7 +305,7 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_right_1 <- gss_tbl %>% + vis_theor_right_1 <- gss_tbl %>% specify(sex ~ partyid, success = "female") %>% hypothesize(null = "independence") %>% # alculate(stat = "Chisq") %>% @@ -320,8 +321,8 @@ test_that("visualize basic tests", { vis_both_none_2 <- gss_tbl %>% specify(partyid ~ NULL) %>% hypothesize( - null = "point", - p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) + null = "point", + p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) ) %>% generate(reps = 100, type = "draw") %>% calculate(stat = "Chisq") %>% @@ -333,24 +334,25 @@ test_that("visualize basic tests", { ) # traditional instead of theoretical - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss_tbl %>% specify(partyid ~ NULL) %>% hypothesize( null = "point", p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) ) %>% -# generate(reps = 100, type = "draw") %>% -# calculate(stat = "Chisq") %>% + # generate(reps = 100, type = "draw") %>% + # calculate(stat = "Chisq") %>% visualize(method = "traditional") ) expect_warning( - vis_theor_none_4 <- gss_tbl %>% + vis_theor_none_4 <- gss_tbl %>% specify(partyid ~ NULL) %>% hypothesize( - null = "point", - p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) + null = "point", + p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) ) %>% # generate(reps = 100, type = "draw") %>% # calculate(stat = "Chisq") %>% @@ -373,25 +375,25 @@ test_that("visualize basic tests", { ) # Produces warning first for not checking conditions but would also error - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss_tbl %>% specify(hours ~ sex) %>% hypothesize(null = "independence") %>% generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", - order = c("female", "male")) %>% + calculate(stat = "diff in means", order = c("female", "male")) %>% visualize(method = "both") + shade_p_value(direction = "both", obs_stat = obs_diff_mean) ) expect_snapshot( - res_vis_theor_both_1 <- gss_tbl %>% - specify(hours ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", order = c("female", "male")) %>% - visualize(method = "theoretical") + - shade_p_value(direction = "both", obs_stat = obs_diff_mean) + res_vis_theor_both_1 <- gss_tbl %>% + specify(hours ~ sex) %>% + hypothesize(null = "independence") %>% + generate(reps = 100, type = "permute") %>% + calculate(stat = "diff in means", order = c("female", "male")) %>% + visualize(method = "theoretical") + + shade_p_value(direction = "both", obs_stat = obs_diff_mean) ) expect_doppelganger("vis-theor-both-1", res_vis_theor_both_1) @@ -464,22 +466,23 @@ test_that("obs_stat as a data.frame works", { test_that('method = "both" behaves nicely', { skip_if(getRversion() < "4.1.0") - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss_tbl %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% generate(reps = 100, type = "bootstrap") %>% -# calculate(stat = "mean") %>% + # calculate(stat = "mean") %>% visualize(method = "both") ) expect_snapshot( - res_method_both <- gss_tbl %>% - specify(hours ~ college) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 10, type = "bootstrap") %>% - calculate(stat = "t", order = c("no degree", "degree")) %>% - visualize(method = "both") + res_method_both <- gss_tbl %>% + specify(hours ~ college) %>% + hypothesize(null = "point", mu = 4) %>% + generate(reps = 10, type = "bootstrap") %>% + calculate(stat = "t", order = c("no degree", "degree")) %>% + visualize(method = "both") ) expect_doppelganger("method-both", res_method_both) @@ -512,7 +515,7 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { res_ <- gss_tbl %>% specify(sex ~ partyid, success = "female") %>% hypothesize(null = "independence") %>% -# generate(reps = 100, type = "permute") %>% + # generate(reps = 100, type = "permute") %>% calculate(stat = "Chisq") %>% visualize(method = "theoretical") + shade_p_value(obs_stat = 2, direction = "left") @@ -522,7 +525,7 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { res_ <- gss_tbl %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% -# generate(reps = 100, type = "permute") %>% + # generate(reps = 100, type = "permute") %>% calculate(stat = "F") %>% visualize(method = "theoretical") + shade_p_value(obs_stat = 2, direction = "two_sided") @@ -542,7 +545,8 @@ test_that("confidence interval plots are working", { perc_ci <- gss_tbl_boot %>% get_ci() - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_tbl_boot %>% visualize() + shade_confidence_interval(endpoints = df_error) @@ -555,9 +559,9 @@ test_that("confidence interval plots are working", { ) expect_snapshot( - res_ci_vis <- gss_tbl_boot %>% - visualize() + - shade_confidence_interval(endpoints = perc_ci, direction = "between") + res_ci_vis <- gss_tbl_boot %>% + visualize() + + shade_confidence_interval(endpoints = perc_ci, direction = "between") ) expect_doppelganger("ci-vis", res_ci_vis) @@ -578,9 +582,9 @@ test_that("title adapts to not hypothesis testing workflow", { visualize() ) expect_snapshot( - res_vis_no_hypothesize_both <- gss_tbl_boot_tbl %>% - calculate(stat = "t") %>% - visualize(method = "both") + res_vis_no_hypothesize_both <- gss_tbl_boot_tbl %>% + calculate(stat = "t") %>% + visualize(method = "both") ) expect_doppelganger("vis-no-hypothesize-both", res_vis_no_hypothesize_both) @@ -666,10 +670,10 @@ test_that("visualize can handle multiple explanatory variables", { ) expect_snapshot( - res_viz_fit_p_val_right <- - null_fits %>% - visualize() + - shade_p_value(obs_stat = obs_fit, direction = "right") + res_viz_fit_p_val_right <- + null_fits %>% + visualize() + + shade_p_value(obs_stat = obs_fit, direction = "right") ) expect_doppelganger( @@ -924,5 +928,4 @@ test_that("visualize can handle `assume()` output", { "viz-assume-2z-ci", visualize(null_dist) + shade_confidence_interval(ci) ) - }) diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index befd67e4..45f2f46b 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -2,30 +2,35 @@ test_that("t_test works", { # Two Sample expect_snapshot(res_ <- gss_tbl %>% t_test(hours ~ sex)) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, gss_tbl %>% t_test(response = "hours", explanatory = "sex") ) - new_way <- t_test(gss_tbl, - hours ~ sex, - order = c("male", "female")) - new_way_alt <- t_test(gss_tbl, - response = hours, - explanatory = sex, - order = c("male", "female")) + new_way <- t_test(gss_tbl, hours ~ sex, order = c("male", "female")) + new_way_alt <- t_test( + gss_tbl, + response = hours, + explanatory = sex, + order = c("male", "female") + ) old_way <- t.test(hours ~ sex, data = gss_tbl) %>% broom::glance() %>% - dplyr::select(statistic, t_df = parameter, p_value = p.value, - alternative, estimate, - lower_ci = conf.low, upper_ci = conf.high) + dplyr::select( + statistic, + t_df = parameter, + p_value = p.value, + alternative, + estimate, + lower_ci = conf.low, + upper_ci = conf.high + ) expect_equal(new_way, new_way_alt, tolerance = 1e-5) expect_equal(new_way, old_way, tolerance = 1e-5) # check that the order argument changes output - new_way2 <- t_test(gss_tbl, - hours ~ sex, - order = c("female", "male")) + new_way2 <- t_test(gss_tbl, hours ~ sex, order = c("female", "male")) expect_equal(new_way[["lower_ci"]], -new_way2[["upper_ci"]]) expect_equal(new_way[["statistic"]], -new_way2[["statistic"]]) @@ -36,9 +41,15 @@ test_that("t_test works", { t_test(response = hours, mu = 0) old_way <- t.test(x = gss_tbl$hours, mu = 0) %>% broom::glance() %>% - dplyr::select(statistic, t_df = parameter, p_value = p.value, - alternative, estimate, - lower_ci = conf.low, upper_ci = conf.high) + dplyr::select( + statistic, + t_df = parameter, + p_value = p.value, + alternative, + estimate, + lower_ci = conf.low, + upper_ci = conf.high + ) expect_equal(new_way, new_way_alt, tolerance = 1e-5) expect_equal(new_way, old_way, tolerance = 1e-5) @@ -46,8 +57,10 @@ test_that("t_test works", { test_that("chisq_test works", { # maleependence - expect_silent(gss_tbl %>% - chisq_test(college ~ partyid)) + expect_silent( + gss_tbl %>% + chisq_test(college ~ partyid) + ) new_way <- gss_tbl %>% chisq_test(college ~ partyid) new_way_alt <- gss_tbl %>% @@ -60,8 +73,10 @@ test_that("chisq_test works", { expect_equal(new_way, old_way, tolerance = eps) # Goodness of Fit - expect_silent(gss_tbl %>% - chisq_test(response = partyid, p = c(.3, .4, .3))) + expect_silent( + gss_tbl %>% + chisq_test(response = partyid, p = c(.3, .4, .3)) + ) new_way <- gss_tbl %>% chisq_test(partyid ~ NULL, p = c(.3, .4, .3)) new_way_alt <- gss_tbl %>% @@ -74,11 +89,16 @@ test_that("chisq_test works", { expect_equal(new_way, old_way, tolerance = 1e-5) # check that function errors out when response is numeric - expect_snapshot(error = TRUE, chisq_test(x = gss_tbl, response = age, explanatory = partyid)) + expect_snapshot( + error = TRUE, + chisq_test(x = gss_tbl, response = age, explanatory = partyid) + ) # check that function errors out when explanatory is numeric - expect_snapshot(error = TRUE, chisq_test(x = gss_tbl, response = partyid, explanatory = age)) - + expect_snapshot( + error = TRUE, + chisq_test(x = gss_tbl, response = partyid, explanatory = age) + ) }) test_that("_stat functions work", { @@ -111,26 +131,28 @@ test_that("_stat functions work", { chisq_stat(partyid ~ NULL) ) expect_snapshot( - obs_stat_way_alt <- gss_tbl %>% - chisq_stat(response = partyid) + obs_stat_way_alt <- gss_tbl %>% + chisq_stat(response = partyid) ) - expect_equal(dplyr::pull(new_way), obs_stat_way, ignore_attr = TRUE) - expect_equal(dplyr::pull(new_way), obs_stat_way_alt, ignore_attr = TRUE) + expect_equal(dplyr::pull(new_way), obs_stat_way, ignore_attr = TRUE) + expect_equal(dplyr::pull(new_way), obs_stat_way_alt, ignore_attr = TRUE) - # robust to the named vector - unordered_p <- gss_tbl %>% - chisq_test(response = partyid, p = c(.2, .3, .5)) - ordered_p <- gss_tbl %>% - chisq_test(response = partyid, p = c(ind = .2, rep = .3, dem = .5)) + # robust to the named vector + unordered_p <- gss_tbl %>% + chisq_test(response = partyid, p = c(.2, .3, .5)) + ordered_p <- gss_tbl %>% + chisq_test(response = partyid, p = c(ind = .2, rep = .3, dem = .5)) - expect_equal(unordered_p, ordered_p, ignore_attr = TRUE) + expect_equal(unordered_p, ordered_p, ignore_attr = TRUE) # Two sample t expect_snapshot( - res_ <- gss_tbl %>% t_stat( - hours ~ sex, order = c("male", "female") - ) + res_ <- gss_tbl %>% + t_stat( + hours ~ sex, + order = c("male", "female") + ) ) another_way <- gss_tbl %>% t_test(hours ~ sex, order = c("male", "female")) %>% @@ -144,9 +166,7 @@ test_that("_stat functions work", { expect_snapshot( obs_stat_way_alt <- gss_tbl %>% - t_stat(response = hours, - explanatory = sex, - order = c("male", "female")) + t_stat(response = hours, explanatory = sex, order = c("male", "female")) ) expect_equal(another_way, obs_stat_way, ignore_attr = TRUE) @@ -174,11 +194,13 @@ test_that("_stat functions work", { expect_equal(another_way, obs_stat_way, ignore_attr = TRUE) expect_equal(another_way, obs_stat_way_alt, ignore_attr = TRUE) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- chisq_stat(x = gss_tbl, response = age, explanatory = sex) ) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- chisq_stat(x = gss_tbl, response = sex, explanatory = age) ) }) @@ -187,8 +209,7 @@ test_that("conf_int argument works", { expect_equal( names( gss_tbl %>% - t_test(hours ~ sex, - order = c("male", "female"), conf_int = FALSE) + t_test(hours ~ sex, order = c("male", "female"), conf_int = FALSE) ), c("statistic", "t_df", "p_value", "alternative", "estimate"), tolerance = 1e-5 @@ -197,31 +218,46 @@ test_that("conf_int argument works", { names( gss_tbl %>% t_test( - hours ~ sex, order = c("male", "female"), + hours ~ sex, + order = c("male", "female"), conf_int = TRUE ) ), - c("statistic", "t_df", "p_value", "alternative", - "estimate", "lower_ci", "upper_ci"), + c( + "statistic", + "t_df", + "p_value", + "alternative", + "estimate", + "lower_ci", + "upper_ci" + ), tolerance = 1e-5 ) ci_test <- gss_tbl %>% t_test( - hours ~ sex, order = c("male", "female"), - conf_int = TRUE, conf_level = 0.9 + hours ~ sex, + order = c("male", "female"), + conf_int = TRUE, + conf_level = 0.9 ) old_way <- t.test( - formula = hours ~ sex, data = gss_tbl, conf.level = 0.9 + formula = hours ~ sex, + data = gss_tbl, + conf.level = 0.9 )[["conf.int"]] expect_equal(ci_test$lower_ci[1], old_way[1], tolerance = 1e-5) expect_equal(ci_test$upper_ci[1], old_way[2], tolerance = 1e-5) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- gss_tbl %>% t_test( - hours ~ sex, order = c("female", "male"), - conf_int = TRUE, conf_level = 1.1 + hours ~ sex, + order = c("female", "male"), + conf_int = TRUE, + conf_level = 1.1 ) ) @@ -237,7 +273,8 @@ test_that("conf_int argument works", { expect_snapshot( var_equal <- gss_tbl_small %>% t_stat( - hours ~ sex, order = c("female", "male"), + hours ~ sex, + order = c("female", "male"), var.equal = TRUE ) ) @@ -251,46 +288,38 @@ test_that("conf_int argument works", { shortcut_var_equal <- gss_tbl_small %>% specify(hours ~ sex) %>% calculate( - stat = "t", order = c("female", "male"), + stat = "t", + order = c("female", "male"), var.equal = TRUE ) expect_false(shortcut_no_var_equal == shortcut_var_equal) }) # generate some data to test the prop.test wrapper -df <- data.frame(exp = rep(c("a", "b"), each = 500), - resp = c(rep("c", 450), - rep("d", 50), - rep("c", 400), - rep("d", 100)), - stringsAsFactors = FALSE) +df <- data.frame( + exp = rep(c("a", "b"), each = 500), + resp = c(rep("c", 450), rep("d", 50), rep("c", 400), rep("d", 100)), + stringsAsFactors = FALSE +) sum_df <- table(df) -bad_df <- data.frame(resp = 1:5, - exp = letters[1:5]) +bad_df <- data.frame(resp = 1:5, exp = letters[1:5]) -bad_df2 <- data.frame(resp = letters[1:5], - exp = 1:5) +bad_df2 <- data.frame(resp = letters[1:5], exp = 1:5) df_l <- df %>% - dplyr::mutate(resp = dplyr::if_else(resp == "c", TRUE, FALSE)) + dplyr::mutate(resp = dplyr::if_else(resp == "c", TRUE, FALSE)) test_that("two sample prop_test works", { - # run the tests with default args base <- prop.test(sum_df) infer <- prop_test(df, resp ~ exp, order = c("a", "b")) # check that results are same - expect_equal(base[["statistic"]], - infer[["statistic"]], - tolerance = .001) - expect_equal(base[["parameter"]], - infer[["chisq_df"]]) - expect_equal(base[["p.value"]], - infer[["p_value"]], - tolerance = .001) + expect_equal(base[["statistic"]], infer[["statistic"]], tolerance = .001) + expect_equal(base[["parameter"]], infer[["chisq_df"]]) + expect_equal(base[["p.value"]], infer[["p_value"]], tolerance = .001) # expect warning for unspecified order expect_snapshot(res_ <- prop_test(df, resp ~ exp)) @@ -298,14 +327,9 @@ test_that("two sample prop_test works", { # check that the functions respond to "p" in the same way base2 <- prop.test(sum_df, p = c(.1, .1)) infer2 <- prop_test(df, resp ~ exp, order = c("a", "b"), p = c(.1, .1)) - expect_equal(base2[["statistic"]], - infer2[["statistic"]], - tolerance = .001) - expect_equal(base2[["parameter"]], - infer2[["chisq_df"]]) - expect_equal(base2[["p.value"]], - infer2[["p_value"]], - tolerance = .001) + expect_equal(base2[["statistic"]], infer2[["statistic"]], tolerance = .001) + expect_equal(base2[["parameter"]], infer2[["chisq_df"]]) + expect_equal(base2[["p.value"]], infer2[["p_value"]], tolerance = .001) # check confidence interval argument infer3 <- prop_test(df, resp ~ exp, order = c("a", "b"), conf_int = TRUE) @@ -320,13 +344,24 @@ test_that("two sample prop_test works", { expect_snapshot(error = TRUE, res_ <- prop_test(bad_df2, resp ~ exp)) # check that the success argument changes output - infer5 <- prop_test(df, resp ~ exp, order = c("a", "b"), success = "d", conf_int = TRUE) + infer5 <- prop_test( + df, + resp ~ exp, + order = c("a", "b"), + success = "d", + conf_int = TRUE + ) expect_equal(infer3[["upper_ci"]], -infer5[["lower_ci"]], tolerance = .001) # check that logical variables are leveled intuitively infer1_l <- prop_test(df_l, resp ~ exp, order = c("b", "a")) infer2_l <- prop_test(df_l, resp ~ exp, order = c("b", "a"), success = "TRUE") - infer3_l <- prop_test(df_l, resp ~ exp, order = c("b", "a"), success = "FALSE") + infer3_l <- prop_test( + df_l, + resp ~ exp, + order = c("b", "a"), + success = "FALSE" + ) expect_equal(infer1_l$lower_ci, infer2_l$lower_ci) expect_equal(infer1_l$lower_ci, -infer3_l$upper_ci) @@ -339,30 +374,19 @@ df_1 <- df %>% sum_df_1 <- table(df_1) test_that("one sample prop_test works", { - # check that results with default args are the same base <- prop.test(sum_df_1) infer <- prop_test(df_1, resp ~ NULL, p = .5) - expect_equal(base[["statistic"]], - infer[["statistic"]], - tolerance = .001) - expect_equal(base[["parameter"]], - infer[["chisq_df"]]) - expect_equal(base[["p.value"]], - infer[["p_value"]], - tolerance = .001) + expect_equal(base[["statistic"]], infer[["statistic"]], tolerance = .001) + expect_equal(base[["parameter"]], infer[["chisq_df"]]) + expect_equal(base[["p.value"]], infer[["p_value"]], tolerance = .001) # check that the functions respond to "p" in the same way base2 <- prop.test(sum_df_1, p = .86) infer2 <- prop_test(df_1, resp ~ NULL, p = .86) - expect_equal(base2[["statistic"]], - infer2[["statistic"]], - tolerance = .001) - expect_equal(base2[["parameter"]], - infer2[["chisq_df"]]) - expect_equal(base2[["p.value"]], - infer2[["p_value"]], - tolerance = .001) + expect_equal(base2[["statistic"]], infer2[["statistic"]], tolerance = .001) + expect_equal(base2[["parameter"]], infer2[["chisq_df"]]) + expect_equal(base2[["p.value"]], infer2[["p_value"]], tolerance = .001) # expect message for unspecified p expect_snapshot(res_ <- prop_test(df_1, resp ~ NULL)) @@ -371,7 +395,8 @@ test_that("one sample prop_test works", { infer3 <- prop_test(df_1, resp ~ NULL, p = .2, success = "c") infer4 <- prop_test(df_1, resp ~ NULL, p = .8, success = "d") expect_equal(infer3[["chisq_df"]], infer4[["chisq_df"]], tolerance = .001) - expect_snapshot(error = TRUE, + expect_snapshot( + error = TRUE, res_ <- prop_test(df_1, resp ~ NULL, p = .2, success = "b") ) }) @@ -380,8 +405,12 @@ test_that("prop_test output dimensionality is correct", { infer_1_sample <- prop_test(df, resp ~ NULL, p = .5) infer_1_sample_z <- prop_test(df, resp ~ NULL, p = .5, z = TRUE) infer_2_sample <- prop_test(df, resp ~ exp, order = c("a", "b")) - infer_2_sample_no_int <- prop_test(df, resp ~ exp, order = c("a", "b"), - conf_int = FALSE) + infer_2_sample_no_int <- prop_test( + df, + resp ~ exp, + order = c("a", "b"), + conf_int = FALSE + ) infer_2_sample_z <- prop_test(df, resp ~ exp, order = c("a", "b"), z = TRUE) expect_length(infer_1_sample, 4) @@ -392,51 +421,51 @@ test_that("prop_test output dimensionality is correct", { }) test_that("prop_test handles >2 explanatory levels gracefully", { - set.seed(1) - dfr <- - tibble::tibble( - exp = sample(c("a", "b", "c"), 100, replace = TRUE), - resp = sample(c("d", "e"), 100, replace = TRUE) - ) + set.seed(1) + dfr <- + tibble::tibble( + exp = sample(c("a", "b", "c"), 100, replace = TRUE), + resp = sample(c("d", "e"), 100, replace = TRUE) + ) - res_old <- prop.test(table(dfr)) + res_old <- prop.test(table(dfr)) - # don't pass order - expect_silent( - res_1 <- prop_test(dfr, resp ~ exp) - ) + # don't pass order + expect_silent( + res_1 <- prop_test(dfr, resp ~ exp) + ) - # pass 2-length order - expect_snapshot( - res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b")) - ) + # pass 2-length order + expect_snapshot( + res_2 <- prop_test(dfr, resp ~ exp, order = c("a", "b")) + ) - # pass 3-length order - expect_snapshot( - res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c")) - ) + # pass 3-length order + expect_snapshot( + res_3 <- prop_test(dfr, resp ~ exp, order = c("a", "b", "c")) + ) - expect_equal(res_1, res_2) - expect_equal(res_2, res_3) + expect_equal(res_1, res_2) + expect_equal(res_2, res_3) - expect_named(res_1, c("statistic", "chisq_df", "p_value")) - expect_equal(res_1$statistic, res_old$statistic) - expect_equal(res_1$chisq_df, res_old$parameter) - expect_equal(res_1$p_value, res_old$p.value) + expect_named(res_1, c("statistic", "chisq_df", "p_value")) + expect_equal(res_1$statistic, res_old$statistic) + expect_equal(res_1$chisq_df, res_old$parameter) + expect_equal(res_1$p_value, res_old$p.value) }) test_that("prop_test errors with >2 response levels", { - set.seed(1) - dfr <- - tibble::tibble( - exp = sample(c("a", "b"), 100, replace = TRUE), - resp = sample(c("c", "d", "e"), 100, replace = TRUE) - ) + set.seed(1) + dfr <- + tibble::tibble( + exp = sample(c("a", "b"), 100, replace = TRUE), + resp = sample(c("c", "d", "e"), 100, replace = TRUE) + ) - expect_snapshot( - error = TRUE, - res_1 <- prop_test(dfr, resp ~ exp) - ) + expect_snapshot( + error = TRUE, + res_1 <- prop_test(dfr, resp ~ exp) + ) }) test_that("prop_test z argument works as expected", { @@ -458,29 +487,29 @@ test_that("wrappers can handled ordered factors", { ) expect_snapshot( - ordered_t_1 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = TRUE)) %>% - chisq_test(income ~ partyid) + ordered_t_1 <- gss_tbl %>% + dplyr::mutate(income = factor(income, ordered = TRUE)) %>% + chisq_test(income ~ partyid) ) expect_snapshot( - ordered_f_1 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = FALSE)) %>% - chisq_test(income ~ partyid) + ordered_f_1 <- gss_tbl %>% + dplyr::mutate(income = factor(income, ordered = FALSE)) %>% + chisq_test(income ~ partyid) ) expect_equal(ordered_t_1, ordered_f_1) expect_snapshot( - ordered_t_2 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = TRUE)) %>% - chisq_test(partyid ~ income) + ordered_t_2 <- gss_tbl %>% + dplyr::mutate(income = factor(income, ordered = TRUE)) %>% + chisq_test(partyid ~ income) ) expect_snapshot( - ordered_f_2 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = FALSE)) %>% - chisq_test(partyid ~ income) + ordered_f_2 <- gss_tbl %>% + dplyr::mutate(income = factor(income, ordered = FALSE)) %>% + chisq_test(partyid ~ income) ) expect_equal(ordered_t_2, ordered_f_2) @@ -496,45 +525,53 @@ test_that("wrappers can handled ordered factors", { }) test_that("handles spaces in variable names (t_test)", { - gss_ <- gss %>% - tidyr::drop_na(college) %>% - dplyr::mutate(`h o u r s` = hours) - - expect_equal( - t_test(gss_, - formula = hours ~ college, - order = c("degree", "no degree"), - alternative = "two-sided"), - t_test(gss_, - formula = `h o u r s` ~ college, - order = c("degree", "no degree"), - alternative = "two-sided") - ) - - expect_equal( - t_test(gss_, - response = hours, - explanatory = college, - order = c("degree", "no degree"), - alternative = "two-sided"), - t_test(gss_, - response = `h o u r s`, - explanatory = college, - order = c("degree", "no degree"), - alternative = "two-sided") - ) + gss_ <- gss %>% + tidyr::drop_na(college) %>% + dplyr::mutate(`h o u r s` = hours) + + expect_equal( + t_test( + gss_, + formula = hours ~ college, + order = c("degree", "no degree"), + alternative = "two-sided" + ), + t_test( + gss_, + formula = `h o u r s` ~ college, + order = c("degree", "no degree"), + alternative = "two-sided" + ) + ) + + expect_equal( + t_test( + gss_, + response = hours, + explanatory = college, + order = c("degree", "no degree"), + alternative = "two-sided" + ), + t_test( + gss_, + response = `h o u r s`, + explanatory = college, + order = c("degree", "no degree"), + alternative = "two-sided" + ) + ) }) test_that("handles spaces in variable names (prop_test)", { - df$`r e s p` <- df$resp + df$`r e s p` <- df$resp - expect_equal( - prop_test(df, `r e s p` ~ exp, order = c("a", "b")), - prop_test(df, resp ~ exp, order = c("a", "b")) - ) + expect_equal( + prop_test(df, `r e s p` ~ exp, order = c("a", "b")), + prop_test(df, resp ~ exp, order = c("a", "b")) + ) - expect_equal( - prop_test(df, response = `r e s p`, explanatory = exp, order = c("a", "b")), - prop_test(df, response = resp, explanatory = exp, order = c("a", "b")) - ) + expect_equal( + prop_test(df, response = `r e s p`, explanatory = exp, order = c("a", "b")), + prop_test(df, response = resp, explanatory = exp, order = c("a", "b")) + ) }) From 7762c4d1b9895daf079c4151c5bbc3cd960f45eb Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 12:47:20 -0500 Subject: [PATCH 04/11] `usethis::use_package("R", "Depends", "4.1")`, re`document()` --- DESCRIPTION | 4 ++-- NEWS.md | 2 ++ man/calculate.Rd | 4 ++-- man/fit.infer.Rd | 4 ++-- man/generate.Rd | 4 ++-- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5edb820f..9c1fbabf 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,7 +33,7 @@ License: MIT + file LICENSE URL: https://github.com/tidymodels/infer, https://infer.tidymodels.org/ BugReports: https://github.com/tidymodels/infer/issues Depends: - R (>= 3.5.0) + R (>= 4.1) Imports: broom, cli, @@ -68,6 +68,6 @@ Config/Needs/website: tidyverse/tidytemplate Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Config/testthat/edition: 3 Config/usethis/last-upkeep: 2025-04-25 diff --git a/NEWS.md b/NEWS.md index 5495f80c..7fbb1916 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # infer (development version) +* Increased the minimum required R version to R 4.1 + # infer 1.0.8 * The infer print method now truncates output when descriptions of explanatory or responses variables exceed the console width (#543). diff --git a/man/calculate.Rd b/man/calculate.Rd index f2739ca9..dcc25387 100755 --- a/man/calculate.Rd +++ b/man/calculate.Rd @@ -78,7 +78,7 @@ gss \%>\% \if{html}{\out{
}}\preformatted{## Response: age (numeric) ## Explanatory: college (factor) -## Null Hypothesis: independence +## Null Hypothesis: indepe... ## # A tibble: 5 x 2 ## replicate stat ## @@ -104,7 +104,7 @@ gss \%>\% \if{html}{\out{
}}\preformatted{## Response: age (numeric) ## Explanatory: college (factor) -## Null Hypothesis: independence +## Null Hypothesis: indepe... ## # A tibble: 5 x 2 ## replicate stat ## diff --git a/man/fit.infer.Rd b/man/fit.infer.Rd index 8d3cad99..50763eba 100644 --- a/man/fit.infer.Rd +++ b/man/fit.infer.Rd @@ -92,7 +92,7 @@ gss \%>\% \if{html}{\out{
}}\preformatted{## Response: age (numeric) ## Explanatory: college (factor) -## Null Hypothesis: independence +## Null Hypothesis: indepe... ## # A tibble: 5 x 2 ## replicate stat ## @@ -118,7 +118,7 @@ gss \%>\% \if{html}{\out{
}}\preformatted{## Response: age (numeric) ## Explanatory: college (factor) -## Null Hypothesis: independence +## Null Hypothesis: indepe... ## # A tibble: 5 x 2 ## replicate stat ## diff --git a/man/generate.Rd b/man/generate.Rd index 65ced39b..be33e2fe 100755 --- a/man/generate.Rd +++ b/man/generate.Rd @@ -76,7 +76,7 @@ gss \%>\% \if{html}{\out{
}}\preformatted{## Response: age (numeric) ## Explanatory: college (factor) -## Null Hypothesis: independence +## Null Hypothesis: indepe... ## # A tibble: 5 x 2 ## replicate stat ## @@ -102,7 +102,7 @@ gss \%>\% \if{html}{\out{
}}\preformatted{## Response: age (numeric) ## Explanatory: college (factor) -## Null Hypothesis: independence +## Null Hypothesis: indepe... ## # A tibble: 5 x 2 ## replicate stat ## From a5c0f776bf8122d3f24723a4de6f54ea5adb1f2d Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:19:30 -0500 Subject: [PATCH 05/11] transition to the base pipe --- R/assume.R | 67 ++- R/calculate.R | 131 ++--- R/fit.R | 33 +- R/generate.R | 26 +- R/get_confidence_interval.R | 48 +- R/get_p_value.R | 52 +- R/hypothesize.R | 8 +- R/observe.R | 55 +- R/pipe.R | 6 +- R/rep_sample_n.R | 8 +- R/shade_confidence_interval.R | 32 +- R/shade_p_value.R | 34 +- R/specify.R | 8 +- R/utils.R | 10 +- R/visualize.R | 84 +-- R/wrappers.R | 70 +-- README.Rmd | 14 +- README.md | 14 +- data-raw/save_gss.R | 50 +- man-roxygen/seeds.Rmd | 16 +- tests/testthat/_snaps/aliases.md | 4 +- tests/testthat/_snaps/assume.md | 44 +- tests/testthat/_snaps/calculate.md | 66 +-- tests/testthat/_snaps/fit.md | 5 +- tests/testthat/_snaps/generate.md | 97 ++-- .../_snaps/get_confidence_interval.md | 14 +- tests/testthat/_snaps/get_p_value.md | 6 +- tests/testthat/_snaps/hypothesize.md | 53 +- tests/testthat/_snaps/observe.md | 4 +- .../_snaps/shade_confidence_interval.md | 24 +- tests/testthat/_snaps/shade_p_value.md | 36 +- tests/testthat/_snaps/specify.md | 10 +- tests/testthat/_snaps/visualize.md | 97 ++-- tests/testthat/_snaps/wrappers.md | 49 +- tests/testthat/helper-data.R | 34 +- tests/testthat/test-aliases.R | 10 +- tests/testthat/test-assume.R | 174 +++--- tests/testthat/test-calculate.R | 412 +++++++------- tests/testthat/test-fit.R | 68 +-- tests/testthat/test-generate.R | 385 ++++++------- tests/testthat/test-get_confidence_interval.R | 164 +++--- tests/testthat/test-get_p_value.R | 132 ++--- tests/testthat/test-hypothesize.R | 122 ++-- tests/testthat/test-observe.R | 88 +-- tests/testthat/test-print.R | 6 +- tests/testthat/test-rep_sample_n.R | 2 +- .../testthat/test-shade_confidence_interval.R | 8 +- tests/testthat/test-shade_p_value.R | 22 +- tests/testthat/test-specify.R | 32 +- tests/testthat/test-utils.R | 12 +- tests/testthat/test-visualize.R | 532 +++++++++--------- tests/testthat/test-wrappers.R | 142 ++--- vignettes/anova.Rmd | 26 +- vignettes/chi_squared.Rmd | 54 +- vignettes/infer.Rmd | 98 ++-- vignettes/observed_stat_examples.Rmd | 530 ++++++++--------- vignettes/paired.Rmd | 34 +- vignettes/t_test.Rmd | 52 +- 58 files changed, 2206 insertions(+), 2208 deletions(-) diff --git a/R/assume.R b/R/assume.R index f9bcd62a..c400be40 100644 --- a/R/assume.R +++ b/R/assume.R @@ -67,37 +67,37 @@ #' #' # F distribution #' # with the `partyid` explanatory variable -#' gss %>% -#' specify(age ~ partyid) %>% +#' gss |> +#' specify(age ~ partyid) |> #' assume(distribution = "F") #' #' # Chi-squared goodness of fit distribution #' # on the `finrela` variable -#' gss %>% -#' specify(response = finrela) %>% +#' gss |> +#' specify(response = finrela) |> #' hypothesize(null = "point", #' p = c("far below average" = 1/6, #' "below average" = 1/6, #' "average" = 1/6, #' "above average" = 1/6, #' "far above average" = 1/6, -#' "DK" = 1/6)) %>% +#' "DK" = 1/6)) |> #' assume("Chisq") #' #' # Chi-squared test of independence #' # on the `finrela` and `sex` variables -#' gss %>% -#' specify(formula = finrela ~ sex) %>% +#' gss |> +#' specify(formula = finrela ~ sex) |> #' assume(distribution = "Chisq") #' #' # T distribution -#' gss %>% -#' specify(age ~ college) %>% +#' gss |> +#' specify(age ~ college) |> #' assume("t") #' #' # Z distribution -#' gss %>% -#' specify(response = sex, success = "female") %>% +#' gss |> +#' specify(response = sex, success = "female") |> #' assume("z") #' #' \dontrun{ @@ -107,14 +107,14 @@ #' # for example, a 1-sample t-test ------------------------------------- #' #' # calculate the observed statistic -#' obs_stat <- gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' obs_stat <- gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' calculate(stat = "t") #' #' # construct a null distribution -#' null_dist <- gss %>% -#' specify(response = hours) %>% +#' null_dist <- gss |> +#' specify(response = hours) |> #' assume("t") #' #' # juxtapose them visually @@ -127,14 +127,14 @@ #' # or, an F test ------------------------------------------------------ #' #' # calculate the observed statistic -#' obs_stat <- gss %>% -#' specify(age ~ partyid) %>% -#' hypothesize(null = "independence") %>% +#' obs_stat <- gss |> +#' specify(age ~ partyid) |> +#' hypothesize(null = "independence") |> #' calculate(stat = "F") #' #' # construct a null distribution -#' null_dist <- gss %>% -#' specify(age ~ partyid) %>% +#' null_dist <- gss |> +#' specify(age ~ partyid) |> #' assume(distribution = "F") #' #' # juxtapose them visually @@ -361,6 +361,11 @@ determine_df <- function(x, dist, df) { # return a vector of dfs recognized by `assume` acceptable_dfs <- function(x) { + # base R pipe doesn't support operators or anonymous functions + # in piped expressions (#553) + minus_one <- function(x) {x - 1} + minus_two <- function(x) {x - 2} + if (attr(x, "theory_type") == "Two sample t") { c( # t.test param with var.equal = FALSE @@ -383,17 +388,17 @@ acceptable_dfs <- function(x) { ) ), # min(n1 - 1, n2 - 1) - x %>% - dplyr::count(!!explanatory_expr(x)) %>% - dplyr::pull(n) %>% - min() %>% - `-`(1), + x |> + dplyr::count(!!explanatory_expr(x)) |> + dplyr::pull(n) |> + min() |> + minus_one(), # n1 + n2 - 2 - x %>% - dplyr::count(!!explanatory_expr(x)) %>% - dplyr::pull(n) %>% - sum() %>% - `-`(2) + x |> + dplyr::count(!!explanatory_expr(x)) |> + dplyr::pull(n) |> + sum() |> + minus_two() ) } else { c( diff --git a/R/calculate.R b/R/calculate.R index 846a8f83..101b5cea 100755 --- a/R/calculate.R +++ b/R/calculate.R @@ -44,34 +44,34 @@ #' #' # calculate a null distribution of hours worked per week under #' # the null hypothesis that the mean is 40 -#' gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% -#' generate(reps = 200, type = "bootstrap") %>% +#' gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> +#' generate(reps = 200, type = "bootstrap") |> #' calculate(stat = "mean") #' #' # calculate the corresponding observed statistic -#' gss %>% -#' specify(response = hours) %>% +#' gss |> +#' specify(response = hours) |> #' calculate(stat = "mean") #' #' # calculate a null distribution assuming independence between age #' # of respondent and whether they have a college degree -#' gss %>% -#' specify(age ~ college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 200, type = "permute") %>% +#' gss |> +#' specify(age ~ college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 200, type = "permute") |> #' calculate("diff in means", order = c("degree", "no degree")) #' #' # calculate the corresponding observed statistic -#' gss %>% -#' specify(age ~ college) %>% +#' gss |> +#' specify(age ~ college) |> #' calculate("diff in means", order = c("degree", "no degree")) #' #' # some statistics require a null hypothesis -#' gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' calculate(stat = "t") #' #' # more in-depth explanation of how to use the infer package @@ -185,9 +185,9 @@ check_input_vs_stat <- function(x, stat, call = caller_env()) { response_type <- attr(x, "type_desc_response") explanatory_type <- attr(x, "type_desc_explanatory") - possible_stats <- stat_types %>% - dplyr::filter(resp == response_type & exp == explanatory_type) %>% - dplyr::pull(stats) %>% + possible_stats <- stat_types |> + dplyr::filter(resp == response_type & exp == explanatory_type) |> + dplyr::pull(stats) |> unlist() if (is.null(possible_stats)) { @@ -218,7 +218,7 @@ check_input_vs_stat <- function(x, stat, call = caller_env()) { } if (is_hypothesized(x)) { - stat_nulls <- stat_hypotheses %>% + stat_nulls <- stat_hypotheses |> dplyr::filter( stat == !!stat & hypothesis == attr(x, "null") @@ -237,11 +237,12 @@ check_input_vs_stat <- function(x, stat, call = caller_env()) { } # When given no hypothesis for a theorized statistic, supply a reasonable value +.subset_1 <- function(x) {x[[1]]} assume_null <- function(x, stat_) { - null_fn <- theorized_nulls %>% - dplyr::filter(stat == stat_) %>% - dplyr::pull(null_fn) %>% - `[[`(1) + null_fn <- theorized_nulls |> + dplyr::filter(stat == stat_) |> + dplyr::pull(null_fn) |> + .subset_1() null_fn(x) } @@ -303,13 +304,13 @@ calc_impl_one_f <- function(f) { x <- dplyr::group_by(x, replicate) } - res <- x %>% + res <- x |> dplyr::summarize(stat = f(!!(sym(col)), ...)) # calculate SE for confidence intervals if (!is_generated(x)) { - sample_sd <- x %>% - dplyr::summarize(stats::sd(!!(sym(col)))) %>% + sample_sd <- x |> + dplyr::summarize(stats::sd(!!(sym(col)))) |> dplyr::pull() attr(res, "se") <- sample_sd / sqrt(nrow(x)) @@ -341,7 +342,7 @@ calc_impl_success_f <- function(f, output_name) { x <- dplyr::group_by(x, replicate) } - res <- x %>% + res <- x |> dplyr::summarize(stat = f(!!sym(col), success)) # calculate SE for confidence intervals @@ -373,7 +374,7 @@ calc_impl.count <- calc_impl_success_f( #' @export calc_impl.F <- function(type, x, order, ...) { - x %>% + x |> dplyr::summarize( stat = stats::anova( stats::lm(!!(response_expr(x)) ~ !!(explanatory_expr(x))) @@ -383,7 +384,7 @@ calc_impl.F <- function(type, x, order, ...) { #' @export calc_impl.slope <- function(type, x, order, ...) { - x %>% + x |> dplyr::summarize( stat = stats::coef( stats::lm(!!(response_expr(x)) ~ !!(explanatory_expr(x))) @@ -393,7 +394,7 @@ calc_impl.slope <- function(type, x, order, ...) { #' @export calc_impl.correlation <- function(type, x, order, ...) { - x %>% + x |> dplyr::summarize( stat = stats::cor(!!explanatory_expr(x), !!response_expr(x)) ) @@ -401,10 +402,10 @@ calc_impl.correlation <- function(type, x, order, ...) { calc_impl_diff_f <- function(f, operator) { function(type, x, order, ...) { - res <- x %>% - dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) %>% - dplyr::summarize(value = f(!!response_expr(x), ...)) %>% - dplyr::group_by(replicate) %>% + res <- x |> + dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |> + dplyr::summarize(value = f(!!response_expr(x), ...)) |> + dplyr::group_by(replicate) |> dplyr::summarize( stat = operator( value[!!(explanatory_expr(x)) == order[1]], @@ -414,13 +415,13 @@ calc_impl_diff_f <- function(f, operator) { # calculate SE for confidence intervals if (!is_generated(x) && identical(operator, `-`)) { - sample_sds <- x %>% - dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) %>% - dplyr::summarize(stats::sd(!!response_expr(x))) %>% + sample_sds <- x |> + dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |> + dplyr::summarize(stats::sd(!!response_expr(x))) |> dplyr::pull() - sample_counts <- x %>% - dplyr::count(!!explanatory_expr(x), .drop = FALSE) %>% + sample_counts <- x |> + dplyr::count(!!explanatory_expr(x), .drop = FALSE) |> dplyr::pull() attr(res, "se") <- @@ -463,8 +464,8 @@ calc_impl.Chisq <- function(type, x, order, ...) { unname(chisq[["statistic"]]) } - result <- x %>% - dplyr::nest_by(.key = "data") %>% + result <- x |> + dplyr::nest_by(.key = "data") |> dplyr::summarise(stat = chisq_gof(data), .groups = "drop") } else { # Chi-Square Test of Independence @@ -480,15 +481,15 @@ calc_impl.Chisq <- function(type, x, order, ...) { } # Compute result - result <- x %>% - dplyr::nest_by(.key = "data") %>% + result <- x |> + dplyr::nest_by(.key = "data") |> dplyr::summarise(stat = chisq_indep(data), .groups = "drop") } if (is_generated(x)) { - result <- result %>% dplyr::select(replicate, stat) + result <- result |> dplyr::select(replicate, stat) } else { - result <- result %>% dplyr::select(stat) + result <- result |> dplyr::select(stat) } copy_attrs( @@ -514,9 +515,9 @@ calc_impl.function_of_props <- function(type, x, order, operator, ...) { col <- response_expr(x) success <- attr(x, "success") - res <- x %>% - dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) %>% - dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>% + res <- x |> + dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |> + dplyr::summarize(prop = mean(!!sym(col) == success, ...)) |> dplyr::summarize( stat = operator( prop[!!explanatory_expr(x) == order[1]], @@ -526,13 +527,13 @@ calc_impl.function_of_props <- function(type, x, order, operator, ...) { # calculate SE for confidence intervals if (!is_generated(x)) { - props <- x %>% - dplyr::group_by(!!explanatory_expr(x), .drop = FALSE) %>% - dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>% + props <- x |> + dplyr::group_by(!!explanatory_expr(x), .drop = FALSE) |> + dplyr::summarize(prop = mean(!!sym(col) == success, ...)) |> dplyr::pull() - counts <- x %>% - dplyr::count(!!explanatory_expr(x), .drop = FALSE) %>% + counts <- x |> + dplyr::count(!!explanatory_expr(x), .drop = FALSE) |> dplyr::pull() attr(res, "se") <- @@ -562,14 +563,14 @@ calc_impl.odds_ratio <- function(type, x, order, ...) { col <- response_expr(x) success <- attr(x, "success") - x %>% - dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) %>% - dplyr::summarize(prop = mean(!!sym(col) == success, ...)) %>% + x |> + dplyr::group_by(replicate, !!explanatory_expr(x), .drop = FALSE) |> + dplyr::summarize(prop = mean(!!sym(col) == success, ...)) |> dplyr::summarize( prop_1 = prop[!!explanatory_expr(x) == order[1]], prop_2 = prop[!!explanatory_expr(x) == order[2]], stat = (prop_1 / prop_2) / ((1 - prop_1) / (1 - prop_2)) - ) %>% + ) |> dplyr::select(stat) } @@ -578,7 +579,7 @@ calc_impl.t <- function(type, x, order, ...) { if (theory_type(x) == "Two sample t") { x <- reorder_explanatory(x, order) - df_out <- x %>% + df_out <- x |> dplyr::summarize( stat = stats::t.test( !!response_expr(x) ~ !!explanatory_expr(x), @@ -588,13 +589,13 @@ calc_impl.t <- function(type, x, order, ...) { } else if (theory_type(x) == "One sample t") { if (!is_hypothesized(x)) { # For bootstrap - df_out <- x %>% + df_out <- x |> dplyr::summarize( stat = stats::t.test(!!response_expr(x), ...)[["statistic"]] ) } else { # For hypothesis testing - df_out <- x %>% + df_out <- x |> dplyr::summarize( stat = stats::t.test( !!response_expr(x), @@ -619,15 +620,15 @@ calc_impl.z <- function(type, x, order, ...) { levels = c(order[1], order[2]) ) - aggregated <- x %>% - dplyr::group_by(replicate, explan) %>% + aggregated <- x |> + dplyr::group_by(replicate, explan) |> dplyr::summarize( group_num = dplyr::n(), prop = mean(rlang::eval_tidy(col) == rlang::eval_tidy(success)), num_suc = sum(rlang::eval_tidy(col) == rlang::eval_tidy(success)) ) - df_out <- aggregated %>% + df_out <- aggregated |> dplyr::summarize( diff_prop = prop[explan == order[1]] - prop[explan == order[2]], total_suc = sum(num_suc), @@ -636,7 +637,7 @@ calc_impl.z <- function(type, x, order, ...) { p_hat = total_suc / (n1 + n2), denom = sqrt(p_hat * (1 - p_hat) / n1 + p_hat * (1 - p_hat) / n2), stat = diff_prop / denom - ) %>% + ) |> dplyr::select(stat) df_out @@ -649,7 +650,7 @@ calc_impl.z <- function(type, x, order, ...) { p0 <- unname(attr(x, "params")[1]) num_rows <- nrow(x) / length(unique(x$replicate)) - df_out <- x %>% + df_out <- x |> dplyr::summarize( stat = (mean(rlang::eval_tidy(col) == rlang::eval_tidy(success), ...) - p0) / diff --git a/R/fit.R b/R/fit.R index 8384e653..1b7a1829 100644 --- a/R/fit.R +++ b/R/fit.R @@ -77,8 +77,8 @@ generics::fit #' @examples #' # fit a linear model predicting number of hours worked per #' # week using respondent age and degree status. -#' observed_fit <- gss %>% -#' specify(hours ~ age + college) %>% +#' observed_fit <- gss |> +#' specify(hours ~ age + college) |> #' fit() #' #' observed_fit @@ -86,18 +86,18 @@ generics::fit #' # fit 100 models to resamples of the gss dataset, where the response #' # `hours` is permuted in each. note that this code is the same as #' # the above except for the addition of the `generate` step. -#' null_fits <- gss %>% -#' specify(hours ~ age + college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 100, type = "permute") %>% +#' null_fits <- gss |> +#' specify(hours ~ age + college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 100, type = "permute") |> #' fit() #' #' null_fits #' #' # for logistic regression, just supply a binary response variable! #' # (this can also be made explicit via the `family` argument in ...) -#' gss %>% -#' specify(college ~ age + hours) %>% +#' gss |> +#' specify(college ~ age + hours) |> #' fit() #' #' # more in-depth explanation of how to use the infer package @@ -126,9 +126,9 @@ fit.infer <- function(object, ...) { formula <- get_formula(object) if (is_generated(object)) { - x <- object %>% - tidyr::nest(data = -replicate) %>% - dplyr::rowwise() %>% + x <- object |> + tidyr::nest(data = -replicate) |> + dplyr::rowwise() |> dplyr::mutate( model = list( do.call( @@ -139,8 +139,8 @@ fit.infer <- function(object, ...) { ) ) ) - ) %>% - dplyr::select(replicate, model) %>% + ) |> + dplyr::select(replicate, model) |> tidyr::unnest(model) } else { x <- do.call( @@ -220,13 +220,12 @@ fit_linear_model <- function(object, formula, ...) { formula = formula, data = object, ... - ) %>% - broom::tidy() %>% + ) |> + broom::tidy() |> dplyr::select( - ., term, estimate - ) %>% + ) |> dplyr::mutate( term = dplyr::case_when( term == "(Intercept)" ~ "intercept", diff --git a/R/generate.R b/R/generate.R index 1f95891d..bba2df10 100755 --- a/R/generate.R +++ b/R/generate.R @@ -46,24 +46,24 @@ #' #' @examples #' # generate a null distribution by taking 200 bootstrap samples -#' gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' generate(reps = 200, type = "bootstrap") #' #' # generate a null distribution for the independence of #' # two variables by permuting their values 200 times -#' gss %>% -#' specify(partyid ~ age) %>% -#' hypothesize(null = "independence") %>% +#' gss |> +#' specify(partyid ~ age) |> +#' hypothesize(null = "independence") |> #' generate(reps = 200, type = "permute") #' #' # generate a null distribution via sampling from a #' # binomial distribution 200 times -#' gss %>% -#' specify(response = sex, success = "female") %>% -#' hypothesize(null = "point", p = .5) %>% -#' generate(reps = 200, type = "draw") %>% +#' gss |> +#' specify(response = sex, success = "female") |> +#' hypothesize(null = "point", p = .5) |> +#' generate(reps = 200, type = "draw") |> #' calculate(stat = "z") #' #' # more in-depth explanation of how to use the infer package @@ -257,9 +257,9 @@ permute <- function(x, reps = 1, variables, ..., call = caller_env()) { reps, permute_once(x, variables, call = call), simplify = FALSE - ) %>% - dplyr::bind_rows() %>% - dplyr::mutate(replicate = rep(1:reps, each = !!nrow_x)) %>% + ) |> + dplyr::bind_rows() |> + dplyr::mutate(replicate = rep(1:reps, each = !!nrow_x)) |> group_by_replicate(reps, nrow_x) df_out <- copy_attrs(to = df_out, from = x) diff --git a/R/get_confidence_interval.R b/R/get_confidence_interval.R index 9848603a..3652bc89 100644 --- a/R/get_confidence_interval.R +++ b/R/get_confidence_interval.R @@ -64,15 +64,15 @@ #' #' @examples #' -#' boot_dist <- gss %>% +#' boot_dist <- gss |> #' # We're interested in the number of hours worked per week -#' specify(response = hours) %>% +#' specify(response = hours) |> #' # Generate bootstrap samples -#' generate(reps = 1000, type = "bootstrap") %>% +#' generate(reps = 1000, type = "bootstrap") |> #' # Calculate mean of each bootstrap sample #' calculate(stat = "mean") #' -#' boot_dist %>% +#' boot_dist |> #' # Calculate the confidence interval around the point estimate #' get_confidence_interval( #' # At the 95% confidence level; percentile method @@ -80,11 +80,11 @@ #' ) #' #' # for type = "se" or type = "bias-corrected" we need a point estimate -#' sample_mean <- gss %>% -#' specify(response = hours) %>% +#' sample_mean <- gss |> +#' specify(response = hours) |> #' calculate(stat = "mean") #' -#' boot_dist %>% +#' boot_dist |> #' get_confidence_interval( #' point_estimate = sample_mean, #' # At the 95% confidence level @@ -96,8 +96,8 @@ #' # using a theoretical distribution ----------------------------------- #' #' # define a sampling distribution -#' sampling_dist <- gss %>% -#' specify(response = hours) %>% +#' sampling_dist <- gss |> +#' specify(response = hours) |> #' assume("t") #' #' # get the confidence interval---note that the @@ -112,8 +112,8 @@ #' #' # fit a linear model predicting number of hours worked per #' # week using respondent age and degree status. -#' observed_fit <- gss %>% -#' specify(hours ~ age + college) %>% +#' observed_fit <- gss |> +#' specify(hours ~ age + college) |> #' fit() #' #' observed_fit @@ -121,10 +121,10 @@ #' # fit 100 models to resamples of the gss dataset, where the response #' # `hours` is permuted in each. note that this code is the same as #' # the above except for the addition of the `generate` step. -#' null_fits <- gss %>% -#' specify(hours ~ age + college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 100, type = "permute") %>% +#' null_fits <- gss |> +#' specify(hours ~ age + college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 100, type = "permute") |> #' fit() #' #' null_fits @@ -172,15 +172,15 @@ get_confidence_interval <- function( ) # split up x and point estimate by term - term_data <- x %>% - dplyr::ungroup() %>% - dplyr::group_by(term) %>% - dplyr::group_split() %>% + term_data <- x |> + dplyr::ungroup() |> + dplyr::group_by(term) |> + dplyr::group_split() |> purrr::map(copy_attrs, x) - term_estimates <- point_estimate %>% - dplyr::ungroup() %>% - dplyr::group_by(term) %>% + term_estimates <- point_estimate |> + dplyr::ungroup() |> + dplyr::group_by(term) |> dplyr::group_split() # check arguments for each term @@ -199,11 +199,11 @@ get_confidence_interval <- function( switch_ci, level = level, type = type - ) %>% + ) |> dplyr::mutate( term = purrr::map_chr(term_estimates, purrr::pluck, "term"), .before = dplyr::everything() - ) %>% + ) |> copy_attrs(x) } else { check_ci_args(x, level, type, point_estimate) diff --git a/R/get_p_value.R b/R/get_p_value.R index 750a8dbd..b95b4505 100644 --- a/R/get_p_value.R +++ b/R/get_p_value.R @@ -57,34 +57,34 @@ #' # using a simulation-based null distribution ------------------------------ #' #' # find the point estimate---mean number of hours worked per week -#' point_estimate <- gss %>% -#' specify(response = hours) %>% +#' point_estimate <- gss |> +#' specify(response = hours) |> #' calculate(stat = "mean") #' #' # starting with the gss dataset -#' gss %>% +#' gss |> #' # ...we're interested in the number of hours worked per week -#' specify(response = hours) %>% +#' specify(response = hours) |> #' # hypothesizing that the mean is 40 -#' hypothesize(null = "point", mu = 40) %>% +#' hypothesize(null = "point", mu = 40) |> #' # generating data points for a null distribution -#' generate(reps = 1000, type = "bootstrap") %>% +#' generate(reps = 1000, type = "bootstrap") |> #' # finding the null distribution -#' calculate(stat = "mean") %>% +#' calculate(stat = "mean") |> # # calculate the p-value for the point estimate #' get_p_value(obs_stat = point_estimate, direction = "two-sided") #' #' # using a theoretical null distribution ----------------------------------- #' #' # calculate the observed statistic -#' obs_stat <- gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' obs_stat <- gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' calculate(stat = "t") #' #' # define a null distribution -#' null_dist <- gss %>% -#' specify(response = hours) %>% +#' null_dist <- gss |> +#' specify(response = hours) |> #' assume("t") #' #' # calculate a p-value @@ -94,8 +94,8 @@ #' #' # fit a linear model predicting number of hours worked per #' # week using respondent age and degree status. -#' observed_fit <- gss %>% -#' specify(hours ~ age + college) %>% +#' observed_fit <- gss |> +#' specify(hours ~ age + college) |> #' fit() #' #' observed_fit @@ -103,10 +103,10 @@ #' # fit 100 models to resamples of the gss dataset, where the response #' # `hours` is permuted in each. note that this code is the same as #' # the above except for the addition of the `generate` step. -#' null_fits <- gss %>% -#' specify(hours ~ age + college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 100, type = "permute") %>% +#' null_fits <- gss |> +#' specify(hours ~ age + college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 100, type = "permute") |> #' fit() #' #' null_fits @@ -149,15 +149,15 @@ get_p_value.default <- function(x, obs_stat, direction) { ) # split up x and obs_stat by term - term_data <- x %>% - dplyr::ungroup() %>% - dplyr::group_by(term) %>% - dplyr::group_split() %>% + term_data <- x |> + dplyr::ungroup() |> + dplyr::group_by(term) |> + dplyr::group_split() |> purrr::map(copy_attrs, x) - term_obs_stats <- obs_stat %>% - dplyr::ungroup() %>% - dplyr::group_by(term) %>% + term_obs_stats <- obs_stat |> + dplyr::ungroup() |> + dplyr::group_by(term) |> dplyr::group_split() # calculate the p value for each term and then add the term column back in @@ -166,7 +166,7 @@ get_p_value.default <- function(x, obs_stat, direction) { purrr::map(term_obs_stats, purrr::pluck, "estimate"), simulation_based_p_value, direction = direction - ) %>% + ) |> dplyr::mutate( term = purrr::map_chr(term_obs_stats, purrr::pluck, "term"), .before = dplyr::everything() diff --git a/R/hypothesize.R b/R/hypothesize.R index 846d18a0..5aa02da2 100755 --- a/R/hypothesize.R +++ b/R/hypothesize.R @@ -36,13 +36,13 @@ #' #' @examples #' # hypothesize independence of two variables -#' gss %>% -#' specify(college ~ partyid, success = "degree") %>% +#' gss |> +#' specify(college ~ partyid, success = "degree") |> #' hypothesize(null = "independence") #' #' # hypothesize a mean number of hours worked per week of 40 -#' gss %>% -#' specify(response = hours) %>% +#' gss |> +#' specify(response = hours) |> #' hypothesize(null = "point", mu = 40) #' #' # more in-depth explanation of how to use the infer package diff --git a/R/observe.R b/R/observe.R index 6f88d6b9..1e20207a 100644 --- a/R/observe.R +++ b/R/observe.R @@ -17,22 +17,22 @@ #' #' @examples #' # calculating the observed mean number of hours worked per week -#' gss %>% +#' gss |> #' observe(hours ~ NULL, stat = "mean") #' #' # equivalently, calculating the same statistic with the core verbs -#' gss %>% -#' specify(response = hours) %>% +#' gss |> +#' specify(response = hours) |> #' calculate(stat = "mean") #' #' # calculating a t statistic for hypothesized mu = 40 hours worked/week -#' gss %>% +#' gss |> #' observe(hours ~ NULL, stat = "t", null = "point", mu = 40) #' #' # equivalently, calculating the same statistic with the core verbs -#' gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' calculate(stat = "t") #' #' # similarly for a difference in means in age based on whether @@ -45,8 +45,8 @@ #' ) #' #' # equivalently, calculating the same statistic with the core verbs -#' gss %>% -#' specify(age ~ college) %>% +#' gss |> +#' specify(age ~ college) |> #' calculate("diff in means", order = c("degree", "no degree")) #' #' # for a more in-depth explanation of how to use the infer package @@ -104,27 +104,30 @@ observe <- function( } # pass arguments on to core verbs - specify( + res <- + specify( x = x, formula = formula, response = {{ response }}, explanatory = {{ explanatory }}, success = success - ) %>% - hypothesize_fn( - null = if (has_explanatory(.)) { - "independence" - } else { - "point" - }, - p = p, - mu = mu, - med = med, - sigma = sigma - ) %>% - calculate( - stat = stat, - order = order, - ... ) + + hypothesize_fn( + res, + null = if (has_explanatory(res)) { + "independence" + } else { + "point" + }, + p = p, + mu = mu, + med = med, + sigma = sigma + ) |> + calculate( + stat = stat, + order = order, + ... + ) } diff --git a/R/pipe.R b/R/pipe.R index b2efe06a..f2461d26 100755 --- a/R/pipe.R +++ b/R/pipe.R @@ -1,13 +1,13 @@ #' Pipe #' -#' Like \{dplyr\}, \{infer\} also uses the pipe (\code{%>%}) function +#' Like \{dplyr\}, \{infer\} also uses the pipe (\code{|>}) function #' from \code{magrittr} to turn function composition into a series of #' iterative statements. #' #' @param lhs,rhs Inference functions and the initial data frame. #' -#' @importFrom magrittr %>% -#' @name %>% +#' @importFrom magrittr |> +#' @name |> #' @rdname pipe #' @export NULL diff --git a/R/rep_sample_n.R b/R/rep_sample_n.R index 37e4c958..4a7e8874 100644 --- a/R/rep_sample_n.R +++ b/R/rep_sample_n.R @@ -43,15 +43,15 @@ #' library(tibble) #' #' # take 1000 samples of size n = 50, without replacement -#' slices <- gss %>% +#' slices <- gss |> #' rep_slice_sample(n = 50, reps = 1000) #' #' slices #' #' # compute the proportion of respondents with a college #' # degree in each replicate -#' p_hats <- slices %>% -#' group_by(replicate) %>% +#' p_hats <- slices |> +#' group_by(replicate) |> #' summarize(prop_college = mean(college == "degree")) #' #' # plot sampling distribution @@ -72,7 +72,7 @@ #' rep_slice_sample(df, n = 2, reps = 5, weight_by = c(.5, .4, .3, .2, .1)) #' #' # alternatively, pass an unquoted column name in `.data` as `weight_by` -#' df <- df %>% mutate(wts = c(.5, .4, .3, .2, .1)) +#' df <- df |> mutate(wts = c(.5, .4, .3, .2, .1)) #' #' rep_slice_sample(df, n = 2, reps = 5, weight_by = wts) #' @export diff --git a/R/shade_confidence_interval.R b/R/shade_confidence_interval.R index 2236dad0..981b4b9c 100644 --- a/R/shade_confidence_interval.R +++ b/R/shade_confidence_interval.R @@ -28,21 +28,21 @@ #' #' @examples #' # find the point estimate---mean number of hours worked per week -#' point_estimate <- gss %>% -#' specify(response = hours) %>% +#' point_estimate <- gss |> +#' specify(response = hours) |> #' calculate(stat = "mean") #' #' # ...and a bootstrap distribution -#' boot_dist <- gss %>% +#' boot_dist <- gss |> #' # ...we're interested in the number of hours worked per week -#' specify(response = hours) %>% +#' specify(response = hours) |> #' # generating data points -#' generate(reps = 1000, type = "bootstrap") %>% +#' generate(reps = 1000, type = "bootstrap") |> #' # finding the distribution from the generated data #' calculate(stat = "mean") #' #' # find a confidence interval around the point estimate -#' ci <- boot_dist %>% +#' ci <- boot_dist |> #' get_confidence_interval(point_estimate = point_estimate, #' # at the 95% confidence level #' level = .95, @@ -51,12 +51,12 @@ #' #' #' # and plot it! -#' boot_dist %>% +#' boot_dist |> #' visualize() + #' shade_confidence_interval(ci) #' #' # or just plot the bounds -#' boot_dist %>% +#' boot_dist |> #' visualize() + #' shade_confidence_interval(ci, fill = NULL) #' @@ -64,8 +64,8 @@ #' # theoretical distributions, too---the theoretical #' # distribution will be recentered and rescaled to #' # align with the confidence interval -#' sampling_dist <- gss %>% -#' specify(response = hours) %>% +#' sampling_dist <- gss |> +#' specify(response = hours) |> #' assume(distribution = "t") #' #' visualize(sampling_dist) + @@ -76,17 +76,17 @@ #' # explanatory variables, use a `fit()`-based workflow #' #' # fit 1000 linear models with the `hours` variable permuted -#' null_fits <- gss %>% -#' specify(hours ~ age + college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 1000, type = "permute") %>% +#' null_fits <- gss |> +#' specify(hours ~ age + college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 1000, type = "permute") |> #' fit() #' #' null_fits #' #' # fit a linear model to the observed data -#' obs_fit <- gss %>% -#' specify(hours ~ age + college) %>% +#' obs_fit <- gss |> +#' specify(hours ~ age + college) |> #' fit() #' #' obs_fit diff --git a/R/shade_p_value.R b/R/shade_p_value.R index 14f0c6e3..3c5c3df7 100644 --- a/R/shade_p_value.R +++ b/R/shade_p_value.R @@ -32,34 +32,34 @@ #' #' @examples #' # find the point estimate---mean number of hours worked per week -#' point_estimate <- gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' point_estimate <- gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' calculate(stat = "t") #' #' # ...and a null distribution -#' null_dist <- gss %>% +#' null_dist <- gss |> #' # ...we're interested in the number of hours worked per week -#' specify(response = hours) %>% +#' specify(response = hours) |> #' # hypothesizing that the mean is 40 -#' hypothesize(null = "point", mu = 40) %>% +#' hypothesize(null = "point", mu = 40) |> #' # generating data points for a null distribution -#' generate(reps = 1000, type = "bootstrap") %>% +#' generate(reps = 1000, type = "bootstrap") |> #' # estimating the null distribution #' calculate(stat = "t") #' #' # shade the p-value of the point estimate -#' null_dist %>% +#' null_dist |> #' visualize() + #' shade_p_value(obs_stat = point_estimate, direction = "two-sided") #' #' # you can shade confidence intervals on top of #' # theoretical distributions, too! -#' null_dist_theory <- gss %>% -#' specify(response = hours) %>% +#' null_dist_theory <- gss |> +#' specify(response = hours) |> #' assume(distribution = "t") #' -#' null_dist_theory %>% +#' null_dist_theory |> #' visualize() + #' shade_p_value(obs_stat = point_estimate, direction = "two-sided") #' @@ -68,17 +68,17 @@ #' # explanatory variables, use a `fit()`-based workflow #' #' # fit 1000 linear models with the `hours` variable permuted -#' null_fits <- gss %>% -#' specify(hours ~ age + college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 1000, type = "permute") %>% +#' null_fits <- gss |> +#' specify(hours ~ age + college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 1000, type = "permute") |> #' fit() #' #' null_fits #' #' # fit a linear model to the observed data -#' obs_fit <- gss %>% -#' specify(hours ~ age + college) %>% +#' obs_fit <- gss |> +#' specify(hours ~ age + college) |> #' fit() #' #' obs_fit diff --git a/R/specify.R b/R/specify.R index 910509ed..841f4f17 100755 --- a/R/specify.R +++ b/R/specify.R @@ -25,15 +25,15 @@ #' #' @examples #' # specifying for a point estimate on one variable -#' gss %>% +#' gss |> #' specify(response = age) #' #' # specify a relationship between variables as a formula... -#' gss %>% +#' gss |> #' specify(age ~ partyid) #' #' # ...or with named arguments! -#' gss %>% +#' gss |> #' specify(response = age, explanatory = partyid) #' #' # more in-depth explanation of how to use the infer package @@ -74,7 +74,7 @@ specify <- function( check_success_arg(x, success) # Select variables - x <- x %>% + x <- x |> select(any_of(c(response_name(x), explanatory_name(x)))) is_complete <- stats::complete.cases(x) diff --git a/R/utils.R b/R/utils.R index 6fa75661..cbfc81a4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -86,21 +86,21 @@ reorder_explanatory <- function(x, order) { } standardize_variable_types <- function(x) { - tibble::as_tibble(x) %>% + tibble::as_tibble(x) |> # character and ordered to factor dplyr::mutate( dplyr::across( where(~ is.character(.x) || is.ordered(.x)), ~ factor(.x, ordered = FALSE) ) - ) %>% + ) |> # logical to factor, with TRUE as the first level dplyr::mutate( dplyr::across( where(~ is.logical(.x)), ~ factor(.x, levels = c("TRUE", "FALSE")) ) - ) %>% + ) |> # integer to numeric dplyr::mutate( dplyr::across( @@ -708,8 +708,8 @@ check_obs_stat <- function(obs_stat, plot = NULL, call = caller_env()) { x_lab <- x_axis_label(plot) obs_stat <- - obs_stat %>% - dplyr::filter(term == x_lab) %>% + obs_stat |> + dplyr::filter(term == x_lab) |> dplyr::pull(estimate) return(obs_stat) diff --git a/R/visualize.R b/R/visualize.R index 3dcc4c6b..871797ab 100755 --- a/R/visualize.R +++ b/R/visualize.R @@ -56,35 +56,35 @@ ggplot2::ggplot_add #' @examples #' #' # generate a null distribution -#' null_dist <- gss %>% +#' null_dist <- gss |> #' # we're interested in the number of hours worked per week -#' specify(response = hours) %>% +#' specify(response = hours) |> #' # hypothesizing that the mean is 40 -#' hypothesize(null = "point", mu = 40) %>% +#' hypothesize(null = "point", mu = 40) |> #' # generating data points for a null distribution -#' generate(reps = 1000, type = "bootstrap") %>% +#' generate(reps = 1000, type = "bootstrap") |> #' # calculating a distribution of means #' calculate(stat = "mean") #' #' # or a bootstrap distribution, omitting the hypothesize() step, #' # for use in confidence intervals -#' boot_dist <- gss %>% -#' specify(response = hours) %>% -#' generate(reps = 1000, type = "bootstrap") %>% +#' boot_dist <- gss |> +#' specify(response = hours) |> +#' generate(reps = 1000, type = "bootstrap") |> #' calculate(stat = "mean") #' #' # we can easily plot the null distribution by piping into visualize -#' null_dist %>% +#' null_dist |> #' visualize() #' #' # we can add layers to the plot as in ggplot, as well... #' # find the point estimate---mean number of hours worked per week -#' point_estimate <- gss %>% -#' specify(response = hours) %>% +#' point_estimate <- gss |> +#' specify(response = hours) |> #' calculate(stat = "mean") #' #' # find a confidence interval around the point estimate -#' ci <- boot_dist %>% +#' ci <- boot_dist |> #' get_confidence_interval(point_estimate = point_estimate, #' # at the 95% confidence level #' level = .95, @@ -92,19 +92,19 @@ ggplot2::ggplot_add #' type = "se") #' #' # display a shading of the area beyond the p-value on the plot -#' null_dist %>% +#' null_dist |> #' visualize() + #' shade_p_value(obs_stat = point_estimate, direction = "two-sided") #' #' # ...or within the bounds of the confidence interval -#' null_dist %>% +#' null_dist |> #' visualize() + #' shade_confidence_interval(ci) #' #' # plot a theoretical sampling distribution by creating #' # a theory-based distribution with `assume()` -#' sampling_dist <- gss %>% -#' specify(response = hours) %>% +#' sampling_dist <- gss |> +#' specify(response = hours) |> #' assume(distribution = "t") #' #' visualize(sampling_dist) @@ -120,15 +120,15 @@ ggplot2::ggplot_add #' # to plot both a theory-based and simulation-based null distribution, #' # use a theorized statistic (i.e. one of t, z, F, or Chisq) #' # and supply the simulation-based null distribution -#' null_dist_t <- gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% -#' generate(reps = 1000, type = "bootstrap") %>% +#' null_dist_t <- gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> +#' generate(reps = 1000, type = "bootstrap") |> #' calculate(stat = "t") #' -#' obs_stat <- gss %>% -#' specify(response = hours) %>% -#' hypothesize(null = "point", mu = 40) %>% +#' obs_stat <- gss |> +#' specify(response = hours) |> +#' hypothesize(null = "point", mu = 40) |> #' calculate(stat = "t") #' #' visualize(null_dist_t, method = "both") @@ -141,10 +141,10 @@ ggplot2::ggplot_add #' # explanatory variables, use a `fit()`-based workflow #' #' # fit 1000 models with the `hours` variable permuted -#' null_fits <- gss %>% -#' specify(hours ~ age + college) %>% -#' hypothesize(null = "independence") %>% -#' generate(reps = 1000, type = "permute") %>% +#' null_fits <- gss |> +#' specify(hours ~ age + college) |> +#' hypothesize(null = "independence") |> +#' generate(reps = 1000, type = "permute") |> #' fit() #' #' null_fits @@ -159,10 +159,10 @@ ggplot2::ggplot_add #' library(ggplot2) #' #' # to add a ggplot2 theme to a `calculate()`-based visualization, use `+` -#' null_dist %>% visualize() + theme_dark() +#' null_dist |> visualize() + theme_dark() #' #' # to add a ggplot2 theme to a `fit()`-based visualization, use `&` -#' null_fits %>% visualize() & theme_dark() +#' null_fits |> visualize() & theme_dark() #' } #' #' # More in-depth explanation of how to use the infer package @@ -211,12 +211,12 @@ visualize <- function( dots <- check_dots_for_deprecated(list(...)) if (is_fitted(data)) { - term_data <- data %>% - dplyr::rename(stat = estimate) %>% - dplyr::ungroup() %>% - dplyr::group_by(term) %>% - dplyr::group_split() %>% - purrr::map(copy_attrs, data) %>% + term_data <- data |> + dplyr::rename(stat = estimate) |> + dplyr::ungroup() |> + dplyr::group_by(term) |> + dplyr::group_split() |> + purrr::map(copy_attrs, data) |> purrr::map(copy_attrs, data, c("viz_method", "viz_bins")) plots <- purrr::map2( @@ -359,9 +359,9 @@ check_for_piped_visualize <- function(..., call = caller_env()) { cli_abort( c( "It looks like you piped the result of `visualize()` into \\ - `{called_function}()` (using `%>%`) rather than adding the result of \\ + `{called_function}()` rather than adding the result of \\ `{called_function}()` as a layer with `+`.", - i = "Consider changing `%>%` to `+`." + i = "Consider changing `|>` (or `%>%`) to `+`." ), call = call ) @@ -377,8 +377,8 @@ impute_endpoints <- function(endpoints, plot = NULL, call = caller_env()) { x_lab <- x_axis_label(plot) res <- - endpoints %>% - dplyr::filter(term == x_lab) %>% + endpoints |> + dplyr::filter(term == x_lab) |> dplyr::select(-term) return(unlist(res)) @@ -403,7 +403,7 @@ impute_endpoints <- function(endpoints, plot = NULL, call = caller_env()) { res <- unlist(endpoints) } - res %>% copy_attrs(endpoints, attrs = c("se", "point_estimate")) + res |> copy_attrs(endpoints, attrs = c("se", "point_estimate")) } impute_obs_stat <- function( @@ -756,7 +756,7 @@ ggplot_add.infer_layer <- function(object, plot, object_name) { # - object_name is the unevaluated call on the RHS of the `+` # # output is the actual output of the addition - this allows for - # a more %>%-esque programming style + # a more |>-esque programming style # # the biggest advantage this offers us is that we can # overwrite existing elements, i.e. subsetting into the patchwork, @@ -810,12 +810,12 @@ ggplot_add.infer_layer <- function(object, plot, object_name) { # extract the x axis label from a ggplot -- these are unique # ids for terms in visualize() workflows x_axis_label <- function(x) { - x %>% purrr::pluck("labels", "x") + x |> purrr::pluck("labels", "x") } create_plot_data <- function(data) { if (inherits(data, "infer_dist")) { - res <- tibble::tibble() %>% + res <- tibble::tibble() |> copy_attrs( data, c("theory_type", "distr_param", "distr_param2", "viz_method") diff --git a/R/wrappers.R b/R/wrappers.R index 6e6663a9..db4ef799 100755 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -28,8 +28,8 @@ #' #' # t test for number of hours worked per week #' # by college degree status -#' gss %>% -#' tidyr::drop_na(college) %>% +#' gss |> +#' tidyr::drop_na(college) |> #' t_test(formula = hours ~ college, #' order = c("degree", "no degree"), #' alternative = "two-sided") @@ -87,7 +87,7 @@ t_test <- function( mu = mu, conf.level = conf_level, ... - ) %>% + ) |> broom::glance() } else { # one sample @@ -96,12 +96,12 @@ t_test <- function( alternative = alternative, mu = mu, conf.level = conf_level - ) %>% + ) |> broom::glance() } if (conf_int) { - results <- prelim %>% + results <- prelim |> dplyr::select( statistic, t_df = parameter, @@ -112,7 +112,7 @@ t_test <- function( upper_ci = conf.high ) } else { - results <- prelim %>% + results <- prelim |> dplyr::select( statistic, t_df = parameter, @@ -147,13 +147,13 @@ t_test <- function( #' #' # t test statistic for true mean number of hours worked #' # per week of 40 -#' gss %>% +#' gss |> #' t_stat(response = hours, mu = 40) #' #' # t test statistic for number of hours worked per week #' # by college degree status -#' gss %>% -#' tidyr::drop_na(college) %>% +#' gss |> +#' tidyr::drop_na(college) |> #' t_stat(formula = hours ~ college, #' order = c("degree", "no degree"), #' alternative = "two-sided") @@ -210,7 +210,7 @@ t_stat <- function( mu = mu, conf.level = conf_level, ... - ) %>% + ) |> broom::glance() } else { # one sample @@ -219,14 +219,14 @@ t_stat <- function( alternative = alternative, mu = mu, conf.level = conf_level - ) %>% + ) |> broom::glance() } # removed unnecessary if(conf_int) clause; only the statistic itself # was returned regardless - results <- prelim %>% - dplyr::select(statistic) %>% + results <- prelim |> + dplyr::select(statistic) |> pull() results @@ -291,11 +291,11 @@ chisq_test <- function(x, formula, response = NULL, explanatory = NULL, ...) { ) } - x <- x %>% + x <- x |> select(any_of(c(response_name(x), explanatory_name(x)))) - stats::chisq.test(table(x), ...) %>% - broom::glance() %>% + stats::chisq.test(table(x), ...) |> + broom::glance() |> dplyr::select(statistic, chisq_df = parameter, p_value = p.value) } @@ -368,12 +368,12 @@ chisq_stat <- function(x, formula, response = NULL, explanatory = NULL, ...) { ) } - x <- x %>% + x <- x |> select(any_of(c(response_name(x), explanatory_name(x)))) - suppressWarnings(stats::chisq.test(table(x), ...)) %>% - broom::glance() %>% - dplyr::select(statistic) %>% + suppressWarnings(stats::chisq.test(table(x), ...)) |> + broom::glance() |> + dplyr::select(statistic) |> pull() } @@ -539,8 +539,8 @@ prop_test <- function( # two sample if (has_explanatory(x)) { # make a summary table to supply to prop.test - sum_table <- x %>% - select(explanatory_name(x), response_name(x)) %>% + sum_table <- x |> + select(explanatory_name(x), response_name(x)) |> table() length_exp_levels <- length(levels(explanatory_variable(x))) @@ -568,9 +568,9 @@ prop_test <- function( ) } else { # one sample - response_tbl <- response_variable(x) %>% - factor() %>% - stats::relevel(success) %>% + response_tbl <- response_variable(x) |> + factor() |> + stats::relevel(success) |> table() if (is.null(p)) { @@ -592,8 +592,8 @@ prop_test <- function( if (length(prelim$estimate) <= 2) { if (conf_int & is.null(p)) { - results <- prelim %>% - broom::glance() %>% + results <- prelim |> + broom::glance() |> dplyr::select( statistic, chisq_df = parameter, @@ -603,8 +603,8 @@ prop_test <- function( upper_ci = conf.high ) } else { - results <- prelim %>% - broom::glance() %>% + results <- prelim |> + broom::glance() |> dplyr::select( statistic, chisq_df = parameter, @@ -613,8 +613,8 @@ prop_test <- function( ) } } else { - results <- prelim %>% - broom::glance() %>% + results <- prelim |> + broom::glance() |> dplyr::select(statistic, chisq_df = parameter, p_value = p.value) } @@ -634,8 +634,8 @@ calculate_z <- function(x, results, success, p, order) { form <- new_formula(response_expr(x), exp) - stat <- x %>% - specify(formula = form, success = success) %>% + stat <- x |> + specify(formula = form, success = success) |> hypothesize( null = if (has_explanatory(x)) { "independence" @@ -647,7 +647,7 @@ calculate_z <- function(x, results, success, p, order) { } else { p } - ) %>% + ) |> calculate( stat = "z", order = if (has_explanatory(x)) { @@ -655,7 +655,7 @@ calculate_z <- function(x, results, success, p, order) { } else { NULL } - ) %>% + ) |> dplyr::pull() results$statistic <- stat diff --git a/README.Rmd b/README.Rmd index 2331c452..1ff680df 100755 --- a/README.Rmd +++ b/README.Rmd @@ -79,18 +79,18 @@ As an example, we'll run an analysis of variance on `age` and `partyid`, testing Calculating the observed statistic, ```{r, message = FALSE, warning = FALSE} -F_hat <- gss %>% - specify(age ~ partyid) %>% +F_hat <- gss |> + specify(age ~ partyid) |> calculate(stat = "F") ``` Then, generating the null distribution, ```{r, message = FALSE, warning = FALSE} -null_dist <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "F") ``` @@ -108,7 +108,7 @@ knitr::include_graphics("https://raw.githubusercontent.com/tidymodels/infer/main Calculating the p-value from the null distribution and observed statistic, ```{r, message = FALSE, warning = FALSE} -null_dist %>% +null_dist |> get_p_value(obs_stat = F_hat, direction = "greater") ``` diff --git a/README.md b/README.md index 0f3493b6..a3038148 100755 --- a/README.md +++ b/README.md @@ -124,18 +124,18 @@ political party affiliation. Calculating the observed statistic, ``` r -F_hat <- gss %>% - specify(age ~ partyid) %>% +F_hat <- gss |> + specify(age ~ partyid) |> calculate(stat = "F") ``` Then, generating the null distribution, ``` r -null_dist <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "F") ``` @@ -158,7 +158,7 @@ Calculating the p-value from the null distribution and observed statistic, ``` r -null_dist %>% +null_dist |> get_p_value(obs_stat = F_hat, direction = "greater") ``` diff --git a/data-raw/save_gss.R b/data-raw/save_gss.R index 928bd8ed..01ed4914 100644 --- a/data-raw/save_gss.R +++ b/data-raw/save_gss.R @@ -9,13 +9,13 @@ download.file("https://gss.norc.org/documents/stata/GSS_stata.zip", temp) # if this next line errors with "No such file or directory", try # incrementing the number after "_R" -gss_orig <- haven::read_dta(unz(temp, filename = "GSS7218_R2.DTA")) %>% +gss_orig <- haven::read_dta(unz(temp, filename = "GSS7218_R2.DTA")) |> haven::as_factor() unlink(temp) # select relevant columns -gss_small <- gss_orig %>% - filter(!stringr::str_detect(sample, "blk oversamp")) %>% # this is for weighting +gss_small <- gss_orig |> + filter(!stringr::str_detect(sample, "blk oversamp")) |> # this is for weighting select( year, age, @@ -28,26 +28,26 @@ gss_small <- gss_orig %>% class, finrela, weight = wtssall - ) %>% + ) |> mutate_if( is.factor, ~ fct_collapse(., NULL = c("IAP", "NA", "iap", "na")) - ) %>% + ) |> mutate( - age = age %>% - fct_recode("89" = "89 or older", NULL = "DK") %>% # truncated at 89 - as.character() %>% + age = age |> + fct_recode("89" = "89 or older", NULL = "DK") |> # truncated at 89 + as.character() |> as.numeric(), - hompop = hompop %>% - fct_collapse(NULL = c("DK")) %>% - as.character() %>% + hompop = hompop |> + fct_collapse(NULL = c("DK")) |> + as.character() |> as.numeric(), - hours = hours %>% - fct_recode("89" = "89+ hrs", NULL = "DK") %>% # truncated at 89 - as.character() %>% + hours = hours |> + fct_recode("89" = "89+ hrs", NULL = "DK") |> # truncated at 89 + as.character() |> as.numeric(), - weight = weight %>% - as.character() %>% + weight = weight |> + as.character() |> as.numeric(), partyid = fct_collapse( partyid, @@ -67,22 +67,22 @@ gss_small <- gss_orig %>% # sample 3k rows, first dropping NAs set.seed(20200201) -gss <- gss_small %>% - drop_na() %>% +gss <- gss_small |> + drop_na() |> sample_n(500) # check that the sample is similar unweighted to weighted gss_wt <- srvyr::as_survey_design(gss, weights = weight) -unweighted <- gss %>% - group_by(year, sex, partyid) %>% - summarize(n = n()) %>% - ungroup() %>% - group_by(year, sex) %>% +unweighted <- gss |> + group_by(year, sex, partyid) |> + summarize(n = n()) |> + ungroup() |> + group_by(year, sex) |> mutate(prop = n / sum(n)) -weighted <- gss_wt %>% - group_by(year, sex, partyid) %>% +weighted <- gss_wt |> + group_by(year, sex, partyid) |> summarize(prop = srvyr::survey_mean()) # save data into package diff --git a/man-roxygen/seeds.Rmd b/man-roxygen/seeds.Rmd index 159bdd47..dec8e95d 100644 --- a/man-roxygen/seeds.Rmd +++ b/man-roxygen/seeds.Rmd @@ -9,10 +9,10 @@ library(infer) ```{r} set.seed(1) -gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 5, type = "permute") %>% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) ``` @@ -22,10 +22,10 @@ Setting the seed to the same value again and rerunning the same code will produc # set the seed set.seed(1) -gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 5, type = "permute") %>% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) ``` diff --git a/tests/testthat/_snaps/aliases.md b/tests/testthat/_snaps/aliases.md index 91eab8bc..e642697f 100644 --- a/tests/testthat/_snaps/aliases.md +++ b/tests/testthat/_snaps/aliases.md @@ -1,7 +1,7 @@ # old aliases produce informative error Code - res <- gss_calc %>% p_value(obs_stat = -0.2, direction = "right") + res <- p_value(gss_calc, obs_stat = -0.2, direction = "right") Condition Error: ! `conf_int()` was deprecated in infer 0.4.0 and is now defunct. @@ -10,7 +10,7 @@ --- Code - res_ <- gss_permute %>% conf_int() + res_ <- conf_int(gss_permute) Condition Error: ! `conf_int()` was deprecated in infer 0.4.0 and is now defunct. diff --git a/tests/testthat/_snaps/assume.md b/tests/testthat/_snaps/assume.md index 7329dbcc..ab923841 100644 --- a/tests/testthat/_snaps/assume.md +++ b/tests/testthat/_snaps/assume.md @@ -1,8 +1,8 @@ # assume errors with bad arguments Code - gss %>% specify(age ~ college) %>% hypothesize(null = "independence") %>% - assume("boop", nrow(gss) - 1) + assume(hypothesize(specify(gss, age ~ college), null = "independence"), "boop", + nrow(gss) - 1) Condition Error in `assume()`: ! The distribution argument must be one of "Chisq", "F", "t", or "z". @@ -10,8 +10,8 @@ --- Code - gss %>% specify(age ~ college) %>% hypothesize(null = "independence") %>% - assume("t", c(nrow(gss) - 1, 2)) + assume(hypothesize(specify(gss, age ~ college), null = "independence"), "t", c( + nrow(gss) - 1, 2)) Condition Error in `assume()`: ! A T distribution requires 1 degrees of freedom argument, but 2 were supplied. @@ -19,8 +19,8 @@ --- Code - gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% - assume("F", nrow(gss) - 1) + assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F", + nrow(gss) - 1) Message Dropping unused factor levels DK from the supplied explanatory variable 'partyid'. Condition @@ -30,8 +30,8 @@ --- Code - gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% - assume("F", "boop") + assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F", + "boop") Message Dropping unused factor levels DK from the supplied explanatory variable 'partyid'. Condition @@ -41,8 +41,8 @@ --- Code - gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% - assume("F", nrow(gss) - 1, 1) + assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F", + nrow(gss) - 1, 1) Message Dropping unused factor levels DK from the supplied explanatory variable 'partyid'. Condition @@ -53,8 +53,8 @@ --- Code - gss %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% - assume("F", nrow(gss) - 1, 1, 2) + assume(hypothesize(specify(gss, age ~ partyid), null = "independence"), "F", + nrow(gss) - 1, 1, 2) Message Dropping unused factor levels DK from the supplied explanatory variable 'partyid'. Condition @@ -65,8 +65,8 @@ --- Code - gss %>% specify(age ~ finrela) %>% hypothesize(null = "independence") %>% - assume("t", nrow(gss) - 1) + assume(hypothesize(specify(gss, age ~ finrela), null = "independence"), "t", + nrow(gss) - 1) Condition Error in `assume()`: ! The supplied distribution "t" is not well-defined for a numeric response variable (age) and a multinomial categorical explanatory variable (finrela). @@ -74,8 +74,8 @@ --- Code - gss %>% specify(age ~ finrela) %>% hypothesize(null = "independence") %>% - assume("z", nrow(gss) - 1) + assume(hypothesize(specify(gss, age ~ finrela), null = "independence"), "z", + nrow(gss) - 1) Condition Error in `assume()`: ! The supplied distribution "z" is not well-defined for a numeric response variable (age) and a multinomial categorical explanatory variable (finrela). @@ -83,8 +83,8 @@ --- Code - gss %>% specify(age ~ NULL) %>% hypothesize(null = "point", mu = 40) %>% assume( - "z", nrow(gss) - 1) + assume(hypothesize(specify(gss, age ~ NULL), null = "point", mu = 40), "z", + nrow(gss) - 1) Condition Error in `assume()`: ! The supplied distribution "z" is not well-defined for a numeric response variable (age) and no explanatory variable. @@ -92,7 +92,7 @@ --- Code - gss %>% assume("z", nrow(gss) - 1) + assume(gss, "z", nrow(gss) - 1) Condition Error in `assume()`: ! The `x` argument must be the output of a core infer function, likely `specify()` or `hypothesize()`. @@ -100,7 +100,7 @@ --- Code - "boop" %>% assume("z", nrow(gss) - 1) + assume("boop", "z", nrow(gss) - 1) Condition Error in `assume()`: ! The `x` argument must be the output of a core infer function, likely `specify()` or `hypothesize()`. @@ -108,8 +108,8 @@ # assume() handles automatic df gracefully Code - res_ <- gss %>% specify(response = hours) %>% hypothesize(null = "point", mu = 40) %>% - assume("t", nrow(gss) - 2) + res_ <- assume(hypothesize(specify(gss, response = hours), null = "point", mu = 40), + "t", nrow(gss) - 2) Message Message: The supplied `df` argument does not match its expected value. If this is unexpected, ensure that your calculation for `df` is correct (see `assume()` (`?infer::assume()`) for recognized values) or supply `df = NULL` to `assume()`. diff --git a/tests/testthat/_snaps/calculate.md b/tests/testthat/_snaps/calculate.md index 48311d34..dbcbf8f4 100644 --- a/tests/testthat/_snaps/calculate.md +++ b/tests/testthat/_snaps/calculate.md @@ -51,8 +51,8 @@ # errors informatively with incompatible stat vs hypothesis Code - gss %>% specify(college ~ sex, success = "degree") %>% hypothesise(null = "point", - p = 0.4) %>% calculate(stat = "diff in props", order = c("female", "male")) + calculate(hypothesise(specify(gss, college ~ sex, success = "degree"), null = "point", + p = 0.4), stat = "diff in props", order = c("female", "male")) Condition Error in `calculate()`: ! The supplied statistic `stat = "diff in props"` is incompatible with the supplied hypothesis `null = "point"`. @@ -60,9 +60,9 @@ --- Code - gss %>% specify(college ~ sex, success = "degree") %>% hypothesise(null = "point", - p = 0.4) %>% generate(reps = 10, type = "draw") %>% calculate(stat = "diff in props", - order = c("female", "male")) + calculate(generate(hypothesise(specify(gss, college ~ sex, success = "degree"), + null = "point", p = 0.4), reps = 10, type = "draw"), stat = "diff in props", + order = c("female", "male")) Condition Error in `calculate()`: ! The supplied statistic `stat = "diff in props"` is incompatible with the supplied hypothesis `null = "point"`. @@ -70,7 +70,7 @@ # response attribute has been set Code - tibble::as_tibble(gss) %>% calculate(stat = "median") + calculate(tibble::as_tibble(gss), stat = "median") Condition Error in `dplyr::filter()`: i In argument: `resp == response_type & exp == explanatory_type`. @@ -280,8 +280,8 @@ # chi-square matches chisq.test value Code - dat %>% specify(action ~ sex, success = "promote") %>% calculate(stat = "Chisq", - order = c("male", "female"), correct = "boop") + calculate(specify(dat, action ~ sex, success = "promote"), stat = "Chisq", + order = c("male", "female"), correct = "boop") Condition Error in `dplyr::summarise()`: i In argument: `stat = chisq_indep(data)`. @@ -292,14 +292,14 @@ # chi-square works with factors with unused levels Code - out <- test_tbl %>% specify(y ~ x) %>% calculate(stat = "Chisq") %>% pull() + out <- pull(calculate(specify(test_tbl, y ~ x), stat = "Chisq")) Message Dropping unused factor levels d from the supplied explanatory variable 'x'. --- Code - out <- test_tbl %>% specify(y ~ x) %>% calculate(stat = "Chisq") %>% pull() + out <- pull(calculate(specify(test_tbl, y ~ x), stat = "Chisq")) Message Dropping unused factor levels g from the supplied response variable 'y'. @@ -356,7 +356,7 @@ # NULL response gives error Code - gss_tbl_improp %>% calculate(stat = "mean") + calculate(gss_tbl_improp, stat = "mean") Condition Error in `dplyr::filter()`: i In argument: `resp == response_type & exp == explanatory_type`. @@ -371,18 +371,18 @@ Warning: Statistic is not based on a difference or ratio; the `order` argument will be ignored. Check `calculate()` (`?infer::calculate()`) for details. -# specify() %>% calculate() works +# specify() |> calculate() works Code - res_ <- gss_tbl %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% - calculate(stat = "mean") + res_ <- calculate(hypothesize(specify(gss_tbl, hours ~ NULL), null = "point", + mu = 4), stat = "mean") Message Message: The point null hypothesis `mu = 4` does not inform calculation of the observed statistic (a mean) and will be ignored. --- Code - res_ <- gss_tbl %>% specify(partyid ~ NULL) %>% calculate(stat = "Chisq") + res_ <- calculate(specify(gss_tbl, partyid ~ NULL), stat = "Chisq") Condition Warning: A chi-square statistic requires a null hypothesis to calculate the observed statistic. @@ -391,15 +391,15 @@ # One sample t hypothesis test is working Code - res_ <- gss_tbl %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 1) %>% - generate(reps = 10) %>% calculate(stat = "t") + res_ <- calculate(generate(hypothesize(specify(gss_tbl, hours ~ NULL), null = "point", + mu = 1), reps = 10), stat = "t") Message Setting `type = "bootstrap"` in `generate()`. --- Code - res_ <- gss_tbl %>% specify(response = hours) %>% calculate(stat = "t") + res_ <- calculate(specify(gss_tbl, response = hours), stat = "t") Condition Warning: A t statistic requires a null hypothesis to calculate the observed statistic. @@ -447,8 +447,8 @@ # One sample t bootstrap is working Code - res_ <- gss_tbl %>% specify(hours ~ NULL) %>% generate(reps = 10, type = "bootstrap") %>% - calculate(stat = "t") + res_ <- calculate(generate(specify(gss_tbl, hours ~ NULL), reps = 10, type = "bootstrap"), + stat = "t") Condition Warning: A t statistic requires a null hypothesis to calculate the observed statistic. @@ -457,7 +457,7 @@ # calculate warns informatively with insufficient null Code - res_ <- gss %>% specify(response = sex, success = "female") %>% calculate(stat = "z") + res_ <- calculate(specify(gss, response = sex, success = "female"), stat = "z") Condition Warning: A z statistic requires a null hypothesis to calculate the observed statistic. @@ -466,7 +466,7 @@ --- Code - res_ <- gss %>% specify(hours ~ NULL) %>% calculate(stat = "t") + res_ <- calculate(specify(gss, hours ~ NULL), stat = "t") Condition Warning: A t statistic requires a null hypothesis to calculate the observed statistic. @@ -475,7 +475,7 @@ --- Code - res_ <- gss %>% specify(response = partyid) %>% calculate(stat = "Chisq") + res_ <- calculate(specify(gss, response = partyid), stat = "Chisq") Message Dropping unused factor levels DK from the supplied response variable 'partyid'. Condition @@ -486,32 +486,32 @@ # calculate messages informatively with excessive null Code - res_ <- gss %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 40) %>% - calculate(stat = "mean") + res_ <- calculate(hypothesize(specify(gss, hours ~ NULL), null = "point", mu = 40), + stat = "mean") Message Message: The point null hypothesis `mu = 40` does not inform calculation of the observed statistic (a mean) and will be ignored. --- Code - res_ <- gss %>% specify(hours ~ NULL) %>% hypothesize(null = "point", sigma = 10) %>% - calculate(stat = "sd") + res_ <- calculate(hypothesize(specify(gss, hours ~ NULL), null = "point", + sigma = 10), stat = "sd") Message Message: The point null hypothesis `sigma = 10` does not inform calculation of the observed statistic (a standard deviation) and will be ignored. --- Code - res_ <- gss %>% specify(hours ~ college) %>% hypothesize(null = "independence") %>% - calculate("diff in means", order = c("no degree", "degree")) + res_ <- calculate(hypothesize(specify(gss, hours ~ college), null = "independence"), + "diff in means", order = c("no degree", "degree")) Message Message: The independence null hypothesis does not inform calculation of the observed statistic (a difference in means) and will be ignored. # calculate errors out with multiple explanatory variables Code - gss %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% - calculate(stat = "t") + calculate(hypothesize(specify(gss, hours ~ age + college), null = "independence"), + stat = "t") Condition Error in `calculate()`: ! Multiple explanatory variables are not supported in `calculate()`. @@ -520,8 +520,8 @@ --- Code - gss %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% - generate(reps = 3, type = "permute") %>% calculate(stat = "t") + calculate(generate(hypothesize(specify(gss, hours ~ age + college), null = "independence"), + reps = 3, type = "permute"), stat = "t") Condition Error in `calculate()`: ! Multiple explanatory variables are not supported in `calculate()`. diff --git a/tests/testthat/_snaps/fit.md b/tests/testthat/_snaps/fit.md index bac2a36c..66da6344 100644 --- a/tests/testthat/_snaps/fit.md +++ b/tests/testthat/_snaps/fit.md @@ -1,15 +1,14 @@ # fit.infer messages informatively on excessive null Code - res_ <- gss %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% - fit() + res_ <- fit(hypothesize(specify(gss, hours ~ age + college), null = "independence")) Message Message: The independence null hypothesis does not inform calculation of the observed fit and will be ignored. # fit.infer logistic regression works Code - gss %>% specify(finrela ~ age + college) %>% fit() + fit(specify(gss, finrela ~ age + college)) Condition Error in `fit()`: ! infer does not support fitting models for categorical response variables with more than two levels. diff --git a/tests/testthat/_snaps/generate.md b/tests/testthat/_snaps/generate.md index 601ab7a6..4869e394 100644 --- a/tests/testthat/_snaps/generate.md +++ b/tests/testthat/_snaps/generate.md @@ -97,8 +97,8 @@ # auto `type` works (generate) Code - mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 25) %>% - generate(reps = 100, type = "permute") + generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 25), + reps = 100, type = "permute") Condition Warning: You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -108,7 +108,7 @@ --- Code - res_ <- mtcars_df %>% specify(response = mpg) %>% generate(reps = 100, type = "draw") + res_ <- generate(specify(mtcars_df, response = mpg), reps = 100, type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -116,8 +116,8 @@ --- Code - res_ <- mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", - med = 26) %>% generate(reps = 100, type = "permute") + res_ <- generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", + med = 26), reps = 100, type = "permute") Condition Warning: You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -127,8 +127,8 @@ --- Code - res_ <- mtcars_df %>% specify(response = am, success = "1") %>% hypothesize( - null = "point", p = 0.25) %>% generate(reps = 100, type = "bootstrap") + res_ <- generate(hypothesize(specify(mtcars_df, response = am, success = "1"), + null = "point", p = 0.25), reps = 100, type = "bootstrap") Condition Warning: You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean. @@ -136,8 +136,8 @@ --- Code - res_ <- mtcars_df %>% specify(cyl ~ NULL) %>% hypothesize(null = "point", p = c( - `4` = 0.5, `6` = 0.25, `8` = 0.25)) %>% generate(reps = 100, type = "bootstrap") + res_ <- generate(hypothesize(specify(mtcars_df, cyl ~ NULL), null = "point", p = c( + `4` = 0.5, `6` = 0.25, `8` = 0.25)), reps = 100, type = "bootstrap") Condition Warning: You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean. @@ -145,8 +145,8 @@ --- Code - res_ <- mtcars_df %>% specify(cyl ~ am) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "draw") + res_ <- generate(hypothesize(specify(mtcars_df, cyl ~ am), null = "independence"), + reps = 100, type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean. @@ -154,8 +154,8 @@ --- Code - res_ <- mtcars_df %>% specify(mpg ~ cyl) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "draw") + res_ <- generate(hypothesize(specify(mtcars_df, mpg ~ cyl), null = "independence"), + reps = 100, type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"permute"`. This workflow is untested and the results may not mean what you think they mean. @@ -163,8 +163,8 @@ --- Code - res_ <- mtcars_df %>% specify(response = am, success = "1") %>% generate(reps = 100, - type = "draw") + res_ <- generate(specify(mtcars_df, response = am, success = "1"), reps = 100, + type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -172,7 +172,7 @@ --- Code - res_ <- mtcars_df %>% specify(mpg ~ am) %>% generate(reps = 100, type = "permute") + res_ <- generate(specify(mtcars_df, mpg ~ am), reps = 100, type = "permute") Condition Warning: You have given `type = "permute"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -182,8 +182,7 @@ --- Code - res_ <- mtcars_df %>% specify(am ~ vs, success = "1") %>% generate(reps = 100, - type = "draw") + res_ <- generate(specify(mtcars_df, am ~ vs, success = "1"), reps = 100, type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -191,7 +190,7 @@ --- Code - res_ <- mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "draw") + res_ <- generate(specify(mtcars_df, mpg ~ hp), reps = 100, type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -199,7 +198,7 @@ # mismatches lead to error Code - res_ <- mtcars_df %>% generate(reps = 10, type = "permute") + res_ <- generate(mtcars_df, reps = 10, type = "permute") Condition Error in `generate()`: ! The `variables` argument should be one or more unquoted variable names (not strings in quotation marks). @@ -207,8 +206,8 @@ --- Code - res_ <- mtcars_df %>% specify(am ~ NULL, success = "1") %>% hypothesize(null = "independence", - p = c(`1` = 0.5)) %>% generate(reps = 100, type = "draw") + res_ <- generate(hypothesize(specify(mtcars_df, am ~ NULL, success = "1"), + null = "independence", p = c(`1` = 0.5)), reps = 100, type = "draw") Condition Error in `hypothesize()`: ! Please `specify()` an explanatory and a response variable when testing a null hypothesis of `"independence"`. @@ -216,8 +215,8 @@ --- Code - res_ <- mtcars_df %>% specify(cyl ~ NULL) %>% hypothesize(null = "point", p = c( - `4` = 0.5, `6` = 0.25, `8` = 0.25)) %>% generate(reps = 100, type = "bootstrap") + res_ <- generate(hypothesize(specify(mtcars_df, cyl ~ NULL), null = "point", p = c( + `4` = 0.5, `6` = 0.25, `8` = 0.25)), reps = 100, type = "bootstrap") Condition Warning: You have given `type = "bootstrap"`, but `type` is expected to be `"draw"`. This workflow is untested and the results may not mean what you think they mean. @@ -225,7 +224,7 @@ --- Code - res_ <- mtcars_df %>% specify(mpg ~ hp) %>% generate(reps = 100, type = "other") + res_ <- generate(specify(mtcars_df, mpg ~ hp), reps = 100, type = "other") Condition Error in `generate()`: ! The `type` argument should be one of "bootstrap", "permute", or "draw". See `generate()` (`?infer::generate()`) for more details. @@ -240,8 +239,8 @@ # variables argument prompts when it ought to Code - res_ <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = c(howdy)) + res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college), null = "independence"), + reps = 2, type = "permute", variables = c(howdy)) Condition Error in `generate()`: ! The column howdy provided to the `variables` argument is not in the supplied data. @@ -249,8 +248,8 @@ --- Code - res <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = c(howdy, doo)) + res <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college), null = "independence"), + reps = 2, type = "permute", variables = c(howdy, doo)) Condition Error in `generate()`: ! The columns howdy and doo provided to the `variables` argument are not in the supplied data. @@ -258,8 +257,8 @@ --- Code - res_ <- gss[1:10, ] %>% specify(hours ~ NULL) %>% hypothesize(null = "point", - mu = 40) %>% generate(reps = 2, type = "bootstrap", variables = c(hours)) + res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ NULL), null = "point", + mu = 40), reps = 2, type = "bootstrap", variables = c(hours)) Condition Warning: The `variables` argument is only relevant for the "permute" generation type and will be ignored. @@ -267,8 +266,8 @@ --- Code - res_ <- gss[1:10, ] %>% specify(hours ~ age + college) %>% hypothesize(null = "independence") %>% - generate(reps = 2, type = "permute", variables = "hours") + res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college), null = "independence"), + reps = 2, type = "permute", variables = "hours") Condition Error in `generate()`: ! The `variables` argument should be one or more unquoted variable names (not strings in quotation marks). @@ -276,26 +275,26 @@ --- Code - res_ <- gss[1:10, ] %>% specify(hours ~ age + college + age * college) %>% - hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", - variables = age * college) + res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college + age * + college), null = "independence"), reps = 2, type = "permute", variables = age * + college) Message Message: Please supply only data columns to the `variables` argument. Note that any derived effects that depend on these columns will also be affected. --- Code - res_ <- gss[1:10, ] %>% specify(hours ~ age + college + age * college) %>% - hypothesize(null = "independence") %>% generate(reps = 2, type = "permute", - variables = c(hours, age * college)) + res_ <- generate(hypothesize(specify(gss[1:10, ], hours ~ age + college + age * + college), null = "independence"), reps = 2, type = "permute", variables = c( + hours, age * college)) Message Message: Please supply only data columns to the `variables` argument. Note that any derived effects that depend on these columns will also be affected. --- Code - res_ <- gss[1:10, ] %>% specify(hours ~ age * college) %>% generate(reps = 2, - type = "bootstrap", variables = c(hours, age * college)) + res_ <- generate(specify(gss[1:10, ], hours ~ age * college), reps = 2, type = "bootstrap", + variables = c(hours, age * college)) Condition Warning: The `variables` argument is only relevant for the "permute" generation type and will be ignored. @@ -303,16 +302,16 @@ # type = 'draw'/'simulate' superseding handled gracefully Code - res_ <- mtcars_df %>% specify(response = am, success = "1") %>% hypothesize( - null = "point", p = 0.5) %>% generate(type = "simulate") + res_ <- generate(hypothesize(specify(mtcars_df, response = am, success = "1"), + null = "point", p = 0.5), type = "simulate") Message The `"simulate"` generation type has been renamed to `"draw"`. Use `type = "draw"` instead to quiet this message. --- Code - res_ <- mtcars_df %>% specify(response = am, success = "1") %>% hypothesize( - null = "point", p = 0.5) %>% generate(type = "boop") + res_ <- generate(hypothesize(specify(mtcars_df, response = am, success = "1"), + null = "point", p = 0.5), type = "boop") Condition Error in `generate()`: ! The `type` argument should be one of "bootstrap", "permute", or "draw". See `generate()` (`?infer::generate()`) for more details. @@ -320,8 +319,8 @@ --- Code - mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 20) %>% - generate(type = "draw") + generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 20), + type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. @@ -331,8 +330,8 @@ --- Code - mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 20) %>% - generate(type = "draw") + generate(hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 20), + type = "draw") Condition Warning: You have given `type = "draw"`, but `type` is expected to be `"bootstrap"`. This workflow is untested and the results may not mean what you think they mean. diff --git a/tests/testthat/_snaps/get_confidence_interval.md b/tests/testthat/_snaps/get_confidence_interval.md index b8f41e1a..8ff63c0d 100644 --- a/tests/testthat/_snaps/get_confidence_interval.md +++ b/tests/testthat/_snaps/get_confidence_interval.md @@ -8,7 +8,7 @@ # get_confidence_interval checks input Code - test_df %>% get_confidence_interval(type = "other") + get_confidence_interval(test_df, type = "other") Message Using `level = 0.95` to compute confidence interval. Condition @@ -18,7 +18,7 @@ --- Code - test_df %>% get_confidence_interval(level = 1.2) + get_confidence_interval(test_df, level = 1.2) Condition Error in `get_confidence_interval()`: ! The value of `level` must be between 0 and 1, non-inclusive. @@ -26,7 +26,7 @@ --- Code - test_df %>% get_confidence_interval(point_estimate = "a") + get_confidence_interval(test_df, point_estimate = "a") Message Using `level = 0.95` to compute confidence interval. Condition @@ -36,7 +36,7 @@ --- Code - test_df %>% get_confidence_interval(type = "se", point_estimate = "a") + get_confidence_interval(test_df, type = "se", point_estimate = "a") Message Using `level = 0.95` to compute confidence interval. Condition @@ -46,7 +46,7 @@ --- Code - test_df %>% get_confidence_interval(type = "se", point_estimate = data.frame(p = "a")) + get_confidence_interval(test_df, type = "se", point_estimate = data.frame(p = "a")) Message Using `level = 0.95` to compute confidence interval. Condition @@ -56,7 +56,7 @@ --- Code - test_df %>% get_confidence_interval(type = "se") + get_confidence_interval(test_df, type = "se") Message Using `level = 0.95` to compute confidence interval. Condition @@ -66,7 +66,7 @@ --- Code - test_df %>% get_confidence_interval(type = "bias-corrected") + get_confidence_interval(test_df, type = "bias-corrected") Message Using `level = 0.95` to compute confidence interval. Condition diff --git a/tests/testthat/_snaps/get_p_value.md b/tests/testthat/_snaps/get_p_value.md index 9aea44c4..a21a1552 100644 --- a/tests/testthat/_snaps/get_p_value.md +++ b/tests/testthat/_snaps/get_p_value.md @@ -1,7 +1,7 @@ # direction is appropriate Code - test_df %>% get_p_value(obs_stat = 0.5, direction = "righ") + get_p_value(test_df, obs_stat = 0.5, direction = "righ") Condition Error in `get_p_value()`: ! The provided value for `direction` is not appropriate. Possible values are "less", "greater", "two-sided", "left", "right", "both", "two_sided", "two sided", or "two.sided". @@ -9,8 +9,8 @@ # theoretical p-value not supported error Code - gss_tbl %>% specify(hours ~ partyid) %>% hypothesize(null = "independence") %>% - calculate(stat = "F") %>% get_p_value(obs_stat = obs_F, direction = "right") + get_p_value(calculate(hypothesize(specify(gss_tbl, hours ~ partyid), null = "independence"), + stat = "F"), obs_stat = obs_F, direction = "right") Condition Error in `get_p_value()`: ! Theoretical p-values are not yet supported. diff --git a/tests/testthat/_snaps/hypothesize.md b/tests/testthat/_snaps/hypothesize.md index 0979bd30..092f4fd2 100644 --- a/tests/testthat/_snaps/hypothesize.md +++ b/tests/testthat/_snaps/hypothesize.md @@ -1,7 +1,7 @@ # hypothesize() throws an error when null is not point or independence Code - mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "dependence") + hypothesize(specify(mtcars_df, response = mpg), null = "dependence") Condition Error in `hypothesize()`: ! `null` should be either "point", "independence", or "paired independence". @@ -9,7 +9,7 @@ # hypothesize() throws an error when multiple null values are provided Code - mtcars_df %>% specify(response = mpg) %>% hypothesize(null = c("point", + hypothesize(specify(mtcars_df, response = mpg), null = c("point", "independence")) Condition Error in `hypothesize()`: @@ -18,8 +18,7 @@ # hypothesize() throws an error when multiple params are set Code - mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "point", mu = 25, - med = 20) + hypothesize(specify(mtcars_df, response = mpg), null = "point", mu = 25, med = 20) Condition Error in `hypothesize()`: ! You must specify exactly one of `p`, `mu`, `med`, or `sigma`. @@ -27,8 +26,7 @@ # hypothesize() throws a warning when params are set with independence Code - res_ <- mtcars_df %>% specify(mpg ~ vs) %>% hypothesize(null = "independence", - mu = 25) + res_ <- hypothesize(specify(mtcars_df, mpg ~ vs), null = "independence", mu = 25) Condition Warning: Parameter values should not be specified when testing that two variables are independent. @@ -36,8 +34,8 @@ # hypothesize() throws a warning when params are set with paired independence Code - res_ <- mtcars_df %>% specify(response = mpg) %>% hypothesize(null = "paired independence", - mu = 25) + res_ <- hypothesize(specify(mtcars_df, response = mpg), null = "paired independence", + mu = 25) Condition Warning: Parameter values should not be specified when testing paired independence. @@ -45,8 +43,8 @@ # hypothesize() throws an error when p is greater than 1 Code - res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize( - null = "point", p = 1 + .Machine$double.eps) + res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point", + p = 1 + .Machine$double.eps) Condition Error in `hypothesize()`: ! `p` should only contain values between zero and one. @@ -54,8 +52,8 @@ # hypothesize() throws an error when p is less than 0 Code - res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize( - null = "point", p = -.Machine$double.neg.eps) + res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point", + p = -.Machine$double.neg.eps) Condition Error in `hypothesize()`: ! `p` should only contain values between zero and one. @@ -63,8 +61,8 @@ # hypothesize() throws an error when p contains missing values Code - res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize( - null = "point", p = c(`0` = 0.5, `1` = NA_real_)) + res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point", + p = c(`0` = 0.5, `1` = NA_real_)) Condition Error in `hypothesize()`: ! `p` should not contain missing values. @@ -72,8 +70,8 @@ # hypothesize() throws an error when vector p does not sum to 1 Code - res_ <- mtcars_df %>% specify(response = vs, success = "1") %>% hypothesize( - null = "point", p = c(`0` = 0.5, `1` = 0.5 + (eps * 2))) + res_ <- hypothesize(specify(mtcars_df, response = vs, success = "1"), null = "point", + p = c(`0` = 0.5, `1` = 0.5 + (eps * 2))) Condition Error in `hypothesize()`: ! Make sure the hypothesized values for the `p` parameters sum to 1. Please try again. @@ -105,7 +103,7 @@ --- Code - res_ <- mtcars_s %>% hypothesize(null = "point", mean = 3) + res_ <- hypothesize(mtcars_s, null = "point", mean = 3) Condition Error in `hypothesize()`: ! unused argument (mean = 3) @@ -113,7 +111,7 @@ --- Code - res_ <- mtcars_s %>% hypothesize(null = "independence") + res_ <- hypothesize(mtcars_s, null = "independence") Condition Error in `hypothesize()`: ! Please `specify()` an explanatory and a response variable when testing a null hypothesis of `"independence"`. @@ -121,7 +119,7 @@ --- Code - res_ <- mtcars_s %>% hypothesize(null = "point") + res_ <- hypothesize(mtcars_s, null = "point") Condition Error in `hypothesize()`: ! You must specify exactly one of `p`, `mu`, `med`, or `sigma`. @@ -129,7 +127,7 @@ --- Code - res_ <- mtcars_f %>% specify(mpg ~ am) %>% hypothesize(null = "paired independence") + res_ <- hypothesize(specify(mtcars_f, mpg ~ am), null = "paired independence") Condition Error in `hypothesize()`: ! Please `specify()` only a response variable when testing a null hypothesis of `"paired independence"`. @@ -138,7 +136,7 @@ --- Code - res <- mtcars_s %>% hypothesize(null = c("point", "independence"), mu = 3) + res <- hypothesize(mtcars_s, null = c("point", "independence"), mu = 3) Condition Error in `hypothesize()`: ! You should specify exactly one type of null hypothesis. @@ -146,7 +144,7 @@ --- Code - res_ <- mtcars_df %>% dplyr::select(vs) %>% hypothesize(null = "point", mu = 1) + res_ <- hypothesize(dplyr::select(mtcars_df, vs), null = "point", mu = 1) Condition Error in `.subset2()`: ! attempt to select less than one element in get1index @@ -154,8 +152,7 @@ --- Code - res_ <- mtcars_df %>% specify(response = vs) %>% hypothesize(null = "point", - mu = 1) + res_ <- hypothesize(specify(mtcars_df, response = vs), null = "point", mu = 1) Condition Error in `specify()`: ! A level of the response variable `vs` needs to be specified for the `success` argument in `specify()`. @@ -163,7 +160,7 @@ --- Code - res_ <- mtcars_s %>% hypothesize(null = "point", p = 0.2) + res_ <- hypothesize(mtcars_s, null = "point", p = 0.2) Condition Error in `hypothesize()`: ! A point null regarding a proportion requires that `success` be indicated in `specify()`. @@ -171,7 +168,7 @@ --- Code - res_ <- mtcars_s %>% hypothesize() + res_ <- hypothesize(mtcars_s) Condition Error in `hypothesize()`: ! `null` should be either "point", "independence", or "paired independence". @@ -195,8 +192,8 @@ # user can specify multiple explanatory variables Code - res_ <- gss %>% specify(hours ~ sex + college) %>% hypothesize(null = "independence", - mu = 40) + res_ <- hypothesize(specify(gss, hours ~ sex + college), null = "independence", + mu = 40) Condition Warning: Parameter values should not be specified when testing that two variables are independent. diff --git a/tests/testthat/_snaps/observe.md b/tests/testthat/_snaps/observe.md index 7e3b1789..165b90fc 100644 --- a/tests/testthat/_snaps/observe.md +++ b/tests/testthat/_snaps/observe.md @@ -1,7 +1,7 @@ # observe() output is the same as the old wrappers Code - res_wrap <- gss_tbl %>% chisq_stat(college ~ partyid) + res_wrap <- chisq_stat(gss_tbl, college ~ partyid) Condition Warning: `chisq_stat()` was deprecated in infer 1.0.0. @@ -10,7 +10,7 @@ --- Code - res_wrap_2 <- gss_tbl %>% t_stat(hours ~ sex, order = c("male", "female")) + res_wrap_2 <- t_stat(gss_tbl, hours ~ sex, order = c("male", "female")) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. diff --git a/tests/testthat/_snaps/shade_confidence_interval.md b/tests/testthat/_snaps/shade_confidence_interval.md index 6544f232..c7dc0459 100644 --- a/tests/testthat/_snaps/shade_confidence_interval.md +++ b/tests/testthat/_snaps/shade_confidence_interval.md @@ -33,36 +33,36 @@ --- Code - res_ <- gss_viz_sim %>% shade_confidence_interval(c(-1, 1)) + res_ <- shade_confidence_interval(gss_viz_sim, c(-1, 1)) Condition Error in `shade_confidence_interval()`: - ! It looks like you piped the result of `visualize()` into `shade_confidence_interval()` (using `%>%`) rather than adding the result of `shade_confidence_interval()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_confidence_interval()` rather than adding the result of `shade_confidence_interval()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - res_ <- gss_viz_sim %>% shade_confidence_interval(endpoints = c(-1, 1)) + res_ <- shade_confidence_interval(gss_viz_sim, endpoints = c(-1, 1)) Condition Error in `shade_confidence_interval()`: - ! It looks like you piped the result of `visualize()` into `shade_confidence_interval()` (using `%>%`) rather than adding the result of `shade_confidence_interval()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_confidence_interval()` rather than adding the result of `shade_confidence_interval()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - res_ <- gss_viz_sim %>% shade_ci(c(-1, 1)) + res_ <- shade_ci(gss_viz_sim, c(-1, 1)) Condition Error in `shade_ci()`: - ! It looks like you piped the result of `visualize()` into `shade_ci()` (using `%>%`) rather than adding the result of `shade_ci()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_ci()` rather than adding the result of `shade_ci()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - res_ <- gss_viz_sim %>% shade_ci(endpoints = c(-1, 1)) + res_ <- shade_ci(gss_viz_sim, endpoints = c(-1, 1)) Condition Error in `shade_ci()`: - ! It looks like you piped the result of `visualize()` into `shade_ci()` (using `%>%`) rather than adding the result of `shade_ci()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_ci()` rather than adding the result of `shade_ci()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. diff --git a/tests/testthat/_snaps/shade_p_value.md b/tests/testthat/_snaps/shade_p_value.md index 2b11386d..42371348 100644 --- a/tests/testthat/_snaps/shade_p_value.md +++ b/tests/testthat/_snaps/shade_p_value.md @@ -33,54 +33,54 @@ --- Code - gss_viz_sim %>% shade_p_value(1, "right") + shade_p_value(gss_viz_sim, 1, "right") Condition Error in `shade_p_value()`: - ! It looks like you piped the result of `visualize()` into `shade_p_value()` (using `%>%`) rather than adding the result of `shade_p_value()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_p_value()` rather than adding the result of `shade_p_value()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - gss_viz_sim %>% shade_p_value(obs_stat = 1) + shade_p_value(gss_viz_sim, obs_stat = 1) Condition Error in `shade_p_value()`: - ! It looks like you piped the result of `visualize()` into `shade_p_value()` (using `%>%`) rather than adding the result of `shade_p_value()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_p_value()` rather than adding the result of `shade_p_value()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - gss_viz_sim %>% shade_p_value(obs_stat = 1, direction = "right") + shade_p_value(gss_viz_sim, obs_stat = 1, direction = "right") Condition Error in `shade_p_value()`: - ! It looks like you piped the result of `visualize()` into `shade_p_value()` (using `%>%`) rather than adding the result of `shade_p_value()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_p_value()` rather than adding the result of `shade_p_value()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - gss_viz_sim %>% shade_pvalue(1, "right") + shade_pvalue(gss_viz_sim, 1, "right") Condition Error in `shade_pvalue()`: - ! It looks like you piped the result of `visualize()` into `shade_pvalue()` (using `%>%`) rather than adding the result of `shade_pvalue()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_pvalue()` rather than adding the result of `shade_pvalue()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - gss_viz_sim %>% shade_pvalue(obs_stat = 1) + shade_pvalue(gss_viz_sim, obs_stat = 1) Condition Error in `shade_pvalue()`: - ! It looks like you piped the result of `visualize()` into `shade_pvalue()` (using `%>%`) rather than adding the result of `shade_pvalue()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_pvalue()` rather than adding the result of `shade_pvalue()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. --- Code - gss_viz_sim %>% shade_pvalue(obs_stat = 1, direction = "right") + shade_pvalue(gss_viz_sim, obs_stat = 1, direction = "right") Condition Error in `shade_pvalue()`: - ! It looks like you piped the result of `visualize()` into `shade_pvalue()` (using `%>%`) rather than adding the result of `shade_pvalue()` as a layer with `+`. - i Consider changing `%>%` to `+`. + ! It looks like you piped the result of `visualize()` into `shade_pvalue()` rather than adding the result of `shade_pvalue()` as a layer with `+`. + i Consider changing `|>` (or `%>%`) to `+`. diff --git a/tests/testthat/_snaps/specify.md b/tests/testthat/_snaps/specify.md index 91bc6aad..4dab0929 100644 --- a/tests/testthat/_snaps/specify.md +++ b/tests/testthat/_snaps/specify.md @@ -177,24 +177,22 @@ # specify messages when dropping unused levels Code - res_ <- gss %>% dplyr::filter(partyid %in% c("rep", "dem")) %>% specify(age ~ - partyid) + res_ <- specify(dplyr::filter(gss, partyid %in% c("rep", "dem")), age ~ partyid) Message Dropping unused factor levels c("ind", "other", "DK") from the supplied explanatory variable 'partyid'. --- Code - res_ <- gss %>% dplyr::filter(partyid %in% c("rep", "dem")) %>% specify( - partyid ~ age) + res_ <- specify(dplyr::filter(gss, partyid %in% c("rep", "dem")), partyid ~ age) Message Dropping unused factor levels c("ind", "other", "DK") from the supplied response variable 'partyid'. --- Code - res_ <- gss %>% dplyr::filter(partyid %in% c("rep", "dem")) %>% specify( - partyid ~ NULL) + res_ <- specify(dplyr::filter(gss, partyid %in% c("rep", "dem")), partyid ~ + NULL) Message Dropping unused factor levels c("ind", "other", "DK") from the supplied response variable 'partyid'. diff --git a/tests/testthat/_snaps/visualize.md b/tests/testthat/_snaps/visualize.md index 6a78b680..4b33fdd4 100644 --- a/tests/testthat/_snaps/visualize.md +++ b/tests/testthat/_snaps/visualize.md @@ -1,9 +1,9 @@ # visualize warns with bad arguments Code - res_ <- gss_tbl %>% specify(age ~ hours) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "slope") %>% - visualize(obs_stat = obs_slope, direction = "right") + res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ hours), + null = "independence"), reps = 100, type = "permute"), stat = "slope"), + obs_stat = obs_slope, direction = "right") Condition Warning: The arguments `c("obs_stat", "direction")` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`. @@ -11,9 +11,9 @@ --- Code - res_ <- gss_tbl %>% specify(age ~ hours) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "slope") %>% - visualize(obs_stat = obs_slope) + res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ hours), + null = "independence"), reps = 100, type = "permute"), stat = "slope"), + obs_stat = obs_slope) Condition Warning: The arguments `obs_stat` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`. @@ -21,9 +21,9 @@ --- Code - res_ <- gss_tbl %>% specify(age ~ hours) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "slope") %>% - visualize(endpoints = c(0.01, 0.02)) + res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ hours), + null = "independence"), reps = 100, type = "permute"), stat = "slope"), + endpoints = c(0.01, 0.02)) Condition Warning: The arguments `endpoints` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`. @@ -31,7 +31,7 @@ --- Code - res <- age_hours_df %>% visualize(endpoints = c(0.01, 0.02)) + res <- visualize(age_hours_df, endpoints = c(0.01, 0.02)) Condition Warning: The arguments `endpoints` are deprecated in `visualize()` and will be ignored. They should now be passed to one of `shade_p_value()` or `shade_confidence_interval()`. @@ -39,7 +39,7 @@ # visualize basic tests Code - hours_resamp %>% visualize(bins = "yep") + visualize(hours_resamp, bins = "yep") Condition Error in `visualize()`: ! `bins` must be 'numeric', not 'character'. @@ -51,9 +51,9 @@ --- Code - res_vis_theor_none_1 <- gss_tbl %>% specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% calculate(stat = "z", order = c( - "no degree", "degree")) %>% visualize(method = "theoretical") + res_vis_theor_none_1 <- visualize(calculate(hypothesize(specify(gss_tbl, sex ~ + college, success = "female"), null = "independence"), stat = "z", order = c( + "no degree", "degree")), method = "theoretical") Message Rather than setting `method = "theoretical"` with a simulation-based null distribution, the preferred method for visualizing theory-based distributions with infer is now to pass the output of `assume()` as the first argument to `visualize()`. Condition @@ -63,9 +63,9 @@ --- Code - gss_tbl %>% specify(sex ~ college, success = "female") %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "diff in props", - order = c("no degree", "degree")) %>% visualize(method = "both") + + visualize(calculate(generate(hypothesize(specify(gss_tbl, sex ~ college, + success = "female"), null = "independence"), reps = 100, type = "permute"), + stat = "diff in props", order = c("no degree", "degree")), method = "both") + shade_p_value(direction = "both", obs_stat = obs_diff) Condition Warning: @@ -76,8 +76,8 @@ --- Code - gss_tbl %>% specify(partyid ~ NULL) %>% hypothesize(null = "point", p = c(dem = 0.4, - rep = 0.4, ind = 0.2)) %>% visualize(method = "traditional") + visualize(hypothesize(specify(gss_tbl, partyid ~ NULL), null = "point", p = c( + dem = 0.4, rep = 0.4, ind = 0.2)), method = "traditional") Condition Error in `visualize()`: ! Provide `method` with one of three options: `"theoretical"`, `"both"`, or `"simulation"`. `"simulation"` is the default for simulation-based null distributions, while `"theoretical"` is the only option for null distributions outputted by `assume()`. @@ -85,10 +85,9 @@ --- Code - gss_tbl %>% specify(hours ~ sex) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "diff in means", - order = c("female", "male")) %>% visualize(method = "both") + shade_p_value( - direction = "both", obs_stat = obs_diff_mean) + visualize(calculate(generate(hypothesize(specify(gss_tbl, hours ~ sex), null = "independence"), + reps = 100, type = "permute"), stat = "diff in means", order = c("female", + "male")), method = "both") + shade_p_value(direction = "both", obs_stat = obs_diff_mean) Condition Warning: Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you. @@ -98,9 +97,9 @@ --- Code - res_vis_theor_both_1 <- gss_tbl %>% specify(hours ~ sex) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "diff in means", - order = c("female", "male")) %>% visualize(method = "theoretical") + + res_vis_theor_both_1 <- visualize(calculate(generate(hypothesize(specify( + gss_tbl, hours ~ sex), null = "independence"), reps = 100, type = "permute"), + stat = "diff in means", order = c("female", "male")), method = "theoretical") + shade_p_value(direction = "both", obs_stat = obs_diff_mean) Message Rather than setting `method = "theoretical"` with a simulation-based null distribution, the preferred method for visualizing theory-based distributions with infer is now to pass the output of `assume()` as the first argument to `visualize()`. @@ -113,8 +112,8 @@ # method = "both" behaves nicely Code - gss_tbl %>% specify(hours ~ NULL) %>% hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% visualize(method = "both") + visualize(generate(hypothesize(specify(gss_tbl, hours ~ NULL), null = "point", + mu = 4), reps = 100, type = "bootstrap"), method = "both") Condition Error in `visualize()`: ! `generate()` and `calculate()` are both required to be done prior to `visualize(method = "both")` @@ -122,9 +121,9 @@ --- Code - res_method_both <- gss_tbl %>% specify(hours ~ college) %>% hypothesize(null = "point", - mu = 4) %>% generate(reps = 10, type = "bootstrap") %>% calculate(stat = "t", - order = c("no degree", "degree")) %>% visualize(method = "both") + res_method_both <- visualize(calculate(generate(hypothesize(specify(gss_tbl, + hours ~ college), null = "point", mu = 4), reps = 10, type = "bootstrap"), + stat = "t", order = c("no degree", "degree")), method = "both") Condition Warning: With only 10 replicates, it may be difficult to see the relationship between simulation and theory. @@ -134,10 +133,9 @@ # Traditional right-tailed tests have warning if not right-tailed Code - res_ <- gss_tbl %>% specify(sex ~ partyid, success = "female") %>% hypothesize( - null = "independence") %>% generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% visualize(method = "both") + shade_p_value( - obs_stat = 2, direction = "left") + res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, sex ~ partyid, + success = "female"), null = "independence"), reps = 100, type = "permute"), + stat = "Chisq"), method = "both") + shade_p_value(obs_stat = 2, direction = "left") Condition Warning: Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you. @@ -145,9 +143,9 @@ --- Code - res_ <- gss_tbl %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% calculate(stat = "F") %>% - visualize(method = "both") + shade_p_value(obs_stat = 2, direction = "two_sided") + res_ <- visualize(calculate(generate(hypothesize(specify(gss_tbl, age ~ partyid), + null = "independence"), reps = 100, type = "permute"), stat = "F"), method = "both") + + shade_p_value(obs_stat = 2, direction = "two_sided") Condition Warning: Check to make sure the conditions have been met for the theoretical method. infer currently does not check these for you. @@ -155,8 +153,8 @@ --- Code - res_ <- gss_tbl %>% specify(sex ~ partyid, success = "female") %>% hypothesize( - null = "independence") %>% calculate(stat = "Chisq") %>% visualize(method = "theoretical") + + res_ <- visualize(calculate(hypothesize(specify(gss_tbl, sex ~ partyid, + success = "female"), null = "independence"), stat = "Chisq"), method = "theoretical") + shade_p_value(obs_stat = 2, direction = "left") Message Rather than setting `method = "theoretical"` with a simulation-based null distribution, the preferred method for visualizing theory-based distributions with infer is now to pass the output of `assume()` as the first argument to `visualize()`. @@ -167,9 +165,8 @@ --- Code - res_ <- gss_tbl %>% specify(age ~ partyid) %>% hypothesize(null = "independence") %>% - calculate(stat = "F") %>% visualize(method = "theoretical") + shade_p_value( - obs_stat = 2, direction = "two_sided") + res_ <- visualize(calculate(hypothesize(specify(gss_tbl, age ~ partyid), null = "independence"), + stat = "F"), method = "theoretical") + shade_p_value(obs_stat = 2, direction = "two_sided") Message Rather than setting `method = "theoretical"` with a simulation-based null distribution, the preferred method for visualizing theory-based distributions with infer is now to pass the output of `assume()` as the first argument to `visualize()`. Condition @@ -179,7 +176,7 @@ # confidence interval plots are working Code - res_ <- gss_tbl_boot %>% visualize() + shade_confidence_interval(endpoints = df_error) + res_ <- visualize(gss_tbl_boot) + shade_confidence_interval(endpoints = df_error) Condition Error in `shade_confidence_interval()`: ! Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. @@ -187,7 +184,7 @@ --- Code - res_ <- gss_tbl_boot %>% visualize() + shade_confidence_interval(endpoints = vec_error) + res_ <- visualize(gss_tbl_boot) + shade_confidence_interval(endpoints = vec_error) Condition Warning: Expecting `endpoints` to be a 1 x 2 data frame or 2 element vector. Using the first two entries as the `endpoints`. @@ -195,8 +192,8 @@ --- Code - res_ci_vis <- gss_tbl_boot %>% visualize() + shade_confidence_interval( - endpoints = perc_ci, direction = "between") + res_ci_vis <- visualize(gss_tbl_boot) + shade_confidence_interval(endpoints = perc_ci, + direction = "between") Condition Warning: Ignoring unknown parameters: `direction` @@ -206,8 +203,8 @@ # title adapts to not hypothesis testing workflow Code - res_vis_no_hypothesize_both <- gss_tbl_boot_tbl %>% calculate(stat = "t") %>% - visualize(method = "both") + res_vis_no_hypothesize_both <- visualize(calculate(gss_tbl_boot_tbl, stat = "t"), + method = "both") Condition Warning: A t statistic requires a null hypothesis to calculate the observed statistic. @@ -285,7 +282,7 @@ # visualize can handle multiple explanatory variables Code - res_viz_fit_p_val_right <- null_fits %>% visualize() + shade_p_value(obs_stat = obs_fit, + res_viz_fit_p_val_right <- visualize(null_fits) + shade_p_value(obs_stat = obs_fit, direction = "right") # visualize can handle `assume()` output diff --git a/tests/testthat/_snaps/wrappers.md b/tests/testthat/_snaps/wrappers.md index bccf6b77..bcdf03c2 100644 --- a/tests/testthat/_snaps/wrappers.md +++ b/tests/testthat/_snaps/wrappers.md @@ -1,7 +1,7 @@ # t_test works Code - res_ <- gss_tbl %>% t_test(hours ~ sex) + res_ <- t_test(gss_tbl, hours ~ sex) Condition Warning: The statistic is based on a difference or ratio; by default, for difference-based statistics, the explanatory variable is subtracted in the order "male" - "female", or divided in the order "male" / "female" for ratio-based statistics. To specify this order yourself, supply `order = c("male", "female")`. @@ -9,7 +9,7 @@ --- Code - gss_tbl %>% t_test(response = "hours", explanatory = "sex") + t_test(gss_tbl, response = "hours", explanatory = "sex") Condition Error in `t_test()`: ! The response should be a bare variable name (not a string in quotation marks). @@ -33,7 +33,7 @@ # _stat functions work Code - res_ <- gss_tbl %>% chisq_stat(college ~ partyid) + res_ <- chisq_stat(gss_tbl, college ~ partyid) Condition Warning: `chisq_stat()` was deprecated in infer 1.0.0. @@ -42,7 +42,7 @@ --- Code - obs_stat_way <- gss_tbl %>% chisq_stat(college ~ partyid) + obs_stat_way <- chisq_stat(gss_tbl, college ~ partyid) Condition Warning: `chisq_stat()` was deprecated in infer 1.0.0. @@ -51,7 +51,7 @@ --- Code - obs_stat_way <- gss_tbl %>% chisq_stat(partyid ~ NULL) + obs_stat_way <- chisq_stat(gss_tbl, partyid ~ NULL) Condition Warning: `chisq_stat()` was deprecated in infer 1.0.0. @@ -60,7 +60,7 @@ --- Code - obs_stat_way_alt <- gss_tbl %>% chisq_stat(response = partyid) + obs_stat_way_alt <- chisq_stat(gss_tbl, response = partyid) Condition Warning: `chisq_stat()` was deprecated in infer 1.0.0. @@ -69,7 +69,7 @@ --- Code - res_ <- gss_tbl %>% t_stat(hours ~ sex, order = c("male", "female")) + res_ <- t_stat(gss_tbl, hours ~ sex, order = c("male", "female")) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -78,7 +78,7 @@ --- Code - obs_stat_way <- gss_tbl %>% t_stat(hours ~ sex, order = c("male", "female")) + obs_stat_way <- t_stat(gss_tbl, hours ~ sex, order = c("male", "female")) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -87,8 +87,8 @@ --- Code - obs_stat_way_alt <- gss_tbl %>% t_stat(response = hours, explanatory = sex, - order = c("male", "female")) + obs_stat_way_alt <- t_stat(gss_tbl, response = hours, explanatory = sex, order = c( + "male", "female")) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -97,7 +97,7 @@ --- Code - res_ <- gss_tbl %>% t_stat(hours ~ NULL) + res_ <- t_stat(gss_tbl, hours ~ NULL) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -106,7 +106,7 @@ --- Code - obs_stat_way <- gss_tbl %>% t_stat(hours ~ NULL) + obs_stat_way <- t_stat(gss_tbl, hours ~ NULL) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -115,7 +115,7 @@ --- Code - obs_stat_way_alt <- gss_tbl %>% t_stat(response = hours) + obs_stat_way_alt <- t_stat(gss_tbl, response = hours) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -146,7 +146,7 @@ # conf_int argument works Code - res_ <- gss_tbl %>% t_test(hours ~ sex, order = c("female", "male"), conf_int = TRUE, + res_ <- t_test(gss_tbl, hours ~ sex, order = c("female", "male"), conf_int = TRUE, conf_level = 1.1) Condition Error in `t_test()`: @@ -155,8 +155,7 @@ --- Code - no_var_equal <- gss_tbl_small %>% t_stat(hours ~ sex, order = c("female", - "male")) + no_var_equal <- t_stat(gss_tbl_small, hours ~ sex, order = c("female", "male")) Condition Warning: `t_stat()` was deprecated in infer 1.0.0. @@ -165,7 +164,7 @@ --- Code - var_equal <- gss_tbl_small %>% t_stat(hours ~ sex, order = c("female", "male"), + var_equal <- t_stat(gss_tbl_small, hours ~ sex, order = c("female", "male"), var.equal = TRUE) Condition Warning: @@ -242,8 +241,8 @@ # wrappers can handled ordered factors Code - ordered_t_1 <- gss_tbl %>% dplyr::mutate(income = factor(income, ordered = TRUE)) %>% - chisq_test(income ~ partyid) + ordered_t_1 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income, + ordered = TRUE)), income ~ partyid) Condition Warning in `stats::chisq.test()`: Chi-squared approximation may be incorrect @@ -251,8 +250,8 @@ --- Code - ordered_f_1 <- gss_tbl %>% dplyr::mutate(income = factor(income, ordered = FALSE)) %>% - chisq_test(income ~ partyid) + ordered_f_1 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income, + ordered = FALSE)), income ~ partyid) Condition Warning in `stats::chisq.test()`: Chi-squared approximation may be incorrect @@ -260,8 +259,8 @@ --- Code - ordered_t_2 <- gss_tbl %>% dplyr::mutate(income = factor(income, ordered = TRUE)) %>% - chisq_test(partyid ~ income) + ordered_t_2 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income, + ordered = TRUE)), partyid ~ income) Condition Warning in `stats::chisq.test()`: Chi-squared approximation may be incorrect @@ -269,8 +268,8 @@ --- Code - ordered_f_2 <- gss_tbl %>% dplyr::mutate(income = factor(income, ordered = FALSE)) %>% - chisq_test(partyid ~ income) + ordered_f_2 <- chisq_test(dplyr::mutate(gss_tbl, income = factor(income, + ordered = FALSE)), partyid ~ income) Condition Warning in `stats::chisq.test()`: Chi-squared approximation may be incorrect diff --git a/tests/testthat/helper-data.R b/tests/testthat/helper-data.R index bc4e5a37..f34f4878 100644 --- a/tests/testthat/helper-data.R +++ b/tests/testthat/helper-data.R @@ -11,18 +11,18 @@ eps <- if (capabilities("long.double")) { 0.01 } -gss_tbl <- tibble::as_tibble(gss) %>% - dplyr::filter(!(is.na(sex) | is.na(college))) %>% - dplyr::mutate(partyid = as.character(partyid)) %>% +gss_tbl <- tibble::as_tibble(gss) |> + dplyr::filter(!(is.na(sex) | is.na(college))) |> + dplyr::mutate(partyid = as.character(partyid)) |> dplyr::filter(partyid %in% c("ind", "rep", "dem")) -gss_calc <- gss_tbl %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +gss_calc <- gss_tbl |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "diff in props", order = c("female", "male")) -mtcars_df <- mtcars %>% +mtcars_df <- mtcars |> dplyr::mutate( cyl = factor(cyl), vs = factor(vs), @@ -31,8 +31,8 @@ mtcars_df <- mtcars %>% carb = factor(carb) ) -obs_diff <- gss_tbl %>% - specify(college ~ sex, success = "no degree") %>% +obs_diff <- gss_tbl |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "diff in props", order = c("female", "male")) set.seed(2018) @@ -40,18 +40,18 @@ test_df <- tibble::tibble(stat = rnorm(100)) # Data for visualization tests -gss_permute <- gss_tbl %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% +gss_permute <- gss_tbl |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> calculate(stat = "z", order = c("female", "male")) -gss_viz_sim <- gss_permute %>% visualize(method = "simulation") +gss_viz_sim <- gss_permute |> visualize(method = "simulation") # Warnings are about checking conditions for the theoretical method. gss_viz_theor <- suppressWarnings(suppressMessages( - gss_permute %>% visualize(method = "theoretical") + gss_permute |> visualize(method = "theoretical") )) gss_viz_both <- suppressWarnings( - gss_permute %>% visualize(method = "both") + gss_permute |> visualize(method = "both") ) diff --git a/tests/testthat/test-aliases.R b/tests/testthat/test-aliases.R index c7af9ed5..04b32e1c 100644 --- a/tests/testthat/test-aliases.R +++ b/tests/testthat/test-aliases.R @@ -1,24 +1,24 @@ test_that("aliases work", { expect_equal( - gss_calc %>% - get_pvalue(obs_stat = -0.2, direction = "right") %>% + gss_calc |> + get_pvalue(obs_stat = -0.2, direction = "right") |> dplyr::pull(), expected = 1, tolerance = eps ) - expect_silent(gss_permute %>% get_ci()) + expect_silent(gss_permute |> get_ci()) }) test_that("old aliases produce informative error", { expect_snapshot( error = TRUE, - res <- gss_calc %>% + res <- gss_calc |> p_value(obs_stat = -0.2, direction = "right") ) expect_snapshot( error = TRUE, - res_ <- gss_permute %>% conf_int() + res_ <- gss_permute |> conf_int() ) }) diff --git a/tests/testthat/test-assume.R b/tests/testthat/test-assume.R index cee56fa6..ac9b1a36 100644 --- a/tests/testthat/test-assume.R +++ b/tests/testthat/test-assume.R @@ -5,9 +5,9 @@ test_that("distribution description works as expected", { } expect_equal( - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume_( distribution = "F", df = c(length(unique(gss$partyid)) - 1, nrow(gss) - 4) @@ -16,22 +16,22 @@ test_that("distribution description works as expected", { ) expect_equal( - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume_( distribution = "F", df = c(length(unique(gss$partyid)) - 1, nrow(gss) - 4) ), - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume_(distribution = "F") ) expect_equal( - gss %>% - specify(response = finrela) %>% + gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -42,14 +42,14 @@ test_that("distribution description works as expected", { "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% + ) |> assume_("Chisq", length(unique(gss$finrela)) - 1), "A Chi-squared distribution with 5 degrees of freedom." ) expect_equal( - gss %>% - specify(response = finrela) %>% + gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -60,15 +60,15 @@ test_that("distribution description works as expected", { "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% + ) |> assume_("Chisq"), "A Chi-squared distribution with 5 degrees of freedom." ) expect_equal( - gss %>% - specify(formula = finrela ~ sex) %>% - hypothesize(null = "independence") %>% + gss |> + specify(formula = finrela ~ sex) |> + hypothesize(null = "independence") |> assume_( distribution = "Chisq", df = (length(unique(gss$finrela)) - 1) * @@ -78,25 +78,25 @@ test_that("distribution description works as expected", { ) expect_equal( - gss %>% - specify(formula = finrela ~ sex) %>% - hypothesize(null = "independence") %>% + gss |> + specify(formula = finrela ~ sex) |> + hypothesize(null = "independence") |> assume_(distribution = "Chisq"), "A Chi-squared distribution with 5 degrees of freedom." ) expect_equal( - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> assume_("t"), "A T distribution with 423 degrees of freedom." ) expect_equal( - gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% + gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> assume_("z"), "A Z distribution." ) @@ -106,90 +106,90 @@ test_that("assume errors with bad arguments", { # supply a bad distribution expect_snapshot( error = TRUE, - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> assume("boop", nrow(gss) - 1) ) # bad number of df arguments expect_snapshot( error = TRUE, - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> assume("t", c(nrow(gss) - 1, 2)) ) expect_snapshot( error = TRUE, - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume("F", nrow(gss) - 1) ) # bad df argument type expect_snapshot( error = TRUE, - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume("F", "boop") ) # df argument possibly passed to dots expect_snapshot( error = TRUE, - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume("F", nrow(gss) - 1, 1) ) expect_snapshot( error = TRUE, - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume("F", nrow(gss) - 1, 1, 2) ) # supply `distribution`s that don't align with the supplied variables expect_snapshot( error = TRUE, - gss %>% - specify(age ~ finrela) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ finrela) |> + hypothesize(null = "independence") |> assume("t", nrow(gss) - 1) ) expect_snapshot( error = TRUE, - gss %>% - specify(age ~ finrela) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ finrela) |> + hypothesize(null = "independence") |> assume("z", nrow(gss) - 1) ) expect_snapshot( error = TRUE, - gss %>% - specify(age ~ NULL) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(age ~ NULL) |> + hypothesize(null = "point", mu = 40) |> assume("z", nrow(gss) - 1) ) # supply bad `x` arguments expect_snapshot( error = TRUE, - gss %>% + gss |> assume("z", nrow(gss) - 1) ) expect_snapshot( error = TRUE, - "boop" %>% + "boop" |> assume("z", nrow(gss) - 1) ) }) @@ -197,33 +197,33 @@ test_that("assume errors with bad arguments", { test_that("assume() handles automatic df gracefully", { expect_equal( expect_silent( - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume("t") ), expect_silent( - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume("t") ) ) expect_snapshot( - res_ <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + res_ <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume("t", nrow(gss) - 2) ) # t.test param with var.equal = FALSE expect_equal( expect_silent( - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - assume(distribution = "t", 423) %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + assume(distribution = "t", 423) |> attr("df") ), 423 @@ -232,10 +232,10 @@ test_that("assume() handles automatic df gracefully", { # t.test param with var.equal = TRUE expect_equal( expect_silent( - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - assume(distribution = "t", 498) %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + assume(distribution = "t", 498) |> attr("df") ), 498 @@ -244,10 +244,10 @@ test_that("assume() handles automatic df gracefully", { # min(n1 - 1, n2 - 1) expect_equal( expect_silent( - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - assume(distribution = "t", 173) %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + assume(distribution = "t", 173) |> attr("df") ), 173 @@ -256,10 +256,10 @@ test_that("assume() handles automatic df gracefully", { # n1 + n2 - 2 expect_equal( expect_silent( - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - assume(distribution = "t", 498) %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + assume(distribution = "t", 498) |> attr("df") ), 498 @@ -267,9 +267,9 @@ test_that("assume() handles automatic df gracefully", { }) test_that("assume() brings along supplied arguments", { - t_dist <- gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% + t_dist <- gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> assume("t") expect_equal( @@ -292,9 +292,9 @@ test_that("assume() brings along supplied arguments", { attr(t_dist, "distr_param") ) - f_dist <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + f_dist <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume(distribution = "F") expect_equal( diff --git a/tests/testthat/test-calculate.R b/tests/testthat/test-calculate.R index ec485085..52d8a5f9 100644 --- a/tests/testthat/test-calculate.R +++ b/tests/testthat/test-calculate.R @@ -9,9 +9,9 @@ test_that("calculate checks `stat` argument", { expect_snapshot(error = TRUE, calculate(gss_tbl, stat = 3)) # stat is one of the implemented options with informative error - gen_gss_slope <- gss_tbl %>% - specify(hours ~ age) %>% - hypothesize(null = "independence") %>% + gen_gss_slope <- gss_tbl |> + specify(hours ~ age) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "slopee")) @@ -20,13 +20,13 @@ test_that("calculate checks `stat` argument", { expect_snapshot(error = TRUE, calculate(gen_gss_slope, stat = "chi sq")) # stat can be one of the allowed aliases - chisq_df <- gss %>% specify(formula = finrela ~ sex) + chisq_df <- gss |> specify(formula = finrela ~ sex) expect_equal( calculate(chisq_df, stat = "Chisq")[["stat"]], calculate(chisq_df, stat = "chisq")[["stat"]] ) - f_df <- gss %>% specify(age ~ partyid) + f_df <- gss |> specify(age ~ partyid) expect_equal( calculate(f_df, stat = "F")[["stat"]], calculate(f_df, stat = "f")[["stat"]] @@ -36,31 +36,31 @@ test_that("calculate checks `stat` argument", { test_that("errors informatively with incompatible stat vs hypothesis", { expect_snapshot( error = TRUE, - gss %>% - specify(college ~ sex, success = "degree") %>% - hypothesise(null = "point", p = .40) %>% + gss |> + specify(college ~ sex, success = "degree") |> + hypothesise(null = "point", p = .40) |> calculate(stat = "diff in props", order = c("female", "male")) ) expect_snapshot( error = TRUE, - gss %>% - specify(college ~ sex, success = "degree") %>% - hypothesise(null = "point", p = .40) %>% - generate(reps = 10, type = "draw") %>% + gss |> + specify(college ~ sex, success = "degree") |> + hypothesise(null = "point", p = .40) |> + generate(reps = 10, type = "draw") |> calculate(stat = "diff in props", order = c("female", "male")) ) expect_silent( - gss %>% - specify(hours ~ college) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(hours ~ college) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t", order = c("degree", "no degree")) ) expect_silent( - gss %>% - specify(response = finrela) %>% + gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -71,7 +71,7 @@ test_that("errors informatively with incompatible stat vs hypothesis", { "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% + ) |> calculate(stat = "Chisq") ) }) @@ -79,43 +79,43 @@ test_that("errors informatively with incompatible stat vs hypothesis", { test_that("response attribute has been set", { expect_snapshot( error = TRUE, - tibble::as_tibble(gss) %>% calculate(stat = "median") + tibble::as_tibble(gss) |> calculate(stat = "median") ) }) test_that("variable chosen is of appropriate class (one var problems)", { # One sample chisq example - gen_gss1 <- gss_tbl %>% - specify(partyid ~ NULL) %>% + gen_gss1 <- gss_tbl |> + specify(partyid ~ NULL) |> hypothesize( null = "point", p = c("dem" = .5, "rep" = .25, "ind" = .25) - ) %>% + ) |> generate(reps = 10, type = "draw") expect_snapshot(error = TRUE, calculate(gen_gss1, stat = "mean")) # One mean example - gen_gss_num <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 40) %>% + gen_gss_num <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 40) |> generate(reps = 10, type = "bootstrap") expect_snapshot(error = TRUE, calculate(gen_gss_num, stat = "prop")) expect_silent(calculate(gen_gss_num, stat = "mean")) expect_snapshot(error = TRUE, calculate(gen_gss_num, stat = "median")) expect_snapshot(error = TRUE, calculate(gen_gss_num, stat = "sd")) - gen_gss_num2 <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", med = 40) %>% + gen_gss_num2 <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", med = 40) |> generate(reps = 10, type = "bootstrap") expect_snapshot(error = TRUE, calculate(gen_gss_num2, stat = "prop")) expect_snapshot(error = TRUE, calculate(gen_gss_num2, stat = "mean")) expect_silent(calculate(gen_gss_num2, stat = "median")) expect_snapshot(error = TRUE, calculate(gen_gss_num2, stat = "sd")) - gen_gss_num3 <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", sigma = 0.6) %>% + gen_gss_num3 <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", sigma = 0.6) |> generate(reps = 10, type = "bootstrap") expect_snapshot(error = TRUE, calculate(gen_gss_num3, stat = "prop")) expect_snapshot(error = TRUE, calculate(gen_gss_num3, stat = "mean")) @@ -124,9 +124,9 @@ test_that("variable chosen is of appropriate class (one var problems)", { }) test_that("grouping (explanatory) variable is a factor (two var problems)", { - gen_gss2 <- gss_tbl %>% - specify(hours ~ age) %>% - hypothesize(null = "independence") %>% + gen_gss2 <- gss_tbl |> + specify(hours ~ age) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot(error = TRUE, calculate(gen_gss2, stat = "diff in means")) expect_snapshot(error = TRUE, calculate(gen_gss2, stat = "diff in medians")) @@ -136,9 +136,9 @@ test_that("grouping (explanatory) variable is a factor (two var problems)", { }) test_that("grouping (explanatory) variable is numeric (two var problems)", { - gen_gss2a <- gss_tbl %>% - specify(partyid ~ hours) %>% - hypothesize(null = "independence") %>% + gen_gss2a <- gss_tbl |> + specify(partyid ~ hours) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot(error = TRUE, calculate(gen_gss2a, stat = "slope")) # Since shifts to "Slope with t" @@ -147,16 +147,16 @@ test_that("grouping (explanatory) variable is numeric (two var problems)", { }) test_that("response variable is a factor (two var problems)", { - gen_gss3 <- gss_tbl %>% - specify(hours ~ partyid) %>% - hypothesize(null = "independence") %>% + gen_gss3 <- gss_tbl |> + specify(hours ~ partyid) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot(error = TRUE, calculate(gen_gss3, stat = "Chisq")) # explanatory has more than 2 levels - gen_gss4 <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% + gen_gss4 <- gss_tbl |> + specify(sex ~ partyid, success = "female") |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "diff in props")) expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "ratio of props")) @@ -165,9 +165,9 @@ test_that("response variable is a factor (two var problems)", { expect_snapshot(error = TRUE, calculate(gen_gss4, stat = "t")) # Check successful diff in props - gen_gss4a <- gss_tbl %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% + gen_gss4a <- gss_tbl |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_silent( calculate(gen_gss4a, stat = "diff in props", order = c("female", "male")) @@ -184,8 +184,8 @@ test_that("response variable is a factor (two var problems)", { expect_snapshot(res_ <- calculate(gen_gss4a, stat = "z")) }) -gen_gss5 <- gss_tbl %>% - specify(partyid ~ hours) %>% +gen_gss5 <- gss_tbl |> + specify(partyid ~ hours) |> generate(reps = 10, type = "bootstrap") test_that("response variable is numeric (two var problems)", { @@ -193,9 +193,9 @@ test_that("response variable is numeric (two var problems)", { }) test_that("two sample mean-type problems are working", { - gen_gss5a <- gss_tbl %>% - specify(hours ~ college) %>% - hypothesize(null = "independence") %>% + gen_gss5a <- gss_tbl |> + specify(hours ~ college) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot(res_ <- calculate(gen_gss5a, stat = "diff in means")) expect_silent( @@ -217,17 +217,17 @@ test_that("properties of tibble passed-in are correct", { expect_s3_class(gen_gss5, "grouped_df") expect_equal(ncol(gen_gss5), 3) - gen_gss6 <- gss_tbl %>% - specify(hours ~ NULL) %>% + gen_gss6 <- gss_tbl |> + specify(hours ~ NULL) |> generate(reps = 10) expect_equal(ncol(gen_gss6), 2) expect_snapshot(error = TRUE, calculate(gen_gss6)) }) test_that("order is working for diff in means", { - gen_gss7 <- gss_tbl %>% - specify(hours ~ college) %>% - hypothesize(null = "independence") %>% + gen_gss7 <- gss_tbl |> + specify(hours ~ college) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_equal( nrow(calculate( @@ -248,55 +248,55 @@ test_that("order is working for diff in means", { }) test_that("chi-square matches chisq.test value", { - gen_gss8 <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% + gen_gss8 <- gss_tbl |> + specify(sex ~ partyid, success = "female") |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") infer_way <- calculate(gen_gss8, stat = "Chisq") # chisq.test way suppressWarnings( - trad_way <- gen_gss8 %>% - dplyr::group_by(replicate) %>% + trad_way <- gen_gss8 |> + dplyr::group_by(replicate) |> dplyr::do(broom::tidy( stats::chisq.test(table(.$sex, .$partyid)) - )) %>% - dplyr::ungroup() %>% + )) |> + dplyr::ungroup() |> dplyr::select(replicate, stat = statistic) ) # Equal not including attributes expect_equal(infer_way, trad_way, ignore_attr = TRUE) - gen_gss9 <- gss_tbl %>% - specify(partyid ~ NULL) %>% + gen_gss9 <- gss_tbl |> + specify(partyid ~ NULL) |> hypothesize( null = "point", p = c("dem" = 1 / 3, "rep" = 1 / 3, "ind" = 1 / 3) - ) %>% + ) |> generate(reps = 10, type = "draw") infer_way <- calculate(gen_gss9, stat = "Chisq") # chisq.test way - trad_way <- gen_gss9 %>% - dplyr::group_by(replicate) %>% + trad_way <- gen_gss9 |> + dplyr::group_by(replicate) |> dplyr::do(broom::tidy( stats::chisq.test(table(.$partyid)) - )) %>% + )) |> dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way, ignore_attr = TRUE) - gen_gss9a <- gss_tbl %>% - specify(partyid ~ NULL) %>% + gen_gss9a <- gss_tbl |> + specify(partyid ~ NULL) |> hypothesize( null = "point", p = c("dem" = 0.8, "rep" = 0.1, "ind" = 0.1) - ) %>% + ) |> generate(reps = 10, type = "draw") infer_way <- calculate(gen_gss9a, stat = "Chisq") # chisq.test way - trad_way <- gen_gss9a %>% - dplyr::group_by(replicate) %>% + trad_way <- gen_gss9a |> + dplyr::group_by(replicate) |> dplyr::do(broom::tidy( stats::chisq.test(table(.$partyid), p = c(0.8, 0.1, 0.1)) - )) %>% + )) |> dplyr::select(replicate, stat = statistic) expect_equal(infer_way, trad_way, ignore_attr = TRUE) @@ -311,20 +311,20 @@ test_that("chi-square matches chisq.test value", { sex = c(rep(x = "male", times = 44), rep(x = "female", times = 49)) ) - promote_f <- dat %>% - specify(action ~ sex, success = "promote") %>% + promote_f <- dat |> + specify(action ~ sex, success = "promote") |> calculate(stat = "Chisq", order = c("male", "female"), correct = FALSE) - promote_t <- dat %>% - specify(action ~ sex, success = "promote") %>% + promote_t <- dat |> + specify(action ~ sex, success = "promote") |> calculate(stat = "Chisq", order = c("male", "female"), correct = TRUE) expect_false(promote_f$stat == promote_t$stat) expect_snapshot( error = TRUE, - dat %>% - specify(action ~ sex, success = "promote") %>% + dat |> + specify(action ~ sex, success = "promote") |> calculate(stat = "Chisq", order = c("male", "female"), correct = "boop") ) }) @@ -337,9 +337,9 @@ test_that("chi-square works with factors with unused levels", { # Unused levels in explanatory variable expect_snapshot( - out <- test_tbl %>% - specify(y ~ x) %>% - calculate(stat = "Chisq") %>% + out <- test_tbl |> + specify(y ~ x) |> + calculate(stat = "Chisq") |> pull() ) expect_true(!is.na(out)) @@ -348,26 +348,26 @@ test_that("chi-square works with factors with unused levels", { test_tbl[["x"]] <- factor(test_tbl[["x"]]) levels(test_tbl[["y"]]) <- c("e", "f", "g") expect_snapshot( - out <- test_tbl %>% - specify(y ~ x) %>% - calculate(stat = "Chisq") %>% + out <- test_tbl |> + specify(y ~ x) |> + calculate(stat = "Chisq") |> pull() ) expect_true(!is.na(out)) }) test_that("`order` is working", { - gen_gss_tbl10 <- gss_tbl %>% - specify(hours ~ college) %>% - hypothesize(null = "independence") %>% + gen_gss_tbl10 <- gss_tbl |> + specify(hours ~ college) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot( error = TRUE, calculate(gen_gss_tbl10, stat = "diff in means", order = c(TRUE, FALSE)) ) - gen_gss_tbl11 <- gss_tbl %>% - specify(hours ~ college) %>% + gen_gss_tbl11 <- gss_tbl |> + specify(hours ~ college) |> generate(reps = 10, type = "bootstrap") expect_snapshot( error = TRUE, @@ -410,44 +410,44 @@ test_that("`order` is working", { ) }) -gen_gss_tbl12 <- gss_tbl %>% - specify(college ~ NULL, success = "no degree") %>% - hypothesize(null = "point", p = 0.3) %>% +gen_gss_tbl12 <- gss_tbl |> + specify(college ~ NULL, success = "no degree") |> + hypothesize(null = "point", p = 0.3) |> generate(reps = 10, type = "draw") test_that('success is working for stat = "prop"', { - expect_silent(gen_gss_tbl12 %>% calculate(stat = "prop")) - expect_silent(gen_gss_tbl12 %>% calculate(stat = "z")) + expect_silent(gen_gss_tbl12 |> calculate(stat = "prop")) + expect_silent(gen_gss_tbl12 |> calculate(stat = "z")) }) test_that("NULL response gives error", { - gss_tbl_improp <- tibble::as_tibble(gss_tbl) %>% + gss_tbl_improp <- tibble::as_tibble(gss_tbl) |> dplyr::select(hours, age) - expect_snapshot(error = TRUE, gss_tbl_improp %>% calculate(stat = "mean")) + expect_snapshot(error = TRUE, gss_tbl_improp |> calculate(stat = "mean")) }) test_that("Permute F test works", { - gen_gss_tbl13 <- gss_tbl %>% - specify(hours ~ partyid) %>% - hypothesize(null = "independence") %>% + gen_gss_tbl13 <- gss_tbl |> + specify(hours ~ partyid) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_silent(calculate(gen_gss_tbl13, stat = "F")) }) test_that("Permute slope/correlation test works", { - gen_gss_tbl14 <- gss_tbl %>% - specify(hours ~ age) %>% - hypothesize(null = "independence") %>% + gen_gss_tbl14 <- gss_tbl |> + specify(hours ~ age) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_silent(calculate(gen_gss_tbl14, stat = "slope")) expect_silent(calculate(gen_gss_tbl14, stat = "correlation")) }) test_that("order being given when not needed gives warning", { - gen_gss_tbl15 <- gss_tbl %>% - specify(college ~ partyid, success = "no degree") %>% - hypothesize(null = "independence") %>% + gen_gss_tbl15 <- gss_tbl |> + specify(college ~ partyid, success = "no degree") |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") expect_snapshot( res_ <- calculate(gen_gss_tbl15, stat = "Chisq", order = c("dem", "ind")) @@ -457,70 +457,70 @@ test_that("order being given when not needed gives warning", { ## Breaks oldrel build. Commented out for now. # test_that("warning given if calculate without generate", { # expect_snapshot( -# gss_tbl %>% -# specify(partyid ~ NULL) %>% +# gss_tbl |> +# specify(partyid ~ NULL) |> # hypothesize( # null = "point", # p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) -# ) %>% -# # generate(reps = 10, type = "draw") %>% +# ) |> +# # generate(reps = 10, type = "draw") |> # calculate(stat = "Chisq") # ) # }) -test_that("specify() %>% calculate() works", { +test_that("specify() |> calculate() works", { expect_silent( - gss_tbl %>% specify(hours ~ NULL) %>% calculate(stat = "mean") + gss_tbl |> specify(hours ~ NULL) |> calculate(stat = "mean") ) expect_snapshot( - res_ <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% + res_ <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 4) |> calculate(stat = "mean") ) expect_snapshot( - res_ <- gss_tbl %>% specify(partyid ~ NULL) %>% calculate(stat = "Chisq") + res_ <- gss_tbl |> specify(partyid ~ NULL) |> calculate(stat = "Chisq") ) }) test_that("One sample t hypothesis test is working", { expect_snapshot( - res_ <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 1) %>% - generate(reps = 10) %>% + res_ <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 1) |> + generate(reps = 10) |> calculate(stat = "t") ) expect_snapshot( - res_ <- gss_tbl %>% - specify(response = hours) %>% + res_ <- gss_tbl |> + specify(response = hours) |> calculate(stat = "t") ) - gss_tbl %>% - specify(response = hours) %>% + gss_tbl |> + specify(response = hours) |> calculate(stat = "t", mu = 1) }) test_that("specify done before calculate", { - gss_tbl_mean <- gss_tbl %>% + gss_tbl_mean <- gss_tbl |> dplyr::select(stat = hours) expect_snapshot(error = TRUE, calculate(gss_tbl_mean, stat = "mean")) - gss_tbl_prop <- gss_tbl %>% dplyr::select(college) + gss_tbl_prop <- gss_tbl |> dplyr::select(college) attr(gss_tbl_prop, "response") <- "college" expect_snapshot(error = TRUE, calculate(gss_tbl_prop, stat = "prop")) expect_snapshot(error = TRUE, calculate(gss_tbl_prop, stat = "count")) }) test_that("chisq GoF has params specified for observed stat", { - no_params <- gss_tbl %>% specify(response = partyid) + no_params <- gss_tbl |> specify(response = partyid) expect_snapshot(res_ <- calculate(no_params, stat = "Chisq")) - params <- gss_tbl %>% - specify(response = partyid) %>% + params <- gss_tbl |> + specify(response = partyid) |> hypothesize( null = "point", p = c("dem" = .5, "rep" = .25, "ind" = .25) @@ -530,9 +530,9 @@ test_that("chisq GoF has params specified for observed stat", { test_that("One sample t bootstrap is working", { expect_snapshot( - res_ <- gss_tbl %>% - specify(hours ~ NULL) %>% - generate(reps = 10, type = "bootstrap") %>% + res_ <- gss_tbl |> + specify(hours ~ NULL) |> + generate(reps = 10, type = "bootstrap") |> calculate(stat = "t") ) }) @@ -541,11 +541,11 @@ test_that("calculate doesn't depend on order of `p` (#122)", { calc_chisq <- function(p) { set.seed(111) - gss_tbl %>% - specify(partyid ~ NULL) %>% - hypothesize(null = "point", p = p) %>% - generate(reps = 500, type = "draw") %>% - calculate("Chisq") %>% + gss_tbl |> + specify(partyid ~ NULL) |> + hypothesize(null = "point", p = p) |> + generate(reps = 500, type = "draw") |> + calculate("Chisq") |> get_p_value(obs_stat = 5, direction = "right") } @@ -565,22 +565,23 @@ test_that("calc_impl_diff_f works", { }) test_that("calc_impl.sum works", { + .subset_1 <- function(x) x[[1]] expect_equal( - gss_tbl %>% - specify(hours ~ NULL) %>% - calculate(stat = "sum") %>% - `[[`(1), + gss_tbl |> + specify(hours ~ NULL) |> + calculate(stat = "sum") |> + .subset_1(), sum(gss_tbl$hours), tolerance = eps ) - gen_gss_tbl16 <- gss_tbl %>% - specify(hours ~ NULL) %>% + gen_gss_tbl16 <- gss_tbl |> + specify(hours ~ NULL) |> generate(10) expect_equal( - gen_gss_tbl16 %>% calculate(stat = "sum"), - gen_gss_tbl16 %>% dplyr::summarise(stat = sum(hours)), + gen_gss_tbl16 |> calculate(stat = "sum"), + gen_gss_tbl16 |> dplyr::summarise(stat = sum(hours)), ignore_attr = TRUE ) }) @@ -597,24 +598,25 @@ test_that("calc_impl_success_f works", { }) test_that("calc_impl.count works", { + .subset_1 <- function(x) x[[1]] expect_equal( - gss_tbl %>% - specify(college ~ NULL, success = "no degree") %>% - calculate(stat = "count") %>% - `[[`(1), + gss_tbl |> + specify(college ~ NULL, success = "no degree") |> + calculate(stat = "count") |> + .subset_1(), sum(gss_tbl$college == "no degree"), tolerance = eps ) expect_equal( - gen_gss_tbl12 %>% calculate(stat = "count"), - gen_gss_tbl12 %>% dplyr::summarise(stat = sum(college == "no degree")), + gen_gss_tbl12 |> calculate(stat = "count"), + gen_gss_tbl12 |> dplyr::summarise(stat = sum(college == "no degree")), ignore_attr = TRUE ) }) -gss_biased <- gss_tbl %>% +gss_biased <- gss_tbl |> dplyr::filter(!(sex == "male" & college == "no degree" & age < 40)) gss_tbl <- table(gss_biased$sex, gss_biased$college) @@ -626,9 +628,9 @@ test_that("calc_impl.odds_ratio works", { } expect_equal( - gss_biased %>% - specify(college ~ sex, success = "degree") %>% - calculate(stat = "odds ratio", order = c("female", "male")) %>% + gss_biased |> + specify(college ~ sex, success = "degree") |> + calculate(stat = "odds ratio", order = c("female", "male")) |> dplyr::pull(), expected = base_odds_ratio, tolerance = eps @@ -642,9 +644,9 @@ test_that("calc_impl.ratio_of_props works", { } expect_equal( - gss_biased %>% - specify(college ~ sex, success = "degree") %>% - calculate(stat = "ratio of props", order = c("male", "female")) %>% + gss_biased |> + specify(college ~ sex, success = "degree") |> + calculate(stat = "ratio of props", order = c("male", "female")) |> dplyr::pull(), expected = base_ratio_of_props, tolerance = eps @@ -658,9 +660,9 @@ test_that("calc_impl.ratio_of_means works", { } expect_equal( - gss %>% - specify(age ~ college) %>% - calculate("ratio of means", order = c("degree", "no degree")) %>% + gss |> + specify(age ~ college) |> + calculate("ratio of means", order = c("degree", "no degree")) |> dplyr::pull(), expected = base_ratio_of_means, tolerance = eps @@ -668,10 +670,10 @@ test_that("calc_impl.ratio_of_means works", { }) test_that("calc_impl.z works for one sample proportions", { - infer_obs_stat <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% - calculate(stat = "z") %>% + infer_obs_stat <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> + calculate(stat = "z") |> dplyr::pull() base_obs_stat <- @@ -683,59 +685,59 @@ test_that("calc_impl.z works for one sample proportions", { test_that("calculate warns informatively with insufficient null", { expect_snapshot( - res_ <- gss %>% - specify(response = sex, success = "female") %>% + res_ <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "z") ) expect_snapshot( - res_ <- gss %>% - specify(hours ~ NULL) %>% + res_ <- gss |> + specify(hours ~ NULL) |> calculate(stat = "t") ) expect_snapshot( - res_ <- gss %>% - specify(response = partyid) %>% + res_ <- gss |> + specify(response = partyid) |> calculate(stat = "Chisq") ) }) test_that("calculate messages informatively with excessive null", { expect_snapshot( - res_ <- gss %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 40) %>% + res_ <- gss |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "mean") ) expect_snapshot( - res_ <- gss %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", sigma = 10) %>% + res_ <- gss |> + specify(hours ~ NULL) |> + hypothesize(null = "point", sigma = 10) |> calculate(stat = "sd") ) expect_snapshot( - res_ <- gss %>% - specify(hours ~ college) %>% - hypothesize(null = "independence") %>% + res_ <- gss |> + specify(hours ~ college) |> + hypothesize(null = "independence") |> calculate("diff in means", order = c("no degree", "degree")) ) }) test_that("calculate can handle variables named x", { expect_silent({ - t_0 <- data.frame(x = 1:10) %>% - specify(response = x) %>% - hypothesise(null = "point", mu = 0) %>% + t_0 <- data.frame(x = 1:10) |> + specify(response = x) |> + hypothesise(null = "point", mu = 0) |> calculate(stat = "t") }) expect_silent({ - t_1 <- data.frame(sample = 1:10) %>% - specify(response = sample) %>% - hypothesise(null = "point", mu = 0) %>% + t_1 <- data.frame(sample = 1:10) |> + specify(response = sample) |> + hypothesise(null = "point", mu = 0) |> calculate(stat = "t") }) @@ -749,26 +751,26 @@ test_that("calculate can handle variables named x", { test_that("calculate errors out with multiple explanatory variables", { expect_snapshot( error = TRUE, - gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> calculate(stat = "t") ) expect_snapshot( error = TRUE, - gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 3, type = "permute") %>% + gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 3, type = "permute") |> calculate(stat = "t") ) }) test_that("reported standard errors are correct", { # mean --------------------------------------------------------------------- - x_bar <- gss %>% - specify(response = hours) %>% + x_bar <- gss |> + specify(response = hours) |> calculate(stat = "mean") expect_equal( @@ -778,8 +780,8 @@ test_that("reported standard errors are correct", { ) # prop --------------------------------------------------------------------- - p_hat <- gss %>% - specify(response = sex, success = "female") %>% + p_hat <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "prop") expect_equal( @@ -791,8 +793,8 @@ test_that("reported standard errors are correct", { ) # diff in means ------------------------------------------------------------ - diff_bar <- gss %>% - specify(hours ~ college) %>% + diff_bar <- gss |> + specify(hours ~ college) |> calculate(stat = "diff in means", order = c("no degree", "degree")) expect_equal( @@ -807,8 +809,8 @@ test_that("reported standard errors are correct", { ) # diff in props ------------------------------------------------------------ - diff_hat <- gss %>% - specify(sex ~ college, success = "female") %>% + diff_hat <- gss |> + specify(sex ~ college, success = "female") |> calculate(stat = "diff in props", order = c("no degree", "degree")) expect_equal( @@ -831,8 +833,8 @@ test_that("reported standard errors are correct", { # ratio of means ------------------------------------------------------------ # this stat shares machinery with others that report se, so make # sure that we don't - rat_hat <- gss %>% - specify(hours ~ college) %>% + rat_hat <- gss |> + specify(hours ~ college) |> calculate(stat = "ratio of means", order = c("no degree", "degree")) expect_null(attr(rat_hat, "se")) diff --git a/tests/testthat/test-fit.R b/tests/testthat/test-fit.R index eb96e28b..4a1487a3 100644 --- a/tests/testthat/test-fit.R +++ b/tests/testthat/test-fit.R @@ -1,7 +1,7 @@ -x1 <- gss[1:100, ] %>% specify(response = hours) -x2 <- gss[1:100, ] %>% specify(hours ~ NULL) -x3 <- gss[1:100, ] %>% specify(response = hours, explanatory = c(age, college)) -x4 <- gss[1:100, ] %>% specify(hours ~ age + college) +x1 <- gss[1:100, ] |> specify(response = hours) +x2 <- gss[1:100, ] |> specify(hours ~ NULL) +x3 <- gss[1:100, ] |> specify(response = hours, explanatory = c(age, college)) +x4 <- gss[1:100, ] |> specify(hours ~ age + college) test_that("get_formula helper works", { expect_false(has_attr(x1, "formula")) @@ -37,7 +37,7 @@ test_that("fit_linear_model helper works", { expect_equal( c("character", "numeric"), - purrr::map_chr(x3_m, class) %>% unname() + purrr::map_chr(x3_m, class) |> unname() ) expect_equal( @@ -47,11 +47,11 @@ test_that("fit_linear_model helper works", { }) test_that("fit.infer can handle generated objects", { - x3_fit <- x3 %>% fit() + x3_fit <- x3 |> fit() - x3_gen_fit <- x3 %>% - hypothesize(null = 'independence') %>% - generate(reps = 2, type = "permute") %>% + x3_gen_fit <- x3 |> + hypothesize(null = 'independence') |> + generate(reps = 2, type = "permute") |> fit() expect_equal(unique(x3_fit$term), unique(x3_gen_fit$term)) @@ -66,15 +66,15 @@ test_that("fit.infer can handle generated objects", { test_that("fit.infer messages informatively on excessive null", { expect_snapshot( - res_ <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + res_ <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> fit() ) expect_silent( - gss %>% - specify(hours ~ age + college) %>% + gss |> + specify(hours ~ age + college) |> fit() ) }) @@ -82,53 +82,53 @@ test_that("fit.infer messages informatively on excessive null", { test_that("fit.infer logistic regression works", { # linear regression default works expect_equal( - gss %>% - specify(hours ~ age + college) %>% + gss |> + specify(hours ~ age + college) |> fit(), - gss %>% - specify(hours ~ age + college) %>% + gss |> + specify(hours ~ age + college) |> fit(family = stats::gaussian) ) # logistic regression default works expect_equal( - gss %>% - specify(college ~ age + hours) %>% + gss |> + specify(college ~ age + hours) |> fit(family = stats::binomial), - gss %>% - specify(college ~ age + hours) %>% + gss |> + specify(college ~ age + hours) |> fit() ) # errors informatively with multinomial response variable expect_snapshot( error = TRUE, - gss %>% - specify(finrela ~ age + college) %>% + gss |> + specify(finrela ~ age + college) |> fit() ) # works as expected for `generate()`d objects - fit_gen <- gss %>% - specify(college ~ age + hours) %>% - hypothesize(null = "independence") %>% - generate(type = "permute", reps = 2) %>% + fit_gen <- gss |> + specify(college ~ age + hours) |> + hypothesize(null = "independence") |> + generate(type = "permute", reps = 2) |> fit() - fit_obs <- gss %>% - specify(college ~ age + hours) %>% + fit_obs <- gss |> + specify(college ~ age + hours) |> fit() expect_equal(nrow(fit_gen), nrow(fit_obs) * 2) expect_equal(ncol(fit_gen), ncol(fit_obs) + 1) # responds to success argument - fit_deg <- gss %>% - specify(college ~ age + hours, success = "degree") %>% + fit_deg <- gss |> + specify(college ~ age + hours, success = "degree") |> fit() - fit_no_deg <- gss %>% - specify(college ~ age + hours, success = "no degree") %>% + fit_no_deg <- gss |> + specify(college ~ age + hours, success = "no degree") |> fit() expect_equal(fit_deg$term, fit_no_deg$term) diff --git a/tests/testthat/test-generate.R b/tests/testthat/test-generate.R index a51afb5f..fbadc291 100644 --- a/tests/testthat/test-generate.R +++ b/tests/testthat/test-generate.R @@ -1,37 +1,37 @@ -hyp_prop <- mtcars_df %>% - specify(response = am, success = "1") %>% +hyp_prop <- mtcars_df |> + specify(response = am, success = "1") |> hypothesize(null = "point", p = .5) -hyp_diff_in_props <- mtcars_df %>% - specify(am ~ vs, success = "1") %>% +hyp_diff_in_props <- mtcars_df |> + specify(am ~ vs, success = "1") |> hypothesize(null = "independence") -hyp_chisq_gof <- mtcars_df %>% - specify(response = cyl) %>% +hyp_chisq_gof <- mtcars_df |> + specify(response = cyl) |> hypothesize(null = "point", p = c("4" = 1 / 3, "6" = 1 / 3, "8" = 1 / 3)) -hyp_chisq_ind <- mtcars_df %>% - specify(cyl ~ vs) %>% +hyp_chisq_ind <- mtcars_df |> + specify(cyl ~ vs) |> hypothesize(null = "independence") -hyp_mean <- mtcars_df %>% - specify(response = mpg) %>% +hyp_mean <- mtcars_df |> + specify(response = mpg) |> hypothesize(null = "point", mu = 3) -hyp_median <- mtcars_df %>% - specify(response = mpg) %>% +hyp_median <- mtcars_df |> + specify(response = mpg) |> hypothesize(null = "point", med = 3) -hyp_sd <- mtcars_df %>% - specify(response = mpg) %>% +hyp_sd <- mtcars_df |> + specify(response = mpg) |> hypothesize(null = "point", sigma = 7) -hyp_diff_in_means <- mtcars_df %>% - specify(mpg ~ vs) %>% +hyp_diff_in_means <- mtcars_df |> + specify(mpg ~ vs) |> hypothesize(null = "independence") -hyp_anova <- mtcars_df %>% - specify(mpg ~ cyl) %>% +hyp_anova <- mtcars_df |> + specify(mpg ~ cyl) |> hypothesize(null = "independence") test_that("cohesion with type argument", { @@ -76,69 +76,69 @@ test_that("sensible output", { }) test_that("auto `type` works (generate)", { - one_mean <- mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% + one_mean <- mtcars_df |> + specify(response = mpg) |> # formula alt: mpg ~ NULL + hypothesize(null = "point", mu = 25) |> generate(reps = 100) - one_nonshift_mean <- mtcars_df %>% - specify(response = mpg) %>% + one_nonshift_mean <- mtcars_df |> + specify(response = mpg) |> generate(reps = 100) - one_median <- mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% + one_median <- mtcars_df |> + specify(response = mpg) |> # formula alt: mpg ~ NULL + hypothesize(null = "point", med = 26) |> generate(reps = 100) - one_prop <- mtcars_df %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% + one_prop <- mtcars_df |> + specify(response = am, success = "1") |> # formula alt: am ~ NULL + hypothesize(null = "point", p = .25) |> generate(reps = 100) - two_props <- mtcars_df %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs - hypothesize(null = "independence") %>% + two_props <- mtcars_df |> + specify(am ~ vs, success = "1") |> # alt: response = am, explanatory = vs + hypothesize(null = "independence") |> generate(reps = 100) - gof_chisq <- mtcars_df %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% + gof_chisq <- mtcars_df |> + specify(cyl ~ NULL) |> # alt: response = cyl + hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) |> generate(reps = 100) - indep_chisq <- mtcars_df %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am - hypothesize(null = "independence") %>% + indep_chisq <- mtcars_df |> + specify(cyl ~ am) |> # alt: response = cyl, explanatory = am + hypothesize(null = "independence") |> generate(reps = 100) - two_means <- mtcars_df %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% + two_means <- mtcars_df |> + specify(mpg ~ am) |> # alt: response = mpg, explanatory = am + hypothesize(null = "independence") |> generate(reps = 100) - anova_f <- mtcars_df %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% + anova_f <- mtcars_df |> + specify(mpg ~ cyl) |> # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") |> generate(reps = 100) - slopes <- mtcars_df %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% + slopes <- mtcars_df |> + specify(mpg ~ hp) |> # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") |> generate(reps = 100) - one_nonshift_prop <- mtcars_df %>% - specify(response = am, success = "1") %>% + one_nonshift_prop <- mtcars_df |> + specify(response = am, success = "1") |> generate(reps = 100) - two_means_boot <- mtcars_df %>% - specify(mpg ~ am) %>% + two_means_boot <- mtcars_df |> + specify(mpg ~ am) |> generate(reps = 100) - two_props_boot <- mtcars_df %>% - specify(am ~ vs, success = "1") %>% + two_props_boot <- mtcars_df |> + specify(am ~ vs, success = "1") |> generate(reps = 100) - slope_boot <- mtcars_df %>% - specify(mpg ~ hp) %>% + slope_boot <- mtcars_df |> + specify(mpg ~ hp) |> generate(reps = 100) expect_equal(attr(one_mean, "type"), "bootstrap") @@ -158,103 +158,103 @@ test_that("auto `type` works (generate)", { expect_snapshot( error = TRUE, - mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", mu = 25) %>% + mtcars_df |> + specify(response = mpg) |> # formula alt: mpg ~ NULL + hypothesize(null = "point", mu = 25) |> generate(reps = 100, type = "permute") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(response = mpg) %>% + res_ <- mtcars_df |> + specify(response = mpg) |> generate(reps = 100, type = "draw") ) expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL - hypothesize(null = "point", med = 26) %>% + res_ <- mtcars_df |> + specify(response = mpg) |> # formula alt: mpg ~ NULL + hypothesize(null = "point", med = 26) |> generate(reps = 100, type = "permute") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL - hypothesize(null = "point", p = .25) %>% + res_ <- mtcars_df |> + specify(response = am, success = "1") |> # formula alt: am ~ NULL + hypothesize(null = "point", p = .25) |> generate(reps = 100, type = "bootstrap") ) expect_silent( - mtcars_df %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs - hypothesize(null = "independence") %>% + mtcars_df |> + specify(am ~ vs, success = "1") |> # alt: response = am, explanatory = vs + hypothesize(null = "independence") |> generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(cyl ~ NULL) %>% # alt: response = cyl - hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>% + res_ <- mtcars_df |> + specify(cyl ~ NULL) |> # alt: response = cyl + hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) |> generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am - hypothesize(null = "independence") %>% + res_ <- mtcars_df |> + specify(cyl ~ am) |> # alt: response = cyl, explanatory = am + hypothesize(null = "independence") |> generate(reps = 100, type = "draw") ) expect_silent( - mtcars_df %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am - hypothesize(null = "independence") %>% + mtcars_df |> + specify(mpg ~ am) |> # alt: response = mpg, explanatory = am + hypothesize(null = "independence") |> generate(reps = 100, type = "bootstrap") ) expect_silent( - mtcars_df %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am + mtcars_df |> + specify(mpg ~ am) |> # alt: response = mpg, explanatory = am generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% + res_ <- mtcars_df |> + specify(mpg ~ cyl) |> # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") |> generate(reps = 100, type = "draw") ) expect_silent( - mtcars_df %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl - hypothesize(null = "independence") %>% + mtcars_df |> + specify(mpg ~ hp) |> # alt: response = mpg, explanatory = cyl + hypothesize(null = "independence") |> generate(reps = 100, type = "bootstrap") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(response = am, success = "1") %>% + res_ <- mtcars_df |> + specify(response = am, success = "1") |> generate(reps = 100, type = "draw") ) expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(mpg ~ am) %>% + res_ <- mtcars_df |> + specify(mpg ~ am) |> generate(reps = 100, type = "permute") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(am ~ vs, success = "1") %>% + res_ <- mtcars_df |> + specify(am ~ vs, success = "1") |> generate(reps = 100, type = "draw") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(mpg ~ hp) %>% + res_ <- mtcars_df |> + specify(mpg ~ hp) |> generate(reps = 100, type = "draw") ) }) @@ -262,28 +262,28 @@ test_that("auto `type` works (generate)", { test_that("mismatches lead to error", { expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% generate(reps = 10, type = "permute") + res_ <- mtcars_df |> generate(reps = 10, type = "permute") ) expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(am ~ NULL, success = "1") %>% - hypothesize(null = "independence", p = c("1" = 0.5)) %>% + res_ <- mtcars_df |> + specify(am ~ NULL, success = "1") |> + hypothesize(null = "independence", p = c("1" = 0.5)) |> generate(reps = 100, type = "draw") ) expect_snapshot( - res_ <- mtcars_df %>% - specify(cyl ~ NULL) %>% # alt: response = cyl + res_ <- mtcars_df |> + specify(cyl ~ NULL) |> # alt: response = cyl hypothesize( null = "point", p = c("4" = .5, "6" = .25, "8" = .25) - ) %>% + ) |> generate(reps = 100, type = "bootstrap") ) expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(mpg ~ hp) %>% + res_ <- mtcars_df |> + specify(mpg ~ hp) |> generate(reps = 100, type = "other") ) }) @@ -296,17 +296,17 @@ test_that("generate() handles `NULL` value of `type`", { test_that("generate() handles `x` response", { expect_named( - data.frame(x = factor(rbinom(100, size = 1, prob = .5))) %>% - specify(response = x, success = "1") %>% - hypothesize(null = "point", p = .5) %>% + data.frame(x = factor(rbinom(100, size = 1, prob = .5))) |> + specify(response = x, success = "1") |> + hypothesize(null = "point", p = .5) |> generate(reps = 100, type = "draw"), c("x", "replicate") ) expect_named( - data.frame(category = c(rep(c("A", "B"), each = 5)), x = 1:10) %>% - specify(explanatory = category, response = x) %>% - hypothesize(null = "independence") %>% + data.frame(category = c(rep(c("A", "B"), each = 5)), x = 1:10) |> + specify(explanatory = category, response = x) |> + hypothesize(null = "independence") |> generate(reps = 5, type = "permute"), c("x", "category", "replicate") ) @@ -316,23 +316,24 @@ test_that("generate() can permute with multiple explanatory variables", { # if the y variable is the one being permuted and the x's # are being left alone, then each age + college combination # should exist in every replicate + equals_3 <- function(x) {x == 3} expect_true( - gss %>% + gss |> # add random noise to make the variable truly continuous - dplyr::mutate(age = age + rnorm(nrow(gss))) %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 3, type = "permute") %>% - dplyr::ungroup() %>% - dplyr::count(age, college) %>% - dplyr::pull(n) %>% - `==`(3) %>% + dplyr::mutate(age = age + rnorm(nrow(gss))) |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 3, type = "permute") |> + dplyr::ungroup() |> + dplyr::count(age, college) |> + dplyr::pull(n) |> + equals_3() |> all() ) - x <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + x <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 3, type = "permute") expect_true(inherits(x, "infer")) @@ -353,26 +354,26 @@ test_that("generate is sensitive to the variables argument", { { set.seed(1) - gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute") }, { set.seed(1) - gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = hours) } ) # permuting changes output expect_silent( - perm_age <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + perm_age <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = age) ) @@ -381,9 +382,9 @@ test_that("generate is sensitive to the variables argument", { expect_true(all(perm_age$college[1:10] == perm_age$college[11:20])) expect_silent( - perm_college <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + perm_college <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = college) ) @@ -392,9 +393,9 @@ test_that("generate is sensitive to the variables argument", { expect_false(all(perm_college$college[1:10] == perm_college$college[11:20])) expect_silent( - perm_college_age <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + perm_college_age <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = c(college, age)) ) @@ -412,9 +413,9 @@ test_that("generate is sensitive to the variables argument", { set.seed(1) expect_message( - res_1 <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + res_1 <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate( reps = 2, type = "permute", @@ -427,9 +428,9 @@ test_that("generate is sensitive to the variables argument", { { set.seed(1) - gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = hours) } ) @@ -438,75 +439,75 @@ test_that("generate is sensitive to the variables argument", { test_that("variables argument prompts when it ought to", { expect_snapshot( error = TRUE, - res_ <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + res_ <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = c(howdy)) ) expect_snapshot( error = TRUE, - res <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + res <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = c(howdy, doo)) ) expect_snapshot( - res_ <- gss[1:10, ] %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 40) %>% + res_ <- gss[1:10, ] |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 40) |> generate(reps = 2, type = "bootstrap", variables = c(hours)) ) expect_snapshot( error = TRUE, - res_ <- gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + res_ <- gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = "hours") ) expect_snapshot( - res_ <- gss[1:10, ] %>% - specify(hours ~ age + college + age * college) %>% - hypothesize(null = "independence") %>% + res_ <- gss[1:10, ] |> + specify(hours ~ age + college + age * college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = age * college) ) expect_snapshot( - res_ <- gss[1:10, ] %>% - specify(hours ~ age + college + age * college) %>% - hypothesize(null = "independence") %>% + res_ <- gss[1:10, ] |> + specify(hours ~ age + college + age * college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = c(hours, age * college)) ) expect_silent( - gss[1:10, ] %>% - specify(hours ~ age + college + age * college) %>% - hypothesize(null = "independence") %>% + gss[1:10, ] |> + specify(hours ~ age + college + age * college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute", variables = c(hours)) ) expect_silent( - gss[1:10, ] %>% - specify(hours ~ age + college + age * college) %>% - hypothesize(null = "independence") %>% + gss[1:10, ] |> + specify(hours ~ age + college + age * college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute") ) expect_silent( - gss[1:10, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% + gss[1:10, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> generate(reps = 2, type = "permute") ) # warn on type != permute but don't raise message re: interaction # effects unless otherwise used appropriately expect_snapshot( - res_ <- gss[1:10, ] %>% - specify(hours ~ age * college) %>% + res_ <- gss[1:10, ] |> + specify(hours ~ age * college) |> generate( reps = 2, type = "bootstrap", @@ -518,43 +519,43 @@ test_that("variables argument prompts when it ought to", { test_that("type = 'draw'/'simulate' superseding handled gracefully", { # message on type = 'simulate' expect_snapshot( - res_ <- mtcars_df %>% - specify(response = am, success = "1") %>% - hypothesize(null = "point", p = .5) %>% + res_ <- mtcars_df |> + specify(response = am, success = "1") |> + hypothesize(null = "point", p = .5) |> generate(type = "simulate") ) # don't message on type = 'draw' expect_silent( - mtcars_df %>% - specify(response = am, success = "1") %>% - hypothesize(null = "point", p = .5) %>% + mtcars_df |> + specify(response = am, success = "1") |> + hypothesize(null = "point", p = .5) |> generate(type = "draw") ) # mention new generation types when supplied a bad one expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = am, success = "1") %>% - hypothesize(null = "point", p = .5) %>% + res_ <- mtcars_df |> + specify(response = am, success = "1") |> + hypothesize(null = "point", p = .5) |> generate(type = "boop") ) # warns with either alias when given unexpected generate type expect_snapshot( error = TRUE, - mtcars_df %>% - specify(response = mpg) %>% - hypothesize(null = "point", mu = 20) %>% + mtcars_df |> + specify(response = mpg) |> + hypothesize(null = "point", mu = 20) |> generate(type = "draw") ) expect_snapshot( error = TRUE, - mtcars_df %>% - specify(response = mpg) %>% - hypothesize(null = "point", mu = 20) %>% + mtcars_df |> + specify(response = mpg) |> + hypothesize(null = "point", mu = 20) |> generate(type = "draw") ) @@ -563,9 +564,9 @@ test_that("type = 'draw'/'simulate' superseding handled gracefully", { set.seed(1) expect_message( - res_1 <- mtcars_df %>% - specify(response = am, success = "1") %>% - hypothesize(null = "point", p = .5) %>% + res_1 <- mtcars_df |> + specify(response = am, success = "1") |> + hypothesize(null = "point", p = .5) |> generate(type = "simulate") ) @@ -574,9 +575,9 @@ test_that("type = 'draw'/'simulate' superseding handled gracefully", { { set.seed(1) - res_2 <- mtcars_df %>% - specify(response = am, success = "1") %>% - hypothesize(null = "point", p = .5) %>% + res_2 <- mtcars_df |> + specify(response = am, success = "1") |> + hypothesize(null = "point", p = .5) |> generate(type = "draw") res_2 diff --git a/tests/testthat/test-get_confidence_interval.R b/tests/testthat/test-get_confidence_interval.R index ff3708cd..fed832c8 100644 --- a/tests/testthat/test-get_confidence_interval.R +++ b/tests/testthat/test-get_confidence_interval.R @@ -26,7 +26,7 @@ perc_def_out <- tibble::tibble( test_that("get_confidence_interval works with defaults", { expect_message( - expect_equal(test_df %>% get_confidence_interval(), perc_def_out), + expect_equal(test_df |> get_confidence_interval(), perc_def_out), "Using `level = 0.95`" ) }) @@ -34,14 +34,14 @@ test_that("get_confidence_interval works with defaults", { test_that("get_confidence_interval works with `type = 'percentile'`", { expect_message( expect_equal( - test_df %>% get_confidence_interval(type = "percentile"), + test_df |> get_confidence_interval(type = "percentile"), perc_def_out ), "Using `level = 0.95`" ) expect_equal( - test_df %>% get_confidence_interval(level = 0.5, type = "percentile"), + test_df |> get_confidence_interval(level = 0.5, type = "percentile"), tibble::tibble( lower_ci = unname(quantile(test_df[["stat"]], 0.25)), upper_ci = unname(quantile(test_df[["stat"]], 0.75)) @@ -53,7 +53,7 @@ test_that("get_confidence_interval works with `type = 'se'`", { expect_message( # use equivalent rather than equal as ci has attributes for se and point est expect_equal( - test_df %>% + test_df |> get_confidence_interval(type = "se", point_estimate = point), tibble::tibble(lower_ci = -5.653, upper_ci = 6.603), tolerance = 1e-3, @@ -64,7 +64,7 @@ test_that("get_confidence_interval works with `type = 'se'`", { # use equivalent rather than equal as ci has attributes for se and point est expect_equal( - test_df %>% + test_df |> get_confidence_interval(level = 0.5, type = "se", point_estimate = point), tibble::tibble(lower_ci = -1.633, upper_ci = 2.583), tolerance = 1e-3, @@ -75,7 +75,7 @@ test_that("get_confidence_interval works with `type = 'se'`", { test_that("get_confidence_interval works with `type = 'bias-corrected'`", { expect_message( expect_equal( - test_df %>% + test_df |> get_confidence_interval( type = "bias-corrected", point_estimate = point @@ -87,7 +87,7 @@ test_that("get_confidence_interval works with `type = 'bias-corrected'`", { ) expect_equal( - test_df %>% + test_df |> get_confidence_interval( level = 0.5, type = "bias-corrected", @@ -102,14 +102,14 @@ test_that("get_confidence_interval supports data frame `point_estimate`", { point_df <- data.frame(p = point) expect_equal( - test_df %>% get_confidence_interval(type = "se", point_estimate = point), - test_df %>% get_confidence_interval(type = "se", point_estimate = point_df), + test_df |> get_confidence_interval(type = "se", point_estimate = point), + test_df |> get_confidence_interval(type = "se", point_estimate = point_df), tolerance = eps ) expect_equal( - test_df %>% + test_df |> get_confidence_interval(type = "bias-corrected", point_estimate = point), - test_df %>% + test_df |> get_confidence_interval( type = "bias-corrected", point_estimate = point_df @@ -127,24 +127,24 @@ test_that("get_confidence_interval messages with no explicit `level`", { test_that("get_confidence_interval checks input", { expect_snapshot( error = TRUE, - test_df %>% get_confidence_interval(type = "other") + test_df |> get_confidence_interval(type = "other") ) expect_snapshot( error = TRUE, - test_df %>% get_confidence_interval(level = 1.2) + test_df |> get_confidence_interval(level = 1.2) ) expect_snapshot( error = TRUE, - test_df %>% get_confidence_interval(point_estimate = "a") + test_df |> get_confidence_interval(point_estimate = "a") ) expect_snapshot( error = TRUE, - test_df %>% get_confidence_interval(type = "se", point_estimate = "a") + test_df |> get_confidence_interval(type = "se", point_estimate = "a") ) expect_snapshot( error = TRUE, - test_df %>% + test_df |> get_confidence_interval( type = "se", point_estimate = data.frame(p = "a") @@ -153,11 +153,11 @@ test_that("get_confidence_interval checks input", { expect_snapshot( error = TRUE, - test_df %>% get_confidence_interval(type = "se") + test_df |> get_confidence_interval(type = "se") ) expect_snapshot( error = TRUE, - test_df %>% get_confidence_interval(type = "bias-corrected") + test_df |> get_confidence_interval(type = "bias-corrected") ) }) @@ -166,14 +166,14 @@ test_that("get_confidence_interval can handle fitted objects", { # generate example objects set.seed(1) - null_fits <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% + null_fits <- gss[1:50, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 10, type = "permute") |> fit() - obs_fit <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% + obs_fit <- gss[1:50, ] |> + specify(hours ~ age + college) |> fit() # check each ci type @@ -233,8 +233,8 @@ test_that("get_confidence_interval can handle fitted objects", { ) # errors out when it ought to - obs_fit_2 <- gss[1:50, ] %>% - specify(hours ~ age) %>% + obs_fit_2 <- gss[1:50, ] |> + specify(hours ~ age) |> fit() expect_snapshot( @@ -243,8 +243,8 @@ test_that("get_confidence_interval can handle fitted objects", { ) obs_fit_3 <- - obs_fit_2 <- gss[1:50, ] %>% - specify(year ~ age + college) %>% + obs_fit_2 <- gss[1:50, ] |> + specify(year ~ age + college) |> fit() expect_snapshot( @@ -256,14 +256,14 @@ test_that("get_confidence_interval can handle fitted objects", { test_that("get_confidence_interval can handle bad args with fitted objects", { set.seed(1) - null_fits <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% + null_fits <- gss[1:50, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 10, type = "permute") |> fit() - obs_fit <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% + obs_fit <- gss[1:50, ] |> + specify(hours ~ age + college) |> fit() expect_snapshot( @@ -287,21 +287,21 @@ test_that("get_confidence_interval can handle bad args with fitted objects", { }) test_that("theoretical CIs align with simulation-based (mean)", { - x_bar <- gss %>% - specify(response = hours) %>% + x_bar <- gss |> + specify(response = hours) |> calculate(stat = "mean") set.seed(1) - null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 1e3, type = "bootstrap") %>% + null_dist <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1e3, type = "bootstrap") |> calculate(stat = "mean") - null_dist_theory <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + null_dist_theory <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume(distribution = "t") expect_equal( @@ -322,20 +322,20 @@ test_that("theoretical CIs align with simulation-based (mean)", { }) test_that("theoretical CIs align with simulation-based (prop)", { - p_hat <- gss %>% - specify(response = sex, success = "female") %>% + p_hat <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "prop") set.seed(1) - null_dist <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1e3, type = "draw") %>% + null_dist <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> + generate(reps = 1e3, type = "draw") |> calculate(stat = "prop") - null_dist_theory <- gss %>% - specify(response = sex, success = "female") %>% + null_dist_theory <- gss |> + specify(response = sex, success = "female") |> assume(distribution = "z") expect_equal( @@ -356,20 +356,20 @@ test_that("theoretical CIs align with simulation-based (prop)", { }) test_that("theoretical CIs align with simulation-based (diff in means)", { - diff_bar <- gss %>% - specify(age ~ college) %>% + diff_bar <- gss |> + specify(age ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) set.seed(1) - null_dist <- gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 3e3, type = "permute") %>% + null_dist <- gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 3e3, type = "permute") |> calculate(stat = "diff in means", order = c("degree", "no degree")) - null_dist_theory <- gss %>% - specify(age ~ college) %>% + null_dist_theory <- gss |> + specify(age ~ college) |> assume(distribution = "t") expect_equal( @@ -390,20 +390,20 @@ test_that("theoretical CIs align with simulation-based (diff in means)", { }) test_that("theoretical CIs align with simulation-based (diff in props)", { - diff_hat <- gss %>% - specify(college ~ sex, success = "no degree") %>% + diff_hat <- gss |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "diff in props", order = c("female", "male")) set.seed(1) - null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1e3, type = "permute") %>% + null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1e3, type = "permute") |> calculate(stat = "diff in props", order = c("female", "male")) - null_dist_theory <- gss %>% - specify(college ~ sex, success = "no degree") %>% + null_dist_theory <- gss |> + specify(college ~ sex, success = "no degree") |> assume(distribution = "z") expect_equal( @@ -424,12 +424,12 @@ test_that("theoretical CIs align with simulation-based (diff in props)", { }) test_that("theoretical CIs check arguments properly", { - x_bar <- gss %>% - specify(response = hours) %>% + x_bar <- gss |> + specify(response = hours) |> calculate(stat = "mean") - null_dist_theory <- gss %>% - specify(age ~ college) %>% + null_dist_theory <- gss |> + specify(age ~ college) |> assume(distribution = "t") # check that type is handled correctly @@ -487,9 +487,9 @@ test_that("theoretical CIs check arguments properly", { ) # check that statistics are implemented - obs_t <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + obs_t <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") expect_snapshot( @@ -502,12 +502,12 @@ test_that("theoretical CIs check arguments properly", { ) # check that stat and distribution align - p_hat <- gss %>% - specify(response = sex, success = "female") %>% + p_hat <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "prop") - null_dist_z <- gss %>% - specify(response = sex, success = "female") %>% + null_dist_z <- gss |> + specify(response = sex, success = "female") |> assume(distribution = "z") expect_snapshot( @@ -537,10 +537,10 @@ test_that("handles missing values gracefully (#520)", { set.seed(1) boot_dist <- - data %>% - specify(prop ~ group) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "bootstrap") %>% + data |> + specify(prop ~ group) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "diff in medians", order = c("b", "a")) expect_snapshot(res <- get_confidence_interval(boot_dist, .95)) diff --git a/tests/testthat/test-get_p_value.R b/tests/testthat/test-get_p_value.R index 7e5e65c7..b19109ea 100644 --- a/tests/testthat/test-get_p_value.R +++ b/tests/testthat/test-get_p_value.R @@ -21,7 +21,7 @@ test_df$stat <- sample(c( test_that("direction is appropriate", { expect_snapshot( error = TRUE, - test_df %>% get_p_value(obs_stat = 0.5, direction = "righ") + test_df |> get_p_value(obs_stat = 0.5, direction = "righ") ) }) @@ -104,15 +104,15 @@ test_that("get_p_value works", { }) test_that("theoretical p-value not supported error", { - obs_F <- gss_tbl %>% - specify(hours ~ partyid) %>% + obs_F <- gss_tbl |> + specify(hours ~ partyid) |> calculate(stat = "F") expect_snapshot( error = TRUE, - gss_tbl %>% - specify(hours ~ partyid) %>% - hypothesize(null = "independence") %>% - calculate(stat = "F") %>% + gss_tbl |> + specify(hours ~ partyid) |> + hypothesize(null = "independence") |> + calculate(stat = "F") |> get_p_value(obs_stat = obs_F, direction = "right") ) }) @@ -138,14 +138,14 @@ test_that("get_p_value throws error in case of `NaN` stat", { test_that("get_p_value can handle fitted objects", { set.seed(1) - null_fits <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% + null_fits <- gss[1:50, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 10, type = "permute") |> fit() - obs_fit <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% + obs_fit <- gss[1:50, ] |> + specify(hours ~ age + college) |> fit() expect_equal( @@ -162,28 +162,28 @@ test_that("get_p_value can handle fitted objects", { ) # errors out when it ought to - obs_fit_2 <- gss[1:50, ] %>% - specify(hours ~ age) %>% + obs_fit_2 <- gss[1:50, ] |> + specify(hours ~ age) |> fit() expect_snapshot(error = TRUE, get_p_value(null_fits, obs_fit_2, "both")) - obs_fit_3 <- gss[1:50, ] %>% - specify(year ~ age + college) %>% + obs_fit_3 <- gss[1:50, ] |> + specify(year ~ age + college) |> fit() expect_snapshot(error = TRUE, get_p_value(null_fits, obs_fit_3, "both")) set.seed(1) - null_fits_4 <- gss[1:50, ] %>% - specify(hours ~ age) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% + null_fits_4 <- gss[1:50, ] |> + specify(hours ~ age) |> + hypothesize(null = "independence") |> + generate(reps = 10, type = "permute") |> fit() - obs_fit_4 <- gss[1:50, ] %>% - specify(hours ~ age) %>% + obs_fit_4 <- gss[1:50, ] |> + specify(hours ~ age) |> fit() obs_fit_4 @@ -219,14 +219,14 @@ test_that("get_p_value can handle fitted objects", { test_that("get_p_value can handle bad args with fitted objects", { set.seed(1) - null_fits <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% + null_fits <- gss[1:50, ] |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 10, type = "permute") |> fit() - obs_fit <- gss[1:50, ] %>% - specify(hours ~ age + college) %>% + obs_fit <- gss[1:50, ] |> + specify(hours ~ age + college) |> fit() expect_snapshot(error = TRUE, get_p_value(null_fits, "boop", "both")) @@ -241,16 +241,16 @@ test_that("get_p_value can handle bad args with fitted objects", { test_that("get_p_value errors informatively when args are switched", { # switch obs_stat and x - obs_stat <- gss %>% - specify(response = hours) %>% + obs_stat <- gss |> + specify(response = hours) |> calculate(stat = "mean") set.seed(1) - null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 41) %>% - generate(reps = 20, type = "bootstrap") %>% + null_dist <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 41) |> + generate(reps = 20, type = "bootstrap") |> calculate(stat = "mean") expect_snapshot(error = TRUE, get_p_value(obs_stat, null_dist, "both")) @@ -270,14 +270,14 @@ test_that("get_p_value can handle theoretical distributions", { # f ------------------------------------------------------------ # direction = "right" is the only valid one f_dist <- - gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume(distribution = "F") f_obs <- - gss %>% - specify(age ~ partyid) %>% + gss |> + specify(age ~ partyid) |> calculate(stat = "F") expect_equal( @@ -296,15 +296,15 @@ test_that("get_p_value can handle theoretical distributions", { # t ------------------------------------------------------------ t_dist <- - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume("t") t_obs <- - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") expect_equal( @@ -356,14 +356,14 @@ test_that("get_p_value can handle theoretical distributions", { # chisq ------------------------------------------------------------ # direction = "right" is the only valid one chisq_dist <- - gss %>% - specify(college ~ finrela) %>% - hypothesize(null = "independence") %>% + gss |> + specify(college ~ finrela) |> + hypothesize(null = "independence") |> assume(distribution = "Chisq") chisq_obs <- - gss %>% - specify(college ~ finrela) %>% + gss |> + specify(college ~ finrela) |> calculate(stat = "Chisq") expect_equal( @@ -384,15 +384,15 @@ test_that("get_p_value can handle theoretical distributions", { # z ------------------------------------------------------------ z_dist <- - gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% + gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> assume("z") z_obs <- - gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% + gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> calculate(stat = "z") expect_equal( @@ -466,21 +466,21 @@ test_that("get_p_value can handle theoretical distributions", { test_that("get_p_value warns with bad theoretical distributions", { t_dist_40 <- - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume("t") t_dist_30 <- - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 30) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 30) |> assume("t") t_obs <- - gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") expect_silent( diff --git a/tests/testthat/test-hypothesize.R b/tests/testthat/test-hypothesize.R index bee93bd6..59b7cf36 100644 --- a/tests/testthat/test-hypothesize.R +++ b/tests/testthat/test-hypothesize.R @@ -1,47 +1,47 @@ -one_mean <- mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL +one_mean <- mtcars_df |> + specify(response = mpg) |> # formula alt: mpg ~ NULL hypothesize(null = "point", mu = 25) -one_mean_specify <- mtcars_df %>% +one_mean_specify <- mtcars_df |> specify(response = mpg) -one_median <- mtcars_df %>% - specify(response = mpg) %>% # formula alt: mpg ~ NULL +one_median <- mtcars_df |> + specify(response = mpg) |> # formula alt: mpg ~ NULL hypothesize(null = "point", med = 26) -one_prop <- mtcars_df %>% - specify(response = am, success = "1") %>% # formula alt: am ~ NULL +one_prop <- mtcars_df |> + specify(response = am, success = "1") |> # formula alt: am ~ NULL hypothesize(null = "point", p = .25) -one_prop_specify <- mtcars_df %>% +one_prop_specify <- mtcars_df |> specify(response = am, success = "1") -two_props <- mtcars_df %>% - specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs +two_props <- mtcars_df |> + specify(am ~ vs, success = "1") |> # alt: response = am, explanatory = vs hypothesize(null = "independence") -gof_chisq <- mtcars_df %>% - specify(cyl ~ NULL) %>% # alt: response = cyl +gof_chisq <- mtcars_df |> + specify(cyl ~ NULL) |> # alt: response = cyl hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) -indep_chisq <- mtcars_df %>% - specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am +indep_chisq <- mtcars_df |> + specify(cyl ~ am) |> # alt: response = cyl, explanatory = am hypothesize(null = "independence") -two_means <- mtcars_df %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am +two_means <- mtcars_df |> + specify(mpg ~ am) |> # alt: response = mpg, explanatory = am hypothesize(null = "independence") -two_medians <- mtcars_df %>% - specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am +two_medians <- mtcars_df |> + specify(mpg ~ am) |> # alt: response = mpg, explanatory = am hypothesize(null = "independence") -anova_f <- mtcars_df %>% - specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl +anova_f <- mtcars_df |> + specify(mpg ~ cyl) |> # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") -slopes <- mtcars_df %>% - specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl +slopes <- mtcars_df |> + specify(mpg ~ hp) |> # alt: response = mpg, explanatory = cyl hypothesize(null = "independence") test_that("auto `type` works (hypothesize)", { @@ -60,22 +60,22 @@ test_that("auto `type` works (hypothesize)", { test_that("hypothesize() throws an error when null is not point or independence", { expect_snapshot( error = TRUE, - mtcars_df %>% - specify(response = mpg) %>% + mtcars_df |> + specify(response = mpg) |> hypothesize(null = "dependence") ) }) test_that("hypothesize() allows partial matching of null arg for point", { - hyp_p <- mtcars_df %>% - specify(response = mpg) %>% + hyp_p <- mtcars_df |> + specify(response = mpg) |> hypothesize(null = "po", mu = 0) expect_equal(attr(hyp_p, "null"), "point") }) test_that("hypothesize() allows partial matching of null arg for independence", { - hyp_i <- mtcars_df %>% - specify(mpg ~ vs) %>% + hyp_i <- mtcars_df |> + specify(mpg ~ vs) |> hypothesize(null = "i") expect_equal(attr(hyp_i, "null"), "independence") }) @@ -83,8 +83,8 @@ test_that("hypothesize() allows partial matching of null arg for independence", test_that("hypothesize() throws an error when multiple null values are provided", { expect_snapshot( error = TRUE, - mtcars_df %>% - specify(response = mpg) %>% + mtcars_df |> + specify(response = mpg) |> hypothesize(null = c("point", "independence")) ) }) @@ -92,24 +92,24 @@ test_that("hypothesize() throws an error when multiple null values are provided" test_that("hypothesize() throws an error when multiple params are set", { expect_snapshot( error = TRUE, - mtcars_df %>% - specify(response = mpg) %>% + mtcars_df |> + specify(response = mpg) |> hypothesize(null = "point", mu = 25, med = 20) ) }) test_that("hypothesize() throws a warning when params are set with independence", { expect_snapshot( - res_ <- mtcars_df %>% - specify(mpg ~ vs) %>% + res_ <- mtcars_df |> + specify(mpg ~ vs) |> hypothesize(null = "independence", mu = 25) ) }) test_that("hypothesize() throws a warning when params are set with paired independence", { expect_snapshot( - res_ <- mtcars_df %>% - specify(response = mpg) %>% + res_ <- mtcars_df |> + specify(response = mpg) |> hypothesize(null = "paired independence", mu = 25) ) }) @@ -117,8 +117,8 @@ test_that("hypothesize() throws a warning when params are set with paired indepe test_that("hypothesize() throws an error when p is greater than 1", { expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = vs, success = "1") %>% + res_ <- mtcars_df |> + specify(response = vs, success = "1") |> hypothesize(null = "point", p = 1 + .Machine$double.eps) ) }) @@ -126,8 +126,8 @@ test_that("hypothesize() throws an error when p is greater than 1", { test_that("hypothesize() throws an error when p is less than 0", { expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = vs, success = "1") %>% + res_ <- mtcars_df |> + specify(response = vs, success = "1") |> hypothesize(null = "point", p = -.Machine$double.neg.eps) ) }) @@ -135,8 +135,8 @@ test_that("hypothesize() throws an error when p is less than 0", { test_that("hypothesize() throws an error when p contains missing values", { expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = vs, success = "1") %>% + res_ <- mtcars_df |> + specify(response = vs, success = "1") |> hypothesize(null = "point", p = c("0" = 0.5, "1" = NA_real_)) ) }) @@ -144,15 +144,15 @@ test_that("hypothesize() throws an error when p contains missing values", { test_that("hypothesize() throws an error when vector p does not sum to 1", { expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = vs, success = "1") %>% + res_ <- mtcars_df |> + specify(response = vs, success = "1") |> hypothesize(null = "point", p = c("0" = 0.5, "1" = 0.5 + (eps * 2))) ) }) test_that("hypothesize arguments function", { mtcars_f <- dplyr::mutate(mtcars, cyl = factor(cyl)) - mtcars_s <- mtcars_f %>% specify(response = mpg) + mtcars_s <- mtcars_f |> specify(response = mpg) matrix1 <- matrix(data = NA, nrow = 3, ncol = 3) expect_snapshot(error = TRUE, res_ <- hypothesize(matrix1)) @@ -161,52 +161,52 @@ test_that("hypothesize arguments function", { expect_snapshot( error = TRUE, - res_ <- mtcars_s %>% hypothesize(null = "point", mean = 3) + res_ <- mtcars_s |> hypothesize(null = "point", mean = 3) ) expect_snapshot( error = TRUE, - res_ <- mtcars_s %>% hypothesize(null = "independence") + res_ <- mtcars_s |> hypothesize(null = "independence") ) expect_snapshot( error = TRUE, - res_ <- mtcars_s %>% hypothesize(null = "point") + res_ <- mtcars_s |> hypothesize(null = "point") ) expect_snapshot( error = TRUE, res_ <- - mtcars_f %>% - specify(mpg ~ am) %>% + mtcars_f |> + specify(mpg ~ am) |> hypothesize(null = "paired independence") ) # Produces error on win-build expect_snapshot( error = TRUE, - res <- mtcars_s %>% hypothesize(null = c("point", "independence"), mu = 3) + res <- mtcars_s |> hypothesize(null = c("point", "independence"), mu = 3) ) expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - dplyr::select(vs) %>% + res_ <- mtcars_df |> + dplyr::select(vs) |> hypothesize(null = "point", mu = 1) ) expect_snapshot( error = TRUE, - res_ <- mtcars_df %>% - specify(response = vs) %>% + res_ <- mtcars_df |> + specify(response = vs) |> hypothesize(null = "point", mu = 1) ) expect_snapshot( error = TRUE, - res_ <- mtcars_s %>% hypothesize(null = "point", p = 0.2) + res_ <- mtcars_s |> hypothesize(null = "point", p = 0.2) ) - expect_snapshot(error = TRUE, res_ <- mtcars_s %>% hypothesize()) + expect_snapshot(error = TRUE, res_ <- mtcars_s |> hypothesize()) }) test_that("params correct", { @@ -226,8 +226,8 @@ test_that("sensible output", { test_that("user can specify multiple explanatory variables", { x <- - gss %>% - specify(hours ~ sex + college) %>% + gss |> + specify(hours ~ sex + college) |> hypothesize(null = "independence") expect_true(inherits(x, "infer")) @@ -239,8 +239,8 @@ test_that("user can specify multiple explanatory variables", { expect_equal(response_name(x), "hours") expect_snapshot( - res_ <- gss %>% - specify(hours ~ sex + college) %>% + res_ <- gss |> + specify(hours ~ sex + college) |> hypothesize(null = "independence", mu = 40) ) }) diff --git a/tests/testthat/test-observe.R b/tests/testthat/test-observe.R index d44a67a0..929f9de1 100644 --- a/tests/testthat/test-observe.R +++ b/tests/testthat/test-observe.R @@ -1,18 +1,18 @@ test_that("observe() output is equal to core verbs", { expect_equal( - gss %>% + gss |> observe(hours ~ NULL, stat = "mean"), - gss %>% - specify(hours ~ NULL) %>% + gss |> + specify(hours ~ NULL) |> calculate(stat = "mean") ) expect_equal( - gss %>% + gss |> observe(hours ~ NULL, stat = "t", null = "point", mu = 40), - gss %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") ) @@ -23,8 +23,8 @@ test_that("observe() output is equal to core verbs", { stat = "diff in means", order = c("degree", "no degree") ), - gss %>% - specify(age ~ college) %>% + gss |> + specify(age ~ college) |> calculate("diff in means", order = c("degree", "no degree")), ignore_attr = TRUE ) @@ -33,43 +33,43 @@ test_that("observe() output is equal to core verbs", { test_that("observe messages/warns/errors informatively", { expect_equal( expect_message( - gss %>% + gss |> observe(hours ~ NULL, stat = "mean", mu = 40) - ) %>% + ) |> conditionMessage(), expect_message( - gss %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 40) %>% + gss |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "mean") - ) %>% + ) |> conditionMessage() ) expect_equal( expect_warning( - gss %>% + gss |> observe(hours ~ NULL, stat = "t") - ) %>% + ) |> conditionMessage(), expect_warning( - gss %>% - specify(hours ~ NULL) %>% + gss |> + specify(hours ~ NULL) |> calculate(stat = "t") - ) %>% + ) |> conditionMessage() ) expect_error( expect_equal( capture.output( - gss %>% + gss |> observe(hours ~ age, stat = "diff in means"), type = "message" ), capture.output( - gss %>% - specify(hours ~ age) %>% + gss |> + specify(hours ~ age) |> calculate(stat = "diff in means"), type = "message" ), @@ -78,10 +78,10 @@ test_that("observe messages/warns/errors informatively", { expect_error( expect_equal( - gss %>% + gss |> observe(explanatory = age, stat = "diff in means"), - gss %>% - specify(explanatory = age) %>% + gss |> + specify(explanatory = age) |> calculate(stat = "diff in means") ) ) @@ -90,76 +90,76 @@ test_that("observe messages/warns/errors informatively", { test_that("observe() works with either specify() interface", { # unnamed formula argument expect_equal( - gss %>% + gss |> observe(hours ~ NULL, stat = "mean"), - gss %>% + gss |> observe(response = hours, stat = "mean"), ignore_attr = TRUE ) expect_equal( - gss %>% + gss |> observe( hours ~ college, stat = "diff in means", order = c("degree", "no degree") ), - gss %>% - specify(hours ~ college) %>% + gss |> + specify(hours ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) ) # named formula argument expect_equal( - gss %>% + gss |> observe(formula = hours ~ NULL, stat = "mean"), - gss %>% + gss |> observe(response = hours, stat = "mean"), ignore_attr = TRUE ) expect_equal( - gss %>% + gss |> observe(formula = hours ~ NULL, stat = "mean"), - gss %>% + gss |> observe(response = hours, stat = "mean"), ignore_attr = TRUE ) expect_equal( - gss %>% + gss |> observe( formula = hours ~ college, stat = "diff in means", order = c("degree", "no degree") ), - gss %>% - specify(formula = hours ~ college) %>% + gss |> + specify(formula = hours ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) ) }) test_that("observe() output is the same as the old wrappers", { expect_snapshot( - res_wrap <- gss_tbl %>% + res_wrap <- gss_tbl |> chisq_stat(college ~ partyid) ) expect_equal( - gss_tbl %>% - observe(college ~ partyid, stat = "Chisq") %>% + gss_tbl |> + observe(college ~ partyid, stat = "Chisq") |> dplyr::pull(), res_wrap ) expect_snapshot( - res_wrap_2 <- gss_tbl %>% + res_wrap_2 <- gss_tbl |> t_stat(hours ~ sex, order = c("male", "female")) ) expect_equal( - gss_tbl %>% - observe(stat = "t", hours ~ sex, order = c("male", "female")) %>% + gss_tbl |> + observe(stat = "t", hours ~ sex, order = c("male", "female")) |> dplyr::pull(), res_wrap_2 ) diff --git a/tests/testthat/test-print.R b/tests/testthat/test-print.R index 9a828328..043c4f4a 100644 --- a/tests/testthat/test-print.R +++ b/tests/testthat/test-print.R @@ -1,8 +1,8 @@ test_that("print works", { expect_output(print( - gss_tbl %>% - specify(age ~ hours) %>% - hypothesize(null = "independence") %>% + gss_tbl |> + specify(age ~ hours) |> + hypothesize(null = "independence") |> generate(reps = 10, type = "permute") )) }) diff --git a/tests/testthat/test-rep_sample_n.R b/tests/testthat/test-rep_sample_n.R index 63abc8f2..e0e7dd9e 100644 --- a/tests/testthat/test-rep_sample_n.R +++ b/tests/testthat/test-rep_sample_n.R @@ -262,7 +262,7 @@ test_that("`rep_slice_sample` uses `weight_by`", { ) population_wt <- - population %>% + population |> dplyr::mutate(wts = rep(1, n_population) / n_population) set.seed(1) res3 <- rep_slice_sample( diff --git a/tests/testthat/test-shade_confidence_interval.R b/tests/testthat/test-shade_confidence_interval.R index aa1ecd06..dc9d0a88 100644 --- a/tests/testthat/test-shade_confidence_interval.R +++ b/tests/testthat/test-shade_confidence_interval.R @@ -73,16 +73,16 @@ test_that("shade_confidence_interval throws errors and warnings", { ) expect_snapshot( error = TRUE, - res_ <- gss_viz_sim %>% shade_confidence_interval(c(-1, 1)) + res_ <- gss_viz_sim |> shade_confidence_interval(c(-1, 1)) ) expect_snapshot( error = TRUE, - res_ <- gss_viz_sim %>% shade_confidence_interval(endpoints = c(-1, 1)) + res_ <- gss_viz_sim |> shade_confidence_interval(endpoints = c(-1, 1)) ) - expect_snapshot(error = TRUE, res_ <- gss_viz_sim %>% shade_ci(c(-1, 1))) + expect_snapshot(error = TRUE, res_ <- gss_viz_sim |> shade_ci(c(-1, 1))) expect_snapshot( error = TRUE, - res_ <- gss_viz_sim %>% shade_ci(endpoints = c(-1, 1)) + res_ <- gss_viz_sim |> shade_ci(endpoints = c(-1, 1)) ) }) diff --git a/tests/testthat/test-shade_p_value.R b/tests/testthat/test-shade_p_value.R index 18fb531e..bb776788 100644 --- a/tests/testthat/test-shade_p_value.R +++ b/tests/testthat/test-shade_p_value.R @@ -73,7 +73,7 @@ test_that("shade_p_value works", { # -roper p-value shading when the calculated statistic falls exactly on the # boundaries of a histogram bin (#424) - r_hat <- gss %>% + r_hat <- gss |> observe( college ~ sex, success = "no degree", @@ -83,10 +83,10 @@ test_that("shade_p_value works", { set.seed(33) - null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% + null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1000) |> calculate(stat = "ratio of props", order = c("female", "male")) expect_doppelganger( @@ -167,17 +167,17 @@ test_that("shade_p_value throws errors", { error = TRUE, gss_viz_sim + shade_p_value(1, "right", fill = "x") ) - expect_snapshot(error = TRUE, gss_viz_sim %>% shade_p_value(1, "right")) - expect_snapshot(error = TRUE, gss_viz_sim %>% shade_p_value(obs_stat = 1)) + expect_snapshot(error = TRUE, gss_viz_sim |> shade_p_value(1, "right")) + expect_snapshot(error = TRUE, gss_viz_sim |> shade_p_value(obs_stat = 1)) expect_snapshot( error = TRUE, - gss_viz_sim %>% shade_p_value(obs_stat = 1, direction = "right") + gss_viz_sim |> shade_p_value(obs_stat = 1, direction = "right") ) - expect_snapshot(error = TRUE, gss_viz_sim %>% shade_pvalue(1, "right")) - expect_snapshot(error = TRUE, gss_viz_sim %>% shade_pvalue(obs_stat = 1)) + expect_snapshot(error = TRUE, gss_viz_sim |> shade_pvalue(1, "right")) + expect_snapshot(error = TRUE, gss_viz_sim |> shade_pvalue(obs_stat = 1)) expect_snapshot( error = TRUE, - gss_viz_sim %>% shade_pvalue(obs_stat = 1, direction = "right") + gss_viz_sim |> shade_pvalue(obs_stat = 1, direction = "right") ) }) diff --git a/tests/testthat/test-specify.R b/tests/testthat/test-specify.R index 2eae808a..2c6adbb8 100644 --- a/tests/testthat/test-specify.R +++ b/tests/testthat/test-specify.R @@ -1,12 +1,12 @@ -one_nonshift_mean <- mtcars_df %>% specify(response = mpg) +one_nonshift_mean <- mtcars_df |> specify(response = mpg) -one_nonshift_prop <- mtcars_df %>% specify(response = am, success = "1") +one_nonshift_prop <- mtcars_df |> specify(response = am, success = "1") -two_means_boot <- mtcars_df %>% specify(mpg ~ am) +two_means_boot <- mtcars_df |> specify(mpg ~ am) -two_props_boot <- mtcars_df %>% specify(am ~ vs, success = "1") +two_props_boot <- mtcars_df |> specify(am ~ vs, success = "1") -slope_boot <- mtcars_df %>% specify(mpg ~ hp) +slope_boot <- mtcars_df |> specify(mpg ~ hp) test_that("auto `type` works (specify)", { expect_equal(attr(one_nonshift_mean, "type"), "bootstrap") @@ -68,8 +68,8 @@ test_that("formula argument is a formula", { expect_snapshot(error = TRUE, specify(mtcars, am, success = "1")) expect_snapshot(error = TRUE, specify(mtcars, response = am, "1")) expect_silent({ - mtcars %>% - dplyr::mutate(am = factor(am)) %>% + mtcars |> + dplyr::mutate(am = factor(am)) |> specify(response = am, success = "1") }) }) @@ -85,32 +85,32 @@ test_that("specify doesn't have NSE issues (#256)", { test_that("specify messages when dropping unused levels", { expect_snapshot( - res_ <- gss %>% - dplyr::filter(partyid %in% c("rep", "dem")) %>% + res_ <- gss |> + dplyr::filter(partyid %in% c("rep", "dem")) |> specify(age ~ partyid) ) expect_snapshot( - res_ <- gss %>% - dplyr::filter(partyid %in% c("rep", "dem")) %>% + res_ <- gss |> + dplyr::filter(partyid %in% c("rep", "dem")) |> specify(partyid ~ age) ) expect_snapshot( - res_ <- gss %>% - dplyr::filter(partyid %in% c("rep", "dem")) %>% + res_ <- gss |> + dplyr::filter(partyid %in% c("rep", "dem")) |> specify(partyid ~ NULL) ) expect_silent( - gss %>% - dplyr::filter(partyid %in% c("rep", "dem")) %>% + gss |> + dplyr::filter(partyid %in% c("rep", "dem")) |> specify(age ~ NULL) ) }) test_that("user can specify multiple explanatory variables", { - x <- gss %>% specify(hours ~ sex + college) + x <- gss |> specify(hours ~ sex + college) expect_true(inherits(x, "infer")) expect_true(inherits(explanatory_variable(x), "tbl_df")) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 5e7d4d99..73d1fe90 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -106,14 +106,14 @@ test_that("hypothesize errors out when x isn't a dataframe", { test_that("p_null supplies appropriate params", { expect_equal( - gss %>% specify(partyid ~ NULL) %>% p_null(), + gss |> specify(partyid ~ NULL) |> p_null(), c(p.dem = 0.2, p.ind = 0.2, p.rep = 0.2, p.other = 0.2, p.DK = 0.2) ) }) test_that("variables are standardized as expected", { gss_types <- - gss %>% + gss |> dplyr::mutate( age = as.integer(age), is_dem = dplyr::if_else(partyid == "dem", TRUE, FALSE), @@ -144,10 +144,10 @@ test_that("group_by_replicate() helper returns correct results", { nrow_gss <- nrow(gss) gss_gen <- - gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = reps, type = "permute") %>% + gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = reps, type = "permute") |> dplyr::ungroup() expect_equal( diff --git a/tests/testthat/test-visualize.R b/tests/testthat/test-visualize.R index ec5e2ead..d1700497 100644 --- a/tests/testthat/test-visualize.R +++ b/tests/testthat/test-visualize.R @@ -2,22 +2,22 @@ library(dplyr) set.seed(42) -hours_resamp <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", med = 3) %>% - generate(reps = 10, type = "bootstrap") %>% +hours_resamp <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", med = 3) |> + generate(reps = 10, type = "bootstrap") |> calculate(stat = "median") -obs_slope <- lm(age ~ hours, data = gss_tbl) %>% - broom::tidy() %>% - dplyr::filter(term == "hours") %>% - dplyr::select(estimate) %>% +obs_slope <- lm(age ~ hours, data = gss_tbl) |> + broom::tidy() |> + dplyr::filter(term == "hours") |> + dplyr::select(estimate) |> dplyr::pull() -obs_diff <- gss_tbl %>% - group_by(college) %>% - summarize(prop = mean(college == "no degree")) %>% - summarize(diff(prop)) %>% +obs_diff <- gss_tbl |> + group_by(college) |> + summarize(prop = mean(college == "no degree")) |> + summarize(diff(prop)) |> pull() obs_z <- sqrt( @@ -29,13 +29,13 @@ obs_z <- sqrt( )$statistic ) -obs_diff_mean <- gss_tbl %>% - group_by(college) %>% - summarize(mean_sepal_width = mean(hours)) %>% - summarize(diff(mean_sepal_width)) %>% +obs_diff_mean <- gss_tbl |> + group_by(college) |> + summarize(mean_sepal_width = mean(hours)) |> + summarize(diff(mean_sepal_width)) |> pull() -obs_t <- gss_tbl %>% +obs_t <- gss_tbl |> observe(hours ~ college, order = c("no degree", "degree"), stat = "t") obs_F <- anova( @@ -48,48 +48,48 @@ test_that("visualize warns with bad arguments", { # warns when supplied deprecated args in what used to be # a valid way expect_snapshot( - res_ <- gss_tbl %>% - specify(age ~ hours) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") %>% + res_ <- gss_tbl |> + specify(age ~ hours) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "slope") |> visualize(obs_stat = obs_slope, direction = "right") ) # warning is the same when deprecated args are inappropriate expect_snapshot( - res_ <- gss_tbl %>% - specify(age ~ hours) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") %>% + res_ <- gss_tbl |> + specify(age ~ hours) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "slope") |> visualize(obs_stat = obs_slope) ) # same goes for CI args expect_snapshot( - res_ <- gss_tbl %>% - specify(age ~ hours) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") %>% + res_ <- gss_tbl |> + specify(age ~ hours) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "slope") |> visualize(endpoints = c(.01, .02)) ) # output should not change when supplied a deprecated argument - age_hours_df <- gss_tbl %>% - specify(age ~ hours) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% + age_hours_df <- gss_tbl |> + specify(age ~ hours) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> calculate(stat = "slope") expect_snapshot( - res <- age_hours_df %>% + res <- age_hours_df |> visualize(endpoints = c(.01, .02)) ) expect_equal( - age_hours_df %>% + age_hours_df |> visualize(), res ) @@ -103,45 +103,45 @@ test_that("visualize basic tests", { # visualise also works expect_doppelganger("visualise", visualise(hours_resamp)) - expect_snapshot(error = TRUE, hours_resamp %>% visualize(bins = "yep")) + expect_snapshot(error = TRUE, hours_resamp |> visualize(bins = "yep")) expect_doppelganger( "vis-sim-right-1", - gss_tbl %>% - specify(age ~ hours) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "slope") %>% + gss_tbl |> + specify(age ~ hours) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "slope") |> visualize() + shade_p_value(obs_stat = obs_slope, direction = "right") ) # obs_stat not specified expect_snapshot_error( - gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", order = c("no degree", "degree")) %>% + gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "diff in props", order = c("no degree", "degree")) |> visualize() + shade_p_value(direction = "both") ) expect_doppelganger( "vis-sim-both-1", - gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", order = c("no degree", "degree")) %>% + gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "diff in props", order = c("no degree", "degree")) |> visualize() + shade_p_value(direction = "both", obs_stat = obs_diff) ) expect_snapshot( - res_vis_theor_none_1 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - calculate(stat = "z", order = c("no degree", "degree")) %>% + res_vis_theor_none_1 <- gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + calculate(stat = "z", order = c("no degree", "degree")) |> visualize(method = "theoretical") ) @@ -150,11 +150,11 @@ test_that("visualize basic tests", { # diff in props and z on different scales expect_snapshot( error = TRUE, - gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", order = c("no degree", "degree")) %>% + gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "diff in props", order = c("no degree", "degree")) |> visualize(method = "both") + shade_p_value(direction = "both", obs_stat = obs_diff) ) @@ -162,21 +162,21 @@ test_that("visualize basic tests", { expect_doppelganger( "vis-sim-none-1", expect_silent( - gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in props", order = c("no degree", "degree")) %>% + gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "diff in props", order = c("no degree", "degree")) |> visualize() ) ) expect_warning( - vis_both_both_1 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", order = c("no degree", "degree")) %>% + vis_both_both_1 <- gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "z", order = c("no degree", "degree")) |> visualize(method = "both") + shade_p_value(direction = "both", obs_stat = obs_z) ) @@ -186,11 +186,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_both_2 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", order = c("degree", "no degree")) %>% + vis_both_both_2 <- gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "z", order = c("degree", "no degree")) |> visualize(method = "both") + shade_p_value(direction = "both", obs_stat = -obs_z) ) @@ -200,11 +200,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_left_1 <- gss_tbl %>% - specify(age ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("female", "male")) %>% + vis_both_left_1 <- gss_tbl |> + specify(age ~ sex) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "t", order = c("female", "male")) |> visualize(method = "both") + shade_p_value(direction = "left", obs_stat = obs_t) ) @@ -214,11 +214,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_left_1 <- gss_tbl %>% - specify(age ~ sex) %>% - hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% - calculate(stat = "t", order = c("female", "male")) %>% + vis_theor_left_1 <- gss_tbl |> + specify(age ~ sex) |> + hypothesize(null = "independence") |> + # generate(reps = 100, type = "permute") |> + calculate(stat = "t", order = c("female", "male")) |> visualize(method = "theoretical") + shade_p_value(direction = "left", obs_stat = obs_t) ) @@ -228,11 +228,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_none_1 <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 1) %>% - generate(reps = 100) %>% - calculate(stat = "t") %>% + vis_both_none_1 <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 1) |> + generate(reps = 100) |> + calculate(stat = "t") |> visualize(method = "both") ) expect_doppelganger( @@ -241,9 +241,9 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_none_2 <- gss_tbl %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% + vis_theor_none_2 <- gss_tbl |> + specify(age ~ college) |> + hypothesize(null = "independence") |> visualize(method = "theoretical") ) expect_doppelganger( @@ -252,9 +252,9 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_none_3 <- gss_tbl %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + vis_theor_none_3 <- gss_tbl |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> visualize(method = "theoretical") ) expect_doppelganger( @@ -263,11 +263,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_right_1 <- gss_tbl %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% + vis_both_right_1 <- gss_tbl |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "F") |> visualize(method = "both") + shade_p_value(obs_stat = obs_F, direction = "right") ) @@ -277,11 +277,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_left_2 <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "z", order = c("no degree", "degree")) %>% + vis_both_left_2 <- gss_tbl |> + specify(sex ~ college, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "z", order = c("no degree", "degree")) |> visualize(method = "both") + shade_p_value(direction = "left", obs_stat = obs_z) ) @@ -291,11 +291,11 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_right_2 <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% + vis_both_right_2 <- gss_tbl |> + specify(sex ~ partyid, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "Chisq") |> visualize(method = "both") + shade_p_value(obs_stat = obs_F, direction = "right") ) @@ -305,10 +305,10 @@ test_that("visualize basic tests", { ) expect_warning( - vis_theor_right_1 <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% - # alculate(stat = "Chisq") %>% + vis_theor_right_1 <- gss_tbl |> + specify(sex ~ partyid, success = "female") |> + hypothesize(null = "independence") |> + # alculate(stat = "Chisq") |> visualize(method = "theoretical") + shade_p_value(obs_stat = obs_F, direction = "right") ) @@ -318,14 +318,14 @@ test_that("visualize basic tests", { ) expect_warning( - vis_both_none_2 <- gss_tbl %>% - specify(partyid ~ NULL) %>% + vis_both_none_2 <- gss_tbl |> + specify(partyid ~ NULL) |> hypothesize( null = "point", p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) - ) %>% - generate(reps = 100, type = "draw") %>% - calculate(stat = "Chisq") %>% + ) |> + generate(reps = 100, type = "draw") |> + calculate(stat = "Chisq") |> visualize(method = "both") ) expect_doppelganger( @@ -336,26 +336,26 @@ test_that("visualize basic tests", { # traditional instead of theoretical expect_snapshot( error = TRUE, - gss_tbl %>% - specify(partyid ~ NULL) %>% + gss_tbl |> + specify(partyid ~ NULL) |> hypothesize( null = "point", p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) - ) %>% - # generate(reps = 100, type = "draw") %>% - # calculate(stat = "Chisq") %>% + ) |> + # generate(reps = 100, type = "draw") |> + # calculate(stat = "Chisq") |> visualize(method = "traditional") ) expect_warning( - vis_theor_none_4 <- gss_tbl %>% - specify(partyid ~ NULL) %>% + vis_theor_none_4 <- gss_tbl |> + specify(partyid ~ NULL) |> hypothesize( null = "point", p = c("dem" = 0.4, "rep" = 0.4, "ind" = 0.2) - ) %>% - # generate(reps = 100, type = "draw") %>% - # calculate(stat = "Chisq") %>% + ) |> + # generate(reps = 100, type = "draw") |> + # calculate(stat = "Chisq") |> visualize(method = "theoretical") ) expect_doppelganger( @@ -365,11 +365,11 @@ test_that("visualize basic tests", { expect_doppelganger( "vis-sim-both-2", - gss_tbl %>% - specify(hours ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 10, type = "permute") %>% - calculate(stat = "diff in means", order = c("female", "male")) %>% + gss_tbl |> + specify(hours ~ sex) |> + hypothesize(null = "independence") |> + generate(reps = 10, type = "permute") |> + calculate(stat = "diff in means", order = c("female", "male")) |> visualize() + shade_p_value(direction = "both", obs_stat = obs_diff_mean) ) @@ -377,21 +377,21 @@ test_that("visualize basic tests", { # Produces warning first for not checking conditions but would also error expect_snapshot( error = TRUE, - gss_tbl %>% - specify(hours ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", order = c("female", "male")) %>% + gss_tbl |> + specify(hours ~ sex) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "diff in means", order = c("female", "male")) |> visualize(method = "both") + shade_p_value(direction = "both", obs_stat = obs_diff_mean) ) expect_snapshot( - res_vis_theor_both_1 <- gss_tbl %>% - specify(hours ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "diff in means", order = c("female", "male")) %>% + res_vis_theor_both_1 <- gss_tbl |> + specify(hours ~ sex) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "diff in means", order = c("female", "male")) |> visualize(method = "theoretical") + shade_p_value(direction = "both", obs_stat = obs_diff_mean) ) @@ -399,11 +399,11 @@ test_that("visualize basic tests", { expect_doppelganger("vis-theor-both-1", res_vis_theor_both_1) expect_warning( - vis_theor_both_2 <- gss_tbl %>% - specify(sex ~ NULL, success = "female") %>% - hypothesize(null = "point", p = 0.8) %>% - # generate(reps = 100, type = "draw") %>% - # calculate(stat = "z") %>% + vis_theor_both_2 <- gss_tbl |> + specify(sex ~ NULL, success = "female") |> + hypothesize(null = "point", p = 0.8) |> + # generate(reps = 100, type = "draw") |> + # calculate(stat = "z") |> visualize(method = "theoretical") + shade_p_value(obs_stat = 2, direction = "both") ) @@ -414,11 +414,11 @@ test_that("visualize basic tests", { expect_doppelganger( "vis-sim-left-1", - gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 1.3) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% + gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 1.3) |> + generate(reps = 100, type = "bootstrap") |> + calculate(stat = "mean") |> visualize() + shade_p_value(direction = "left", obs_stat = mean(gss_tbl$hours)) ) @@ -433,27 +433,27 @@ test_that("mirror_obs_stat works", { test_that("obs_stat as a data.frame works", { skip_if(getRversion() < "4.1.0") - mean_petal_width <- gss_tbl %>% - specify(hours ~ NULL) %>% + mean_petal_width <- gss_tbl |> + specify(hours ~ NULL) |> calculate(stat = "mean") expect_doppelganger( "df-obs_stat-1", - gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% + gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 4) |> + generate(reps = 100, type = "bootstrap") |> + calculate(stat = "mean") |> visualize() + shade_p_value(obs_stat = mean_petal_width, direction = "both") ) mean_df_test <- data.frame(x = c(4.1, 1), y = c(1, 2)) expect_warning( - df_obs_stat_2 <- gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% - calculate(stat = "mean") %>% + df_obs_stat_2 <- gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 4) |> + generate(reps = 100, type = "bootstrap") |> + calculate(stat = "mean") |> visualize() + shade_p_value(obs_stat = mean_df_test, direction = "both") ) @@ -468,20 +468,20 @@ test_that('method = "both" behaves nicely', { expect_snapshot( error = TRUE, - gss_tbl %>% - specify(hours ~ NULL) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 100, type = "bootstrap") %>% - # calculate(stat = "mean") %>% + gss_tbl |> + specify(hours ~ NULL) |> + hypothesize(null = "point", mu = 4) |> + generate(reps = 100, type = "bootstrap") |> + # calculate(stat = "mean") |> visualize(method = "both") ) expect_snapshot( - res_method_both <- gss_tbl %>% - specify(hours ~ college) %>% - hypothesize(null = "point", mu = 4) %>% - generate(reps = 10, type = "bootstrap") %>% - calculate(stat = "t", order = c("no degree", "degree")) %>% + res_method_both <- gss_tbl |> + specify(hours ~ college) |> + hypothesize(null = "point", mu = 4) |> + generate(reps = 10, type = "bootstrap") |> + calculate(stat = "t", order = c("no degree", "degree")) |> visualize(method = "both") ) @@ -492,41 +492,41 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { skip_if(getRversion() < "4.1.0") expect_snapshot( - res_ <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% + res_ <- gss_tbl |> + specify(sex ~ partyid, success = "female") |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "Chisq") |> visualize(method = "both") + shade_p_value(obs_stat = 2, direction = "left") ) expect_snapshot( - res_ <- gss_tbl %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% + res_ <- gss_tbl |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> + calculate(stat = "F") |> visualize(method = "both") + shade_p_value(obs_stat = 2, direction = "two_sided") ) expect_snapshot( - res_ <- gss_tbl %>% - specify(sex ~ partyid, success = "female") %>% - hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% - calculate(stat = "Chisq") %>% + res_ <- gss_tbl |> + specify(sex ~ partyid, success = "female") |> + hypothesize(null = "independence") |> + # generate(reps = 100, type = "permute") |> + calculate(stat = "Chisq") |> visualize(method = "theoretical") + shade_p_value(obs_stat = 2, direction = "left") ) expect_snapshot( - res_ <- gss_tbl %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - # generate(reps = 100, type = "permute") %>% - calculate(stat = "F") %>% + res_ <- gss_tbl |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + # generate(reps = 100, type = "permute") |> + calculate(stat = "F") |> visualize(method = "theoretical") + shade_p_value(obs_stat = 2, direction = "two_sided") ) @@ -535,31 +535,31 @@ test_that("Traditional right-tailed tests have warning if not right-tailed", { test_that("confidence interval plots are working", { skip_if(getRversion() < "4.1.0") - gss_tbl_boot <- gss_tbl %>% - specify(sex ~ college, success = "female") %>% - generate(reps = 100) %>% + gss_tbl_boot <- gss_tbl |> + specify(sex ~ college, success = "female") |> + generate(reps = 100) |> calculate(stat = "diff in props", order = c("no degree", "degree")) df_error <- tibble::tibble(col1 = rnorm(5), col2 = rnorm(5)) vec_error <- 1:10 - perc_ci <- gss_tbl_boot %>% get_ci() + perc_ci <- gss_tbl_boot |> get_ci() expect_snapshot( error = TRUE, - res_ <- gss_tbl_boot %>% + res_ <- gss_tbl_boot |> visualize() + shade_confidence_interval(endpoints = df_error) ) expect_snapshot( - res_ <- gss_tbl_boot %>% + res_ <- gss_tbl_boot |> visualize() + shade_confidence_interval(endpoints = vec_error) ) expect_snapshot( - res_ci_vis <- gss_tbl_boot %>% + res_ci_vis <- gss_tbl_boot |> visualize() + shade_confidence_interval(endpoints = perc_ci, direction = "between") ) @@ -571,19 +571,19 @@ test_that("title adapts to not hypothesis testing workflow", { skip_if(getRversion() < "4.1.0") set.seed(100) - gss_tbl_boot_tbl <- gss_tbl %>% - specify(response = hours) %>% + gss_tbl_boot_tbl <- gss_tbl |> + specify(response = hours) |> generate(reps = 100, type = "bootstrap") expect_doppelganger( "vis-no-hypothesize-sim", - gss_tbl_boot_tbl %>% - calculate(stat = "mean") %>% + gss_tbl_boot_tbl |> + calculate(stat = "mean") |> visualize() ) expect_snapshot( - res_vis_no_hypothesize_both <- gss_tbl_boot_tbl %>% - calculate(stat = "t") %>% + res_vis_no_hypothesize_both <- gss_tbl_boot_tbl |> + calculate(stat = "t") |> visualize(method = "both") ) @@ -607,9 +607,9 @@ test_that("warn_right_tail_test works", { test_that("visualize warns about removing `NaN`", { skip_if(getRversion() < "4.1.0") - dist <- gss_tbl_boot_tbl <- gss_tbl %>% - specify(response = hours) %>% - generate(reps = 10, type = "bootstrap") %>% + dist <- gss_tbl_boot_tbl <- gss_tbl |> + specify(response = hours) |> + generate(reps = 10, type = "bootstrap") |> calculate("mean") # A warning should be raised if there is NaN in a visualized dist @@ -630,14 +630,14 @@ test_that("visualize can handle multiple explanatory variables", { skip_if_not(identical(Sys.info()[["sysname"]], "Darwin")) # generate example objects - null_fits <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 20, type = "permute") %>% + null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 20, type = "permute") |> fit() - obs_fit <- gss %>% - specify(hours ~ age + college) %>% + obs_fit <- gss |> + specify(hours ~ age + college) |> fit() conf_ints <- @@ -650,28 +650,28 @@ test_that("visualize can handle multiple explanatory variables", { # visualize with multiple panes expect_doppelganger( "viz-fit-bare", - null_fits %>% + null_fits |> visualize() ) # with p values shaded -- test each possible direction expect_doppelganger( "viz-fit-p-val-both", - null_fits %>% + null_fits |> visualize() + shade_p_value(obs_stat = obs_fit, direction = "both") ) expect_doppelganger( "viz-fit-p-val-left", - null_fits %>% + null_fits |> visualize() + shade_p_value(obs_stat = obs_fit, direction = "left") ) expect_snapshot( res_viz_fit_p_val_right <- - null_fits %>% + null_fits |> visualize() + shade_p_value(obs_stat = obs_fit, direction = "right") ) @@ -684,7 +684,7 @@ test_that("visualize can handle multiple explanatory variables", { # with confidence intervals shaded expect_doppelganger( "viz-fit-conf-int", - null_fits %>% + null_fits |> visualize() + shade_confidence_interval(endpoints = conf_ints) ) @@ -692,10 +692,10 @@ test_that("visualize can handle multiple explanatory variables", { # with no hypothesize() expect_doppelganger( "viz-fit-no-h0", - gss %>% - specify(hours ~ age + college) %>% - generate(reps = 20, type = "bootstrap") %>% - fit() %>% + gss |> + specify(hours ~ age + college) |> + generate(reps = 20, type = "bootstrap") |> + fit() |> visualize() ) @@ -706,13 +706,13 @@ test_that("visualize can handle `assume()` output", { skip_if(getRversion() < "4.1.0") # F ---------------------------------------------------------------------- - obs_stat <- gss %>% - specify(age ~ partyid) %>% + obs_stat <- gss |> + specify(age ~ partyid) |> calculate(stat = "F") - null_dist <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% + null_dist <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume(distribution = "F") expect_doppelganger( @@ -726,18 +726,18 @@ test_that("visualize can handle `assume()` output", { ) # t (mean) ----------------------------------------------------------------- - obs_stat <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + obs_stat <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") - null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% + null_dist <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> assume("t") - obs_mean <- gss %>% - specify(response = hours) %>% + obs_mean <- gss |> + specify(response = hours) |> calculate(stat = "mean") ci <- @@ -791,17 +791,17 @@ test_that("visualize can handle `assume()` output", { ) # t (diff in means) ----------------------------------------------------------------- - obs_stat <- gss %>% - specify(hours ~ college) %>% + obs_stat <- gss |> + specify(hours ~ college) |> calculate(stat = "t", order = c("degree", "no degree")) - null_dist <- gss %>% - specify(hours ~ college) %>% - hypothesize(null = "independence") %>% + null_dist <- gss |> + specify(hours ~ college) |> + hypothesize(null = "independence") |> assume("t") - obs_diff <- gss %>% - specify(hours ~ college) %>% + obs_diff <- gss |> + specify(hours ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) ci <- @@ -837,18 +837,18 @@ test_that("visualize can handle `assume()` output", { ) # z (prop) ----------------------------------------------------------------- - obs_stat <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% + obs_stat <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> calculate(stat = "z") - null_dist <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% + null_dist <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> assume("z") - obs_prop <- gss %>% - specify(response = sex, success = "female") %>% + obs_prop <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "prop") ci <- @@ -884,17 +884,17 @@ test_that("visualize can handle `assume()` output", { ) # z (diff in props) -------------------------------------------------------- - obs_stat <- gss %>% - specify(college ~ sex, success = "no degree") %>% + obs_stat <- gss |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "z", order = c("female", "male")) - null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% + null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> assume("z") - obs_diff <- gss %>% - specify(college ~ sex, success = "no degree") %>% + obs_diff <- gss |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "diff in props", order = c("female", "male")) ci <- diff --git a/tests/testthat/test-wrappers.R b/tests/testthat/test-wrappers.R index 45f2f46b..7da07027 100644 --- a/tests/testthat/test-wrappers.R +++ b/tests/testthat/test-wrappers.R @@ -1,10 +1,10 @@ test_that("t_test works", { # Two Sample - expect_snapshot(res_ <- gss_tbl %>% t_test(hours ~ sex)) + expect_snapshot(res_ <- gss_tbl |> t_test(hours ~ sex)) expect_snapshot( error = TRUE, - gss_tbl %>% t_test(response = "hours", explanatory = "sex") + gss_tbl |> t_test(response = "hours", explanatory = "sex") ) new_way <- t_test(gss_tbl, hours ~ sex, order = c("male", "female")) @@ -14,8 +14,8 @@ test_that("t_test works", { explanatory = sex, order = c("male", "female") ) - old_way <- t.test(hours ~ sex, data = gss_tbl) %>% - broom::glance() %>% + old_way <- t.test(hours ~ sex, data = gss_tbl) |> + broom::glance() |> dplyr::select( statistic, t_df = parameter, @@ -35,12 +35,12 @@ test_that("t_test works", { expect_equal(new_way[["statistic"]], -new_way2[["statistic"]]) # One Sample - new_way <- gss_tbl %>% + new_way <- gss_tbl |> t_test(hours ~ NULL, mu = 0) - new_way_alt <- gss_tbl %>% + new_way_alt <- gss_tbl |> t_test(response = hours, mu = 0) - old_way <- t.test(x = gss_tbl$hours, mu = 0) %>% - broom::glance() %>% + old_way <- t.test(x = gss_tbl$hours, mu = 0) |> + broom::glance() |> dplyr::select( statistic, t_df = parameter, @@ -58,15 +58,15 @@ test_that("t_test works", { test_that("chisq_test works", { # maleependence expect_silent( - gss_tbl %>% + gss_tbl |> chisq_test(college ~ partyid) ) - new_way <- gss_tbl %>% + new_way <- gss_tbl |> chisq_test(college ~ partyid) - new_way_alt <- gss_tbl %>% + new_way_alt <- gss_tbl |> chisq_test(response = college, explanatory = partyid) - old_way <- chisq.test(x = table(gss_tbl$partyid, gss_tbl$college)) %>% - broom::glance() %>% + old_way <- chisq.test(x = table(gss_tbl$partyid, gss_tbl$college)) |> + broom::glance() |> dplyr::select(statistic, chisq_df = parameter, p_value = p.value) expect_equal(new_way, new_way_alt, tolerance = eps) @@ -74,15 +74,15 @@ test_that("chisq_test works", { # Goodness of Fit expect_silent( - gss_tbl %>% + gss_tbl |> chisq_test(response = partyid, p = c(.3, .4, .3)) ) - new_way <- gss_tbl %>% + new_way <- gss_tbl |> chisq_test(partyid ~ NULL, p = c(.3, .4, .3)) - new_way_alt <- gss_tbl %>% + new_way_alt <- gss_tbl |> chisq_test(response = partyid, p = c(.3, .4, .3)) - old_way <- chisq.test(x = table(gss_tbl$partyid), p = c(.3, .4, .3)) %>% - broom::glance() %>% + old_way <- chisq.test(x = table(gss_tbl$partyid), p = c(.3, .4, .3)) |> + broom::glance() |> dplyr::select(statistic, chisq_df = parameter, p_value = p.value) expect_equal(new_way, new_way_alt, tolerance = 1e-5) @@ -104,15 +104,15 @@ test_that("chisq_test works", { test_that("_stat functions work", { # Test of maleependence expect_snapshot( - res_ <- gss_tbl %>% chisq_stat(college ~ partyid) + res_ <- gss_tbl |> chisq_stat(college ~ partyid) ) - another_way <- gss_tbl %>% - chisq_test(college ~ partyid) %>% + another_way <- gss_tbl |> + chisq_test(college ~ partyid) |> dplyr::select(statistic) expect_snapshot( - obs_stat_way <- gss_tbl %>% chisq_stat(college ~ partyid) + obs_stat_way <- gss_tbl |> chisq_stat(college ~ partyid) ) one_more <- chisq.test( table(gss_tbl$partyid, gss_tbl$college) @@ -122,16 +122,16 @@ test_that("_stat functions work", { expect_equal(one_more, obs_stat_way, ignore_attr = TRUE) # Goodness of Fit - new_way <- gss_tbl %>% - chisq_test(partyid ~ NULL) %>% + new_way <- gss_tbl |> + chisq_test(partyid ~ NULL) |> dplyr::select(statistic) expect_snapshot( - obs_stat_way <- gss_tbl %>% + obs_stat_way <- gss_tbl |> chisq_stat(partyid ~ NULL) ) expect_snapshot( - obs_stat_way_alt <- gss_tbl %>% + obs_stat_way_alt <- gss_tbl |> chisq_stat(response = partyid) ) @@ -139,33 +139,33 @@ test_that("_stat functions work", { expect_equal(dplyr::pull(new_way), obs_stat_way_alt, ignore_attr = TRUE) # robust to the named vector - unordered_p <- gss_tbl %>% + unordered_p <- gss_tbl |> chisq_test(response = partyid, p = c(.2, .3, .5)) - ordered_p <- gss_tbl %>% + ordered_p <- gss_tbl |> chisq_test(response = partyid, p = c(ind = .2, rep = .3, dem = .5)) expect_equal(unordered_p, ordered_p, ignore_attr = TRUE) # Two sample t expect_snapshot( - res_ <- gss_tbl %>% + res_ <- gss_tbl |> t_stat( hours ~ sex, order = c("male", "female") ) ) - another_way <- gss_tbl %>% - t_test(hours ~ sex, order = c("male", "female")) %>% - dplyr::select(statistic) %>% + another_way <- gss_tbl |> + t_test(hours ~ sex, order = c("male", "female")) |> + dplyr::select(statistic) |> pull() expect_snapshot( - obs_stat_way <- gss_tbl %>% + obs_stat_way <- gss_tbl |> t_stat(hours ~ sex, order = c("male", "female")) ) expect_snapshot( - obs_stat_way_alt <- gss_tbl %>% + obs_stat_way_alt <- gss_tbl |> t_stat(response = hours, explanatory = sex, order = c("male", "female")) ) @@ -174,20 +174,20 @@ test_that("_stat functions work", { # One sample t expect_snapshot( - res_ <- gss_tbl %>% t_stat(hours ~ NULL) + res_ <- gss_tbl |> t_stat(hours ~ NULL) ) - another_way <- gss_tbl %>% - t_test(hours ~ NULL) %>% - dplyr::select(statistic) %>% + another_way <- gss_tbl |> + t_test(hours ~ NULL) |> + dplyr::select(statistic) |> pull() expect_snapshot( - obs_stat_way <- gss_tbl %>% + obs_stat_way <- gss_tbl |> t_stat(hours ~ NULL) ) expect_snapshot( - obs_stat_way_alt <- gss_tbl %>% + obs_stat_way_alt <- gss_tbl |> t_stat(response = hours) ) @@ -208,7 +208,7 @@ test_that("_stat functions work", { test_that("conf_int argument works", { expect_equal( names( - gss_tbl %>% + gss_tbl |> t_test(hours ~ sex, order = c("male", "female"), conf_int = FALSE) ), c("statistic", "t_df", "p_value", "alternative", "estimate"), @@ -216,7 +216,7 @@ test_that("conf_int argument works", { ) expect_equal( names( - gss_tbl %>% + gss_tbl |> t_test( hours ~ sex, order = c("male", "female"), @@ -235,7 +235,7 @@ test_that("conf_int argument works", { tolerance = 1e-5 ) - ci_test <- gss_tbl %>% + ci_test <- gss_tbl |> t_test( hours ~ sex, order = c("male", "female"), @@ -252,7 +252,7 @@ test_that("conf_int argument works", { expect_snapshot( error = TRUE, - res_ <- gss_tbl %>% + res_ <- gss_tbl |> t_test( hours ~ sex, order = c("female", "male"), @@ -263,15 +263,15 @@ test_that("conf_int argument works", { # Check that var.equal produces different results # Thanks for fmaleing this @EllaKaye! - gss_tbl_small <- gss_tbl %>% dplyr::slice(1:6, 90:100) + gss_tbl_small <- gss_tbl |> dplyr::slice(1:6, 90:100) expect_snapshot( - no_var_equal <- gss_tbl_small %>% + no_var_equal <- gss_tbl_small |> t_stat(hours ~ sex, order = c("female", "male")) ) expect_snapshot( - var_equal <- gss_tbl_small %>% + var_equal <- gss_tbl_small |> t_stat( hours ~ sex, order = c("female", "male"), @@ -281,12 +281,12 @@ test_that("conf_int argument works", { expect_false(no_var_equal == var_equal) - shortcut_no_var_equal <- gss_tbl_small %>% - specify(hours ~ sex) %>% + shortcut_no_var_equal <- gss_tbl_small |> + specify(hours ~ sex) |> calculate(stat = "t", order = c("female", "male")) - shortcut_var_equal <- gss_tbl_small %>% - specify(hours ~ sex) %>% + shortcut_var_equal <- gss_tbl_small |> + specify(hours ~ sex) |> calculate( stat = "t", order = c("female", "male"), @@ -308,7 +308,7 @@ bad_df <- data.frame(resp = 1:5, exp = letters[1:5]) bad_df2 <- data.frame(resp = letters[1:5], exp = 1:5) -df_l <- df %>% +df_l <- df |> dplyr::mutate(resp = dplyr::if_else(resp == "c", TRUE, FALSE)) test_that("two sample prop_test works", { @@ -368,7 +368,7 @@ test_that("two sample prop_test works", { }) # ...and some data for the one sample wrapper -df_1 <- df %>% +df_1 <- df |> select(resp) sum_df_1 <- table(df_1) @@ -478,55 +478,55 @@ test_that("prop_test z argument works as expected", { test_that("wrappers can handled ordered factors", { expect_equal( - gss_tbl %>% - dplyr::mutate(sex = factor(sex, ordered = FALSE)) %>% + gss_tbl |> + dplyr::mutate(sex = factor(sex, ordered = FALSE)) |> t_test(hours ~ sex, order = c("male", "female")), - gss_tbl %>% - dplyr::mutate(sex = factor(sex, ordered = TRUE)) %>% + gss_tbl |> + dplyr::mutate(sex = factor(sex, ordered = TRUE)) |> t_test(hours ~ sex, order = c("male", "female")) ) expect_snapshot( - ordered_t_1 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = TRUE)) %>% + ordered_t_1 <- gss_tbl |> + dplyr::mutate(income = factor(income, ordered = TRUE)) |> chisq_test(income ~ partyid) ) expect_snapshot( - ordered_f_1 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = FALSE)) %>% + ordered_f_1 <- gss_tbl |> + dplyr::mutate(income = factor(income, ordered = FALSE)) |> chisq_test(income ~ partyid) ) expect_equal(ordered_t_1, ordered_f_1) expect_snapshot( - ordered_t_2 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = TRUE)) %>% + ordered_t_2 <- gss_tbl |> + dplyr::mutate(income = factor(income, ordered = TRUE)) |> chisq_test(partyid ~ income) ) expect_snapshot( - ordered_f_2 <- gss_tbl %>% - dplyr::mutate(income = factor(income, ordered = FALSE)) %>% + ordered_f_2 <- gss_tbl |> + dplyr::mutate(income = factor(income, ordered = FALSE)) |> chisq_test(partyid ~ income) ) expect_equal(ordered_t_2, ordered_f_2) expect_equal( - df %>% - dplyr::mutate(resp = factor(resp, ordered = TRUE)) %>% + df |> + dplyr::mutate(resp = factor(resp, ordered = TRUE)) |> prop_test(resp ~ NULL, p = .5), - df %>% - dplyr::mutate(resp = factor(resp, ordered = FALSE)) %>% + df |> + dplyr::mutate(resp = factor(resp, ordered = FALSE)) |> prop_test(resp ~ NULL, p = .5) ) }) test_that("handles spaces in variable names (t_test)", { - gss_ <- gss %>% - tidyr::drop_na(college) %>% + gss_ <- gss |> + tidyr::drop_na(college) |> dplyr::mutate(`h o u r s` = hours) expect_equal( diff --git a/vignettes/anova.Rmd b/vignettes/anova.Rmd index cf063eb5..dac5fea4 100644 --- a/vignettes/anova.Rmd +++ b/vignettes/anova.Rmd @@ -32,7 +32,7 @@ To carry out an ANOVA, we'll examine the association between age and political p This is what the relationship looks like in the observed data: ```{r plot-f, echo = FALSE} -gss %>% +gss |> ggplot2::ggplot() + ggplot2::aes(x = partyid, y = age) + ggplot2::geom_boxplot() + @@ -49,9 +49,9 @@ First, to calculate the observed statistic, we can use `specify()` and `calculat ```{r calc-obs-stat-f, warning = FALSE, message = FALSE} # calculate the observed statistic -observed_f_statistic <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% +observed_f_statistic <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> calculate(stat = "F") ``` @@ -61,10 +61,10 @@ We can `generate()` an approximation of the null distribution using randomizatio ```{r generate-null-f, warning = FALSE, message = FALSE} # generate the null distribution using randomization -null_dist <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "F") ``` @@ -74,7 +74,7 @@ To get a sense for what this distribution looks like, and where our observed sta ```{r visualize-f, warning = FALSE, message = FALSE} # visualize the null distribution and test statistic! -null_dist %>% +null_dist |> visualize() + shade_p_value(observed_f_statistic, direction = "greater") @@ -84,8 +84,8 @@ We could also visualize the observed statistic against the theoretical null dist ```{r visualize-f-theor, warning = FALSE, message = FALSE} # visualize the theoretical null distribution and test statistic! -null_dist_theory <- gss %>% - specify(age ~ partyid) %>% +null_dist_theory <- gss |> + specify(age ~ partyid) |> assume(distribution = "F") visualize(null_dist_theory) + @@ -97,7 +97,7 @@ To visualize both the randomization-based and theoretical null distributions to ```{r visualize-indep-both, warning = FALSE, message = FALSE} # visualize both null distributions and the test statistic! -null_dist %>% +null_dist |> visualize(method = "both") + shade_p_value(observed_f_statistic, direction = "greater") @@ -107,7 +107,7 @@ Either way, it looks like our observed test statistic would be quite unlikely if ```{r p-value-indep, warning = FALSE, message = FALSE} # calculate the p value from the observed statistic and null distribution -p_value <- null_dist %>% +p_value <- null_dist |> get_p_value(obs_stat = observed_f_statistic, direction = "greater") diff --git a/vignettes/chi_squared.Rmd b/vignettes/chi_squared.Rmd index 5dfb9f49..0d26802d 100644 --- a/vignettes/chi_squared.Rmd +++ b/vignettes/chi_squared.Rmd @@ -36,7 +36,7 @@ To carry out a chi-squared test of independence, we'll examine the association b This is what the relationship looks like in the sample data: ```{r plot-indep, echo = FALSE} -gss %>% +gss |> ggplot2::ggplot() + ggplot2::aes(x = finrela, fill = college) + ggplot2::geom_bar(position = "fill") + @@ -57,9 +57,9 @@ First, to calculate the observed statistic, we can use `specify()` and `calculat ```{r calc-obs-stat-indep, warning = FALSE, message = FALSE} # calculate the observed statistic -observed_indep_statistic <- gss %>% - specify(college ~ finrela) %>% - hypothesize(null = "independence") %>% +observed_indep_statistic <- gss |> + specify(college ~ finrela) |> + hypothesize(null = "independence") |> calculate(stat = "Chisq") ``` @@ -69,10 +69,10 @@ We can `generate()` the null distribution in one of two ways---using randomizati ```{r generate-null-indep, warning = FALSE, message = FALSE} # generate the null distribution using randomization -null_dist_sim <- gss %>% - specify(college ~ finrela) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist_sim <- gss |> + specify(college ~ finrela) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "Chisq") ``` @@ -80,8 +80,8 @@ Note that, in the line `specify(college ~ finrela)` above, we could use the equi ```{r generate-null-indep-t, warning = FALSE, message = FALSE} # generate the null distribution by theoretical approximation -null_dist_theory <- gss %>% - specify(college ~ finrela) %>% +null_dist_theory <- gss |> + specify(college ~ finrela) |> assume(distribution = "Chisq") ``` @@ -89,7 +89,7 @@ To get a sense for what these distributions look like, and where our observed st ```{r visualize-indep, warning = FALSE, message = FALSE} # visualize the null distribution and test statistic! -null_dist_sim %>% +null_dist_sim |> visualize() + shade_p_value(observed_indep_statistic, direction = "greater" @@ -100,9 +100,9 @@ We could also visualize the observed statistic against the theoretical null dist ```{r visualize-indep-theor, warning = FALSE, message = FALSE} # visualize the theoretical null distribution and test statistic! -gss %>% - specify(college ~ finrela) %>% - assume(distribution = "Chisq") %>% +gss |> + specify(college ~ finrela) |> + assume(distribution = "Chisq") |> visualize() + shade_p_value(observed_indep_statistic, direction = "greater" @@ -113,7 +113,7 @@ To visualize both the randomization-based and theoretical null distributions to ```{r visualize-indep-both, warning = FALSE, message = FALSE} # visualize both null distributions and the test statistic! -null_dist_sim %>% +null_dist_sim |> visualize(method = "both") + shade_p_value(observed_indep_statistic, direction = "greater" @@ -124,7 +124,7 @@ Either way, it looks like our observed test statistic would be quite unlikely if ```{r p-value-indep, warning = FALSE, message = FALSE} # calculate the p value from the observed statistic and null distribution -p_value_independence <- null_dist_sim %>% +p_value_independence <- null_dist_sim |> get_p_value( obs_stat = observed_indep_statistic, direction = "greater" @@ -153,7 +153,7 @@ chisq_test(gss, college ~ finrela) Now, moving on to a chi-squared goodness of fit test, we'll take a look at the self-identified income class of our survey respondents. Suppose our null hypothesis is that `finrela` follows a uniform distribution (i.e. there's actually an equal number of people that describe their income as far below average, below average, average, above average, far above average, or that don't know their income.) The graph below represents this hypothesis: ```{r gof-plot, echo = FALSE} -gss %>% +gss |> ggplot2::ggplot() + ggplot2::aes(x = finrela) + ggplot2::geom_bar() + @@ -170,8 +170,8 @@ First, to carry out this hypothesis test, we would calculate our observed statis ```{r observed-gof-statistic, warning = FALSE, message = FALSE} # calculating the null distribution -observed_gof_statistic <- gss %>% - specify(response = finrela) %>% +observed_gof_statistic <- gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -182,7 +182,7 @@ observed_gof_statistic <- gss %>% "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% + ) |> calculate(stat = "Chisq") ``` @@ -191,8 +191,8 @@ The observed statistic is `r observed_gof_statistic`. Now, generating a null dis ```{r null-distribution-gof, warning = FALSE, message = FALSE} # generating a null distribution, assuming each income class is equally likely -null_dist_gof <- gss %>% - specify(response = finrela) %>% +null_dist_gof <- gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -203,8 +203,8 @@ null_dist_gof <- gss %>% "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% - generate(reps = 1000, type = "draw") %>% + ) |> + generate(reps = 1000, type = "draw") |> calculate(stat = "Chisq") ``` @@ -212,7 +212,7 @@ Again, to get a sense for what these distributions look like, and where our obse ```{r visualize-indep-gof, warning = FALSE, message = FALSE} # visualize the null distribution and test statistic! -null_dist_gof %>% +null_dist_gof |> visualize() + shade_p_value(observed_gof_statistic, direction = "greater" @@ -223,7 +223,7 @@ This statistic seems like it would be quite unlikely if income class self-identi ```{r get-p-value-gof, warning = FALSE, message = FALSE} # calculate the p-value -p_value_gof <- null_dist_gof %>% +p_value_gof <- null_dist_gof |> get_p_value( observed_gof_statistic, direction = "greater" @@ -256,5 +256,3 @@ chisq_test( ) ) ``` - - diff --git a/vignettes/infer.Rmd b/vignettes/infer.Rmd index 74df0de9..eba0648a 100644 --- a/vignettes/infer.Rmd +++ b/vignettes/infer.Rmd @@ -51,15 +51,15 @@ Each row is an individual survey response, containing some basic demographic inf The `specify()` function can be used to specify which of the variables in the dataset you're interested in. If you're only interested in, say, the `age` of the respondents, you might write: ```{r specify-example, warning = FALSE, message = FALSE} -gss %>% +gss |> specify(response = age) ``` On the front-end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. Checking the class of this object, though: ```{r specify-one, warning = FALSE, message = FALSE} -gss %>% - specify(response = age) %>% +gss |> + specify(response = age) |> class() ``` @@ -69,11 +69,11 @@ If you're interested in two variables--`age` and `partyid`, for example--you can ```{r specify-two, warning = FALSE, message = FALSE} # as a formula -gss %>% +gss |> specify(age ~ partyid) # with the named arguments -gss %>% +gss |> specify(response = age, explanatory = partyid) ``` @@ -81,7 +81,7 @@ If you're doing inference on one proportion or a difference in proportions, you ```{r specify-success, warning = FALSE, message = FALSE} # specifying for inference on proportions -gss %>% +gss |> specify(response = college, success = "degree") ``` @@ -90,16 +90,16 @@ gss %>% The next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of "independence" or "point" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`: ```{r hypothesize-independence, warning = FALSE, message = FALSE} -gss %>% - specify(college ~ partyid, success = "degree") %>% +gss |> + specify(college ~ partyid, success = "degree") |> hypothesize(null = "independence") ``` If you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write: ```{r hypothesize-40-hr-week, warning = FALSE, message = FALSE} -gss %>% - specify(response = hours) %>% +gss |> + specify(response = hours) |> hypothesize(null = "point", mu = 40) ``` @@ -118,9 +118,9 @@ Continuing on with our example above, about the average number of hours worked a ```{r generate-point, warning = FALSE, message = FALSE} set.seed(1) -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% +gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> generate(reps = 1000, type = "bootstrap") ``` @@ -131,9 +131,9 @@ Note that, before `generate()`ing, we've set the seed for random number generati To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 1000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age: ```{r generate-permute, warning = FALSE, message = FALSE} -gss %>% - specify(partyid ~ age) %>% - hypothesize(null = "independence") %>% +gss |> + specify(partyid ~ age) |> + hypothesize(null = "independence") |> generate(reps = 1000, type = "permute") ``` @@ -142,20 +142,20 @@ gss %>% `calculate()` calculates summary statistics from the output of infer core functions. The function takes in a `stat` argument, which is currently one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "t", "z", "slope", or "correlation". For example, continuing our example above to calculate the null distribution of mean hours worked per week: ```{r calculate-point, warning = FALSE, message = FALSE} -gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 1000, type = "bootstrap") %>% +gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") ``` The output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or t and z statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write: ```{r specify-diff-in-means, warning = FALSE, message = FALSE} -gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) ``` @@ -167,15 +167,15 @@ To illustrate, we'll go back to the example of determining whether the mean numb ```{r utilities-examples} # find the point estimate -obs_mean <- gss %>% - specify(response = hours) %>% +obs_mean <- gss |> + specify(response = hours) |> calculate(stat = "mean") # generate a null distribution -null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 1000, type = "bootstrap") %>% +null_dist <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") ``` @@ -184,14 +184,14 @@ Our point estimate `r obs_mean` seems *pretty* close to 40, but a little bit dif We could initially just visualize the null distribution. ```{r visualize, warning = FALSE, message = FALSE} -null_dist %>% +null_dist |> visualize() ``` Where does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this. ```{r visualize2, warning = FALSE, message = FALSE} -null_dist %>% +null_dist |> visualize() + shade_p_value(obs_stat = obs_mean, direction = "two-sided") ``` @@ -200,7 +200,7 @@ Notice that infer has also shaded the regions of the null distribution that are ```{r get_p_value, warning = FALSE, message = FALSE} # get a two-tailed p-value -p_value <- null_dist %>% +p_value <- null_dist |> get_p_value(obs_stat = obs_mean, direction = "two-sided") p_value @@ -213,13 +213,13 @@ To get a confidence interval around our estimate, we can write: ```{r get_conf, message = FALSE, warning = FALSE} # generate a distribution like the null distribution, # though exclude the null hypothesis from the pipeline -boot_dist <- gss %>% - specify(response = hours) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(response = hours) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") # start with the bootstrap distribution -ci <- boot_dist %>% +ci <- boot_dist |> # calculate the confidence interval around the point estimate get_confidence_interval( point_estimate = obs_mean, @@ -235,7 +235,7 @@ ci As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\alpha = .05$. To see this interval represented visually, we can use the `shade_confidence_interval()` utility: ```{r visualize-ci, warning = FALSE, message = FALSE} -boot_dist %>% +boot_dist |> visualize() + shade_confidence_interval(endpoints = ci) ``` @@ -248,9 +248,9 @@ Generally, to find a null distribution using theory-based methods, use the same ```{r, message = FALSE, warning = FALSE} # calculate an observed t statistic -obs_t <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% +obs_t <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") ``` @@ -258,8 +258,8 @@ Then, to define a theoretical $t$ distribution, we could write: ```{r, message = FALSE, warning = FALSE} # switch out calculate with assume to define a distribution -t_dist <- gss %>% - specify(response = hours) %>% +t_dist <- gss |> + specify(response = hours) |> assume(distribution = "t") ``` @@ -303,18 +303,18 @@ To accommodate randomization-based inference with multiple explanatory variables As an example, suppose that we want to fit `hours` worked per week using the respondent `age` and `college` completion status. We could first begin by fitting a linear model to the observed data. ```{r} -observed_fit <- gss %>% - specify(hours ~ age + college) %>% +observed_fit <- gss |> + specify(hours ~ age + college) |> fit() ``` Now, to generate null distributions for each of these terms, we can fit 1000 models to resamples of the `gss` dataset, where the response `hours` is permuted in each. Note that this code is the same as the above except for the addition of the `hypothesize()` and `generate()` step. ```{r} -null_fits <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> fit() null_fits diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd index a8dabe58..fe6911a2 100644 --- a/vignettes/observed_stat_examples.Rmd +++ b/vignettes/observed_stat_examples.Rmd @@ -43,25 +43,25 @@ dplyr::glimpse(gss) Calculating the observed statistic, ```{r} -x_bar <- gss %>% - specify(response = hours) %>% +x_bar <- gss |> + specify(response = hours) |> calculate(stat = "mean") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -x_bar <- gss %>% +x_bar <- gss |> observe(response = hours, stat = "mean") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1000) |> calculate(stat = "mean") ``` @@ -75,7 +75,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = x_bar, direction = "two-sided") ``` @@ -84,34 +84,34 @@ null_dist %>% Calculating the observed statistic, ```{r} -t_bar <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% +t_bar <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -t_bar <- gss %>% +t_bar <- gss |> observe(response = hours, null = "point", mu = 40, stat = "t") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1000) |> calculate(stat = "t") ``` Alternatively, finding the null distribution using theoretical methods using the `assume()` verb, ```{r} -null_dist_theory <- gss %>% - specify(response = hours) %>% +null_dist_theory <- gss |> + specify(response = hours) |> assume("t") ``` @@ -141,14 +141,14 @@ Note that the above code makes use of the randomization-based null distribution. Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = t_bar, direction = "two-sided") ``` Alternatively, using the `t_test()` wrapper: ```{r} -gss %>% +gss |> t_test(response = hours, mu = 40) ``` @@ -159,25 +159,25 @@ gss %>% Calculating the observed statistic, ```{r} -x_tilde <- gss %>% - specify(response = age) %>% +x_tilde <- gss |> + specify(response = age) |> calculate(stat = "median") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -x_tilde <- gss %>% +x_tilde <- gss |> observe(response = age, stat = "median") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(response = age) %>% - hypothesize(null = "point", med = 40) %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(response = age) |> + hypothesize(null = "point", med = 40) |> + generate(reps = 1000) |> calculate(stat = "median") ``` @@ -191,7 +191,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = x_tilde, direction = "two-sided") ``` @@ -204,13 +204,13 @@ Suppose that each of these survey respondents had provided the number of `hours` ```{r} set.seed(1) -gss_paired <- gss %>% +gss_paired <- gss |> mutate( hours_previous = hours + 5 - rpois(nrow(.), 4.8), diff = hours - hours_previous ) -gss_paired %>% +gss_paired |> select(hours, hours_previous, diff) ``` @@ -221,25 +221,25 @@ infer supports paired hypothesis testing via the `null = "paired independence"` Calculating the observed statistic, ```{r} -x_tilde <- gss_paired %>% - specify(response = diff) %>% +x_tilde <- gss_paired |> + specify(response = diff) |> calculate(stat = "mean") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -x_tilde <- gss_paired %>% +x_tilde <- gss_paired |> observe(response = diff, stat = "mean") ``` Then, generating the null distribution, ```{r} -null_dist <- gss_paired %>% - specify(response = diff) %>% - hypothesize(null = "paired independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss_paired |> + specify(response = diff) |> + hypothesize(null = "paired independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "mean") ``` @@ -255,7 +255,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = x_tilde, direction = "two-sided") ``` @@ -264,25 +264,25 @@ null_dist %>% Calculating the observed statistic, ```{r} -p_hat <- gss %>% - specify(response = sex, success = "female") %>% +p_hat <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "prop") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -p_hat <- gss %>% +p_hat <- gss |> observe(response = sex, success = "female", stat = "prop") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> + generate(reps = 1000) |> calculate(stat = "prop") ``` @@ -296,18 +296,18 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = p_hat, direction = "two-sided") ``` Note that logical variables will be coerced to factors: ```{r} -null_dist <- gss %>% - dplyr::mutate(is_female = (sex == "female")) %>% - specify(response = is_female, success = "TRUE") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000) %>% +null_dist <- gss |> + dplyr::mutate(is_female = (sex == "female")) |> + specify(response = is_female, success = "TRUE") |> + hypothesize(null = "point", p = .5) |> + generate(reps = 1000) |> calculate(stat = "prop") ``` @@ -316,26 +316,26 @@ null_dist <- gss %>% Calculating the observed statistic, ```{r} -p_hat <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% +p_hat <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> calculate(stat = "z") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -p_hat <- gss %>% +p_hat <- gss |> observe(response = sex, success = "female", null = "point", p = .5, stat = "z") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(response = sex, success = "female") %>% - hypothesize(null = "point", p = .5) %>% - generate(reps = 1000, type = "draw") %>% +null_dist <- gss |> + specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> + generate(reps = 1000, type = "draw") |> calculate(stat = "z") ``` @@ -349,7 +349,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = p_hat, direction = "two-sided") ``` @@ -370,15 +370,15 @@ The `infer` package provides several statistics to work with data of this type. Calculating the observed statistic, ```{r} -d_hat <- gss %>% - specify(college ~ sex, success = "no degree") %>% +d_hat <- gss |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "diff in props", order = c("female", "male")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -d_hat <- gss %>% +d_hat <- gss |> observe( college ~ sex, success = "no degree", @@ -389,10 +389,10 @@ d_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1000) |> calculate(stat = "diff in props", order = c("female", "male")) ``` @@ -406,7 +406,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = d_hat, direction = "two-sided") ``` @@ -415,15 +415,15 @@ infer also provides functionality to calculate ratios of proportions. The workfl Calculating the observed statistic, ```{r} -r_hat <- gss %>% - specify(college ~ sex, success = "no degree") %>% +r_hat <- gss |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "ratio of props", order = c("female", "male")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -r_hat <- gss %>% +r_hat <- gss |> observe(college ~ sex, success = "no degree", stat = "ratio of props", order = c("female", "male")) ``` @@ -431,10 +431,10 @@ r_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1000) |> calculate(stat = "ratio of props", order = c("female", "male")) ``` @@ -448,7 +448,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = r_hat, direction = "two-sided") ``` @@ -457,18 +457,18 @@ In addition, the package provides functionality to calculate odds ratios. The wo Calculating the observed statistic, ```{r} -or_hat <- gss %>% - specify(college ~ sex, success = "no degree") %>% +or_hat <- gss |> + specify(college ~ sex, success = "no degree") |> calculate(stat = "odds ratio", order = c("female", "male")) ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1000) |> calculate(stat = "odds ratio", order = c("female", "male")) ``` @@ -482,7 +482,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = or_hat, direction = "two-sided") ``` @@ -491,16 +491,16 @@ null_dist %>% Finding the standardized observed statistic, ```{r} -z_hat <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% +z_hat <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> calculate(stat = "z", order = c("female", "male")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -z_hat <- gss %>% +z_hat <- gss |> observe(college ~ sex, success = "no degree", stat = "z", order = c("female", "male")) ``` @@ -508,18 +508,18 @@ z_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(college ~ sex, success = "no degree") %>% - hypothesize(null = "independence") %>% - generate(reps = 1000) %>% +null_dist <- gss |> + specify(college ~ sex, success = "no degree") |> + hypothesize(null = "independence") |> + generate(reps = 1000) |> calculate(stat = "z", order = c("female", "male")) ``` Alternatively, finding the null distribution using theoretical methods using the `assume()` verb, ```{r} -null_dist_theory <- gss %>% - specify(college ~ sex, success = "no degree") %>% +null_dist_theory <- gss |> + specify(college ~ sex, success = "no degree") |> assume("z") ``` @@ -549,7 +549,7 @@ Note that the above code makes use of the randomization-based null distribution. Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = z_hat, direction = "two-sided") ``` @@ -570,8 +570,8 @@ Calculating the observed statistic, Note the need to add in the hypothesized values here to compute the observed statistic. ```{r} -Chisq_hat <- gss %>% - specify(response = finrela) %>% +Chisq_hat <- gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -582,14 +582,14 @@ Chisq_hat <- gss %>% "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% + ) |> calculate(stat = "Chisq") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -Chisq_hat <- gss %>% +Chisq_hat <- gss |> observe( response = finrela, null = "point", @@ -608,8 +608,8 @@ Chisq_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(response = finrela) %>% +null_dist <- gss |> + specify(response = finrela) |> hypothesize( null = "point", p = c( @@ -620,16 +620,16 @@ null_dist <- gss %>% "far above average" = 1 / 6, "DK" = 1 / 6 ) - ) %>% - generate(reps = 1000, type = "draw") %>% + ) |> + generate(reps = 1000, type = "draw") |> calculate(stat = "Chisq") ``` Alternatively, finding the null distribution using theoretical methods using the `assume()` verb, ```{r} -null_dist_theory <- gss %>% - specify(response = finrela) %>% +null_dist_theory <- gss |> + specify(response = finrela) |> assume("Chisq") ``` @@ -659,7 +659,7 @@ Note that the above code makes use of the randomization-based null distribution. Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` @@ -685,34 +685,34 @@ chisq_test( Calculating the observed statistic, ```{r} -Chisq_hat <- gss %>% - specify(formula = finrela ~ sex) %>% - hypothesize(null = "independence") %>% +Chisq_hat <- gss |> + specify(formula = finrela ~ sex) |> + hypothesize(null = "independence") |> calculate(stat = "Chisq") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -Chisq_hat <- gss %>% +Chisq_hat <- gss |> observe(formula = finrela ~ sex, stat = "Chisq") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(finrela ~ sex) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(finrela ~ sex) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "Chisq") ``` Alternatively, finding the null distribution using theoretical methods using the `assume()` verb, ```{r} -null_dist_theory <- gss %>% - specify(finrela ~ sex) %>% +null_dist_theory <- gss |> + specify(finrela ~ sex) |> assume(distribution = "Chisq") ``` @@ -742,14 +742,14 @@ Note that the above code makes use of the randomization-based null distribution. Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = Chisq_hat, direction = "greater") ``` Alternatively, using the wrapper to carry out the test, ```{r} -gss %>% +gss |> chisq_test(formula = finrela ~ sex) ``` @@ -758,15 +758,15 @@ gss %>% Calculating the observed statistic, ```{r} -d_hat <- gss %>% - specify(age ~ college) %>% +d_hat <- gss |> + specify(age ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -d_hat <- gss %>% +d_hat <- gss |> observe(age ~ college, stat = "diff in means", order = c("degree", "no degree")) ``` @@ -774,10 +774,10 @@ d_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "diff in means", order = c("degree", "no degree")) ``` @@ -791,7 +791,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = d_hat, direction = "two-sided") ``` @@ -800,16 +800,16 @@ null_dist %>% Finding the standardized observed statistic, ```{r} -t_hat <- gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% +t_hat <- gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> calculate(stat = "t", order = c("degree", "no degree")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -t_hat <- gss %>% +t_hat <- gss |> observe(age ~ college, stat = "t", order = c("degree", "no degree")) ``` @@ -817,18 +817,18 @@ t_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(age ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "t", order = c("degree", "no degree")) ``` Alternatively, finding the null distribution using theoretical methods using the `assume()` verb, ```{r} -null_dist_theory <- gss %>% - specify(age ~ college) %>% +null_dist_theory <- gss |> + specify(age ~ college) |> assume("t") ``` @@ -858,7 +858,7 @@ Note that the above code makes use of the randomization-based null distribution. Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = t_hat, direction = "two-sided") ``` @@ -869,15 +869,15 @@ Note the similarities in this plot and the previous one. Calculating the observed statistic, ```{r} -d_hat <- gss %>% - specify(age ~ college) %>% +d_hat <- gss |> + specify(age ~ college) |> calculate(stat = "diff in medians", order = c("degree", "no degree")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -d_hat <- gss %>% +d_hat <- gss |> observe(age ~ college, stat = "diff in medians", order = c("degree", "no degree")) ``` @@ -885,10 +885,10 @@ d_hat <- gss %>% Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(age ~ college) %>% # alt: response = age, explanatory = season - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ college) |> # alt: response = age, explanatory = season + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "diff in medians", order = c("degree", "no degree")) ``` @@ -902,7 +902,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = d_hat, direction = "two-sided") ``` @@ -911,34 +911,34 @@ null_dist %>% Calculating the observed statistic, ```{r} -F_hat <- gss %>% - specify(age ~ partyid) %>% +F_hat <- gss |> + specify(age ~ partyid) |> calculate(stat = "F") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -F_hat <- gss %>% +F_hat <- gss |> observe(age ~ partyid, stat = "F") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "F") ``` Alternatively, finding the null distribution using theoretical methods using the `assume()` verb, ```{r} -null_dist_theory <- gss %>% - specify(age ~ partyid) %>% - hypothesize(null = "independence") %>% +null_dist_theory <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> assume(distribution = "F") ``` @@ -968,7 +968,7 @@ Note that the above code makes use of the randomization-based null distribution. Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = F_hat, direction = "greater") ``` @@ -977,25 +977,25 @@ null_dist %>% Calculating the observed statistic, ```{r} -slope_hat <- gss %>% - specify(hours ~ age) %>% +slope_hat <- gss |> + specify(hours ~ age) |> calculate(stat = "slope") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -slope_hat <- gss %>% +slope_hat <- gss |> observe(hours ~ age, stat = "slope") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(hours ~ age) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(hours ~ age) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "slope") ``` @@ -1009,7 +1009,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = slope_hat, direction = "two-sided") ``` @@ -1018,25 +1018,25 @@ null_dist %>% Calculating the observed statistic, ```{r} -correlation_hat <- gss %>% - specify(hours ~ age) %>% +correlation_hat <- gss |> + specify(hours ~ age) |> calculate(stat = "correlation") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -correlation_hat <- gss %>% +correlation_hat <- gss |> observe(hours ~ age, stat = "correlation") ``` Then, generating the null distribution, ```{r} -null_dist <- gss %>% - specify(hours ~ age) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(hours ~ age) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "correlation") ``` @@ -1050,7 +1050,7 @@ visualize(null_dist) + Calculating the p-value from the null distribution and observed statistic, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = correlation_hat, direction = "two-sided") ``` @@ -1063,28 +1063,28 @@ Not currently implemented since $t$ could refer to standardized slope or standar Calculating the observed fit, ```{r} -obs_fit <- gss %>% - specify(hours ~ age + college) %>% +obs_fit <- gss |> + specify(hours ~ age + college) |> fit() ``` Generating a distribution of fits with the response variable permuted, ```{r} -null_dist <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> fit() ``` Generating a distribution of fits where each explanatory variable is permuted independently, ```{r} -null_dist2 <- gss %>% - specify(hours ~ age + college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute", variables = c(age, college)) %>% +null_dist2 <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute", variables = c(age, college)) |> fit() ``` @@ -1098,7 +1098,7 @@ visualize(null_dist) + Calculating p-values from the null distribution and observed fit, ```{r} -null_dist %>% +null_dist |> get_p_value(obs_stat = obs_fit, direction = "two-sided") ``` @@ -1111,24 +1111,24 @@ Note that this `fit()`-based workflow can be applied to use cases with differing Finding the observed statistic, ```{r} -x_bar <- gss %>% - specify(response = hours) %>% +x_bar <- gss |> + specify(response = hours) |> calculate(stat = "mean") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -x_bar <- gss %>% +x_bar <- gss |> observe(response = hours, stat = "mean") ``` Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(response = hours) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(response = hours) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") ``` @@ -1157,8 +1157,8 @@ visualize(boot_dist) + Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution, ```{r} -sampling_dist <- gss %>% - specify(response = hours) %>% +sampling_dist <- gss |> + specify(response = hours) |> assume(distribution = "t") ``` @@ -1180,16 +1180,16 @@ Note that the `t` distribution is recentered and rescaled to lie on the scale of Finding the observed statistic, ```{r} -t_hat <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% +t_hat <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -t_hat <- gss %>% +t_hat <- gss |> observe(response = hours, null = "point", mu = 40, stat = "t") @@ -1198,9 +1198,9 @@ t_hat <- gss %>% Then, generating the bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(response = hours) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(response = hours) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "t") ``` @@ -1220,7 +1220,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = t_hat) visualize(boot_dist) + @@ -1234,24 +1234,24 @@ See the above subsection (one mean) for a theory-based approach. Note that infer Finding the observed statistic, ```{r} -p_hat <- gss %>% - specify(response = sex, success = "female") %>% +p_hat <- gss |> + specify(response = sex, success = "female") |> calculate(stat = "prop") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -p_hat <- gss %>% +p_hat <- gss |> observe(response = sex, success = "female", stat = "prop") ``` Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(response = sex, success = "female") %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(response = sex, success = "female") |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "prop") ``` @@ -1271,7 +1271,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = p_hat) visualize(boot_dist) + @@ -1281,8 +1281,8 @@ visualize(boot_dist) + Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution, ```{r} -sampling_dist <- gss %>% - specify(response = sex, success = "female") %>% +sampling_dist <- gss |> + specify(response = sex, success = "female") |> assume(distribution = "z") ``` @@ -1308,15 +1308,15 @@ See the above subsection (one proportion) for a theory-based approach. Finding the observed statistic, ```{r} -d_hat <- gss %>% - specify(hours ~ college) %>% +d_hat <- gss |> + specify(hours ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -d_hat <- gss %>% +d_hat <- gss |> observe(hours ~ college, stat = "diff in means", order = c("degree", "no degree")) ``` @@ -1324,9 +1324,9 @@ d_hat <- gss %>% Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(hours ~ college) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(hours ~ college) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "diff in means", order = c("degree", "no degree")) ``` @@ -1346,7 +1346,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = d_hat) visualize(boot_dist) + @@ -1356,8 +1356,8 @@ visualize(boot_dist) + Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution, ```{r} -sampling_dist <- gss %>% - specify(hours ~ college) %>% +sampling_dist <- gss |> + specify(hours ~ college) |> assume(distribution = "t") ``` @@ -1379,15 +1379,15 @@ Note that the `t` distribution is recentered and rescaled to lie on the scale of Finding the observed statistic, ```{r} -d_hat <- gss %>% - specify(hours ~ college) %>% +d_hat <- gss |> + specify(hours ~ college) |> calculate(stat = "ratio of means", order = c("degree", "no degree")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -d_hat <- gss %>% +d_hat <- gss |> observe(hours ~ college, stat = "ratio of means", order = c("degree", "no degree")) ``` @@ -1395,9 +1395,9 @@ d_hat <- gss %>% Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(hours ~ college) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(hours ~ college) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "ratio of means", order = c("degree", "no degree")) ``` @@ -1417,7 +1417,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = d_hat) visualize(boot_dist) + @@ -1429,15 +1429,15 @@ visualize(boot_dist) + Finding the standardized point estimate, ```{r} -t_hat <- gss %>% - specify(hours ~ college) %>% +t_hat <- gss |> + specify(hours ~ college) |> calculate(stat = "t", order = c("degree", "no degree")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -t_hat <- gss %>% +t_hat <- gss |> observe(hours ~ college, stat = "t", order = c("degree", "no degree")) ``` @@ -1445,9 +1445,9 @@ t_hat <- gss %>% Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(hours ~ college) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(hours ~ college) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "t", order = c("degree", "no degree")) ``` @@ -1467,7 +1467,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = t_hat) visualize(boot_dist) + @@ -1481,15 +1481,15 @@ See the above subsection (diff in means) for a theory-based approach. `infer` do Finding the observed statistic, ```{r} -d_hat <- gss %>% - specify(college ~ sex, success = "degree") %>% +d_hat <- gss |> + specify(college ~ sex, success = "degree") |> calculate(stat = "diff in props", order = c("female", "male")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -d_hat <- gss %>% +d_hat <- gss |> observe(college ~ sex, success = "degree", stat = "diff in props", order = c("female", "male")) ``` @@ -1497,9 +1497,9 @@ d_hat <- gss %>% Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(college ~ sex, success = "degree") %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(college ~ sex, success = "degree") |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "diff in props", order = c("female", "male")) ``` @@ -1519,7 +1519,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = d_hat) visualize(boot_dist) + @@ -1529,8 +1529,8 @@ visualize(boot_dist) + Instead of a simulation-based bootstrap distribution, we can also define a theory-based sampling distribution, ```{r} -sampling_dist <- gss %>% - specify(college ~ sex, success = "degree") %>% +sampling_dist <- gss |> + specify(college ~ sex, success = "degree") |> assume(distribution = "z") ``` @@ -1552,15 +1552,15 @@ Note that the `z` distribution is recentered and rescaled to lie on the scale of Finding the standardized point estimate, ```{r} -z_hat <- gss %>% - specify(college ~ sex, success = "degree") %>% +z_hat <- gss |> + specify(college ~ sex, success = "degree") |> calculate(stat = "z", order = c("female", "male")) ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -z_hat <- gss %>% +z_hat <- gss |> observe(college ~ sex, success = "degree", stat = "z", order = c("female", "male")) ``` @@ -1568,9 +1568,9 @@ z_hat <- gss %>% Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(college ~ sex, success = "degree") %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(college ~ sex, success = "degree") |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "z", order = c("female", "male")) ``` @@ -1590,7 +1590,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = z_hat) visualize(boot_dist) + @@ -1604,24 +1604,24 @@ See the above subsection (diff in props) for a theory-based approach. Finding the observed statistic, ```{r} -slope_hat <- gss %>% - specify(hours ~ age) %>% +slope_hat <- gss |> + specify(hours ~ age) |> calculate(stat = "slope") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -slope_hat <- gss %>% +slope_hat <- gss |> observe(hours ~ age, stat = "slope") ``` Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(hours ~ age) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(hours ~ age) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "slope") ``` @@ -1641,7 +1641,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = slope_hat) visualize(boot_dist) + @@ -1653,24 +1653,24 @@ visualize(boot_dist) + Finding the observed statistic, ```{r} -correlation_hat <- gss %>% - specify(hours ~ age) %>% +correlation_hat <- gss |> + specify(hours ~ age) |> calculate(stat = "correlation") ``` Alternatively, using the `observe()` wrapper to calculate the observed statistic, ```{r} -correlation_hat <- gss %>% +correlation_hat <- gss |> observe(hours ~ age, stat = "correlation") ``` Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(hours ~ age) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(hours ~ age) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "correlation") ``` @@ -1690,7 +1690,7 @@ visualize(boot_dist) + Alternatively, use the bootstrap distribution to find a confidence interval using the standard error, ```{r} -standard_error_ci <- boot_dist %>% +standard_error_ci <- boot_dist |> get_ci(type = "se", point_estimate = correlation_hat) visualize(boot_dist) + @@ -1706,17 +1706,17 @@ Not currently implemented since $t$ could refer to standardized slope or standar Calculating the observed fit, ```{r} -obs_fit <- gss %>% - specify(hours ~ age + college) %>% +obs_fit <- gss |> + specify(hours ~ age + college) |> fit() ``` Then, generating a bootstrap distribution, ```{r} -boot_dist <- gss %>% - specify(hours ~ age + college) %>% - generate(reps = 1000, type = "bootstrap") %>% +boot_dist <- gss |> + specify(hours ~ age + college) |> + generate(reps = 1000, type = "bootstrap") |> fit() ``` diff --git a/vignettes/paired.Rmd b/vignettes/paired.Rmd index f247126a..983f5b5b 100644 --- a/vignettes/paired.Rmd +++ b/vignettes/paired.Rmd @@ -34,13 +34,13 @@ Two sets of observations are paired if each observation in one column has a spec ```{r} set.seed(1) -gss_paired <- gss %>% +gss_paired <- gss |> mutate( hours_previous = hours + 5 - rpois(nrow(.), 4.8), diff = hours - hours_previous ) -gss_paired %>% +gss_paired |> select(hours, hours_previous, diff) ``` @@ -52,7 +52,7 @@ Here, we pre-compute the difference between paired observations as `diff`. The d ```{r plot-diff, echo = FALSE} unique_diff <- unique(gss_paired$diff) -gss_paired %>% +gss_paired |> ggplot2::ggplot() + ggplot2::aes(x = diff) + ggplot2::geom_histogram(bins = diff(range(unique_diff))) + @@ -70,8 +70,8 @@ We calculate the observed statistic in the paired setting in the same way that w ```{r calc-obs-mean} # calculate the observed statistic observed_statistic <- - gss_paired %>% - specify(response = diff) %>% + gss_paired |> + specify(response = diff) |> calculate(stat = "mean") ``` @@ -82,10 +82,10 @@ Tests for paired data are carried out via the `null = "paired independence"` arg ```{r generate-null} # generate the null distribution null_dist <- - gss_paired %>% - specify(response = diff) %>% - hypothesize(null = "paired independence") %>% - generate(reps = 1000, type = "permute") %>% + gss_paired |> + specify(response = diff) |> + hypothesize(null = "paired independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "mean") null_dist @@ -100,7 +100,7 @@ To get a sense for what this distribution looks like, and where our observed sta ```{r visualize} # visualize the null distribution and test statistic -null_dist %>% +null_dist |> visualize() + shade_p_value(observed_statistic, direction = "two-sided") @@ -112,7 +112,7 @@ More exactly, we can calculate the p-value: ```{r p-value} # calculate the p value from the test statistic and null distribution -p_value <- null_dist %>% +p_value <- null_dist |> get_p_value(obs_stat = observed_statistic, direction = "two-sided") @@ -126,10 +126,10 @@ We can also generate a bootstrap confidence interval for the mean paired differe ```{r generate-boot} # generate a bootstrap distribution boot_dist <- - gss_paired %>% - specify(response = diff) %>% - hypothesize(null = "paired independence") %>% - generate(reps = 1000, type = "bootstrap") %>% + gss_paired |> + specify(response = diff) |> + hypothesize(null = "paired independence") |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") visualize(boot_dist) @@ -141,7 +141,7 @@ Calculating a confidence interval: ```{r confidence-interval} # calculate the confidence from the bootstrap distribution -confidence_interval <- boot_dist %>% +confidence_interval <- boot_dist |> get_confidence_interval(level = .95) confidence_interval @@ -150,7 +150,7 @@ confidence_interval By default, `get_confidence_interval()` constructs the lower and upper bounds by taking the observations at the $(1 - .95) / 2$ and $1 - ((1-.95) / 2)$th percentiles. To instead build the confidence interval using the standard error of the bootstrap distribution, we can write: ```{r} -boot_dist %>% +boot_dist |> get_confidence_interval(type = "se", point_estimate = observed_statistic, level = .95) diff --git a/vignettes/t_test.Rmd b/vignettes/t_test.Rmd index 5ba6f05f..a014815f 100644 --- a/vignettes/t_test.Rmd +++ b/vignettes/t_test.Rmd @@ -36,7 +36,7 @@ The 1-sample $t$-test can be used to test whether a sample of continuous data co As an example, we'll test whether the average American adult works 40 hours a week using data from the `gss`. To do so, we make use of the `hours` variable, giving the number of hours that respondents reported having worked in the previous week. The distribution of `hours` in the observed data looks like this: ```{r plot-1-sample, echo = FALSE} -gss %>% +gss |> ggplot2::ggplot() + ggplot2::aes(x = hours) + ggplot2::geom_histogram(bins = 20) + @@ -55,8 +55,8 @@ First, to calculate the observed statistic, we can use `specify()` and `calculat ```{r calc-obs-stat-1-sample, warning = FALSE, message = FALSE} # calculate the observed statistic -observed_statistic <- gss %>% - specify(response = hours) %>% +observed_statistic <- gss |> + specify(response = hours) |> calculate(stat = "mean") ``` @@ -66,10 +66,10 @@ We can `generate()` the null distribution using the bootstrap. In the bootstrap, ```{r generate-null-1-sample, warning = FALSE, message = FALSE} # generate the null distribution -null_dist_1_sample <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - generate(reps = 1000, type = "bootstrap") %>% +null_dist_1_sample <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") ``` @@ -77,7 +77,7 @@ To get a sense for what these distributions look like, and where our observed st ```{r visualize-1-sample, warning = FALSE, message = FALSE} # visualize the null distribution and test statistic! -null_dist_1_sample %>% +null_dist_1_sample |> visualize() + shade_p_value(observed_statistic, direction = "two-sided" @@ -88,7 +88,7 @@ It looks like our observed mean of `r observed_statistic` would be relatively un ```{r p-value-1-sample, warning = FALSE, message = FALSE} # calculate the p value from the test statistic and null distribution -p_value_1_sample <- null_dist_1_sample %>% +p_value_1_sample <- null_dist_1_sample |> get_p_value(obs_stat = observed_statistic, direction = "two-sided") @@ -107,10 +107,10 @@ An alternative approach to the `t_test()` wrapper is to calculate the observed s ```{r} # calculate the observed statistic -observed_statistic <- gss %>% - specify(response = hours) %>% - hypothesize(null = "point", mu = 40) %>% - calculate(stat = "t") %>% +observed_statistic <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + calculate(stat = "t") |> dplyr::pull() ``` @@ -129,7 +129,7 @@ Note that the resulting $t$-statistics from these two theory-based approaches ar 2-Sample $t$-tests evaluate the difference in mean values of two populations using data randomly-sampled from the population that approximately follows a normal distribution. As an example, we'll test if Americans work the same number of hours a week regardless of whether they have a college degree or not using data from the `gss`. The `college` and `hours` variables allow us to do so: ```{r plot-2-sample, echo = FALSE} -gss %>% +gss |> ggplot2::ggplot() + ggplot2::aes(x = college, y = hours) + ggplot2::geom_boxplot() + @@ -147,8 +147,8 @@ As with the one-sample test, to calculate the observed difference in means, we c ```{r calc-obs-stat-2-sample, warning = FALSE, message = FALSE} # calculate the observed statistic -observed_statistic <- gss %>% - specify(hours ~ college) %>% +observed_statistic <- gss |> + specify(hours ~ college) |> calculate(stat = "diff in means", order = c("degree", "no degree")) observed_statistic @@ -164,10 +164,10 @@ We can `generate()` the null distribution using permutation, where, for each rep ```{r generate-null-2-sample, warning = FALSE, message = FALSE} # generate the null distribution with randomization -null_dist_2_sample <- gss %>% - specify(hours ~ college) %>% - hypothesize(null = "independence") %>% - generate(reps = 1000, type = "permute") %>% +null_dist_2_sample <- gss |> + specify(hours ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> calculate(stat = "diff in means", order = c("degree", "no degree")) ``` @@ -177,7 +177,7 @@ To get a sense for what these distributions look like, and where our observed st ```{r visualize-2-sample, warning = FALSE, message = FALSE} # visualize the randomization-based null distribution and test statistic! -null_dist_2_sample %>% +null_dist_2_sample |> visualize() + shade_p_value(observed_statistic, direction = "two-sided") @@ -188,7 +188,7 @@ It looks like our observed statistic of `r observed_statistic` would be unlikely ```{r p-value-2-sample, warning = FALSE, message = FALSE} # calculate the p value from the randomization-based null # distribution and the observed statistic -p_value_2_sample <- null_dist_2_sample %>% +p_value_2_sample <- null_dist_2_sample |> get_p_value(obs_stat = observed_statistic, direction = "two-sided") @@ -212,10 +212,10 @@ An alternative approach to the `t_test()` wrapper is to calculate the observed s ```{r} # calculate the observed statistic -observed_statistic <- gss %>% - specify(hours ~ college) %>% - hypothesize(null = "point", mu = 40) %>% - calculate(stat = "t", order = c("degree", "no degree")) %>% +observed_statistic <- gss |> + specify(hours ~ college) |> + hypothesize(null = "point", mu = 40) |> + calculate(stat = "t", order = c("degree", "no degree")) |> dplyr::pull() observed_statistic From 459b940d8ac97dc6c20b6079dc48d00fd5c486cc Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:20:22 -0500 Subject: [PATCH 06/11] `devtools::document()` transition to base pipe --- R/pipe.R | 4 +-- man/assume.Rd | 42 ++++++++++++------------ man/calculate.Rd | 46 +++++++++++++------------- man/fit.infer.Rd | 32 +++++++++--------- man/generate.Rd | 36 ++++++++++---------- man/get_confidence_interval.Rd | 30 ++++++++--------- man/get_p_value.Rd | 36 ++++++++++---------- man/hypothesize.Rd | 8 ++--- man/observe.Rd | 18 +++++----- man/pipe.Rd | 2 +- man/rep_sample_n.Rd | 8 ++--- man/shade_confidence_interval.Rd | 32 +++++++++--------- man/shade_p_value.Rd | 34 +++++++++---------- man/specify.Rd | 6 ++-- man/t_stat.Rd | 6 ++-- man/t_test.Rd | 4 +-- man/visualize.Rd | 56 ++++++++++++++++---------------- 17 files changed, 200 insertions(+), 200 deletions(-) diff --git a/R/pipe.R b/R/pipe.R index f2461d26..4bab0038 100755 --- a/R/pipe.R +++ b/R/pipe.R @@ -6,8 +6,8 @@ #' #' @param lhs,rhs Inference functions and the initial data frame. #' -#' @importFrom magrittr |> -#' @name |> +#' @importFrom magrittr %>% +#' @name %>% #' @rdname pipe #' @export NULL diff --git a/man/assume.Rd b/man/assume.Rd index f6df869f..b7b7ce6b 100644 --- a/man/assume.Rd +++ b/man/assume.Rd @@ -77,37 +77,37 @@ By default, the package will use the \code{"parameter"} entry of the analogous # F distribution # with the `partyid` explanatory variable -gss \%>\% - specify(age ~ partyid) \%>\% +gss |> + specify(age ~ partyid) |> assume(distribution = "F") # Chi-squared goodness of fit distribution # on the `finrela` variable -gss \%>\% - specify(response = finrela) \%>\% +gss |> + specify(response = finrela) |> hypothesize(null = "point", p = c("far below average" = 1/6, "below average" = 1/6, "average" = 1/6, "above average" = 1/6, "far above average" = 1/6, - "DK" = 1/6)) \%>\% + "DK" = 1/6)) |> assume("Chisq") # Chi-squared test of independence # on the `finrela` and `sex` variables -gss \%>\% - specify(formula = finrela ~ sex) \%>\% +gss |> + specify(formula = finrela ~ sex) |> assume(distribution = "Chisq") # T distribution -gss \%>\% - specify(age ~ college) \%>\% +gss |> + specify(age ~ college) |> assume("t") # Z distribution -gss \%>\% - specify(response = sex, success = "female") \%>\% +gss |> + specify(response = sex, success = "female") |> assume("z") \dontrun{ @@ -117,14 +117,14 @@ gss \%>\% # for example, a 1-sample t-test ------------------------------------- # calculate the observed statistic -obs_stat <- gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% +obs_stat <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") # construct a null distribution -null_dist <- gss \%>\% - specify(response = hours) \%>\% +null_dist <- gss |> + specify(response = hours) |> assume("t") # juxtapose them visually @@ -137,14 +137,14 @@ get_p_value(null_dist, obs_stat, direction = "both") # or, an F test ------------------------------------------------------ # calculate the observed statistic -obs_stat <- gss \%>\% - specify(age ~ partyid) \%>\% - hypothesize(null = "independence") \%>\% +obs_stat <- gss |> + specify(age ~ partyid) |> + hypothesize(null = "independence") |> calculate(stat = "F") # construct a null distribution -null_dist <- gss \%>\% - specify(age ~ partyid) \%>\% +null_dist <- gss |> + specify(age ~ partyid) |> assume(distribution = "F") # juxtapose them visually diff --git a/man/calculate.Rd b/man/calculate.Rd index dcc25387..404193d7 100755 --- a/man/calculate.Rd +++ b/man/calculate.Rd @@ -69,10 +69,10 @@ the following code. \if{html}{\out{
}}\preformatted{set.seed(1) -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 5, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) }\if{html}{\out{
}} @@ -95,10 +95,10 @@ will produce the same result. \if{html}{\out{
}}\preformatted{# set the seed set.seed(1) -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 5, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) }\if{html}{\out{
}} @@ -123,34 +123,34 @@ resampling with \code{generate()}. # calculate a null distribution of hours worked per week under # the null hypothesis that the mean is 40 -gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% - generate(reps = 200, type = "bootstrap") \%>\% +gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 200, type = "bootstrap") |> calculate(stat = "mean") # calculate the corresponding observed statistic -gss \%>\% - specify(response = hours) \%>\% +gss |> + specify(response = hours) |> calculate(stat = "mean") # calculate a null distribution assuming independence between age # of respondent and whether they have a college degree -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 200, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 200, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) # calculate the corresponding observed statistic -gss \%>\% - specify(age ~ college) \%>\% +gss |> + specify(age ~ college) |> calculate("diff in means", order = c("degree", "no degree")) # some statistics require a null hypothesis - gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% + gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") # more in-depth explanation of how to use the infer package diff --git a/man/fit.infer.Rd b/man/fit.infer.Rd index 50763eba..1229bd22 100644 --- a/man/fit.infer.Rd +++ b/man/fit.infer.Rd @@ -83,10 +83,10 @@ the following code. \if{html}{\out{
}}\preformatted{set.seed(1) -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 5, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) }\if{html}{\out{
}} @@ -109,10 +109,10 @@ will produce the same result. \if{html}{\out{
}}\preformatted{# set the seed set.seed(1) -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 5, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) }\if{html}{\out{
}} @@ -136,8 +136,8 @@ resampling with \code{generate()}. \examples{ # fit a linear model predicting number of hours worked per # week using respondent age and degree status. -observed_fit <- gss \%>\% - specify(hours ~ age + college) \%>\% +observed_fit <- gss |> + specify(hours ~ age + college) |> fit() observed_fit @@ -145,18 +145,18 @@ observed_fit # fit 100 models to resamples of the gss dataset, where the response # `hours` is permuted in each. note that this code is the same as # the above except for the addition of the `generate` step. -null_fits <- gss \%>\% - specify(hours ~ age + college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 100, type = "permute") \%>\% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> fit() null_fits # for logistic regression, just supply a binary response variable! # (this can also be made explicit via the `family` argument in ...) -gss \%>\% - specify(college ~ age + hours) \%>\% +gss |> + specify(college ~ age + hours) |> fit() # more in-depth explanation of how to use the infer package diff --git a/man/generate.Rd b/man/generate.Rd index be33e2fe..a2df3b67 100755 --- a/man/generate.Rd +++ b/man/generate.Rd @@ -67,10 +67,10 @@ the following code. \if{html}{\out{
}}\preformatted{set.seed(1) -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 5, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) }\if{html}{\out{
}} @@ -93,10 +93,10 @@ will produce the same result. \if{html}{\out{
}}\preformatted{# set the seed set.seed(1) -gss \%>\% - specify(age ~ college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 5, type = "permute") \%>\% +gss |> + specify(age ~ college) |> + hypothesize(null = "independence") |> + generate(reps = 5, type = "permute") |> calculate("diff in means", order = c("degree", "no degree")) }\if{html}{\out{
}} @@ -119,24 +119,24 @@ resampling with \code{generate()}. \examples{ # generate a null distribution by taking 200 bootstrap samples -gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% +gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> generate(reps = 200, type = "bootstrap") # generate a null distribution for the independence of # two variables by permuting their values 200 times -gss \%>\% - specify(partyid ~ age) \%>\% - hypothesize(null = "independence") \%>\% +gss |> + specify(partyid ~ age) |> + hypothesize(null = "independence") |> generate(reps = 200, type = "permute") # generate a null distribution via sampling from a # binomial distribution 200 times -gss \%>\% -specify(response = sex, success = "female") \%>\% - hypothesize(null = "point", p = .5) \%>\% - generate(reps = 200, type = "draw") \%>\% +gss |> +specify(response = sex, success = "female") |> + hypothesize(null = "point", p = .5) |> + generate(reps = 200, type = "draw") |> calculate(stat = "z") # more in-depth explanation of how to use the infer package diff --git a/man/get_confidence_interval.Rd b/man/get_confidence_interval.Rd index d6c037c7..957c4021 100644 --- a/man/get_confidence_interval.Rd +++ b/man/get_confidence_interval.Rd @@ -79,15 +79,15 @@ point estimates: \examples{ -boot_dist <- gss \%>\% +boot_dist <- gss |> # We're interested in the number of hours worked per week - specify(response = hours) \%>\% + specify(response = hours) |> # Generate bootstrap samples - generate(reps = 1000, type = "bootstrap") \%>\% + generate(reps = 1000, type = "bootstrap") |> # Calculate mean of each bootstrap sample calculate(stat = "mean") -boot_dist \%>\% +boot_dist |> # Calculate the confidence interval around the point estimate get_confidence_interval( # At the 95\% confidence level; percentile method @@ -95,11 +95,11 @@ boot_dist \%>\% ) # for type = "se" or type = "bias-corrected" we need a point estimate -sample_mean <- gss \%>\% - specify(response = hours) \%>\% +sample_mean <- gss |> + specify(response = hours) |> calculate(stat = "mean") -boot_dist \%>\% +boot_dist |> get_confidence_interval( point_estimate = sample_mean, # At the 95\% confidence level @@ -111,8 +111,8 @@ boot_dist \%>\% # using a theoretical distribution ----------------------------------- # define a sampling distribution -sampling_dist <- gss \%>\% - specify(response = hours) \%>\% +sampling_dist <- gss |> + specify(response = hours) |> assume("t") # get the confidence interval---note that the @@ -127,8 +127,8 @@ get_confidence_interval( # fit a linear model predicting number of hours worked per # week using respondent age and degree status. -observed_fit <- gss \%>\% - specify(hours ~ age + college) \%>\% +observed_fit <- gss |> + specify(hours ~ age + college) |> fit() observed_fit @@ -136,10 +136,10 @@ observed_fit # fit 100 models to resamples of the gss dataset, where the response # `hours` is permuted in each. note that this code is the same as # the above except for the addition of the `generate` step. -null_fits <- gss \%>\% - specify(hours ~ age + college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 100, type = "permute") \%>\% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> fit() null_fits diff --git a/man/get_p_value.Rd b/man/get_p_value.Rd index a1075c22..a298e7ae 100644 --- a/man/get_p_value.Rd +++ b/man/get_p_value.Rd @@ -77,33 +77,33 @@ raised to caution the user against reporting a p-value exactly equal to 0. # using a simulation-based null distribution ------------------------------ # find the point estimate---mean number of hours worked per week -point_estimate <- gss \%>\% - specify(response = hours) \%>\% +point_estimate <- gss |> + specify(response = hours) |> calculate(stat = "mean") # starting with the gss dataset -gss \%>\% +gss |> # ...we're interested in the number of hours worked per week - specify(response = hours) \%>\% + specify(response = hours) |> # hypothesizing that the mean is 40 - hypothesize(null = "point", mu = 40) \%>\% + hypothesize(null = "point", mu = 40) |> # generating data points for a null distribution - generate(reps = 1000, type = "bootstrap") \%>\% + generate(reps = 1000, type = "bootstrap") |> # finding the null distribution - calculate(stat = "mean") \%>\% + calculate(stat = "mean") |> get_p_value(obs_stat = point_estimate, direction = "two-sided") # using a theoretical null distribution ----------------------------------- # calculate the observed statistic -obs_stat <- gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% +obs_stat <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") # define a null distribution -null_dist <- gss \%>\% - specify(response = hours) \%>\% +null_dist <- gss |> + specify(response = hours) |> assume("t") # calculate a p-value @@ -113,8 +113,8 @@ get_p_value(null_dist, obs_stat, direction = "both") # fit a linear model predicting number of hours worked per # week using respondent age and degree status. -observed_fit <- gss \%>\% - specify(hours ~ age + college) \%>\% +observed_fit <- gss |> + specify(hours ~ age + college) |> fit() observed_fit @@ -122,10 +122,10 @@ observed_fit # fit 100 models to resamples of the gss dataset, where the response # `hours` is permuted in each. note that this code is the same as # the above except for the addition of the `generate` step. -null_fits <- gss \%>\% - specify(hours ~ age + college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 100, type = "permute") \%>\% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 100, type = "permute") |> fit() null_fits diff --git a/man/hypothesize.Rd b/man/hypothesize.Rd index 826e5e50..d8ba4450 100755 --- a/man/hypothesize.Rd +++ b/man/hypothesize.Rd @@ -51,13 +51,13 @@ Learn more in \code{vignette("infer")}. } \examples{ # hypothesize independence of two variables -gss \%>\% - specify(college ~ partyid, success = "degree") \%>\% +gss |> + specify(college ~ partyid, success = "degree") |> hypothesize(null = "independence") # hypothesize a mean number of hours worked per week of 40 -gss \%>\% - specify(response = hours) \%>\% +gss |> + specify(response = hours) |> hypothesize(null = "point", mu = 40) # more in-depth explanation of how to use the infer package diff --git a/man/observe.Rd b/man/observe.Rd index 9945a85a..3803ad04 100644 --- a/man/observe.Rd +++ b/man/observe.Rd @@ -99,22 +99,22 @@ Learn more in \code{vignette("infer")}. } \examples{ # calculating the observed mean number of hours worked per week -gss \%>\% +gss |> observe(hours ~ NULL, stat = "mean") # equivalently, calculating the same statistic with the core verbs -gss \%>\% - specify(response = hours) \%>\% +gss |> + specify(response = hours) |> calculate(stat = "mean") # calculating a t statistic for hypothesized mu = 40 hours worked/week -gss \%>\% +gss |> observe(hours ~ NULL, stat = "t", null = "point", mu = 40) # equivalently, calculating the same statistic with the core verbs -gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% +gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") # similarly for a difference in means in age based on whether @@ -127,8 +127,8 @@ observe( ) # equivalently, calculating the same statistic with the core verbs -gss \%>\% - specify(age ~ college) \%>\% +gss |> + specify(age ~ college) |> calculate("diff in means", order = c("degree", "no degree")) # for a more in-depth explanation of how to use the infer package diff --git a/man/pipe.Rd b/man/pipe.Rd index 1d87f3e2..0586edc0 100755 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -7,7 +7,7 @@ \item{lhs, rhs}{Inference functions and the initial data frame.} } \description{ -Like \{dplyr\}, \{infer\} also uses the pipe (\code{\%>\%}) function +Like \{dplyr\}, \{infer\} also uses the pipe (\code{|>}) function from \code{magrittr} to turn function composition into a series of iterative statements. } diff --git a/man/rep_sample_n.Rd b/man/rep_sample_n.Rd index 84138096..a8966ffa 100644 --- a/man/rep_sample_n.Rd +++ b/man/rep_sample_n.Rd @@ -65,15 +65,15 @@ library(ggplot2) library(tibble) # take 1000 samples of size n = 50, without replacement -slices <- gss \%>\% +slices <- gss |> rep_slice_sample(n = 50, reps = 1000) slices # compute the proportion of respondents with a college # degree in each replicate -p_hats <- slices \%>\% - group_by(replicate) \%>\% +p_hats <- slices |> + group_by(replicate) |> summarize(prop_college = mean(college == "degree")) # plot sampling distribution @@ -94,7 +94,7 @@ df <- tibble( rep_slice_sample(df, n = 2, reps = 5, weight_by = c(.5, .4, .3, .2, .1)) # alternatively, pass an unquoted column name in `.data` as `weight_by` -df <- df \%>\% mutate(wts = c(.5, .4, .3, .2, .1)) +df <- df |> mutate(wts = c(.5, .4, .3, .2, .1)) rep_slice_sample(df, n = 2, reps = 5, weight_by = wts) } diff --git a/man/shade_confidence_interval.Rd b/man/shade_confidence_interval.Rd index 904896d0..08719a08 100644 --- a/man/shade_confidence_interval.Rd +++ b/man/shade_confidence_interval.Rd @@ -46,21 +46,21 @@ Learn more in \code{vignette("infer")}. } \examples{ # find the point estimate---mean number of hours worked per week -point_estimate <- gss \%>\% - specify(response = hours) \%>\% +point_estimate <- gss |> + specify(response = hours) |> calculate(stat = "mean") # ...and a bootstrap distribution -boot_dist <- gss \%>\% +boot_dist <- gss |> # ...we're interested in the number of hours worked per week - specify(response = hours) \%>\% + specify(response = hours) |> # generating data points - generate(reps = 1000, type = "bootstrap") \%>\% + generate(reps = 1000, type = "bootstrap") |> # finding the distribution from the generated data calculate(stat = "mean") # find a confidence interval around the point estimate -ci <- boot_dist \%>\% +ci <- boot_dist |> get_confidence_interval(point_estimate = point_estimate, # at the 95\% confidence level level = .95, @@ -69,12 +69,12 @@ ci <- boot_dist \%>\% # and plot it! -boot_dist \%>\% +boot_dist |> visualize() + shade_confidence_interval(ci) # or just plot the bounds -boot_dist \%>\% +boot_dist |> visualize() + shade_confidence_interval(ci, fill = NULL) @@ -82,8 +82,8 @@ boot_dist \%>\% # theoretical distributions, too---the theoretical # distribution will be recentered and rescaled to # align with the confidence interval -sampling_dist <- gss \%>\% - specify(response = hours) \%>\% +sampling_dist <- gss |> + specify(response = hours) |> assume(distribution = "t") visualize(sampling_dist) + @@ -94,17 +94,17 @@ visualize(sampling_dist) + # explanatory variables, use a `fit()`-based workflow # fit 1000 linear models with the `hours` variable permuted -null_fits <- gss \%>\% - specify(hours ~ age + college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 1000, type = "permute") \%>\% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> fit() null_fits # fit a linear model to the observed data -obs_fit <- gss \%>\% - specify(hours ~ age + college) \%>\% +obs_fit <- gss |> + specify(hours ~ age + college) |> fit() obs_fit diff --git a/man/shade_p_value.Rd b/man/shade_p_value.Rd index bcecd838..48d63189 100644 --- a/man/shade_p_value.Rd +++ b/man/shade_p_value.Rd @@ -45,34 +45,34 @@ Learn more in \code{vignette("infer")}. } \examples{ # find the point estimate---mean number of hours worked per week -point_estimate <- gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% +point_estimate <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") # ...and a null distribution -null_dist <- gss \%>\% +null_dist <- gss |> # ...we're interested in the number of hours worked per week - specify(response = hours) \%>\% + specify(response = hours) |> # hypothesizing that the mean is 40 - hypothesize(null = "point", mu = 40) \%>\% + hypothesize(null = "point", mu = 40) |> # generating data points for a null distribution - generate(reps = 1000, type = "bootstrap") \%>\% + generate(reps = 1000, type = "bootstrap") |> # estimating the null distribution calculate(stat = "t") # shade the p-value of the point estimate -null_dist \%>\% +null_dist |> visualize() + shade_p_value(obs_stat = point_estimate, direction = "two-sided") # you can shade confidence intervals on top of # theoretical distributions, too! -null_dist_theory <- gss \%>\% - specify(response = hours) \%>\% +null_dist_theory <- gss |> + specify(response = hours) |> assume(distribution = "t") -null_dist_theory \%>\% +null_dist_theory |> visualize() + shade_p_value(obs_stat = point_estimate, direction = "two-sided") @@ -81,17 +81,17 @@ null_dist_theory \%>\% # explanatory variables, use a `fit()`-based workflow # fit 1000 linear models with the `hours` variable permuted -null_fits <- gss \%>\% - specify(hours ~ age + college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 1000, type = "permute") \%>\% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> fit() null_fits # fit a linear model to the observed data -obs_fit <- gss \%>\% - specify(hours ~ age + college) \%>\% +obs_fit <- gss |> + specify(hours ~ age + college) |> fit() obs_fit diff --git a/man/specify.Rd b/man/specify.Rd index dab03766..795f8635 100755 --- a/man/specify.Rd +++ b/man/specify.Rd @@ -36,15 +36,15 @@ Learn more in \code{vignette("infer")}. } \examples{ # specifying for a point estimate on one variable -gss \%>\% +gss |> specify(response = age) # specify a relationship between variables as a formula... -gss \%>\% +gss |> specify(age ~ partyid) # ...or with named arguments! -gss \%>\% +gss |> specify(response = age, explanatory = partyid) # more in-depth explanation of how to use the infer package diff --git a/man/t_stat.Rd b/man/t_stat.Rd index ae08ab4b..8a9460b6 100755 --- a/man/t_stat.Rd +++ b/man/t_stat.Rd @@ -55,13 +55,13 @@ library(tidyr) # t test statistic for true mean number of hours worked # per week of 40 -gss \%>\% +gss |> t_stat(response = hours, mu = 40) # t test statistic for number of hours worked per week # by college degree status -gss \%>\% - tidyr::drop_na(college) \%>\% +gss |> + tidyr::drop_na(college) |> t_stat(formula = hours ~ college, order = c("degree", "no degree"), alternative = "two-sided") diff --git a/man/t_test.Rd b/man/t_test.Rd index 1486e477..cffda733 100755 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -54,8 +54,8 @@ library(tidyr) # t test for number of hours worked per week # by college degree status -gss \%>\% - tidyr::drop_na(college) \%>\% +gss |> + tidyr::drop_na(college) |> t_test(formula = hours ~ college, order = c("degree", "no degree"), alternative = "two-sided") diff --git a/man/visualize.Rd b/man/visualize.Rd index 9c58c0e3..84e8b08f 100755 --- a/man/visualize.Rd +++ b/man/visualize.Rd @@ -63,35 +63,35 @@ are supplied. \examples{ # generate a null distribution -null_dist <- gss \%>\% +null_dist <- gss |> # we're interested in the number of hours worked per week - specify(response = hours) \%>\% + specify(response = hours) |> # hypothesizing that the mean is 40 - hypothesize(null = "point", mu = 40) \%>\% + hypothesize(null = "point", mu = 40) |> # generating data points for a null distribution - generate(reps = 1000, type = "bootstrap") \%>\% + generate(reps = 1000, type = "bootstrap") |> # calculating a distribution of means calculate(stat = "mean") # or a bootstrap distribution, omitting the hypothesize() step, # for use in confidence intervals -boot_dist <- gss \%>\% - specify(response = hours) \%>\% - generate(reps = 1000, type = "bootstrap") \%>\% +boot_dist <- gss |> + specify(response = hours) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "mean") # we can easily plot the null distribution by piping into visualize -null_dist \%>\% +null_dist |> visualize() # we can add layers to the plot as in ggplot, as well... # find the point estimate---mean number of hours worked per week -point_estimate <- gss \%>\% - specify(response = hours) \%>\% +point_estimate <- gss |> + specify(response = hours) |> calculate(stat = "mean") # find a confidence interval around the point estimate -ci <- boot_dist \%>\% +ci <- boot_dist |> get_confidence_interval(point_estimate = point_estimate, # at the 95\% confidence level level = .95, @@ -99,19 +99,19 @@ ci <- boot_dist \%>\% type = "se") # display a shading of the area beyond the p-value on the plot -null_dist \%>\% +null_dist |> visualize() + shade_p_value(obs_stat = point_estimate, direction = "two-sided") # ...or within the bounds of the confidence interval -null_dist \%>\% +null_dist |> visualize() + shade_confidence_interval(ci) # plot a theoretical sampling distribution by creating # a theory-based distribution with `assume()` -sampling_dist <- gss \%>\% - specify(response = hours) \%>\% +sampling_dist <- gss |> + specify(response = hours) |> assume(distribution = "t") visualize(sampling_dist) @@ -127,15 +127,15 @@ visualize(sampling_dist) + # to plot both a theory-based and simulation-based null distribution, # use a theorized statistic (i.e. one of t, z, F, or Chisq) # and supply the simulation-based null distribution -null_dist_t <- gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% - generate(reps = 1000, type = "bootstrap") \%>\% +null_dist_t <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> + generate(reps = 1000, type = "bootstrap") |> calculate(stat = "t") -obs_stat <- gss \%>\% - specify(response = hours) \%>\% - hypothesize(null = "point", mu = 40) \%>\% +obs_stat <- gss |> + specify(response = hours) |> + hypothesize(null = "point", mu = 40) |> calculate(stat = "t") visualize(null_dist_t, method = "both") @@ -148,10 +148,10 @@ visualize(null_dist_t, method = "both") + # explanatory variables, use a `fit()`-based workflow # fit 1000 models with the `hours` variable permuted -null_fits <- gss \%>\% - specify(hours ~ age + college) \%>\% - hypothesize(null = "independence") \%>\% - generate(reps = 1000, type = "permute") \%>\% +null_fits <- gss |> + specify(hours ~ age + college) |> + hypothesize(null = "independence") |> + generate(reps = 1000, type = "permute") |> fit() null_fits @@ -166,10 +166,10 @@ visualize(null_fits) library(ggplot2) # to add a ggplot2 theme to a `calculate()`-based visualization, use `+` -null_dist \%>\% visualize() + theme_dark() +null_dist |> visualize() + theme_dark() # to add a ggplot2 theme to a `fit()`-based visualization, use `&` -null_fits \%>\% visualize() & theme_dark() +null_fits |> visualize() & theme_dark() } # More in-depth explanation of how to use the infer package From e1f3131cd7af1f510973c3daf565bf044d69aa16 Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:21:10 -0500 Subject: [PATCH 07/11] `knitr::convert_chunk_header(type = "yaml")` --- vignettes/anova.Rmd | 49 +++++++++--- vignettes/chi_squared.Rmd | 88 +++++++++++++++++----- vignettes/infer.Rmd | 108 +++++++++++++++++++++------ vignettes/observed_stat_examples.Rmd | 16 ++-- vignettes/paired.Rmd | 37 ++++++--- vignettes/t_test.Rmd | 73 ++++++++++++++---- 6 files changed, 290 insertions(+), 81 deletions(-) diff --git a/vignettes/anova.Rmd b/vignettes/anova.Rmd index dac5fea4..b8d7293f 100644 --- a/vignettes/anova.Rmd +++ b/vignettes/anova.Rmd @@ -8,12 +8,18 @@ vignette: | \usepackage[utf8]{inputenc} --- -```{r settings, include=FALSE} +```{r} +#| label: settings +#| include: false knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) options(digits = 4) ``` -```{r load-packages, echo = FALSE, message = FALSE, warning = FALSE} +```{r} +#| label: load-packages +#| echo: false +#| message: false +#| warning: false library(ggplot2) library(dplyr) library(infer) @@ -23,7 +29,10 @@ In this vignette, we'll walk through conducting an analysis of variance (ANOVA) Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this: -```{r glimpse-gss-actual, warning = FALSE, message = FALSE} +```{r} +#| label: glimpse-gss-actual +#| warning: false +#| message: false dplyr::glimpse(gss) ``` @@ -31,7 +40,9 @@ To carry out an ANOVA, we'll examine the association between age and political p This is what the relationship looks like in the observed data: -```{r plot-f, echo = FALSE} +```{r} +#| label: plot-f +#| echo: false gss |> ggplot2::ggplot() + ggplot2::aes(x = partyid, y = age) + @@ -47,7 +58,10 @@ If there were no relationship, we would expect to see the each of these boxplots First, to calculate the observed statistic, we can use `specify()` and `calculate()`. -```{r calc-obs-stat-f, warning = FALSE, message = FALSE} +```{r} +#| label: calc-obs-stat-f +#| warning: false +#| message: false # calculate the observed statistic observed_f_statistic <- gss |> specify(age ~ partyid) |> @@ -59,7 +73,10 @@ The observed $F$ statistic is `r observed_f_statistic`. Now, we want to compare We can `generate()` an approximation of the null distribution using randomization. The randomization approach permutes the response and explanatory variables, so that each person's party affiliation is matched up with a random age from the sample in order to break up any association between the two. -```{r generate-null-f, warning = FALSE, message = FALSE} +```{r} +#| label: generate-null-f +#| warning: false +#| message: false # generate the null distribution using randomization null_dist <- gss |> specify(age ~ partyid) |> @@ -72,7 +89,10 @@ Note that, in the line `specify(age ~ partyid)` above, we could use the equivale To get a sense for what this distribution looks like, and where our observed statistic falls, we can use `visualize()`: -```{r visualize-f, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-f +#| warning: false +#| message: false # visualize the null distribution and test statistic! null_dist |> visualize() + @@ -82,7 +102,10 @@ null_dist |> We could also visualize the observed statistic against the theoretical null distribution. To do so, use the `assume()` verb to define a theoretical null distribution and then pass it to `visualize()` like a null distribution outputted from `generate()` and `calculate()`. -```{r visualize-f-theor, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-f-theor +#| warning: false +#| message: false # visualize the theoretical null distribution and test statistic! null_dist_theory <- gss |> specify(age ~ partyid) |> @@ -95,7 +118,10 @@ visualize(null_dist_theory) + To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and then further provide `method = "both"` to `visualize()`. -```{r visualize-indep-both, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-indep-both +#| warning: false +#| message: false # visualize both null distributions and the test statistic! null_dist |> visualize(method = "both") + @@ -105,7 +131,10 @@ null_dist |> Either way, it looks like our observed test statistic would be quite unlikely if there were actually no association between age and political party affiliation. More exactly, we can approximate the p-value from the randomization-based approximation to the null distribution: -```{r p-value-indep, warning = FALSE, message = FALSE} +```{r} +#| label: p-value-indep +#| warning: false +#| message: false # calculate the p value from the observed statistic and null distribution p_value <- null_dist |> get_p_value(obs_stat = observed_f_statistic, diff --git a/vignettes/chi_squared.Rmd b/vignettes/chi_squared.Rmd index 0d26802d..8c63bf21 100644 --- a/vignettes/chi_squared.Rmd +++ b/vignettes/chi_squared.Rmd @@ -8,12 +8,18 @@ vignette: | \usepackage[utf8]{inputenc} --- -```{r settings, include=FALSE} +```{r} +#| label: settings +#| include: false knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) options(digits = 4) ``` -```{r load-packages, echo = FALSE, message = FALSE, warning = FALSE} +```{r} +#| label: load-packages +#| echo: false +#| message: false +#| warning: false library(ggplot2) library(dplyr) library(infer) @@ -25,7 +31,10 @@ In this vignette, we'll walk through conducting a $\chi^2$ (chi-squared) test of Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this: -```{r glimpse-gss-actual, warning = FALSE, message = FALSE} +```{r} +#| label: glimpse-gss-actual +#| warning: false +#| message: false dplyr::glimpse(gss) ``` @@ -35,7 +44,9 @@ To carry out a chi-squared test of independence, we'll examine the association b This is what the relationship looks like in the sample data: -```{r plot-indep, echo = FALSE} +```{r} +#| label: plot-indep +#| echo: false gss |> ggplot2::ggplot() + ggplot2::aes(x = finrela, fill = college) + @@ -55,7 +66,10 @@ If there were no relationship, we would expect to see the purple bars reaching t First, to calculate the observed statistic, we can use `specify()` and `calculate()`. -```{r calc-obs-stat-indep, warning = FALSE, message = FALSE} +```{r} +#| label: calc-obs-stat-indep +#| warning: false +#| message: false # calculate the observed statistic observed_indep_statistic <- gss |> specify(college ~ finrela) |> @@ -67,7 +81,10 @@ The observed $\chi^2$ statistic is `r observed_indep_statistic`. Now, we want to We can `generate()` the null distribution in one of two ways---using randomization or theory-based methods. The randomization approach approximates the null distribution by permuting the response and explanatory variables, so that each person's educational attainment is matched up with a random income from the sample in order to break up any association between the two. -```{r generate-null-indep, warning = FALSE, message = FALSE} +```{r} +#| label: generate-null-indep +#| warning: false +#| message: false # generate the null distribution using randomization null_dist_sim <- gss |> specify(college ~ finrela) |> @@ -78,7 +95,10 @@ null_dist_sim <- gss |> Note that, in the line `specify(college ~ finrela)` above, we could use the equivalent syntax `specify(response = college, explanatory = finrela)`. The same goes in the code below, which generates the null distribution using theory-based methods instead of randomization. -```{r generate-null-indep-t, warning = FALSE, message = FALSE} +```{r} +#| label: generate-null-indep-t +#| warning: false +#| message: false # generate the null distribution by theoretical approximation null_dist_theory <- gss |> specify(college ~ finrela) |> @@ -87,7 +107,10 @@ null_dist_theory <- gss |> To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: -```{r visualize-indep, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-indep +#| warning: false +#| message: false # visualize the null distribution and test statistic! null_dist_sim |> visualize() + @@ -98,7 +121,10 @@ null_dist_sim |> We could also visualize the observed statistic against the theoretical null distribution. To do so, use the `assume()` verb to define a theoretical null distribution and then pass it to `visualize()` like a null distribution outputted from `generate()` and `calculate()`. -```{r visualize-indep-theor, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-indep-theor +#| warning: false +#| message: false # visualize the theoretical null distribution and test statistic! gss |> specify(college ~ finrela) |> @@ -111,7 +137,10 @@ gss |> To visualize both the randomization-based and theoretical null distributions to get a sense of how the two relate, we can pipe the randomization-based null distribution into `visualize()`, and further provide `method = "both"`. -```{r visualize-indep-both, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-indep-both +#| warning: false +#| message: false # visualize both null distributions and the test statistic! null_dist_sim |> visualize(method = "both") + @@ -122,7 +151,10 @@ null_dist_sim |> Either way, it looks like our observed test statistic would be quite unlikely if there were actually no association between education and income. More exactly, we can approximate the p-value with `get_p_value`: -```{r p-value-indep, warning = FALSE, message = FALSE} +```{r} +#| label: p-value-indep +#| warning: false +#| message: false # calculate the p value from the observed statistic and null distribution p_value_independence <- null_dist_sim |> get_p_value( @@ -143,7 +175,10 @@ pchisq(observed_indep_statistic$stat, 5, lower.tail = FALSE) Note that, equivalently to the theory-based approach shown above, the package supplies a wrapper function, `chisq_test`, to carry out Chi-Squared tests of independence on tidy data. The syntax goes like this: -```{r chisq-indep-wrapper, message = FALSE, warning = FALSE} +```{r} +#| label: chisq-indep-wrapper +#| message: false +#| warning: false chisq_test(gss, college ~ finrela) ``` @@ -152,7 +187,9 @@ chisq_test(gss, college ~ finrela) Now, moving on to a chi-squared goodness of fit test, we'll take a look at the self-identified income class of our survey respondents. Suppose our null hypothesis is that `finrela` follows a uniform distribution (i.e. there's actually an equal number of people that describe their income as far below average, below average, average, above average, far above average, or that don't know their income.) The graph below represents this hypothesis: -```{r gof-plot, echo = FALSE} +```{r} +#| label: gof-plot +#| echo: false gss |> ggplot2::ggplot() + ggplot2::aes(x = finrela) + @@ -168,7 +205,10 @@ It seems like a uniform distribution may not be the most appropriate description First, to carry out this hypothesis test, we would calculate our observed statistic. -```{r observed-gof-statistic, warning = FALSE, message = FALSE} +```{r} +#| label: observed-gof-statistic +#| warning: false +#| message: false # calculating the null distribution observed_gof_statistic <- gss |> specify(response = finrela) |> @@ -189,7 +229,10 @@ observed_gof_statistic <- gss |> The observed statistic is `r observed_gof_statistic`. Now, generating a null distribution, by just dropping in a call to `generate()`: -```{r null-distribution-gof, warning = FALSE, message = FALSE} +```{r} +#| label: null-distribution-gof +#| warning: false +#| message: false # generating a null distribution, assuming each income class is equally likely null_dist_gof <- gss |> specify(response = finrela) |> @@ -210,7 +253,10 @@ null_dist_gof <- gss |> Again, to get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: -```{r visualize-indep-gof, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-indep-gof +#| warning: false +#| message: false # visualize the null distribution and test statistic! null_dist_gof |> visualize() + @@ -221,7 +267,10 @@ null_dist_gof |> This statistic seems like it would be quite unlikely if income class self-identification actually followed a uniform distribution! How unlikely, though? Calculating the p-value: -```{r get-p-value-gof, warning = FALSE, message = FALSE} +```{r} +#| label: get-p-value-gof +#| warning: false +#| message: false # calculate the p-value p_value_gof <- null_dist_gof |> get_p_value( @@ -242,7 +291,10 @@ pchisq(observed_gof_statistic$stat, 5, lower.tail = FALSE) Again, equivalently to the theory-based approach shown above, the package supplies a wrapper function, `chisq_test()`, to carry out Chi-Squared goodness of fit tests on tidy data. The syntax goes like this: -```{r chisq-gof-wrapper, message = FALSE, warning = FALSE} +```{r} +#| label: chisq-gof-wrapper +#| message: false +#| warning: false chisq_test( gss, response = finrela, diff --git a/vignettes/infer.Rmd b/vignettes/infer.Rmd index eba0648a..b732ce8a 100644 --- a/vignettes/infer.Rmd +++ b/vignettes/infer.Rmd @@ -10,7 +10,8 @@ vignette: | \usepackage[utf8]{inputenc} --- -```{r include=FALSE} +```{r} +#| include: false knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) options(digits = 4) ``` @@ -30,13 +31,20 @@ The workflow of this package is designed around this idea. Starting out with som Throughout this vignette, we make use of `gss`, a dataset supplied by `infer` containing a sample of 500 observations of 11 variables from the *General Social Survey*. -```{r load-packages, echo = FALSE, message = FALSE, warning = FALSE} +```{r} +#| label: load-packages +#| echo: false +#| message: false +#| warning: false library(dplyr) library(infer) ``` -```{r load-gss, warning = FALSE, message = FALSE} +```{r} +#| label: load-gss +#| warning: false +#| message: false # load in the dataset data(gss) @@ -50,14 +58,20 @@ Each row is an individual survey response, containing some basic demographic inf The `specify()` function can be used to specify which of the variables in the dataset you're interested in. If you're only interested in, say, the `age` of the respondents, you might write: -```{r specify-example, warning = FALSE, message = FALSE} +```{r} +#| label: specify-example +#| warning: false +#| message: false gss |> specify(response = age) ``` On the front-end, the output of `specify()` just looks like it selects off the columns in the dataframe that you've specified. Checking the class of this object, though: -```{r specify-one, warning = FALSE, message = FALSE} +```{r} +#| label: specify-one +#| warning: false +#| message: false gss |> specify(response = age) |> class() @@ -67,7 +81,10 @@ We can see that the `infer` class has been appended on top of the dataframe clas If you're interested in two variables--`age` and `partyid`, for example--you can `specify()` their relationship in one of two (equivalent) ways: -```{r specify-two, warning = FALSE, message = FALSE} +```{r} +#| label: specify-two +#| warning: false +#| message: false # as a formula gss |> specify(age ~ partyid) @@ -79,7 +96,10 @@ gss |> If you're doing inference on one proportion or a difference in proportions, you will need to use the `success` argument to specify which level of your `response` variable is a success. For instance, if you're interested in the proportion of the population with a college degree, you might use the following code: -```{r specify-success, warning = FALSE, message = FALSE} +```{r} +#| label: specify-success +#| warning: false +#| message: false # specifying for inference on proportions gss |> specify(response = college, success = "degree") @@ -89,7 +109,10 @@ gss |> The next step in the infer pipeline is often to declare a null hypothesis using `hypothesize()`. The first step is to supply one of "independence" or "point" to the `null` argument. If your null hypothesis assumes independence between two variables, then this is all you need to supply to `hypothesize()`: -```{r hypothesize-independence, warning = FALSE, message = FALSE} +```{r} +#| label: hypothesize-independence +#| warning: false +#| message: false gss |> specify(college ~ partyid, success = "degree") |> hypothesize(null = "independence") @@ -97,7 +120,10 @@ gss |> If you're doing inference on a point estimate, you will also need to provide one of `p` (the true proportion of successes, between 0 and 1), `mu` (the true mean), `med` (the true median), or `sigma` (the true standard deviation). For instance, if the null hypothesis is that the mean number of hours worked per week in our population is 40, we would write: -```{r hypothesize-40-hr-week, warning = FALSE, message = FALSE} +```{r} +#| label: hypothesize-40-hr-week +#| warning: false +#| message: false gss |> specify(response = hours) |> hypothesize(null = "point", mu = 40) @@ -115,7 +141,10 @@ Once we've asserted our null hypothesis using `hypothesize()`, we can construct Continuing on with our example above, about the average number of hours worked a week, we might write: -```{r generate-point, warning = FALSE, message = FALSE} +```{r} +#| label: generate-point +#| warning: false +#| message: false set.seed(1) gss |> @@ -130,7 +159,10 @@ Note that, before `generate()`ing, we've set the seed for random number generati To generate a null distribution for the independence of two variables, we could also randomly reshuffle the pairings of explanatory and response variables to break any existing association. For instance, to generate 1000 replicates that can be used to create a null distribution under the assumption that political party affiliation is not affected by age: -```{r generate-permute, warning = FALSE, message = FALSE} +```{r} +#| label: generate-permute +#| warning: false +#| message: false gss |> specify(partyid ~ age) |> hypothesize(null = "independence") |> @@ -141,7 +173,10 @@ gss |> `calculate()` calculates summary statistics from the output of infer core functions. The function takes in a `stat` argument, which is currently one of "mean", "median", "sum", "sd", "prop", "count", "diff in means", "diff in medians", "diff in props", "Chisq", "F", "t", "z", "slope", or "correlation". For example, continuing our example above to calculate the null distribution of mean hours worked per week: -```{r calculate-point, warning = FALSE, message = FALSE} +```{r} +#| label: calculate-point +#| warning: false +#| message: false gss |> specify(response = hours) |> hypothesize(null = "point", mu = 40) |> @@ -151,7 +186,10 @@ gss |> The output of `calculate()` here shows us the sample statistic (in this case, the mean) for each of our 1000 replicates. If you're carrying out inference on differences in means, medians, or proportions, or t and z statistics, you will need to supply an `order` argument, giving the order in which the explanatory variables should be subtracted. For instance, to find the difference in mean age of those that have a college degree and those that don't, we might write: -```{r specify-diff-in-means, warning = FALSE, message = FALSE} +```{r} +#| label: specify-diff-in-means +#| warning: false +#| message: false gss |> specify(age ~ college) |> hypothesize(null = "independence") |> @@ -165,7 +203,8 @@ infer also offers several utilities to extract the meaning out of summary statis To illustrate, we'll go back to the example of determining whether the mean number of hours worked per week is 40 hours. -```{r utilities-examples} +```{r} +#| label: utilities-examples # find the point estimate obs_mean <- gss |> specify(response = hours) |> @@ -183,14 +222,20 @@ Our point estimate `r obs_mean` seems *pretty* close to 40, but a little bit dif We could initially just visualize the null distribution. -```{r visualize, warning = FALSE, message = FALSE} +```{r} +#| label: visualize +#| warning: false +#| message: false null_dist |> visualize() ``` Where does our sample's observed statistic lie on this distribution? We can use the `obs_stat` argument to specify this. -```{r visualize2, warning = FALSE, message = FALSE} +```{r} +#| label: visualize2 +#| warning: false +#| message: false null_dist |> visualize() + shade_p_value(obs_stat = obs_mean, direction = "two-sided") @@ -198,7 +243,10 @@ null_dist |> Notice that infer has also shaded the regions of the null distribution that are as (or more) extreme than our observed statistic. (Also, note that we now use the `+` operator to apply the `shade_p_value()` function. This is because `visualize` outputs a plot object from `ggplot2` instead of a data frame, and the `+` operator is needed to add the p-value layer to the plot object.) The red bar looks like it's slightly far out on the right tail of the null distribution, so observing a sample mean of `r obs_mean` hours would be somewhat unlikely if the mean was actually 40 hours. How unlikely, though? -```{r get_p_value, warning = FALSE, message = FALSE} +```{r} +#| label: get_p_value +#| warning: false +#| message: false # get a two-tailed p-value p_value <- null_dist |> get_p_value(obs_stat = obs_mean, direction = "two-sided") @@ -210,7 +258,10 @@ It looks like the p-value is `r p_value`, which is pretty small---if the true me To get a confidence interval around our estimate, we can write: -```{r get_conf, message = FALSE, warning = FALSE} +```{r} +#| label: get_conf +#| message: false +#| warning: false # generate a distribution like the null distribution, # though exclude the null hypothesis from the pipeline boot_dist <- gss |> @@ -234,7 +285,10 @@ ci As you can see, 40 hours per week is not contained in this interval, which aligns with our previous conclusion that this finding is significant at the confidence level $\alpha = .05$. To see this interval represented visually, we can use the `shade_confidence_interval()` utility: -```{r visualize-ci, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-ci +#| warning: false +#| message: false boot_dist |> visualize() + shade_confidence_interval(endpoints = ci) @@ -246,7 +300,9 @@ infer also provides functionality to use theoretical methods for `"Chisq"`, `"F" Generally, to find a null distribution using theory-based methods, use the same code that you would use to find the observed statistic elsewhere, replacing calls to `calculate()` with `assume()`. For example, to calculate the observed $t$ statistic (a standardized mean): -```{r, message = FALSE, warning = FALSE} +```{r} +#| message: false +#| warning: false # calculate an observed t statistic obs_t <- gss |> specify(response = hours) |> @@ -256,7 +312,9 @@ obs_t <- gss |> Then, to define a theoretical $t$ distribution, we could write: -```{r, message = FALSE, warning = FALSE} +```{r} +#| message: false +#| warning: false # switch out calculate with assume to define a distribution t_dist <- gss |> specify(response = hours) |> @@ -265,7 +323,9 @@ t_dist <- gss |> From here, the theoretical distribution interfaces in the same way that simulation-based null distributions do. For example, to interface with p-values: -```{r, message = FALSE, warning = FALSE} +```{r} +#| message: false +#| warning: false # visualize the theoretical null distribution visualize(t_dist) + shade_p_value(obs_stat = obs_t, direction = "greater") @@ -276,7 +336,9 @@ get_p_value(t_dist, obs_t, "greater") Confidence intervals lie on the scale of the data rather than on the standardized scale of the theoretical distribution, so be sure to use the unstandardized observed statistic when working with confidence intervals. -```{r, message = FALSE, warning = FALSE} +```{r} +#| message: false +#| warning: false # find the theory-based confidence interval theor_ci <- get_confidence_interval( diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd index fe6911a2..4d81cc42 100644 --- a/vignettes/observed_stat_examples.Rmd +++ b/vignettes/observed_stat_examples.Rmd @@ -13,7 +13,8 @@ vignette: | #### Introduction -```{r include=FALSE} +```{r} +#| include: false knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5, message = FALSE, warning = FALSE) options(digits = 4) @@ -23,12 +24,15 @@ This vignette is intended to provide a set of examples that nearly exhaustively Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this: -```{r load-packages, echo = FALSE} +```{r} +#| label: load-packages +#| echo: false library(dplyr) library(infer) ``` -```{r load-gss} +```{r} +#| label: load-gss # load in the dataset data(gss) @@ -355,7 +359,8 @@ null_dist |> The package also supplies a wrapper around `prop.test()` for tests of a single proportion on tidy data. -```{r prop_test_1_grp} +```{r} +#| label: prop_test_1_grp prop_test(gss, college ~ NULL, p = .2) @@ -557,7 +562,8 @@ Note the similarities in this plot and the previous one. The package also supplies a wrapper around `prop.test` to allow for tests of equality of proportions on tidy data. -```{r prop_test_2_grp} +```{r} +#| label: prop_test_2_grp prop_test(gss, college ~ sex, order = c("female", "male")) diff --git a/vignettes/paired.Rmd b/vignettes/paired.Rmd index 983f5b5b..6088c98b 100644 --- a/vignettes/paired.Rmd +++ b/vignettes/paired.Rmd @@ -8,12 +8,18 @@ vignette: | \usepackage[utf8]{inputenc} --- -```{r settings, include=FALSE} +```{r} +#| label: settings +#| include: false knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) options(digits = 4) ``` -```{r load-packages, echo = FALSE, message = FALSE, warning = FALSE} +```{r} +#| label: load-packages +#| echo: false +#| message: false +#| warning: false library(ggplot2) library(dplyr) library(infer) @@ -25,7 +31,10 @@ In this vignette, we'll walk through conducting a randomization-based paired tes Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this: -```{r glimpse-gss-actual, warning = FALSE, message = FALSE} +```{r} +#| label: glimpse-gss-actual +#| warning: false +#| message: false dplyr::glimpse(gss) ``` @@ -50,7 +59,9 @@ To carry out inference on paired data with infer, we pre-compute the difference Here, we pre-compute the difference between paired observations as `diff`. The distribution of `diff` in the observed data looks like this: -```{r plot-diff, echo = FALSE} +```{r} +#| label: plot-diff +#| echo: false unique_diff <- unique(gss_paired$diff) gss_paired |> ggplot2::ggplot() + @@ -67,7 +78,8 @@ From the looks of the distribution, most respondents worked a similar number of We calculate the observed statistic in the paired setting in the same way that we would outside of the paired setting. Using `specify()` and `calculate()`: -```{r calc-obs-mean} +```{r} +#| label: calc-obs-mean # calculate the observed statistic observed_statistic <- gss_paired |> @@ -79,7 +91,8 @@ The observed statistic is `r observed_statistic`. Now, we want to compare this s Tests for paired data are carried out via the `null = "paired independence"` argument to `hypothesize()`. -```{r generate-null} +```{r} +#| label: generate-null # generate the null distribution null_dist <- gss_paired |> @@ -98,7 +111,8 @@ For each replicate, `generate()` carries out `type = "permute"` with `null = "pa To get a sense for what this distribution looks like, and where our observed statistic falls, we can use `visualize()`: -```{r visualize} +```{r} +#| label: visualize # visualize the null distribution and test statistic null_dist |> visualize() + @@ -110,7 +124,8 @@ It looks like our observed mean of `r observed_statistic` would be relatively un More exactly, we can calculate the p-value: -```{r p-value} +```{r} +#| label: p-value # calculate the p value from the test statistic and null distribution p_value <- null_dist |> get_p_value(obs_stat = observed_statistic, @@ -123,7 +138,8 @@ Thus, if the change in mean number of hours worked per week over this time perio We can also generate a bootstrap confidence interval for the mean paired difference using `type = "bootstrap"` in `generate()`. As before, we use the pre-computed differences when generating bootstrap resamples: -```{r generate-boot} +```{r} +#| label: generate-boot # generate a bootstrap distribution boot_dist <- gss_paired |> @@ -139,7 +155,8 @@ Note that, unlike the null distribution of test statistics generated earlier wit Calculating a confidence interval: -```{r confidence-interval} +```{r} +#| label: confidence-interval # calculate the confidence from the bootstrap distribution confidence_interval <- boot_dist |> get_confidence_interval(level = .95) diff --git a/vignettes/t_test.Rmd b/vignettes/t_test.Rmd index a014815f..0e3f2196 100644 --- a/vignettes/t_test.Rmd +++ b/vignettes/t_test.Rmd @@ -8,12 +8,18 @@ vignette: | \usepackage[utf8]{inputenc} --- -```{r settings, include=FALSE} +```{r} +#| label: settings +#| include: false knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) options(digits = 4) ``` -```{r load-packages, echo = FALSE, message = FALSE, warning = FALSE} +```{r} +#| label: load-packages +#| echo: false +#| message: false +#| warning: false library(ggplot2) library(dplyr) library(infer) @@ -25,7 +31,10 @@ In this vignette, we'll walk through conducting $t$-tests and their randomizatio Throughout this vignette, we'll make use of the `gss` dataset supplied by infer, which contains a sample of data from the General Social Survey. See `?gss` for more information on the variables included and their source. Note that this data (and our examples on it) are for demonstration purposes only, and will not necessarily provide accurate estimates unless weighted properly. For these examples, let's suppose that this dataset is a representative sample of a population we want to learn about: American adults. The data looks like this: -```{r glimpse-gss-actual, warning = FALSE, message = FALSE} +```{r} +#| label: glimpse-gss-actual +#| warning: false +#| message: false dplyr::glimpse(gss) ``` @@ -35,7 +44,9 @@ The 1-sample $t$-test can be used to test whether a sample of continuous data co As an example, we'll test whether the average American adult works 40 hours a week using data from the `gss`. To do so, we make use of the `hours` variable, giving the number of hours that respondents reported having worked in the previous week. The distribution of `hours` in the observed data looks like this: -```{r plot-1-sample, echo = FALSE} +```{r} +#| label: plot-1-sample +#| echo: false gss |> ggplot2::ggplot() + ggplot2::aes(x = hours) + @@ -53,7 +64,10 @@ infer's randomization-based analogue to the 1-sample $t$-test is a 1-sample mean First, to calculate the observed statistic, we can use `specify()` and `calculate()`. -```{r calc-obs-stat-1-sample, warning = FALSE, message = FALSE} +```{r} +#| label: calc-obs-stat-1-sample +#| warning: false +#| message: false # calculate the observed statistic observed_statistic <- gss |> specify(response = hours) |> @@ -64,7 +78,10 @@ The observed statistic is `r observed_statistic`. Now, we want to compare this s We can `generate()` the null distribution using the bootstrap. In the bootstrap, for each replicate, a sample of size equal to the input sample size is drawn (with replacement) from the input sample data. This allows us to get a sense of how much variability we'd expect to see in the entire population so that we can then understand how unlikely our sample mean would be. -```{r generate-null-1-sample, warning = FALSE, message = FALSE} +```{r} +#| label: generate-null-1-sample +#| warning: false +#| message: false # generate the null distribution null_dist_1_sample <- gss |> specify(response = hours) |> @@ -75,7 +92,10 @@ null_dist_1_sample <- gss |> To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`: -```{r visualize-1-sample, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-1-sample +#| warning: false +#| message: false # visualize the null distribution and test statistic! null_dist_1_sample |> visualize() + @@ -86,7 +106,10 @@ null_dist_1_sample |> It looks like our observed mean of `r observed_statistic` would be relatively unlikely if the true mean was actually 40 hours a week. More exactly, we can calculate the p-value: -```{r p-value-1-sample, warning = FALSE, message = FALSE} +```{r} +#| label: p-value-1-sample +#| warning: false +#| message: false # calculate the p value from the test statistic and null distribution p_value_1_sample <- null_dist_1_sample |> get_p_value(obs_stat = observed_statistic, @@ -99,7 +122,10 @@ Thus, if the true mean number of hours worked per week was really 40, our approx Analogously to the steps shown above, the package supplies a wrapper function, `t_test`, to carry out 1-sample $t$-tests on tidy data. Rather than using randomization, the wrappers carry out the theory-based $t$-test. The syntax looks like this: -```{r t-test-wrapper, message = FALSE, warning = FALSE} +```{r} +#| label: t-test-wrapper +#| message: false +#| warning: false t_test(gss, response = hours, mu = 40) ``` @@ -128,7 +154,9 @@ Note that the resulting $t$-statistics from these two theory-based approaches ar 2-Sample $t$-tests evaluate the difference in mean values of two populations using data randomly-sampled from the population that approximately follows a normal distribution. As an example, we'll test if Americans work the same number of hours a week regardless of whether they have a college degree or not using data from the `gss`. The `college` and `hours` variables allow us to do so: -```{r plot-2-sample, echo = FALSE} +```{r} +#| label: plot-2-sample +#| echo: false gss |> ggplot2::ggplot() + ggplot2::aes(x = college, y = hours) + @@ -145,7 +173,10 @@ infer's randomization-based analogue to the 2-sample $t$-test is a difference in As with the one-sample test, to calculate the observed difference in means, we can use `specify()` and `calculate()`. -```{r calc-obs-stat-2-sample, warning = FALSE, message = FALSE} +```{r} +#| label: calc-obs-stat-2-sample +#| warning: false +#| message: false # calculate the observed statistic observed_statistic <- gss |> specify(hours ~ college) |> @@ -162,7 +193,10 @@ Now, we want to compare this difference in means to a null distribution, generat We can `generate()` the null distribution using permutation, where, for each replicate, each value of degree status will be randomly reassigned (without replacement) to a new number of hours worked per week in the sample in order to break any association between the two. -```{r generate-null-2-sample, warning = FALSE, message = FALSE} +```{r} +#| label: generate-null-2-sample +#| warning: false +#| message: false # generate the null distribution with randomization null_dist_2_sample <- gss |> specify(hours ~ college) |> @@ -175,7 +209,10 @@ Again, note that, in the lines `specify(hours ~ college)` in the above chunk, we To get a sense for what these distributions look like, and where our observed statistic falls, we can use `visualize()`. -```{r visualize-2-sample, warning = FALSE, message = FALSE} +```{r} +#| label: visualize-2-sample +#| warning: false +#| message: false # visualize the randomization-based null distribution and test statistic! null_dist_2_sample |> visualize() + @@ -185,7 +222,10 @@ null_dist_2_sample |> It looks like our observed statistic of `r observed_statistic` would be unlikely if there was truly no relationship between degree status and number of hours worked. More exactly, we can calculate the p-value; theoretical p-values are not yet supported, so we'll use the randomization-based null distribution to do calculate the p-value. -```{r p-value-2-sample, warning = FALSE, message = FALSE} +```{r} +#| label: p-value-2-sample +#| warning: false +#| message: false # calculate the p value from the randomization-based null # distribution and the observed statistic p_value_2_sample <- null_dist_2_sample |> @@ -199,7 +239,10 @@ Thus, if there were really no relationship between the number of hours worked a Note that, similarly to the steps shown above, the package supplies a wrapper function, `t_test()`, to carry out 2-sample $t$-tests on tidy data. The syntax looks like this: -```{r 2-sample-t-test-wrapper, message = FALSE, warning = FALSE} +```{r} +#| label: 2-sample-t-test-wrapper +#| message: false +#| warning: false t_test(x = gss, formula = hours ~ college, order = c("degree", "no degree"), From 9229ad2d6fad3fcac4c1a34a05cbdfdcbc06a8fb Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:21:41 -0500 Subject: [PATCH 08/11] update license year --- LICENSE | 2 +- LICENSE.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE b/LICENSE index 6c4271be..6073eaa3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2021 +YEAR: 2025 COPYRIGHT HOLDER: infer authors diff --git a/LICENSE.md b/LICENSE.md index 6fe1102e..52035204 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,6 +1,6 @@ # MIT License -Copyright (c) 2021 infer authors +Copyright (c) 2025 infer authors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 220d0ead64c01f11a523e7dd4e233ce19fa3bded Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:22:00 -0500 Subject: [PATCH 09/11] `usethis::use_tidy_description()` --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9c1fbabf..a9772c38 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -65,9 +65,9 @@ Suggests: VignetteBuilder: knitr Config/Needs/website: tidyverse/tidytemplate +Config/testthat/edition: 3 +Config/usethis/last-upkeep: 2025-04-25 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -Config/testthat/edition: 3 -Config/usethis/last-upkeep: 2025-04-25 From fbd1944e452ef15117c67f3efe7edead7793c8eb Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:24:38 -0500 Subject: [PATCH 10/11] `usethis::use_tidy_github_actions()` Mostly just updates GHA versions, but also renames `check-standard.yaml` to `R-CMD-check.yaml` --- .../{check-standard.yaml => R-CMD-check.yaml} | 28 +++++++++++++------ .github/workflows/pkgdown.yaml | 11 +++++--- .github/workflows/pr-commands.yaml | 12 ++++++-- .github/workflows/test-coverage.yaml | 28 +++++++++++++------ README.Rmd | 2 +- README.md | 24 +++++++++------- codecov.yml | 21 +++++++++----- 7 files changed, 84 insertions(+), 42 deletions(-) rename .github/workflows/{check-standard.yaml => R-CMD-check.yaml} (54%) diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/R-CMD-check.yaml similarity index 54% rename from .github/workflows/check-standard.yaml rename to .github/workflows/R-CMD-check.yaml index e89eb525..69cfc6ad 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,13 +1,17 @@ # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +# +# NOTE: This workflow is overkill for most R packages and +# check-standard.yaml is likely a better choice. +# usethis::use_github_action("check-standard") will install it. on: push: - branches: main + branches: [main, master] pull_request: - branches: main - workflow_dispatch: -name: R-CMD-check +name: R-CMD-check.yaml + +permissions: read-all jobs: R-CMD-check: @@ -19,20 +23,25 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} - {os: windows-latest, r: 'release'} + # use 4.0 or 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: 'oldrel-4'} - - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-latest, r: 'release'} - - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} + - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -50,3 +59,4 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 087f0b05..bfc9f4db 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -4,12 +4,13 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] release: types: [published] workflow_dispatch: -name: pkgdown +name: pkgdown.yaml + +permissions: read-all jobs: pkgdown: @@ -19,8 +20,10 @@ jobs: group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-pandoc@v2 @@ -39,7 +42,7 @@ jobs: - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' - uses: JamesIves/github-pages-deploy-action@v4.4.1 + uses: JamesIves/github-pages-deploy-action@v4.5.0 with: clean: false branch: gh-pages diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml index 71f335b3..2edd93f2 100644 --- a/.github/workflows/pr-commands.yaml +++ b/.github/workflows/pr-commands.yaml @@ -4,7 +4,9 @@ on: issue_comment: types: [created] -name: Commands +name: pr-commands.yaml + +permissions: read-all jobs: document: @@ -13,8 +15,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: @@ -50,8 +54,10 @@ jobs: runs-on: ubuntu-latest env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/pr-fetch@v2 with: diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 2c5bb502..0ab748d6 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -4,9 +4,10 @@ on: push: branches: [main, master] pull_request: - branches: [main, master] -name: test-coverage +name: test-coverage.yaml + +permissions: read-all jobs: test-coverage: @@ -15,7 +16,7 @@ jobs: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: @@ -23,28 +24,39 @@ jobs: - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: any::covr + extra-packages: any::covr, any::xml2 needs: coverage - name: Test coverage run: | - covr::codecov( + cov <- covr::package_coverage( quiet = FALSE, clean = FALSE, - install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") ) + print(cov) + covr::to_cobertura(cov) shell: Rscript {0} + - uses: codecov/codecov-action@v5 + with: + # Fail if error if not on PR, or if on PR and token is given + fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} + files: ./cobertura.xml + plugins: noop + disable_search: true + token: ${{ secrets.CODECOV_TOKEN }} + - name: Show testthat output if: always() run: | ## -------------------------------------------------------------------- - find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true shell: bash - name: Upload test results if: failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: coverage-test-failures path: ${{ runner.temp }}/package diff --git a/README.Rmd b/README.Rmd index 1ff680df..7bd751e8 100755 --- a/README.Rmd +++ b/README.Rmd @@ -12,7 +12,7 @@ output: github_document -[![R-CMD-check](https://github.com/tidymodels/infer/actions/workflows/check-standard.yaml/badge.svg)](https://github.com/tidymodels/infer/actions/workflows/check-standard.yaml) +[![R-CMD-check](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer) [![Coverage Status](https://img.shields.io/codecov/c/github/tidymodels/infer/main.svg)](https://app.codecov.io/github/tidymodels/infer/?branch=main) diff --git a/README.md b/README.md index a3038148..b618e062 100755 --- a/README.md +++ b/README.md @@ -2,20 +2,22 @@ # infer R Package A hexagonal logo. A silhouette of a fir tree sits atop green text, reading 'infer'. The logo has a white background and green border. + + + -[![R-CMD-check](https://github.com/tidymodels/infer/actions/workflows/check-standard.yaml/badge.svg)](https://github.com/tidymodels/infer/actions/workflows/check-standard.yaml) +[![R-CMD-check](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/tidymodels/infer/actions/workflows/R-CMD-check.yaml) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/infer)](https://cran.r-project.org/package=infer) [![Coverage Status](https://img.shields.io/codecov/c/github/tidymodels/infer/main.svg)](https://app.codecov.io/github/tidymodels/infer/?branch=main) The objective of this package is to perform statistical inference using -an expressive statistical grammar that coheres with the `tidyverse` -design framework. The package is centered around 4 main verbs, -supplemented with many utilities to visualize and extract value from -their outputs. +an expressive statistical grammar that coheres with the tidyverse design +framework. The package is centered around 4 main verbs, supplemented +with many utilities to visualize and extract value from their outputs. - `specify()` allows you to specify the variable, or relationship between variables, that you’re interested in. @@ -32,6 +34,7 @@ To learn more about the principles underlying the package design, see A diagram showing four steps to carry out randomization-based inference: specify hypothesis, generate data, calculate statistic, and visualize. From left to right, each step is connected by an arrow, while the diagram indicates that generating data and calculating statistics can happen iteratively.

+

@@ -47,14 +50,14 @@ Statistics](https://openintro-ims.netlify.app/). ------------------------------------------------------------------------ -To install the current stable version of `infer` from CRAN: +To install the current stable version of infer from CRAN: ``` r install.packages("infer") ``` -To install the developmental stable version of `infer`, make sure to -install `remotes` first. The `pkgdown` website for this version is at +To install the developmental stable version of infer, make sure to +install remotes first. The pkgdown website for this version is at [infer.tidymodels.org](https://infer.tidymodels.org/). ``` r @@ -150,6 +153,7 @@ visualize(null_dist) + A histogram showing a distribution of F statistics, right-tailed and centered around one. The x axis ranges from zero to five. The region of the histogram to the right of the observed statistic, just above two, is shaded red to represent the p-value.

+

@@ -165,9 +169,9 @@ null_dist |> ## # A tibble: 1 × 1 ## p_value ## - ## 1 0.06 + ## 1 0.059 -Note that the formula and non-formula interfaces (i.e. `age ~ partyid` +Note that the formula and non-formula interfaces (i.e., `age ~ partyid` vs. `response = age, explanatory = partyid`) work for all implemented inference procedures in `infer`. Use whatever is more natural for you. If you will be doing modeling using functions like `lm()` and `glm()`, diff --git a/codecov.yml b/codecov.yml index dfed00ea..04c55859 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,7 +1,14 @@ -comment: - layout: "reach, diff, flags, files" - behavior: default - require_changes: false # if true: only post the comment if coverage changes - require_base: no # [yes :: must have a base report to post] - require_head: yes # [yes :: must have a head report to post] - branches: null \ No newline at end of file +comment: false + +coverage: + status: + project: + default: + target: auto + threshold: 1% + informational: true + patch: + default: + target: auto + threshold: 1% + informational: true From dd63f497e71908fa7c2517791c5e723b9860baca Mon Sep 17 00:00:00 2001 From: simonpcouch Date: Fri, 25 Apr 2025 13:32:01 -0500 Subject: [PATCH 11/11] remove use of pipe placeholder --- vignettes/observed_stat_examples.Rmd | 2 +- vignettes/paired.Rmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/observed_stat_examples.Rmd b/vignettes/observed_stat_examples.Rmd index 4d81cc42..6dedf879 100644 --- a/vignettes/observed_stat_examples.Rmd +++ b/vignettes/observed_stat_examples.Rmd @@ -210,7 +210,7 @@ set.seed(1) gss_paired <- gss |> mutate( - hours_previous = hours + 5 - rpois(nrow(.), 4.8), + hours_previous = hours + 5 - rpois(nrow(gss), 4.8), diff = hours - hours_previous ) diff --git a/vignettes/paired.Rmd b/vignettes/paired.Rmd index 6088c98b..74f16392 100644 --- a/vignettes/paired.Rmd +++ b/vignettes/paired.Rmd @@ -45,7 +45,7 @@ set.seed(1) gss_paired <- gss |> mutate( - hours_previous = hours + 5 - rpois(nrow(.), 4.8), + hours_previous = hours + 5 - rpois(nrow(gss), 4.8), diff = hours - hours_previous )