diff --git a/DESCRIPTION b/DESCRIPTION index 7bf1cd4..d655465 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,6 +18,7 @@ Imports: DescTools(>= 0.99.58), dplyr (>= 1.1.4), gt (>= 0.11.1), + haven (>= 2.5.4), shiny (>= 1.9.1), shinybusy (>= 0.3.3), shinyWidgets (>= 0.8.7) diff --git a/NAMESPACE b/NAMESPACE index 9c2424e..9f29339 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ importFrom(gt,render_gt) importFrom(gt,sub_missing) importFrom(gt,sub_values) importFrom(gt,tab_options) +importFrom(haven,write_sav) importFrom(shinyWidgets,colorPickr) importFrom(shinyWidgets,dropdownButton) importFrom(shinyWidgets,radioGroupButtons) diff --git a/NEWS.md b/NEWS.md index e292e19..337edd6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,35 @@ editor_options: 7 - Models: linear model, logistic regression, Kmeans, Trees +## 2025.01.29-1 + +### Bug Fixes + +1 - **Analysis > Normality Test > Shapiro: error if all values are equal**: +Shapiro does not accept all equal values. Now with check and msg_error +([#15](https://github.com/lgschuck/spada/issues/15)) + +### Improvements + +1 - **Correlation** module: chane name of Alternatives and chance card header title +to Correlation Test + +2 - **Export file** module: now writes Sav (**haven package**) and uses checkbox to compress RDS + +3 - **Normality Test** module: better names and better checks for Shapiro-Wilk Test +([#15](https://github.com/lgschuck/spada/issues/15)) + +4 - **spada.R**: now make.names for the variables inside datasets + +5 - **spada_ui.R**: new name in the menu for Correlation (now Correlation Test) + +6 - **utils.R**: new internal functions: make_var_names and test_all_equal + +7 - **Z Test** module: better visuals and new checks for Mean and Std Deviation inputs + +8 - **DESCRIPTION**: insert [haven](https://haven.tidyverse.org) +package as new dependencie + ## 2025.01.28-1 ### Improvements diff --git a/R/correlation_module.R b/R/correlation_module.R index 23bacc3..a83eee6 100644 --- a/R/correlation_module.R +++ b/R/correlation_module.R @@ -4,7 +4,7 @@ correlation_ui <- function(id) { ns <- NS(id) card( full_screen = T, - card_header('Correlation', class = 'mini-header'), + card_header('Correlation Test', class = 'mini-header'), layout_sidebar( bg = '#02517d', sidebar = sidebar(uiOutput(ns('parameters')), bg = '#e3e3e4'), @@ -21,9 +21,9 @@ correlation_ui <- function(id) { 'Kendall' = 'kendall', 'Spearman' = 'spearman')), radioButtons(ns('radio_alternative'), 'Alternative', - c('Two.sided' = 'two.sided', + c('Two sided' = 'two.sided', 'Less' = 'less', - 'Greater' = 'greater')), + 'Greater' = 'greater'), inline = T), numericInput(ns('confidence'), 'Confidence Interval - %', value = 95, 0, 100, 5, width = '200px'), layout_columns( diff --git a/R/export_file_module.R b/R/export_file_module.R index 3688723..13a3b33 100644 --- a/R/export_file_module.R +++ b/R/export_file_module.R @@ -7,7 +7,7 @@ export_file_ui <- function(id) { fluidRow( column(3, textInput(ns('file_name'), 'File name', value = 'dataset')), column(3, radioButtons(ns('radio_format'), 'File format', - c('csv', 'RDS', 'RDS Compressed'), inline = T)) + c('csv', 'RDS', 'sav'), inline = T)) ), conditionalPanel( condition = sprintf("input['%s'] == 'csv'", ns('radio_format')), @@ -29,6 +29,26 @@ export_file_ui <- function(id) { ) ) ), + conditionalPanel( + condition = sprintf("input['%s'] == 'RDS'", ns('radio_format')), + card( + card_header('RDS Parameters', class = 'mini-header'), + card_body( + checkboxInput(ns('checkbox_rds_compress'), 'Compress') + ), + ) + ), + conditionalPanel( + condition = sprintf("input['%s'] == 'sav'", ns('radio_format')), + card( + card_header('Sav Parameters', class = 'mini-header'), + card_body( + radioButtons(ns('radio_sav_compress'), 'Compress', + c('Byte' = 'byte', 'None' = 'none', 'Zsav' = 'zsav'), + inline = T) + ), + ) + ), card_footer(downloadButton(ns('down_handler'), 'Export Active dataset', icon('download'))) ) @@ -41,11 +61,13 @@ export_file_server <- function(id, df_active) { output$down_handler <- downloadHandler( filename = function() { - paste(input$file_name, + paste0(input$file_name, if(input$radio_format == 'csv'){ '.csv' } else if (input$radio_format %in% c('RDS', 'RDS Compressed')){ '.RDS' + } else if (input$radio_format %in% c('sav')){ + '.sav' }) }, content = function(file) { @@ -58,9 +80,9 @@ export_file_server <- function(id, df_active) { scipen = as.integer(input$radio_scientific) ) } else if (input$radio_format == 'RDS'){ - saveRDS(df_active(), file, compress = F) - } else if (input$radio_format == 'RDS Compressed') { - saveRDS(df_active(), file, compress = T) + saveRDS(df_active(), file, compress = input$checkbox_rds_compress) + } else if (input$radio_format == 'sav') { + write_sav(df_active(), file, compress = input$radio_sav_compress) } } ) diff --git a/R/normality_test_module.R b/R/normality_test_module.R index b145b50..13c65d9 100644 --- a/R/normality_test_module.R +++ b/R/normality_test_module.R @@ -85,7 +85,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { }) var_analysis <- reactive({ - req(df_active) + req(df_active()) df_names <- df_active() |> names() @@ -96,8 +96,9 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { var <- reactive({ req(df_active()) - req(input$sel_var1) - temp <- df_active()[[input$sel_var1]] + req(input$sel_var) + + temp <- df_active()[[input$sel_var]] temp[!is.na(temp)] }) @@ -106,7 +107,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { output$parameters <- renderUI({ req(var_analysis()) tagList( - selectInput(ns('sel_var1'), 'Variable 1', var_analysis()), + selectInput(ns('sel_var'), 'Variable 1', var_analysis()), p('* Showing only numeric variables') ) }) @@ -114,7 +115,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { # histogram --------------------------------------------------------------- output$hist <- renderPlot({ req(df_active()) - req(input$sel_var1) + req(input$sel_var) req(var()) validate(need(input$bins > 0, 'Bins must be 1 or higher')) @@ -125,7 +126,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { col = color_fill(), xlab = 'Values', ylab = 'Density', - main = paste('Histogram of', input$sel_var1) + main = paste('Histogram of', input$sel_var) ) curve(dnorm(x, mean = mean(var()), sd = sd(var())), @@ -137,10 +138,10 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { # qq plot ----------------------------------------------------------------- output$qq_plot <- renderPlot({ - req(input$sel_var1) + req(input$sel_var) req(var()) - qqnorm(var(), main = paste('Normal QQ Plot:', input$sel_var1), + qqnorm(var(), main = paste('Normal QQ Plot:', input$sel_var), col = color_fill()) qqline(var(), col = color_line()) @@ -149,7 +150,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { # ks test ----------------------------------------------------------------- ks_results <- reactive({ - req(input$sel_var1) + req(input$sel_var) req(var()) if(anyDuplicated(var()) > 0){ @@ -165,7 +166,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { df$results <- rownames(df) names(df) <- c('values', 'results') - df[df$results == 'data.name', ]$values <- paste(input$sel_var1) + df[df$results == 'data.name', ]$values <- paste(input$sel_var) results = list( 'results' = df, @@ -179,7 +180,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { df$results <- rownames(df) names(df) <- c('values', 'results') - df[df$results == 'data.name', ]$values <- paste(input$sel_var1) + df[df$results == 'data.name', ]$values <- paste(input$sel_var) results = list('results' = df) } @@ -250,22 +251,27 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { # sw test ----------------------------------------------------------------- sw_results <- reactive({ - req(input$sel_var1) + req(input$sel_var) req(var()) req(var_len()) - validate( - need(between(var_len(), 3, 5000), - paste0('Sample size must be between 3 and 5000 (actual: ', - var_len(), ')') - ) - ) + if (var_len() < 3 || var_len() > 5000) { + msg(paste0('Sample size must be between 3 and 5000 (actual: ', var_len(), ')'), 3) + + return() + } + + if (test_all_equal(var())) { + msg('Shapiro-Wilk test: the values can not be all equal') + return() + } + df <- shapiro.test(var()) |> unlist() |> as.data.frame() df$results <- rownames(df) names(df) <- c('values', 'results') - df[df$results == 'data.name', ]$values <- paste(input$sel_var1) + df[df$results == 'data.name', ]$values <- paste(input$sel_var) df }) |> bindEvent(input$btn_sw) @@ -280,7 +286,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { }) output$conditional_staticard_sw <- renderUI({ - req(ks_results()) + req(sw_results()) tagList( statiCard(sw_results() |> filter(results %in% c('statistic.W')) |> @@ -315,6 +321,7 @@ normality_test_server <- function(id, df, df_metadata, color_fill, color_line) { save_gt_ui(ns('sw_save_gt')) }) + # help file of shapiro.test observe({ showModal(modalDialog( HTML(get_help_file('stats', 'shapiro.test')), diff --git a/R/spada.R b/R/spada.R index 640d3b8..67d57a2 100644 --- a/R/spada.R +++ b/R/spada.R @@ -32,7 +32,8 @@ #' fmt_number fmt_percent gt gt_output gtsave opt_interactive #' render_gt sub_missing sub_values tab_options #' - +#' @importFrom haven write_sav +#' #' @importFrom shinyWidgets colorPickr updateColorPickr show_toast dropdownButton #' radioGroupButtons statiCard #' @@ -45,11 +46,12 @@ spada <- function(...) { datasets <- list(...) - if(length(datasets) == 0) datasets <- list('iris' = datasets::iris, 'mtcars' = datasets::mtcars) + if(length(datasets) == 0) datasets <- list('iris' = datasets::iris, + 'mtcars' = datasets::mtcars) stopifnot('Objects must be data.frame and have at least 1 row each' = sapply(datasets, is.data.frame) |> all() && all(sapply(datasets, nrow) > 0)) - # set names + # set datasets names if(is.null(names(datasets))){ names(datasets) <- lapply(substitute(list(...))[-1], deparse) |> unlist() |> @@ -61,7 +63,9 @@ spada <- function(...) { names(datasets) <- make.names(names(datasets), unique = T) } - gc() + + # make sure all datasets variables are valid names + datasets <- lapply(datasets, make_var_names) ### Run App ----------------------------------------------------------------- shinyApp(spada_ui(), spada_server(datasets), diff --git a/R/spada_ui.R b/R/spada_ui.R index 3f217bc..0d3f6f5 100644 --- a/R/spada_ui.R +++ b/R/spada_ui.R @@ -129,7 +129,7 @@ spada_ui <- function(){ nav_panel('Descriptive Stats', icon = bs_icon('graph-up'), descriptive_stats_ui('pA_desc_stats')), - nav_panel('Correlation', + nav_panel('Correlation Test', icon = bs_icon('magnet'), correlation_ui('pA_correlation')), nav_panel('Normality Test', diff --git a/R/utils.R b/R/utils.R index 055b442..03100c2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -259,14 +259,14 @@ ttip <- function(TRIGGER, ..., ID = NULL, PLACE = 'top'){ } -# get function help ------------------------------------------------------- +# get function help ----------------------------------------------------------- get_help_file <- function(pak, fun){ utils::capture.output( tools::Rd2HTML(tools::Rd_db(pak)[[paste0(fun, '.Rd')]]) ) } -# format decimals --------------------------------------------------------- +# format decimals ------------------------------------------------------------- f_dec <- function(x, dig = 0){ if(is.numeric(x) && !is.na(x) |> all()){ format(round(x, dig), nsmall = dig, scientific = F) @@ -275,6 +275,17 @@ f_dec <- function(x, dig = 0){ } } +# make var names -------------------------------------------------------------- +make_var_names <- function(df){ + names(df) <- names(df) |> make.names(unique = T) + return(df) +} + +# test all equal -------------------------------------------------------------- +test_all_equal <- function(x){ + all(x == x[1]) +} + # palettes -------------------------------------------------------------------- gray_palette <- c('#ffffff', '#585858', '#232323') diff --git a/R/z_test_module.R b/R/z_test_module.R index 079570e..82a23ff 100644 --- a/R/z_test_module.R +++ b/R/z_test_module.R @@ -16,12 +16,21 @@ z_test_ui <- function(id) { sidebar = sidebar( width = 400, h5('Parameters', style = 'margin-bottom: -18px;'), - numericInput(ns('mu'), 'Hypothesized Mean', 0), - numericInput(ns('sd'), 'Std Deviation of Population', 1), + layout_columns( + numericInput( + ns('mu'), + list('Mean', bs_icon('info-circle') |> + ttip('Hypothesized Mean')), 0), + numericInput( + ns('sd'), + list('Std Deviation', bs_icon('info-circle') |> + ttip('Standard Deviation of Population')), + value = 1, min = 0) + ), radioButtons(ns('radio_alternative'), 'Alternative', - c('Two.sided' = 'two.sided', + c('Two sided' = 'two.sided', 'Less' = 'less', - 'Greater' = 'greater')), + 'Greater' = 'greater'), inline = T), numericInput(ns('confidence'), 'Confidence Interval - %', value = 95, 0, 100, 5, width = '200px'), layout_columns( @@ -104,8 +113,12 @@ z_test_server <- function(id, df, df_metadata, color_fill, color_line) { req(input$sel_var) req(input$radio_alternative) - if(input$sd == 0){ - msg_error('Standard Deviation can not be 0', 2) + if(!isTruthy(input$mu)){ + msg_error('Inform a value for the Mean') + } else if(!isTruthy(input$sd)){ + msg_error('Inform a value for the Std Deviation') + } else if(input$sd <= 0){ + msg_error('Standard Deviation must be positive ( > 0)', 2) } else if(!isTruthy(input$confidence) || !between(input$confidence, 0, 100)) { msg_error('Confidence interval must be between 0 and 100%', 2) diff --git a/README.Rmd b/README.Rmd index 73c3a22..53ddaa4 100644 --- a/README.Rmd +++ b/README.Rmd @@ -46,7 +46,7 @@ devtools::install_github("lgschuck/spada") For a specific release visit [Releases](https://github.com/lgschuck/spada/releases) and change the ref parameter bellow for the tag name: - + ``` r devtools::install_github("lgschuck/spada", ref = "2025.01.13-1") diff --git a/README.md b/README.md index 26fa764..1b9ea2c 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ For a specific release visit [Releases](https://github.com/lgschuck/spada/releases) and change the ref parameter bellow for the tag name: - + ``` r devtools::install_github("lgschuck/spada", ref = "2025.01.13-1") diff --git a/docs/index.html b/docs/index.html index d9d0dd0..93a0e88 100644 --- a/docs/index.html +++ b/docs/index.html @@ -108,7 +108,7 @@
For a specific release visit Releases and change the ref parameter bellow for the tag name:
-
devtools::install_github("lgschuck/spada", ref = "2025.01.13-1")
1 - Analysis > Normality Test > Shapiro: error if all values are equal: Shapiro does not accept all equal values. Now with check and msg_error (#15)
+1 - Correlation module: chane name of Alternatives and chance card header title to Correlation Test
+2 - Export file module: now writes Sav (haven package) and uses checkbox to compress RDS
+3 - Normality Test module: better names and better checks for Shapiro-Wilk Test (#15)
+4 - spada.R: now make.names for the variables inside datasets
+5 - spada_ui.R: new name in the menu for Correlation (now Correlation Test)
+6 - utils.R: new internal functions: make_var_names and test_all_equal
+7 - Z Test module: better visuals and new checks for Mean and Std Deviation inputs
+8 - DESCRIPTION: insert haven package as new dependencie
+1 - Exploratory > Stats Table: digits input has no effect: the input was been passed to gt functions (fmt_numeric) but the value column was char given the paste command for Mode values. Fixed with new function f_dec. (#13)
2 - Analysis > Correlation: not enough finite observations: stats::cor.test for Pearson demands at least 3 valid values (Spearman and Kendal a least 2 valid values). New check throughs a error message if less than 3 valid values for all methods. (#14)
1 - Correlation module: now check if Standard Deviation of any informed variable is zero, avoiding warning. Also fixed (#14).
2 - Exploratory module: now with req (for main variable and variable 2) in render_plot (output$gt_dist)
3 - Stats table module: align columns, use ‘-’ for sub_missing becasue the long dash is not an ASCII and could not be replicated in sub_values (devtools::check). Also fixed (#13)
@@ -95,12 +113,12 @@1 - Exploratory Page > Boxplot by groups: error when plot Integer vs Numeric: The error occurs because there is more unique values in Variable 2 than in colors() function tha is used to sample colors. Changed to replace = T. (#11)
2 - Exploratory Page > Stats table: Mode NA for numeric, date, logical and complex var: the gt table was receiving tha NA value as character and the function sub_missing() does not have effect on those values. Now the Mode is passed as character only if it is not NA. (#12)
1 - Descriptive Stats module: now Mode returns NA (not as character) and only paste/collapse values if Mode exists. Inserted sub_missing() in gt_stats for better look and consistency with other views
2 - Stats table module: now Mode returns NA (not as character) and only paste/collapse values if Mode exists
3 - Exploratory module: now the Variable 2 can not be float in Boxplot by Groups, because does not seam reasonable to have an infinite number of groups. Related to (#11)
@@ -109,11 +127,11 @@1 - Filter rows: operators NA and not NA requires a value to apply the filter: fixed, filter_rows_module.R refactored, now with much more robust check for filters, operators and values (#10)
1 - New dependencie: package DescTools
2 - Descriptive Stats module: now with Mode (DescTools package) for numeri, character and factor variables
3 - Filter Rows module: refactored to check operators and values. New operators: Outlier (and Not Outlier) and Logical (TRUE and FALSE)
@@ -124,7 +142,7 @@1 - Correlation module: parameters in card sidebar for better use of space
2 - df_info function: new test file and now returns empty data.frame (accepted by gt_info) in case of no columns in the entry data.frame
3 - Normality test module: now Ks and Shapiro-wilk tests have gt table, save button and statiCards
@@ -134,7 +152,7 @@1 - Correlation module: new layout, new table with test results, help button (help documentation), save table button and statiCards (shnywidgets) with Correlation and p value.
2 - Descriptive Stats module: now with variable selected by default
3 - Normality test module: new Help button in Ks test and Shapiro-Wilk test
@@ -145,7 +163,7 @@1 - Startup page: now with startup page with shinybusy package (new dependencie)
2 - radioGroupButtons: change some radioButtons (shiny) for radioGroupButtons (shinyWidgets) in data_overview_module and exploratory_module for better look
3 - Page config module: colorPickr now with ‘save’ mode for better reset of values and other visual changes fo better look
@@ -158,7 +176,7 @@1 - New module: Filter Rows
2 - Convert module: always align right the preview table
3 - Descriptive Stats module: now all options inicially as TRUE
@@ -173,7 +191,7 @@1 - New module: Normality test
2 - Module Correlation > Scatter Plot now has a button to render plot
3 - Module Descriptive Stats now has a button to render table
@@ -183,11 +201,11 @@1 - Edit > Filter: Factor var - no levels after choose operator: fixed (inserted req(operator)). Now the levels are shown when a factor var and an operator are selected. (#9)
1 - testthat: create structure to run tests (test-fina.R as initial test)
2 - New modules: Order Rows, Convert Cols and Exploratory
3 - Correlation module: insert req in scatter plot
@@ -202,7 +220,7 @@1 - New modules: Sidebar and Navbar Df Info
2 - New function filter_rows in utils.R
3 - New reactives: df_active_ncol, df_active_resume_data and df$df_trigger (to use for updates)
@@ -224,11 +242,11 @@1 - Analysis > Exploratory: error ins stats table when percentile out of range 0-100: now test the range and if the input isTruphy (#8)
1 - Stats table now is a module
2 - new module Correlation
3 - new module Descriptive Stats
@@ -239,14 +257,14 @@1 - Metadata - object color_fn not found: new icon for logical and color format (function data_color) applied only if there is valid (non NA) min and max values (#4)
2 - Edit > Convert - error in preview complex variable convertion: fixed converting complex to character in the preview given that gt table in opt_interactive does not show complex properly (#5)
3 - Edit > Filter: error in filtering complex: now only show/allow operators ‘== (Equal)’, ‘!= (Not Equal)’, ‘Is NA (is.na)’, ‘Not NA (! is.na)’, ‘In (%in%)’ and ‘Not In (! %in%)’ (same for character and factors) (#6)
4 - Edit > Filter: accept blank value: now the value must have length 1 or bigger (#7)
1 - utils functions
2 - page_config_module: correction of a typo
3 - spada function
@@ -258,11 +276,11 @@1 - Data Overview - after Edit only refresh if updat in rows or sample: fixed with insertion of buttons inside output$pD_over_gt. (#3)
1 - export_file_module: separator order now semicolon as default
2 - new import_file_module: allows input csv and RDS files
3 - page_config_module: new visual and size of input file as parameter
@@ -275,18 +293,18 @@1 - Analysis page - q1 object not found: back to calculate q1 and q3. (#1)
2 - Metadata - Error in zeros count: now df_info function uses suna(x == 0) instead of length(x[x == 0]) (#2)
1 - utils functions
df_info now uses suna instead of length, this change fix errors and provide gain in speed.
deletion of format_color_bar and main_value_box functions given that they are now in use anymore
1 - General
Created zzz.R and inserted utils::globalVariables for global variables (check note)
Value boxes: resized to give more space for other elements
1 - ‘Edit’ Page > Convert
1 - utils functions
1 - New functionality: copy dataset (Data page)
2 - Config page now is a module
3 - New Some reactives now with bindCache
1 - gt cannot show complex in opt_interactive, now convert to char before apply gt function
1 - utils functions
change color in value boxes (to gray) for better looking
number of rows (value box) now with decimals
1 - utils functions
df_info: improvement in performance (something like half the time in big datasets - 1e6 rows)
new function: gt_info to generate metadata with gt package