@@ -10,20 +10,29 @@ use std::collections::HashMap;
10
10
use amalthea:: comm:: data_explorer_comm;
11
11
use amalthea:: comm:: data_explorer_comm:: ColumnDisplayType ;
12
12
use amalthea:: comm:: data_explorer_comm:: ColumnSummaryStats ;
13
+ use amalthea:: comm:: data_explorer_comm:: FormatOptions ;
13
14
use harp:: exec:: RFunction ;
14
15
use harp:: exec:: RFunctionExt ;
15
16
use harp:: object:: RObject ;
17
+ use harp:: utils:: r_names2;
18
+ use harp:: vector:: CharacterVector ;
19
+ use harp:: vector:: Vector ;
16
20
use libr:: SEXP ;
17
21
use stdext:: unwrap;
18
22
23
+ use crate :: data_explorer:: format:: format_string;
19
24
use crate :: modules:: ARK_ENVS ;
20
25
21
- pub fn summary_stats ( column : SEXP , display_type : ColumnDisplayType ) -> ColumnSummaryStats {
22
- match summary_stats_ ( column, display_type) {
26
+ pub fn summary_stats (
27
+ column : SEXP ,
28
+ display_type : ColumnDisplayType ,
29
+ format_options : & FormatOptions ,
30
+ ) -> ColumnSummaryStats {
31
+ match summary_stats_ ( column, display_type, format_options) {
23
32
Ok ( stats) => stats,
24
33
Err ( e) => {
25
- // we want to log the error but return an empty summary stats so
26
- // that the user can still see the rest of the data
34
+ // We want to log the error but return an empty summary stats so
35
+ // that the user can still see the rest of the data.
27
36
log:: error!( "Error getting summary stats: {:?}" , e) ;
28
37
empty_column_summary_stats ( )
29
38
} ,
@@ -33,9 +42,10 @@ pub fn summary_stats(column: SEXP, display_type: ColumnDisplayType) -> ColumnSum
33
42
fn summary_stats_ (
34
43
column : SEXP ,
35
44
display_type : ColumnDisplayType ,
45
+ format_options : & FormatOptions ,
36
46
) -> anyhow:: Result < ColumnSummaryStats > {
37
47
match display_type {
38
- ColumnDisplayType :: Number => Ok ( summary_stats_number ( column) ?. into ( ) ) ,
48
+ ColumnDisplayType :: Number => Ok ( summary_stats_number ( column, format_options ) ?. into ( ) ) ,
39
49
ColumnDisplayType :: String => Ok ( summary_stats_string ( column) ?. into ( ) ) ,
40
50
ColumnDisplayType :: Boolean => Ok ( summary_stats_boolean ( column) ?. into ( ) ) ,
41
51
ColumnDisplayType :: Date => Ok ( summary_stats_date ( column) ?. into ( ) ) ,
@@ -44,9 +54,23 @@ fn summary_stats_(
44
54
}
45
55
}
46
56
47
- fn summary_stats_number ( column : SEXP ) -> anyhow:: Result < SummaryStatsNumber > {
48
- let stats = call_summary_fn ( "summary_stats_number" , column) ?;
49
- let r_stats: HashMap < String , String > = stats. try_into ( ) ?;
57
+ fn summary_stats_number (
58
+ column : SEXP ,
59
+ format_options : & FormatOptions ,
60
+ ) -> anyhow:: Result < SummaryStatsNumber > {
61
+ let r_stats = call_summary_fn ( "summary_stats_number" , column) ?;
62
+
63
+ let names = unsafe { CharacterVector :: new_unchecked ( r_names2 ( r_stats. sexp ) ) } ;
64
+ let values = format_string ( r_stats. sexp , format_options) ;
65
+
66
+ let r_stats: HashMap < String , String > = names
67
+ . iter ( )
68
+ . zip ( values. into_iter ( ) )
69
+ . map ( |( name, value) | match name {
70
+ Some ( name) => ( name, value) ,
71
+ None => ( "unk" . to_string ( ) , value) ,
72
+ } )
73
+ . collect ( ) ;
50
74
51
75
Ok ( SummaryStatsNumber ( data_explorer_comm:: SummaryStatsNumber {
52
76
min_value : r_stats[ "min_value" ] . clone ( ) ,
@@ -204,18 +228,28 @@ mod tests {
204
228
use super :: * ;
205
229
use crate :: test:: r_test;
206
230
231
+ fn default_options ( ) -> FormatOptions {
232
+ FormatOptions {
233
+ large_num_digits : 2 ,
234
+ small_num_digits : 4 ,
235
+ max_integral_digits : 7 ,
236
+ thousands_sep : Some ( "," . to_string ( ) ) ,
237
+ }
238
+ }
239
+
207
240
#[ test]
208
241
fn test_numeric_summary ( ) {
209
242
r_test ( || {
210
243
let column = r_parse_eval0 ( "c(1,2,3,4,5, NA)" , R_ENVS . global ) . unwrap ( ) ;
211
- let stats = summary_stats_ ( column. sexp , ColumnDisplayType :: Number ) . unwrap ( ) ;
244
+ let stats =
245
+ summary_stats_ ( column. sexp , ColumnDisplayType :: Number , & default_options ( ) ) . unwrap ( ) ;
212
246
let expected: ColumnSummaryStats =
213
247
SummaryStatsNumber ( data_explorer_comm:: SummaryStatsNumber {
214
- min_value : "1.000000 " . to_string ( ) ,
215
- max_value : "5.000000 " . to_string ( ) ,
216
- mean : "3.000000 " . to_string ( ) ,
217
- median : "3.000000 " . to_string ( ) ,
218
- stdev : "1.581139 " . to_string ( ) ,
248
+ min_value : "1.00 " . to_string ( ) ,
249
+ max_value : "5.00 " . to_string ( ) ,
250
+ mean : "3.00 " . to_string ( ) ,
251
+ median : "3.00 " . to_string ( ) ,
252
+ stdev : "1.58 " . to_string ( ) ,
219
253
} )
220
254
. into ( ) ;
221
255
assert_eq ! ( stats, expected) ;
@@ -226,7 +260,8 @@ mod tests {
226
260
fn test_string_summary ( ) {
227
261
r_test ( || {
228
262
let column = r_parse_eval0 ( "c('a', 'b', 'c', 'd', '')" , R_ENVS . global ) . unwrap ( ) ;
229
- let stats = summary_stats_ ( column. sexp , ColumnDisplayType :: String ) . unwrap ( ) ;
263
+ let stats =
264
+ summary_stats_ ( column. sexp , ColumnDisplayType :: String , & default_options ( ) ) . unwrap ( ) ;
230
265
let expected: ColumnSummaryStats =
231
266
SummaryStatsString ( data_explorer_comm:: SummaryStatsString {
232
267
num_empty : 1 ,
@@ -241,7 +276,8 @@ mod tests {
241
276
fn test_boolean_summary ( ) {
242
277
r_test ( || {
243
278
let column = r_parse_eval0 ( "c(TRUE, FALSE, TRUE, TRUE, NA)" , R_ENVS . global ) . unwrap ( ) ;
244
- let stats = summary_stats_ ( column. sexp , ColumnDisplayType :: Boolean ) . unwrap ( ) ;
279
+ let stats = summary_stats_ ( column. sexp , ColumnDisplayType :: Boolean , & default_options ( ) )
280
+ . unwrap ( ) ;
245
281
let expected: ColumnSummaryStats =
246
282
SummaryStatsBoolean ( data_explorer_comm:: SummaryStatsBoolean {
247
283
true_count : 3 ,
@@ -260,7 +296,8 @@ mod tests {
260
296
R_ENVS . global ,
261
297
)
262
298
. unwrap ( ) ;
263
- let stats = summary_stats_ ( column. sexp , ColumnDisplayType :: Date ) . unwrap ( ) ;
299
+ let stats =
300
+ summary_stats_ ( column. sexp , ColumnDisplayType :: Date , & default_options ( ) ) . unwrap ( ) ;
264
301
let expected: ColumnSummaryStats =
265
302
SummaryStatsDate ( data_explorer_comm:: SummaryStatsDate {
266
303
min_date : "2021-01-01" . to_string ( ) ,
@@ -282,7 +319,9 @@ mod tests {
282
319
R_ENVS . global ,
283
320
)
284
321
. unwrap ( ) ;
285
- let stats = summary_stats_ ( column. sexp , ColumnDisplayType :: Datetime ) . unwrap ( ) ;
322
+ let stats =
323
+ summary_stats_ ( column. sexp , ColumnDisplayType :: Datetime , & default_options ( ) )
324
+ . unwrap ( ) ;
286
325
let expected: ColumnSummaryStats =
287
326
SummaryStatsDatetime ( data_explorer_comm:: SummaryStatsDatetime {
288
327
num_unique : 2 ,
0 commit comments