From fe32795384981559fb07c210977308043f48b34d Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 4 Jun 2024 16:22:32 -0300 Subject: [PATCH 01/19] Move format_column to it's own file --- crates/ark/src/data_explorer/format.rs | 171 ++++++++++++++++++ crates/ark/src/data_explorer/mod.rs | 1 + .../ark/src/data_explorer/r_data_explorer.rs | 169 ++--------------- 3 files changed, 185 insertions(+), 156 deletions(-) create mode 100644 crates/ark/src/data_explorer/format.rs diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs new file mode 100644 index 000000000..7626445a0 --- /dev/null +++ b/crates/ark/src/data_explorer/format.rs @@ -0,0 +1,171 @@ +// +// format.rs +// +// Copyright (C) 2024 by Posit Software, PBC +// +// + +use amalthea::comm::data_explorer_comm::ColumnValue; +use amalthea::comm::data_explorer_comm::FormatOptions; +use harp::exec::RFunction; +use harp::exec::RFunctionExt; +use harp::object::r_dbl_is_finite; +use harp::object::r_dbl_is_nan; +use harp::object::r_length; +use harp::object::r_list_get; +use harp::object::RObject; +use harp::utils::r_classes; +use harp::utils::r_format; +use harp::utils::r_is_null; +use harp::utils::r_typeof; +use harp::vector::formatted_vector::FormattedVector; +use harp::vector::formatted_vector::FormattedVectorCharacterOptions; +use harp::vector::formatted_vector::FormattedVectorOptions; +use harp::vector::CharacterVector; +use harp::vector::ComplexVector; +use harp::vector::IntegerVector; +use harp::vector::LogicalVector; +use harp::vector::NumericVector; +use harp::vector::Vector; +use libr::Rf_xlength; +use libr::SEXP; +use libr::*; + +use crate::modules::ARK_ENVS; + +pub fn format_column(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { + let formatted: Vec = match r_typeof(x) { + VECSXP => { + match r_classes(x) { + Some(_) => { + // If column has a class, we just call format on it. + RObject::from(r_format(x)?).try_into()? + }, + None => { + // For list columns we do something similar to tibbles, ie + // show the element . + RFunction::new("", "format_list_column") + .add(x) + .call_in(ARK_ENVS.positron_ns)? + .try_into()? + }, + } + }, + _ => { + let formatter = FormattedVector::new_with_options(x, FormattedVectorOptions { + character: FormattedVectorCharacterOptions { quote: false }, + })?; + formatter.iter().collect() + }, + }; + + let special_value_codes = special_values(x); + let output = formatted + .into_iter() + .zip(special_value_codes.into_iter()) + .map(|(val, code)| match code { + SpecialValueTypes::NotSpecial => ColumnValue::FormattedValue(val), + _ => ColumnValue::SpecialValueCode(code.into()), + }) + .collect(); + + Ok(output) +} + +#[derive(Clone)] +enum SpecialValueTypes { + NotSpecial, + NULL, + NA, + NaN, + Inf, + NegInf, +} + +// Find the special code values mapping to integer here: +// https://github.com/posit-dev/positron/blob/46eb4dc0b071984be0f083c7836d74a19ef1509f/src/vs/workbench/services/positronDataExplorer/common/dataExplorerCache.ts#L59-L60 +impl Into for SpecialValueTypes { + fn into(self) -> i64 { + match self { + SpecialValueTypes::NotSpecial => -1, + SpecialValueTypes::NULL => 0, + SpecialValueTypes::NA => 1, + SpecialValueTypes::NaN => 2, + SpecialValueTypes::Inf => 10, + SpecialValueTypes::NegInf => 11, + } + } +} + +// Returns an iterator that checks for special values in a vector. +fn special_values(object: SEXP) -> Vec { + match r_typeof(object) { + REALSXP => { + let data = unsafe { NumericVector::new_unchecked(object) }; + data.iter() + .map(|x| match x { + Some(v) => { + if r_dbl_is_nan(v) { + SpecialValueTypes::NaN + } else if !r_dbl_is_finite(v) { + if v < 0.0 { + SpecialValueTypes::NegInf + } else { + SpecialValueTypes::Inf + } + } else { + SpecialValueTypes::NotSpecial + } + }, + None => SpecialValueTypes::NA, + }) + .collect() + }, + STRSXP => { + let data = unsafe { CharacterVector::new_unchecked(object) }; + data.iter() + .map(|x| match x { + Some(_) => SpecialValueTypes::NotSpecial, + None => SpecialValueTypes::NA, + }) + .collect() + }, + INTSXP => { + let data = unsafe { IntegerVector::new_unchecked(object) }; + data.iter() + .map(|x| match x { + Some(_) => SpecialValueTypes::NotSpecial, + None => SpecialValueTypes::NA, + }) + .collect() + }, + LGLSXP => { + let data = unsafe { LogicalVector::new_unchecked(object) }; + data.iter() + .map(|x| match x { + Some(_) => SpecialValueTypes::NotSpecial, + None => SpecialValueTypes::NA, + }) + .collect() + }, + CPLXSXP => { + let data = unsafe { ComplexVector::new_unchecked(object) }; + data.iter() + .map(|x| match x { + Some(_) => SpecialValueTypes::NotSpecial, + None => SpecialValueTypes::NA, + }) + .collect() + }, + VECSXP => (0..r_length(object)) + .map(|i| { + if r_is_null(r_list_get(object, i)) { + SpecialValueTypes::NULL + } else { + SpecialValueTypes::NotSpecial + } + }) + .collect(), + _ => vec![SpecialValueTypes::NotSpecial; unsafe { Rf_xlength(object) as usize }], + } +} diff --git a/crates/ark/src/data_explorer/mod.rs b/crates/ark/src/data_explorer/mod.rs index d4601c035..4c736dac4 100644 --- a/crates/ark/src/data_explorer/mod.rs +++ b/crates/ark/src/data_explorer/mod.rs @@ -6,4 +6,5 @@ // pub mod export_selection; +pub mod format; pub mod r_data_explorer; diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index 2844cfed8..1dcfda898 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -26,6 +26,7 @@ use amalthea::comm::data_explorer_comm::ExportDataSelectionParams; use amalthea::comm::data_explorer_comm::ExportFormat; use amalthea::comm::data_explorer_comm::ExportedData; use amalthea::comm::data_explorer_comm::FilterResult; +use amalthea::comm::data_explorer_comm::FormatOptions; use amalthea::comm::data_explorer_comm::GetColumnProfilesFeatures; use amalthea::comm::data_explorer_comm::GetColumnProfilesParams; use amalthea::comm::data_explorer_comm::GetDataValuesParams; @@ -53,29 +54,13 @@ use crossbeam::channel::Sender; use crossbeam::select; use harp::exec::RFunction; use harp::exec::RFunctionExt; -use harp::object::r_dbl_is_finite; -use harp::object::r_dbl_is_nan; -use harp::object::r_length; -use harp::object::r_list_get; use harp::object::RObject; use harp::r_symbol; use harp::tbl_get_column; -use harp::utils::r_classes; -use harp::utils::r_format; use harp::utils::r_inherits; -use harp::utils::r_is_null; use harp::utils::r_is_object; use harp::utils::r_is_s4; use harp::utils::r_typeof; -use harp::vector::formatted_vector::FormattedVector; -use harp::vector::formatted_vector::FormattedVectorCharacterOptions; -use harp::vector::formatted_vector::FormattedVectorOptions; -use harp::vector::CharacterVector; -use harp::vector::ComplexVector; -use harp::vector::IntegerVector; -use harp::vector::LogicalVector; -use harp::vector::NumericVector; -use harp::vector::Vector; use harp::TableInfo; use harp::TableKind; use libr::*; @@ -88,6 +73,7 @@ use stdext::unwrap; use uuid::Uuid; use crate::data_explorer::export_selection; +use crate::data_explorer::format; use crate::interface::RMain; use crate::lsp::events::EVENTS; use crate::modules::ARK_ENVS; @@ -452,7 +438,7 @@ impl RDataExplorer { row_start_index, num_rows, column_indices, - format_options: _, // TODO: add support for format options + format_options, // TODO: add support for format options }) => { // TODO: Support for data frames with over 2B rows let row_start_index: i32 = row_start_index.try_into()?; @@ -461,7 +447,14 @@ impl RDataExplorer { .into_iter() .map(i32::try_from) .collect::, _>>()?; - r_task(|| self.r_get_data_values(row_start_index, num_rows, column_indices)) + r_task(|| { + self.r_get_data_values( + row_start_index, + num_rows, + column_indices, + format_options, + ) + }) }, DataExplorerBackendRequest::SetSortColumns(SetSortColumnsParams { sort_keys: keys, @@ -1000,6 +993,7 @@ impl RDataExplorer { row_start_index: i32, num_rows: i32, column_indices: Vec, + format_options: FormatOptions, ) -> anyhow::Result { let table = self.table.get().clone(); let object = *table; @@ -1042,7 +1036,7 @@ impl RDataExplorer { let mut column_data: Vec> = Vec::new(); for i in 0..num_cols { let column = tbl_get_column(object.sexp, i, self.shape.kind)?; - let formatted = format_column(column.sexp)?; + let formatted = format::format_column(column.sexp, &format_options)?; column_data.push(formatted.clone()); } @@ -1089,45 +1083,6 @@ impl RDataExplorer { } } -fn format_column(x: SEXP) -> anyhow::Result> { - let formatted: Vec = match r_typeof(x) { - VECSXP => { - match r_classes(x) { - Some(_) => { - // If column has a class, we just call format on it. - RObject::from(r_format(x)?).try_into()? - }, - None => { - // For list columns we do something similar to tibbles, ie - // show the element . - RFunction::new("", "format_list_column") - .add(x) - .call_in(ARK_ENVS.positron_ns)? - .try_into()? - }, - } - }, - _ => { - let formatter = FormattedVector::new_with_options(x, FormattedVectorOptions { - character: FormattedVectorCharacterOptions { quote: false }, - })?; - formatter.iter().collect() - }, - }; - - let special_value_codes = special_values(x); - let output = formatted - .into_iter() - .zip(special_value_codes.into_iter()) - .map(|(val, code)| match code { - SpecialValueTypes::NotSpecial => ColumnValue::FormattedValue(val), - _ => ColumnValue::SpecialValueCode(code.into()), - }) - .collect(); - - Ok(output) -} - // This returns the type of an _element_ of the column. In R atomic // vectors do not have a distinct internal type but we pretend that they // do for the purpose of integrating with Positron types. @@ -1245,101 +1200,3 @@ pub unsafe extern "C" fn ps_view_data_frame( Ok(R_NilValue) } - -#[derive(Clone)] -enum SpecialValueTypes { - NotSpecial, - NULL, - NA, - NaN, - Inf, - NegInf, -} - -// Find the special code values mapping to integer here: -// https://github.com/posit-dev/positron/blob/46eb4dc0b071984be0f083c7836d74a19ef1509f/src/vs/workbench/services/positronDataExplorer/common/dataExplorerCache.ts#L59-L60 -impl Into for SpecialValueTypes { - fn into(self) -> i64 { - match self { - SpecialValueTypes::NotSpecial => -1, - SpecialValueTypes::NULL => 0, - SpecialValueTypes::NA => 1, - SpecialValueTypes::NaN => 2, - SpecialValueTypes::Inf => 10, - SpecialValueTypes::NegInf => 11, - } - } -} - -// Returns an iterator that checks for special values in a vector. -fn special_values(object: SEXP) -> Vec { - match r_typeof(object) { - REALSXP => { - let data = unsafe { NumericVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(v) => { - if r_dbl_is_nan(v) { - SpecialValueTypes::NaN - } else if !r_dbl_is_finite(v) { - if v < 0.0 { - SpecialValueTypes::NegInf - } else { - SpecialValueTypes::Inf - } - } else { - SpecialValueTypes::NotSpecial - } - }, - None => SpecialValueTypes::NA, - }) - .collect() - }, - STRSXP => { - let data = unsafe { CharacterVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - INTSXP => { - let data = unsafe { IntegerVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - LGLSXP => { - let data = unsafe { LogicalVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - CPLXSXP => { - let data = unsafe { ComplexVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - VECSXP => (0..r_length(object)) - .map(|i| { - if r_is_null(r_list_get(object, i)) { - SpecialValueTypes::NULL - } else { - SpecialValueTypes::NotSpecial - } - }) - .collect(), - _ => vec![SpecialValueTypes::NotSpecial; unsafe { Rf_xlength(object) as usize }], - } -} From 6294cd2d93dc7389bc2baa34c4fd1af3b51cb82a Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Wed, 5 Jun 2024 08:51:38 -0300 Subject: [PATCH 02/19] Move to an R implementation for formatting --- crates/ark/src/data_explorer/format.rs | 67 +++++++++++-------- .../src/modules/positron/r_data_explorer.R | 62 +++++++++++++++++ 2 files changed, 101 insertions(+), 28 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 7626445a0..50a2959ca 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -18,9 +18,6 @@ use harp::utils::r_classes; use harp::utils::r_format; use harp::utils::r_is_null; use harp::utils::r_typeof; -use harp::vector::formatted_vector::FormattedVector; -use harp::vector::formatted_vector::FormattedVectorCharacterOptions; -use harp::vector::formatted_vector::FormattedVectorOptions; use harp::vector::CharacterVector; use harp::vector::ComplexVector; use harp::vector::IntegerVector; @@ -33,33 +30,11 @@ use libr::*; use crate::modules::ARK_ENVS; +// Format a column of data for display in the data explorer. pub fn format_column(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { - let formatted: Vec = match r_typeof(x) { - VECSXP => { - match r_classes(x) { - Some(_) => { - // If column has a class, we just call format on it. - RObject::from(r_format(x)?).try_into()? - }, - None => { - // For list columns we do something similar to tibbles, ie - // show the element . - RFunction::new("", "format_list_column") - .add(x) - .call_in(ARK_ENVS.positron_ns)? - .try_into()? - }, - } - }, - _ => { - let formatter = FormattedVector::new_with_options(x, FormattedVectorOptions { - character: FormattedVectorCharacterOptions { quote: false }, - })?; - formatter.iter().collect() - }, - }; - + let formatted = format_values(x, format_options)?; let special_value_codes = special_values(x); + let output = formatted .into_iter() .zip(special_value_codes.into_iter()) @@ -72,6 +47,42 @@ pub fn format_column(x: SEXP, format_options: &FormatOptions) -> anyhow::Result< Ok(output) } +fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { + // If the object has classes we dispatch to the `format` method + if let Some(_) = r_classes(x) { + return Ok(RObject::from(r_format(x)?).try_into()?); + } + + let formatted: Vec = match r_typeof(x) { + INTSXP => RFunction::from("format_integer") + .add(x) + .param("thousands_sep", format_options.thousands_sep.clone()) + .call_in(ARK_ENVS.positron_ns)? + .try_into()?, + REALSXP => RFunction::from("format_real") + .add(x) + .param("thousands_sep", format_options.thousands_sep.clone()) + .param("large_num_digits", format_options.large_num_digits as i32) + .param("small_num_digits", format_options.small_num_digits as i32) + .param( + "max_integral_digits", + format_options.max_integral_digits as i32, + ) + .call_in(ARK_ENVS.positron_ns)? + .try_into()?, + // For list columns we do something similar to tibbles, ie + // show the element . + VECSXP => RFunction::new("", "format_list_column") + .add(x) + .call_in(ARK_ENVS.positron_ns)? + .try_into()?, + // For all other values we rely on base R formatting + _ => RObject::from(r_format(x)?).try_into()?, + }; + + Ok(formatted) +} + #[derive(Clone)] enum SpecialValueTypes { NotSpecial, diff --git a/crates/ark/src/modules/positron/r_data_explorer.R b/crates/ark/src/modules/positron/r_data_explorer.R index 43aa988e3..130a2895d 100644 --- a/crates/ark/src/modules/positron/r_data_explorer.R +++ b/crates/ark/src/modules/positron/r_data_explorer.R @@ -279,6 +279,68 @@ format_list_column <- function(x) { }) } +# Specialized function to format integer values, supporting format options +# that the front-end might provide. +# This function must be vectorized on `x`. +format_integer <- function(x, thousands_sep) { + base::format(x, big.mark = thousands_sep) +} + +#' Format a real number for display +#' +#' This function must be vectorized on `x`. +#' +#' @param large_num_digits Fixed number of decimal places to display for numbers over 1, or in +#' scientific notation. +#' @param small_num_digits Fixed number of decimal places to display for numbers under 1. +#' @param max_integral_digits Maximum number of integral digits to display before switching to +#' scientific notation. +#' @param thousands_sep Thousands separator string +format_real <- function(x, large_num_digits, small_num_digits, max_integral_digits, thousands_sep) { + # format numbers larger than 1 + formatted <- character(length(x)) + + # The limit for large numbers before switching to scientific + # notation + upper_threshold <- 10^max_integral_digits + + # The limit for small numbers before switching to scientific + # notation + lower_threshold <- 10^(-(small_num_digits - 1)) + + abs_x <- abs(x) + formatted <- format_if( + formatted, x, abs_x >= upper_threshold & is.finite(x), + scientific = TRUE, nsmall = large_num_digits + ) + formatted <- format_if( + formatted, x, abs_x > 1 & abs_x < upper_threshold, + scientific = FALSE, nsmall = large_num_digits, big.mark = thousands_sep + ) + formatted <- format_if( + formatted, x, abs_x <= 1 & abs_x >= lower_threshold, + scientific = FALSE, nsmall = small_num_digits + ) + formatted <- format_if( + formatted, x, abs_x < lower_threshold & is.finite(x), + scientific = TRUE, nsmall = large_num_digits + ) + formatted <- format_if( # special case 0 to align with other medium numbers + formatted, x, abs_x == 0, + scientific = FALSE, nsmall = small_num_digits + ) + + formatted +} + +format_if <- function(formatted, x, condition, ...) { + if (any(condition, na.rm = TRUE)) { # avoids copying formatted if condition doesn't match anything + pos <- which(condition) + formatted[pos] <- base::format(x[pos], ...) + } + formatted +} + export_selection <- function(x, format = c("csv", "tsv", "html"), include_header = TRUE) { format <- match.arg(format) From f665c8e2ad1580860acc24d54f1627973a00d57f Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Wed, 5 Jun 2024 08:52:01 -0300 Subject: [PATCH 03/19] Adapt tests for new defaults --- crates/ark/tests/data_explorer.rs | 42 +++++++++++++++---------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/crates/ark/tests/data_explorer.rs b/crates/ark/tests/data_explorer.rs index 336b9753c..ddb878a86 100644 --- a/crates/ark/tests/data_explorer.rs +++ b/crates/ark/tests/data_explorer.rs @@ -207,9 +207,9 @@ fn test_data_explorer() { DataExplorerBackendReply::GetDataValuesReply(data) => { // The first three sorted rows should be 10.4, 10.4, and 13.3. assert_eq!(data.columns.len(), 2); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("10.4".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("10.4".to_string())); - assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("13.3".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("10.40".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("10.40".to_string())); + assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("13.30".to_string())); // Row labels should be sorted as well. if has_row_names { @@ -254,9 +254,9 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 2); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("19.2".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("18.7".to_string())); - assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("17.3".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("19.20".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("18.70".to_string())); + assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("17.30".to_string())); } ); }; @@ -301,8 +301,8 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 2); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("58".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("59".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("58.00".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("59.00".to_string())); // Row labels should be present. let labels = data.row_labels.unwrap(); @@ -380,8 +380,8 @@ fn test_data_explorer() { // The first column (height) should contain the only two rows // where the height is less than 60. assert_eq!(data.columns.len(), 2); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("59".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("58".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("59.00".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("58.00".to_string())); // Row labels should be present. The row labels represent the // rows in the original data set, so after sorting we expect the @@ -461,9 +461,9 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 1); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("0".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("1".to_string())); - assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("2".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("0.0000".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("1.0000".to_string())); + assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("2.00".to_string())); } ); @@ -494,9 +494,9 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 1); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("2".to_string())); - assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("3".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1.0000".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("2.00".to_string())); + assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("3.00".to_string())); } ); @@ -608,10 +608,10 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 2); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("97".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("97".to_string())); - assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("98".to_string())); - assert_eq!(data.columns[0][3], ColumnValue::FormattedValue("98".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("97.00".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("97.00".to_string())); + assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("98.00".to_string())); + assert_eq!(data.columns[0][3], ColumnValue::FormattedValue("98.00".to_string())); } ); @@ -1304,7 +1304,7 @@ fn test_data_explorer_special_values() { DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 6); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1.0000".to_string())); assert_eq!(data.columns[0][1], ColumnValue::SpecialValueCode(1)); assert_eq!(data.columns[0][2], ColumnValue::SpecialValueCode(2)); assert_eq!(data.columns[0][3], ColumnValue::SpecialValueCode(10)); From a19b712aa83aa1f1d42ff6966448c93edc521d70 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Wed, 5 Jun 2024 11:27:57 -0300 Subject: [PATCH 04/19] Add an R implementation matching Python tests --- crates/ark/src/data_explorer/format.rs | 93 +++++++++++++++++++ .../src/modules/positron/r_data_explorer.R | 26 +++--- 2 files changed, 106 insertions(+), 13 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 50a2959ca..b99d60ad1 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -180,3 +180,96 @@ fn special_values(object: SEXP) -> Vec { _ => vec![SpecialValueTypes::NotSpecial; unsafe { Rf_xlength(object) as usize }], } } + +#[cfg(test)] +mod tests { + use harp::environment::R_ENVS; + use harp::eval::r_parse_eval0; + + use super::*; + use crate::test::r_test; + + #[test] + fn test_real_formatting() { + r_test(|| { + // this test needs to match the Python equivalent in + // https://github.com/posit-dev/positron/blob/5192792967b6778608d643b821e84ebb6d5f7025/extensions/positron-python/python_files/positron/positron_ipykernel/tests/test_data_explorer.py#L742-L743 + let assert_float_formatting = |options: FormatOptions, expected: Vec| { + let testing_values = r_parse_eval0( + r#"c( + 0, + 1.0, + 1.01, + 1.012, + 0.0123, + 0.01234, + 0.0001, + 0.00001, + 9999.123, + 9999.999, + 9999999, + 10000000 + )"#, + R_ENVS.global, + ) + .unwrap(); + + let formatted = format_column(testing_values.sexp, &options).unwrap(); + assert_eq!(formatted, expected); + }; + + let options = FormatOptions { + large_num_digits: 2, + small_num_digits: 4, + max_integral_digits: 7, + thousands_sep: None, + }; + let expected = vec![ + "0.00", + "1.00", + "1.01", + "1.01", + "0.0123", + "0.0123", + "0.0001", + "1.00e-05", + "9999.12", + "10000.00", + "9999999.00", + "1.00e+07", + ] + .iter() + .map(|x| ColumnValue::FormattedValue(x.to_string())) + .collect::>(); + + assert_float_formatting(options, expected); + + let options = FormatOptions { + large_num_digits: 3, + small_num_digits: 4, + max_integral_digits: 7, + thousands_sep: Some("_".to_string()), + }; + + let expected = vec![ + "0.000", + "1.000", + "1.010", + "1.012", + "0.0123", + "0.0123", + "0.0001", + "1.000e-05", + "9_999.123", + "9_999.999", + "9_999_999.000", + "1.000e+07", + ] + .iter() + .map(|x| ColumnValue::FormattedValue(x.to_string())) + .collect::>(); + + assert_float_formatting(options, expected); + }) + } +} diff --git a/crates/ark/src/modules/positron/r_data_explorer.R b/crates/ark/src/modules/positron/r_data_explorer.R index 130a2895d..bf8b2883d 100644 --- a/crates/ark/src/modules/positron/r_data_explorer.R +++ b/crates/ark/src/modules/positron/r_data_explorer.R @@ -296,7 +296,7 @@ format_integer <- function(x, thousands_sep) { #' @param max_integral_digits Maximum number of integral digits to display before switching to #' scientific notation. #' @param thousands_sep Thousands separator string -format_real <- function(x, large_num_digits, small_num_digits, max_integral_digits, thousands_sep) { +format_real <- function(x, large_num_digits, small_num_digits, max_integral_digits, thousands_sep = "") { # format numbers larger than 1 formatted <- character(length(x)) @@ -306,37 +306,37 @@ format_real <- function(x, large_num_digits, small_num_digits, max_integral_digi # The limit for small numbers before switching to scientific # notation - lower_threshold <- 10^(-(small_num_digits - 1)) + lower_threshold <- 10^(-(small_num_digits + 1)) abs_x <- abs(x) formatted <- format_if( - formatted, x, abs_x >= upper_threshold & is.finite(x), - scientific = TRUE, nsmall = large_num_digits + formatted, x, abs_x >= upper_threshold & is.finite(x), .fun = base::sprintf, + fmt = paste0("%.", large_num_digits, "e") ) formatted <- format_if( - formatted, x, abs_x > 1 & abs_x < upper_threshold, - scientific = FALSE, nsmall = large_num_digits, big.mark = thousands_sep + formatted, x, abs_x >= 1 & abs_x < upper_threshold, + scientific = FALSE, nsmall = large_num_digits, big.mark = thousands_sep, digits = large_num_digits - 1 ) formatted <- format_if( - formatted, x, abs_x <= 1 & abs_x >= lower_threshold, - scientific = FALSE, nsmall = small_num_digits + formatted, x, abs_x < 1 & abs_x > lower_threshold, + scientific = FALSE, nsmall = small_num_digits, digits = small_num_digits - 1 ) formatted <- format_if( - formatted, x, abs_x < lower_threshold & is.finite(x), - scientific = TRUE, nsmall = large_num_digits + formatted, x, abs_x <= lower_threshold & is.finite(x), .fun = base::sprintf, + fmt = paste0("%.", large_num_digits, "e") ) formatted <- format_if( # special case 0 to align with other medium numbers formatted, x, abs_x == 0, - scientific = FALSE, nsmall = small_num_digits + scientific = FALSE, nsmall = large_num_digits, digits = large_num_digits - 1 ) formatted } -format_if <- function(formatted, x, condition, ...) { +format_if <- function(formatted, x, condition, ..., .fun = base::format) { if (any(condition, na.rm = TRUE)) { # avoids copying formatted if condition doesn't match anything pos <- which(condition) - formatted[pos] <- base::format(x[pos], ...) + formatted[pos] <- .fun(x[pos], ..., trim = TRUE) } formatted } From 16e9affbadd3b0a7d42a3aad61b3b0eb2fddb6ac Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Wed, 5 Jun 2024 15:46:13 -0300 Subject: [PATCH 05/19] Move to own formatter --- crates/ark/src/data_explorer/format.rs | 472 +++++++++++++----- .../ark/src/data_explorer/r_data_explorer.rs | 2 +- crates/ark/tests/data_explorer.rs | 8 +- 3 files changed, 344 insertions(+), 138 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index b99d60ad1..6b4540c05 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -14,6 +14,7 @@ use harp::object::r_dbl_is_nan; use harp::object::r_length; use harp::object::r_list_get; use harp::object::RObject; +use harp::r_null; use harp::utils::r_classes; use harp::utils::r_format; use harp::utils::r_is_null; @@ -24,163 +25,278 @@ use harp::vector::IntegerVector; use harp::vector::LogicalVector; use harp::vector::NumericVector; use harp::vector::Vector; -use libr::Rf_xlength; use libr::SEXP; use libr::*; use crate::modules::ARK_ENVS; -// Format a column of data for display in the data explorer. -pub fn format_column(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { - let formatted = format_values(x, format_options)?; - let special_value_codes = special_values(x); - - let output = formatted - .into_iter() - .zip(special_value_codes.into_iter()) - .map(|(val, code)| match code { - SpecialValueTypes::NotSpecial => ColumnValue::FormattedValue(val), - _ => ColumnValue::SpecialValueCode(code.into()), - }) - .collect(); +pub fn format_column(x: SEXP, format_options: &FormatOptions) -> Vec { + format_values(x, format_options).unwrap_or(unknown_format(x)) +} - Ok(output) +fn unknown_format(x: SEXP) -> Vec { + vec![ColumnValue::FormattedValue("????".to_string()); r_length(x) as usize] } -fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { - // If the object has classes we dispatch to the `format` method +// Format a column of data for display in the data explorer. +fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { if let Some(_) = r_classes(x) { - return Ok(RObject::from(r_format(x)?).try_into()?); + let formatted: Vec = RObject::from(r_format(x)?).try_into()?; + return Ok(formatted + .into_iter() + .map(ColumnValue::FormattedValue) + .collect()); + } + + match r_typeof(x) { + REALSXP => Ok(format_dbl(x, format_options)), + INTSXP => Ok(format_int(x, format_options)), + STRSXP => Ok(format_chr(x)), + LGLSXP => Ok(format_lgl(x)), + CPLXSXP => Ok(format_cpl(x)), + VECSXP => Ok(format_vec(x)), + _ => Err(anyhow::anyhow!("Unsupported column type")), + } +} + +fn format_vec(x: SEXP) -> Vec { + let len = r_length(x); + let mut output = Vec::::with_capacity(len as usize); + + for i in 0..len { + let elt = r_list_get(x, i); + let formatted = if r_is_null(elt) { + SpecialValueTypes::NULL.into() + } else { + ColumnValue::FormattedValue(format_vec_elt(elt)) + }; + output.push(formatted); } - let formatted: Vec = match r_typeof(x) { - INTSXP => RFunction::from("format_integer") - .add(x) - .param("thousands_sep", format_options.thousands_sep.clone()) - .call_in(ARK_ENVS.positron_ns)? - .try_into()?, - REALSXP => RFunction::from("format_real") - .add(x) - .param("thousands_sep", format_options.thousands_sep.clone()) - .param("large_num_digits", format_options.large_num_digits as i32) - .param("small_num_digits", format_options.small_num_digits as i32) - .param( - "max_integral_digits", - format_options.max_integral_digits as i32, - ) - .call_in(ARK_ENVS.positron_ns)? - .try_into()?, - // For list columns we do something similar to tibbles, ie - // show the element . - VECSXP => RFunction::new("", "format_list_column") - .add(x) - .call_in(ARK_ENVS.positron_ns)? - .try_into()?, - // For all other values we rely on base R formatting - _ => RObject::from(r_format(x)?).try_into()?, + output +} + +fn format_vec_elt(x: SEXP) -> String { + // We don't use `r_classes` because we want to see, eg 'numeric' for + // numeric vectors, not an empty value. + let class: Vec = RFunction::new("base", "class") + .add(x) + .call_in(ARK_ENVS.positron_ns) + // failling to evaluate classes will return NULL + .unwrap_or(RObject::from(r_null())) + .try_into() + // if we fail to get the class we will show ???? + .unwrap_or(vec![]); + + let class_str = if class.is_empty() { + "????:".to_string() + } else { + class[0].clone() + }; + + // we call `dim` and not `r_dim()` because we want to see the values + // from the dispatched method. + let dims: Vec = RFunction::from("dim") + .add(x) + .call_in(ARK_ENVS.positron_ns) + // if we fail to get the dims we will show the length of the object instead + .unwrap_or(RObject::from(r_null())) + .try_into() + .unwrap_or(vec![]); + + let dim_str: String = if dims.is_empty() { + r_length(x).to_string() + } else { + dims.iter() + .map(i32::to_string) + .collect::>() + .join(" x ") }; - Ok(formatted) + format!("<{} [{}]>", class_str, dim_str) } -#[derive(Clone)] -enum SpecialValueTypes { - NotSpecial, - NULL, - NA, - NaN, - Inf, - NegInf, +fn format_cpl(x: SEXP) -> Vec { + unsafe { + ComplexVector::new_unchecked(x) + .iter() + .map(|x| match x { + Some(v) => { + ColumnValue::FormattedValue(format!("{}+{}i", v.r.to_string(), v.i.to_string())) + }, + None => SpecialValueTypes::NA.into(), + }) + .collect() + } } -// Find the special code values mapping to integer here: -// https://github.com/posit-dev/positron/blob/46eb4dc0b071984be0f083c7836d74a19ef1509f/src/vs/workbench/services/positronDataExplorer/common/dataExplorerCache.ts#L59-L60 -impl Into for SpecialValueTypes { - fn into(self) -> i64 { - match self { - SpecialValueTypes::NotSpecial => -1, - SpecialValueTypes::NULL => 0, - SpecialValueTypes::NA => 1, - SpecialValueTypes::NaN => 2, - SpecialValueTypes::Inf => 10, - SpecialValueTypes::NegInf => 11, - } +fn format_lgl(x: SEXP) -> Vec { + unsafe { + LogicalVector::new_unchecked(x) + .iter() + .map(|x| match x { + Some(v) => match v { + true => ColumnValue::FormattedValue("TRUE".to_string()), + false => ColumnValue::FormattedValue("FALSE".to_string()), + }, + None => SpecialValueTypes::NA.into(), + }) + .collect() } } -// Returns an iterator that checks for special values in a vector. -fn special_values(object: SEXP) -> Vec { - match r_typeof(object) { - REALSXP => { - let data = unsafe { NumericVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(v) => { - if r_dbl_is_nan(v) { - SpecialValueTypes::NaN - } else if !r_dbl_is_finite(v) { - if v < 0.0 { - SpecialValueTypes::NegInf - } else { - SpecialValueTypes::Inf - } - } else { - SpecialValueTypes::NotSpecial - } - }, - None => SpecialValueTypes::NA, - }) - .collect() - }, - STRSXP => { - let data = unsafe { CharacterVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - INTSXP => { - let data = unsafe { IntegerVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - LGLSXP => { - let data = unsafe { LogicalVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() - }, - CPLXSXP => { - let data = unsafe { ComplexVector::new_unchecked(object) }; - data.iter() - .map(|x| match x { - Some(_) => SpecialValueTypes::NotSpecial, - None => SpecialValueTypes::NA, - }) - .collect() +fn format_chr(x: SEXP) -> Vec { + unsafe { + CharacterVector::new_unchecked(x) + .iter() + .map(|x| match x { + Some(v) => ColumnValue::FormattedValue(v), + None => SpecialValueTypes::NA.into(), + }) + .collect() + } +} + +fn format_int(x: SEXP, options: &FormatOptions) -> Vec { + unsafe { + IntegerVector::new_unchecked(x) + .iter() + .map(|x| format_int_elt(x, options)) + .collect() + } +} + +fn format_int_elt(x: Option, options: &FormatOptions) -> ColumnValue { + match x { + None => SpecialValueTypes::NA.into(), + Some(v) => ColumnValue::FormattedValue(apply_thousands_sep( + v.to_string(), + options.thousands_sep.clone(), + )), + } +} + +fn format_dbl(x: SEXP, options: &FormatOptions) -> Vec { + unsafe { + NumericVector::new_unchecked(x) + .iter() + .map(|x| format_dbl_elt(x, options)) + .collect() + } +} + +fn format_dbl_elt(x: Option, options: &FormatOptions) -> ColumnValue { + match x { + None => SpecialValueTypes::NA.into(), + Some(v) => { + if r_dbl_is_nan(v) { + SpecialValueTypes::NaN.into() + } else if r_dbl_is_finite(v) { + // finite values that are not NaN nor NA + format_dbl_value(v, options) + } else if v > 0.0 { + SpecialValueTypes::Inf.into() + } else { + SpecialValueTypes::NegInf.into() + } }, - VECSXP => (0..r_length(object)) - .map(|i| { - if r_is_null(r_list_get(object, i)) { - SpecialValueTypes::NULL - } else { - SpecialValueTypes::NotSpecial + } +} + +fn format_dbl_value(x: f64, options: &FormatOptions) -> ColumnValue { + // The limit for large numbers before switching to scientific + // notation + let upper_threshold = f64::powf(10.0, options.max_integral_digits as f64); + + // The limit for small numbers before switching to scientific + // notation + let lower_threshold = f64::powf(10.0, -(options.small_num_digits as f64)); + + let large_num_digits = options.large_num_digits as usize; + let small_num_digits = options.small_num_digits as usize; + + let abs_x = x.abs(); + + let formatted = if abs_x >= upper_threshold { + // large numbers use scientific notation + // rust makes 1e7 instead of 1e+7 which aligns baddly + let v = format!("{:.large_num_digits$e}", x).replace("e", "e+"); + pad_exponent(v) + } else if abs_x >= 1.0 { + // this is considered medium numbers and they use a fixed amount of + // digits after the decimal point + apply_thousands_sep( + format!("{:.large_num_digits$}", x), + options.thousands_sep.clone(), + ) + } else if abs_x >= lower_threshold { + // small numbers but not that small are formatted with a different + // amount of digits after the decimal point + apply_thousands_sep( + format!("{:.small_num_digits$}", x), + options.thousands_sep.clone(), + ) + } else if abs_x == 0.0 { + // zero is special cased to behave like a medium number. + format!("{:.large_num_digits$}", x) + } else { + // very small numbers use scientific notation + let v = format!("{:.large_num_digits$e}", x); + pad_exponent(v) + }; + + ColumnValue::FormattedValue(formatted) +} + +fn apply_thousands_sep(x: String, sep: Option) -> String { + match sep { + None => x, + Some(sep) => { + let mut formatted = String::new(); + + // find the decimal point if any + let decimal_point = x.find('.').unwrap_or(x.len()); + + // now walk from the decimal point until walk the string + // backwards adding the thousands separator + let mut count: usize = 0; + for (i, c) in x.chars().rev().enumerate() { + // while before the decimal point, just copy the string + if i < (x.len() - decimal_point) { + formatted.push(c); + continue; } - }) - .collect(), - _ => vec![SpecialValueTypes::NotSpecial; unsafe { Rf_xlength(object) as usize }], + + // now start counting and add a `sep` every three + if count % 3 == 0 && count != 0 { + formatted.push_str(&sep); + } + + formatted.push(c); + count += 1; + } + formatted.chars().rev().collect::() + }, } } +// exponents of the scientific notation should have a minimum of length 2 +// to match the other implementations +// the string must have already been processed to include the e+ in positive values +fn pad_exponent(x: String) -> String { + // find the exponent position + let e_pos = x.find('e').unwrap(); + if (e_pos + 1 + 2) < x.len() { + return x; // the exponent already have 2 digits + } + + // add zeros to the exponent + let mut formatted = x.clone(); + formatted.insert(e_pos + 2, '0'); + + formatted +} + #[cfg(test)] mod tests { use harp::environment::R_ENVS; @@ -189,6 +305,55 @@ mod tests { use super::*; use crate::test::r_test; + fn default_options() -> FormatOptions { + FormatOptions { + large_num_digits: 2, + small_num_digits: 4, + max_integral_digits: 7, + thousands_sep: None, + } + } + + #[test] + fn test_pad_exponents() { + assert_eq!(pad_exponent("1.00e+111".to_string()), "1.00e+111"); + assert_eq!(pad_exponent("1.00e+11".to_string()), "1.00e+11"); + assert_eq!(pad_exponent("1.00e+01".to_string()), "1.00e+01"); + assert_eq!(pad_exponent("1.00e+1".to_string()), "1.00e+01"); + assert_eq!(pad_exponent("1.00e+00".to_string()), "1.00e+00"); + assert_eq!(pad_exponent("1.00e-01".to_string()), "1.00e-01"); + assert_eq!(pad_exponent("1.00e-1".to_string()), "1.00e-01"); + assert_eq!(pad_exponent("1.00e-00".to_string()), "1.00e-00"); + } + + #[test] + fn test_thousands_sep() { + assert_eq!( + apply_thousands_sep("1000000".to_string(), Some(",".to_string())), + "1,000,000" + ); + assert_eq!( + apply_thousands_sep("1000000.000".to_string(), Some(",".to_string())), + "1,000,000.000" + ); + assert_eq!( + apply_thousands_sep("1.00".to_string(), Some(",".to_string())), + "1.00" + ); + assert_eq!( + apply_thousands_sep("1000.00".to_string(), Some(",".to_string())), + "1,000.00" + ); + assert_eq!( + apply_thousands_sep("100.00".to_string(), Some(",".to_string())), + "100.00" + ); + assert_eq!( + apply_thousands_sep("1000000.00".to_string(), None), + "1000000.00" + ); + } + #[test] fn test_real_formatting() { r_test(|| { @@ -214,7 +379,7 @@ mod tests { ) .unwrap(); - let formatted = format_column(testing_values.sexp, &options).unwrap(); + let formatted = format_column(testing_values.sexp, &options); assert_eq!(formatted, expected); }; @@ -272,4 +437,45 @@ mod tests { assert_float_formatting(options, expected); }) } + + #[test] + fn test_list_formatting() { + r_test(|| { + let data = r_parse_eval0("list(0, NULL)", R_ENVS.global).unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + ColumnValue::FormattedValue("".to_string()), + SpecialValueTypes::NULL.into() + ]); + }) + } +} + +#[derive(Clone)] +enum SpecialValueTypes { + NULL, + NA, + NaN, + Inf, + NegInf, +} + +// Find the special code values mapping to integer here: +// https://github.com/posit-dev/positron/blob/46eb4dc0b071984be0f083c7836d74a19ef1509f/src/vs/workbench/services/positronDataExplorer/common/dataExplorerCache.ts#L59-L60 +impl Into for SpecialValueTypes { + fn into(self) -> i64 { + match self { + SpecialValueTypes::NULL => 0, + SpecialValueTypes::NA => 1, + SpecialValueTypes::NaN => 2, + SpecialValueTypes::Inf => 10, + SpecialValueTypes::NegInf => 11, + } + } +} + +impl Into for SpecialValueTypes { + fn into(self) -> ColumnValue { + ColumnValue::SpecialValueCode(self.into()) + } } diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index 1dcfda898..c1f320b20 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -1036,7 +1036,7 @@ impl RDataExplorer { let mut column_data: Vec> = Vec::new(); for i in 0..num_cols { let column = tbl_get_column(object.sexp, i, self.shape.kind)?; - let formatted = format::format_column(column.sexp, &format_options)?; + let formatted = format::format_column(column.sexp, &format_options); column_data.push(formatted.clone()); } diff --git a/crates/ark/tests/data_explorer.rs b/crates/ark/tests/data_explorer.rs index ddb878a86..6365bcd07 100644 --- a/crates/ark/tests/data_explorer.rs +++ b/crates/ark/tests/data_explorer.rs @@ -461,8 +461,8 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 1); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("0.0000".to_string())); - assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("1.0000".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("0.00".to_string())); + assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("1.00".to_string())); assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("2.00".to_string())); } ); @@ -494,7 +494,7 @@ fn test_data_explorer() { assert_match!(socket_rpc(&socket, req), DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 1); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1.0000".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1.00".to_string())); assert_eq!(data.columns[0][1], ColumnValue::FormattedValue("2.00".to_string())); assert_eq!(data.columns[0][2], ColumnValue::FormattedValue("3.00".to_string())); } @@ -1304,7 +1304,7 @@ fn test_data_explorer_special_values() { DataExplorerBackendReply::GetDataValuesReply(data) => { assert_eq!(data.columns.len(), 6); - assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1.0000".to_string())); + assert_eq!(data.columns[0][0], ColumnValue::FormattedValue("1.00".to_string())); assert_eq!(data.columns[0][1], ColumnValue::SpecialValueCode(1)); assert_eq!(data.columns[0][2], ColumnValue::SpecialValueCode(2)); assert_eq!(data.columns[0][3], ColumnValue::SpecialValueCode(10)); From 5198197f779a7a270ce4d8af6a454fc1c082b5e9 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Thu, 6 Jun 2024 20:57:46 -0300 Subject: [PATCH 06/19] Support format options for the summary panel + refactor --- crates/ark/src/data_explorer/format.rs | 331 ++++++++++++++---- .../ark/src/data_explorer/r_data_explorer.rs | 24 +- .../src/modules/positron/r_data_explorer.R | 85 +---- crates/ark/tests/data_explorer.rs | 10 +- 4 files changed, 297 insertions(+), 153 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 6b4540c05..5e2864dda 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -5,6 +5,8 @@ // // +use std::collections::HashMap; + use amalthea::comm::data_explorer_comm::ColumnValue; use amalthea::comm::data_explorer_comm::FormatOptions; use harp::exec::RFunction; @@ -18,6 +20,7 @@ use harp::r_null; use harp::utils::r_classes; use harp::utils::r_format; use harp::utils::r_is_null; +use harp::utils::r_names2; use harp::utils::r_typeof; use harp::vector::CharacterVector; use harp::vector::ComplexVector; @@ -30,22 +33,41 @@ use libr::*; use crate::modules::ARK_ENVS; +// Used by the get_data_values method to format columns for displaying in the grid. pub fn format_column(x: SEXP, format_options: &FormatOptions) -> Vec { + format(x, format_options) + .into_iter() + .map(Into::into) + .collect() +} + +// Used by the summary_profile method to format the summary statistics for display. +pub fn format_stats(x: SEXP, format_options: &FormatOptions) -> HashMap { + let out = format(x, format_options); + let mut stats = HashMap::new(); + unsafe { + CharacterVector::new_unchecked(r_names2(x)) + .iter() + .zip(out.into_iter()) + .for_each(|(nm, value)| { + stats.insert(nm.unwrap_or("????".to_string()), value.into()); + }); + } + stats +} + +fn format(x: SEXP, format_options: &FormatOptions) -> Vec { format_values(x, format_options).unwrap_or(unknown_format(x)) } -fn unknown_format(x: SEXP) -> Vec { - vec![ColumnValue::FormattedValue("????".to_string()); r_length(x) as usize] +fn unknown_format(x: SEXP) -> Vec { + vec![FormattedValue::Unkown; r_length(x) as usize] } // Format a column of data for display in the data explorer. -fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { +fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result> { if let Some(_) = r_classes(x) { - let formatted: Vec = RObject::from(r_format(x)?).try_into()?; - return Ok(formatted - .into_iter() - .map(ColumnValue::FormattedValue) - .collect()); + return Ok(format_object(x)); } match r_typeof(x) { @@ -59,16 +81,64 @@ fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result Vec { +fn format_object(x: SEXP) -> Vec { + // we call r_format() to dispatch the format method + let formatted: Vec = match r_format(x) { + Ok(fmt) => match RObject::from(fmt).try_into() { + Ok(x) => x, + Err(_) => return unknown_format(x), + }, + Err(_) => return unknown_format(x), + }; + + // but we also want to show special value codes. we call base::is.na() to dispatch + // the is.na() function and then replace those with FormattedValues::NA. + let is_na = RFunction::from("is_na_checked") + .add(x) + .call_in(ARK_ENVS.positron_ns); + + match is_na { + Ok(is_na) => { + formatted + .into_iter() + .zip(unsafe { LogicalVector::new_unchecked(is_na.sexp).iter() }) + .map(|(v, is_na)| { + // we don't expect is.na to return NA's, but if it happens, we treat it as false + if is_na.unwrap_or(false) { + FormattedValue::NA + } else { + // base::format defaults to using `trim=FALSE` + // so it will add spaces to the end of the strings so all elements of the vector + // have the same fixed width. We don't want this behavior in the data explorer, + // We tried passing `trim=TRUE` but this is unfortunatelly is not supported for eg. `factors`: + // > format(factor(c("aaaa", "a")), trim = TRUE) + // [1] "aaaa" "a " + // + // so we will just trim the spaces manually, which is not ideal, but it's better than + // having the values misaligned + FormattedValue::Value(v.trim_matches(|x| x == ' ').to_string()) + } + }) + .collect() + }, + Err(_) => { + // if we fail to get the is.na() values we will just return the formatted values + // without additional treatments. + formatted.into_iter().map(FormattedValue::Value).collect() + }, + } +} + +fn format_vec(x: SEXP) -> Vec { let len = r_length(x); - let mut output = Vec::::with_capacity(len as usize); + let mut output = Vec::::with_capacity(len as usize); for i in 0..len { let elt = r_list_get(x, i); let formatted = if r_is_null(elt) { - SpecialValueTypes::NULL.into() + FormattedValue::NULL } else { - ColumnValue::FormattedValue(format_vec_elt(elt)) + FormattedValue::Value(format_vec_elt(elt)) }; output.push(formatted); } @@ -116,48 +186,48 @@ fn format_vec_elt(x: SEXP) -> String { format!("<{} [{}]>", class_str, dim_str) } -fn format_cpl(x: SEXP) -> Vec { +fn format_cpl(x: SEXP) -> Vec { unsafe { ComplexVector::new_unchecked(x) .iter() .map(|x| match x { Some(v) => { - ColumnValue::FormattedValue(format!("{}+{}i", v.r.to_string(), v.i.to_string())) + FormattedValue::Value(format!("{}+{}i", v.r.to_string(), v.i.to_string())) }, - None => SpecialValueTypes::NA.into(), + None => FormattedValue::NA, }) .collect() } } -fn format_lgl(x: SEXP) -> Vec { +fn format_lgl(x: SEXP) -> Vec { unsafe { LogicalVector::new_unchecked(x) .iter() .map(|x| match x { Some(v) => match v { - true => ColumnValue::FormattedValue("TRUE".to_string()), - false => ColumnValue::FormattedValue("FALSE".to_string()), + true => FormattedValue::Value("TRUE".to_string()), + false => FormattedValue::Value("FALSE".to_string()), }, - None => SpecialValueTypes::NA.into(), + None => FormattedValue::NA, }) .collect() } } -fn format_chr(x: SEXP) -> Vec { +fn format_chr(x: SEXP) -> Vec { unsafe { CharacterVector::new_unchecked(x) .iter() .map(|x| match x { - Some(v) => ColumnValue::FormattedValue(v), - None => SpecialValueTypes::NA.into(), + Some(v) => FormattedValue::Value(v), + None => FormattedValue::NA, }) .collect() } } -fn format_int(x: SEXP, options: &FormatOptions) -> Vec { +fn format_int(x: SEXP, options: &FormatOptions) -> Vec { unsafe { IntegerVector::new_unchecked(x) .iter() @@ -166,17 +236,17 @@ fn format_int(x: SEXP, options: &FormatOptions) -> Vec { } } -fn format_int_elt(x: Option, options: &FormatOptions) -> ColumnValue { +fn format_int_elt(x: Option, options: &FormatOptions) -> FormattedValue { match x { - None => SpecialValueTypes::NA.into(), - Some(v) => ColumnValue::FormattedValue(apply_thousands_sep( + None => FormattedValue::NA, + Some(v) => FormattedValue::Value(apply_thousands_sep( v.to_string(), options.thousands_sep.clone(), )), } } -fn format_dbl(x: SEXP, options: &FormatOptions) -> Vec { +fn format_dbl(x: SEXP, options: &FormatOptions) -> Vec { unsafe { NumericVector::new_unchecked(x) .iter() @@ -185,25 +255,25 @@ fn format_dbl(x: SEXP, options: &FormatOptions) -> Vec { } } -fn format_dbl_elt(x: Option, options: &FormatOptions) -> ColumnValue { +fn format_dbl_elt(x: Option, options: &FormatOptions) -> FormattedValue { match x { - None => SpecialValueTypes::NA.into(), + None => FormattedValue::NA, Some(v) => { if r_dbl_is_nan(v) { - SpecialValueTypes::NaN.into() + FormattedValue::NaN } else if r_dbl_is_finite(v) { // finite values that are not NaN nor NA format_dbl_value(v, options) } else if v > 0.0 { - SpecialValueTypes::Inf.into() + FormattedValue::Inf } else { - SpecialValueTypes::NegInf.into() + FormattedValue::NegInf } }, } } -fn format_dbl_value(x: f64, options: &FormatOptions) -> ColumnValue { +fn format_dbl_value(x: f64, options: &FormatOptions) -> FormattedValue { // The limit for large numbers before switching to scientific // notation let upper_threshold = f64::powf(10.0, options.max_integral_digits as f64); @@ -245,7 +315,7 @@ fn format_dbl_value(x: f64, options: &FormatOptions) -> ColumnValue { pad_exponent(v) }; - ColumnValue::FormattedValue(formatted) + FormattedValue::Value(formatted) } fn apply_thousands_sep(x: String, sep: Option) -> String { @@ -267,6 +337,14 @@ fn apply_thousands_sep(x: String, sep: Option) -> String { continue; } + // for negative numbers, break the iteration. + // continue should have the same effect as `break` as there shouldn't exist + // any character before `-`. + if c == '-' { + formatted.push(c); + continue; + } + // now start counting and add a `sep` every three if count % 3 == 0 && count != 0 { formatted.push_str(&sep); @@ -297,6 +375,49 @@ fn pad_exponent(x: String) -> String { formatted } +// This type is only internally used with the intent of being easy to convert to +// ColumnValue or String when needed. +#[derive(Clone)] +enum FormattedValue { + Unkown, + NULL, + NA, + NaN, + Inf, + NegInf, + Value(String), +} + +// Find the special code values mapping to integer here: +// https://github.com/posit-dev/positron/blob/46eb4dc0b071984be0f083c7836d74a19ef1509f/src/vs/workbench/services/positronDataExplorer/common/dataExplorerCache.ts#L59-L60 +impl Into for FormattedValue { + fn into(self) -> ColumnValue { + match self { + FormattedValue::Unkown => ColumnValue::FormattedValue(self.into()), + FormattedValue::NULL => ColumnValue::SpecialValueCode(0), + FormattedValue::NA => ColumnValue::SpecialValueCode(1), + FormattedValue::NaN => ColumnValue::SpecialValueCode(2), + FormattedValue::Inf => ColumnValue::SpecialValueCode(10), + FormattedValue::NegInf => ColumnValue::SpecialValueCode(11), + FormattedValue::Value(v) => ColumnValue::FormattedValue(v), + } + } +} + +impl Into for FormattedValue { + fn into(self) -> String { + match self { + FormattedValue::NULL => "NULL".to_string(), + FormattedValue::NA => "NA".to_string(), + FormattedValue::NaN => "NaN".to_string(), + FormattedValue::Inf => "Inf".to_string(), + FormattedValue::NegInf => "-Inf".to_string(), + FormattedValue::Unkown => "????".to_string(), + FormattedValue::Value(v) => v, + } + } +} + #[cfg(test)] mod tests { use harp::environment::R_ENVS; @@ -310,7 +431,7 @@ mod tests { large_num_digits: 2, small_num_digits: 4, max_integral_digits: 7, - thousands_sep: None, + thousands_sep: Some(",".to_string()), } } @@ -438,44 +559,132 @@ mod tests { }) } + #[test] + fn test_float_special_values() { + r_test(|| { + let data = r_parse_eval0( + "c(NA_real_, NaN, Inf, -Inf, 0, 1, 1000000000, -1000000000)", + R_ENVS.global, + ) + .unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + FormattedValue::NA.into(), + FormattedValue::NaN.into(), + FormattedValue::Inf.into(), + FormattedValue::NegInf.into(), + ColumnValue::FormattedValue("0.00".to_string()), + ColumnValue::FormattedValue("1.00".to_string()), + ColumnValue::FormattedValue("1.00e+09".to_string()), + ColumnValue::FormattedValue("-1.00e+09".to_string()) + ]); + }) + } + #[test] fn test_list_formatting() { r_test(|| { - let data = r_parse_eval0("list(0, NULL)", R_ENVS.global).unwrap(); + let data = r_parse_eval0("list(0, NULL, NA_real_)", R_ENVS.global).unwrap(); let formatted = format_column(data.sexp, &default_options()); assert_eq!(formatted, vec![ ColumnValue::FormattedValue("".to_string()), - SpecialValueTypes::NULL.into() + FormattedValue::NULL.into(), + ColumnValue::FormattedValue("".to_string()) ]); }) } -} -#[derive(Clone)] -enum SpecialValueTypes { - NULL, - NA, - NaN, - Inf, - NegInf, -} + #[test] + fn test_integer_formatting() { + r_test(|| { + let data = r_parse_eval0( + "as.integer(c(1, 1000, 0, -100000, NA, 1000000))", + R_ENVS.global, + ) + .unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + ColumnValue::FormattedValue("1".to_string()), + ColumnValue::FormattedValue("1,000".to_string()), + ColumnValue::FormattedValue("0".to_string()), + ColumnValue::FormattedValue("-100,000".to_string()), + FormattedValue::NA.into(), + ColumnValue::FormattedValue("1,000,000".to_string()) + ]); + }) + } -// Find the special code values mapping to integer here: -// https://github.com/posit-dev/positron/blob/46eb4dc0b071984be0f083c7836d74a19ef1509f/src/vs/workbench/services/positronDataExplorer/common/dataExplorerCache.ts#L59-L60 -impl Into for SpecialValueTypes { - fn into(self) -> i64 { - match self { - SpecialValueTypes::NULL => 0, - SpecialValueTypes::NA => 1, - SpecialValueTypes::NaN => 2, - SpecialValueTypes::Inf => 10, - SpecialValueTypes::NegInf => 11, - } + #[test] + fn test_chr_formatting() { + r_test(|| { + let data = r_parse_eval0("c('a', 'b', 'c', NA, 'd', 'e')", R_ENVS.global).unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + ColumnValue::FormattedValue("a".to_string()), + ColumnValue::FormattedValue("b".to_string()), + ColumnValue::FormattedValue("c".to_string()), + FormattedValue::NA.into(), + ColumnValue::FormattedValue("d".to_string()), + ColumnValue::FormattedValue("e".to_string()) + ]); + }) } -} -impl Into for SpecialValueTypes { - fn into(self) -> ColumnValue { - ColumnValue::SpecialValueCode(self.into()) + #[test] + fn test_factors_formatting() { + r_test(|| { + let data = + r_parse_eval0("factor(c('aaaaa', 'b', 'c', NA, 'd', 'e'))", R_ENVS.global).unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + ColumnValue::FormattedValue("aaaaa".to_string()), + ColumnValue::FormattedValue("b".to_string()), + ColumnValue::FormattedValue("c".to_string()), + FormattedValue::NA.into(), + ColumnValue::FormattedValue("d".to_string()), + ColumnValue::FormattedValue("e".to_string()) + ]); + }) + } + + #[test] + fn test_cpl_formatting() { + // TODO: In the future we might want to use scientific notation for complex numbers + // although I'm not sure it's really helpful: + // > 1000000000+1000000000i + // [1] 1e+09+1e+09i + r_test(|| { + let data = r_parse_eval0( + "c(1+1i, 2+2i, 3+3i, NA, 1000000000+1000000000i, 5+5i)", + R_ENVS.global, + ) + .unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + ColumnValue::FormattedValue("1+1i".to_string()), + ColumnValue::FormattedValue("2+2i".to_string()), + ColumnValue::FormattedValue("3+3i".to_string()), + FormattedValue::NA.into(), + ColumnValue::FormattedValue("1000000000+1000000000i".to_string()), + ColumnValue::FormattedValue("5+5i".to_string()) + ]); + }) + } + + #[test] + fn test_lgl_formatting() { + r_test(|| { + let data = + r_parse_eval0("c(TRUE, FALSE, NA, TRUE, FALSE, TRUE)", R_ENVS.global).unwrap(); + let formatted = format_column(data.sexp, &default_options()); + assert_eq!(formatted, vec![ + ColumnValue::FormattedValue("TRUE".to_string()), + ColumnValue::FormattedValue("FALSE".to_string()), + FormattedValue::NA.into(), + ColumnValue::FormattedValue("TRUE".to_string()), + ColumnValue::FormattedValue("FALSE".to_string()), + ColumnValue::FormattedValue("TRUE".to_string()) + ]); + }) } } diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index c1f320b20..cf8658808 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -74,6 +74,7 @@ use uuid::Uuid; use crate::data_explorer::export_selection; use crate::data_explorer::format; +use crate::data_explorer::format::format_stats; use crate::interface::RMain; use crate::lsp::events::EVENTS; use crate::modules::ARK_ENVS; @@ -438,7 +439,7 @@ impl RDataExplorer { row_start_index, num_rows, column_indices, - format_options, // TODO: add support for format options + format_options, }) => { // TODO: Support for data frames with over 2B rows let row_start_index: i32 = row_start_index.try_into()?; @@ -497,7 +498,7 @@ impl RDataExplorer { }, DataExplorerBackendRequest::GetColumnProfiles(GetColumnProfilesParams { profiles: requests, - format_options: _, // TODO: add support for format options + format_options, }) => { let profiles = requests .into_iter() @@ -523,8 +524,9 @@ impl RDataExplorer { } }, ColumnProfileType::SummaryStats => { - let summary_stats = - r_task(|| self.r_summary_stats(request.column_index as i32)); + let summary_stats = r_task(|| { + self.r_summary_stats(request.column_index as i32, &format_options) + }); ColumnProfileResult { null_count: None, summary_stats: match summary_stats { @@ -651,11 +653,15 @@ impl RDataExplorer { }) .call_in(ARK_ENVS.positron_ns)?; - // Return the count of nulls and NA values + // Retur n the count of nulls and NA values Ok(result.try_into()?) } - fn r_summary_stats(&self, column_index: i32) -> anyhow::Result { + fn r_summary_stats( + &self, + column_index: i32, + format_options: &FormatOptions, + ) -> anyhow::Result { // Get the column to compute summary stats for let column = tbl_get_column(self.table.get().sexp, column_index, self.shape.kind)?; let dtype = display_type(column.sexp); @@ -681,8 +687,10 @@ impl RDataExplorer { match dtype { ColumnDisplayType::Number => { - let r_stats: HashMap = - call_summary_fn("number_summary_stats")?.try_into()?; + let r_stats: HashMap = format_stats( + call_summary_fn("number_summary_stats")?.sexp, + &format_options, + ); stats.number_stats = Some(SummaryStatsNumber { min_value: r_stats["min_value"].clone(), diff --git a/crates/ark/src/modules/positron/r_data_explorer.R b/crates/ark/src/modules/positron/r_data_explorer.R index bf8b2883d..27fb66650 100644 --- a/crates/ark/src/modules/positron/r_data_explorer.R +++ b/crates/ark/src/modules/positron/r_data_explorer.R @@ -47,13 +47,13 @@ number_summary_stats <- function(column, filtered_indices) { col <- col_filter_indices(column, filtered_indices) - format(c( + c( min_value = min(col, na.rm = TRUE), max_value = max(col, na.rm = TRUE), mean = mean(col, na.rm = TRUE), median = stats::median(col, na.rm = TRUE), stdev = stats::sd(col, na.rm = TRUE) - )) + ) } string_summary_stats <- function(column, filtered_indices) { @@ -262,83 +262,10 @@ col_filter_indices <- function(col, idx = NULL) { x[i, j, drop = FALSE] } -format_list_column <- function(x) { - map_chr(x, function(x) { - d <- dim(x) - if (is.null(d)) { - d <- length(x) - } - - paste0( - "<", - class(x)[1], - " [", - paste0(d, collapse = " x "), - "]>" - ) - }) -} - -# Specialized function to format integer values, supporting format options -# that the front-end might provide. -# This function must be vectorized on `x`. -format_integer <- function(x, thousands_sep) { - base::format(x, big.mark = thousands_sep) -} - -#' Format a real number for display -#' -#' This function must be vectorized on `x`. -#' -#' @param large_num_digits Fixed number of decimal places to display for numbers over 1, or in -#' scientific notation. -#' @param small_num_digits Fixed number of decimal places to display for numbers under 1. -#' @param max_integral_digits Maximum number of integral digits to display before switching to -#' scientific notation. -#' @param thousands_sep Thousands separator string -format_real <- function(x, large_num_digits, small_num_digits, max_integral_digits, thousands_sep = "") { - # format numbers larger than 1 - formatted <- character(length(x)) - - # The limit for large numbers before switching to scientific - # notation - upper_threshold <- 10^max_integral_digits - - # The limit for small numbers before switching to scientific - # notation - lower_threshold <- 10^(-(small_num_digits + 1)) - - abs_x <- abs(x) - formatted <- format_if( - formatted, x, abs_x >= upper_threshold & is.finite(x), .fun = base::sprintf, - fmt = paste0("%.", large_num_digits, "e") - ) - formatted <- format_if( - formatted, x, abs_x >= 1 & abs_x < upper_threshold, - scientific = FALSE, nsmall = large_num_digits, big.mark = thousands_sep, digits = large_num_digits - 1 - ) - formatted <- format_if( - formatted, x, abs_x < 1 & abs_x > lower_threshold, - scientific = FALSE, nsmall = small_num_digits, digits = small_num_digits - 1 - ) - formatted <- format_if( - formatted, x, abs_x <= lower_threshold & is.finite(x), .fun = base::sprintf, - fmt = paste0("%.", large_num_digits, "e") - ) - formatted <- format_if( # special case 0 to align with other medium numbers - formatted, x, abs_x == 0, - scientific = FALSE, nsmall = large_num_digits, digits = large_num_digits - 1 - ) - - formatted -} - -format_if <- function(formatted, x, condition, ..., .fun = base::format) { - if (any(condition, na.rm = TRUE)) { # avoids copying formatted if condition doesn't match anything - pos <- which(condition) - formatted[pos] <- .fun(x[pos], ..., trim = TRUE) - } - formatted +is_na_checked <- function(x) { + result <- is.na(x) + stopifnot(is.logical(result), length(x) == length(result)) + result } export_selection <- function(x, format = c("csv", "tsv", "html"), include_header = TRUE) { diff --git a/crates/ark/tests/data_explorer.rs b/crates/ark/tests/data_explorer.rs index 6365bcd07..f6e22d205 100644 --- a/crates/ark/tests/data_explorer.rs +++ b/crates/ark/tests/data_explorer.rs @@ -790,11 +790,11 @@ fn test_data_explorer() { assert!(number_stats.is_some()); let number_stats = number_stats.unwrap(); assert_eq!(number_stats, SummaryStatsNumber { - min_value: String::from("1"), - max_value: String::from("3"), - mean: String::from("2"), - median: String::from("2"), - stdev: String::from("1"), + min_value: String::from("1.00"), + max_value: String::from("3.00"), + mean: String::from("2.00"), + median: String::from("2.00"), + stdev: String::from("1.00"), }); // The second column is a character column From 23ae211b703f41d58a562e7cc4304f19523c91ba Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 7 Jun 2024 08:19:12 -0300 Subject: [PATCH 07/19] typo --- crates/ark/src/data_explorer/r_data_explorer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index cf8658808..e67784100 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -653,7 +653,7 @@ impl RDataExplorer { }) .call_in(ARK_ENVS.positron_ns)?; - // Retur n the count of nulls and NA values + // Return the count of nulls and NA values Ok(result.try_into()?) } From 21c5df91108e35ca23d89984872479864488ee6f Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 7 Jun 2024 09:04:10 -0300 Subject: [PATCH 08/19] Some small improvements --- crates/ark/src/data_explorer/format.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 5e2864dda..ef110b1de 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -363,7 +363,13 @@ fn apply_thousands_sep(x: String, sep: Option) -> String { // the string must have already been processed to include the e+ in positive values fn pad_exponent(x: String) -> String { // find the exponent position - let e_pos = x.find('e').unwrap(); + let e_pos = match x.find('e') { + Some(v) => v, + None => return x, // if no e is found, return the string as is + }; + + // "1e-12" the e_pos (1) + 3 is < x.len() (5) + // "1e-1" the e_pos (1) + 3 is == x.len() (4) if (e_pos + 1 + 2) < x.len() { return x; // the exponent already have 2 digits } From f2de7b24413bf3d4aeaff35cda8e8708dc4e9994 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 09:22:38 -0300 Subject: [PATCH 09/19] Improve comments in the `apply_thousands_separator` function --- crates/ark/src/data_explorer/format.rs | 27 +++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index ef110b1de..3f0cf949a 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -324,28 +324,29 @@ fn apply_thousands_sep(x: String, sep: Option) -> String { Some(sep) => { let mut formatted = String::new(); - // find the decimal point if any + // Find the decimal point if any let decimal_point = x.find('.').unwrap_or(x.len()); - // now walk from the decimal point until walk the string - // backwards adding the thousands separator + // Walk backwards on the string to add the thousands separator let mut count: usize = 0; for (i, c) in x.chars().rev().enumerate() { - // while before the decimal point, just copy the string + // Now walk backwards until we reach the decimal point. + // After the point, start adding the thousands separator. if i < (x.len() - decimal_point) { formatted.push(c); continue; } - // for negative numbers, break the iteration. - // continue should have the same effect as `break` as there shouldn't exist + // For negative numbers, break the iteration. + // `continue` should have the same effect as `break` as there shouldn't exist // any character before `-`. + // This avoids '-100' to be formatted as '-,100'. if c == '-' { formatted.push(c); continue; } - // now start counting and add a `sep` every three + // Add a `sep` every three characters if count % 3 == 0 && count != 0 { formatted.push_str(&sep); } @@ -479,6 +480,18 @@ mod tests { apply_thousands_sep("1000000.00".to_string(), None), "1000000.00" ); + assert_eq!( + apply_thousands_sep("-100".to_string(), Some(",".to_string())), + "-100" + ); + assert_eq!( + apply_thousands_sep("-100000".to_string(), Some(",".to_string())), + "-100,000" + ); + assert_eq!( + apply_thousands_sep("-100000.00".to_string(), Some(",".to_string())), + "-100,000.00" + ); } #[test] From f5a39fe09968952533a6964567cc057b58af436d Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 09:25:03 -0300 Subject: [PATCH 10/19] Apply suggestions from code review Co-authored-by: Lionel Henry --- crates/ark/src/data_explorer/format.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 3f0cf949a..76720e73b 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -110,7 +110,7 @@ fn format_object(x: SEXP) -> Vec { // base::format defaults to using `trim=FALSE` // so it will add spaces to the end of the strings so all elements of the vector // have the same fixed width. We don't want this behavior in the data explorer, - // We tried passing `trim=TRUE` but this is unfortunatelly is not supported for eg. `factors`: + // We tried passing `trim=TRUE` but this is unfortunately not supported for eg. `factors`: // > format(factor(c("aaaa", "a")), trim = TRUE) // [1] "aaaa" "a " // From d5ec59bc0b35ad1ef47c663d43b86c7a1f1852bd Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 09:27:34 -0300 Subject: [PATCH 11/19] `format_vec` -> `format_list` --- crates/ark/src/data_explorer/format.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 76720e73b..21cf24246 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -76,7 +76,7 @@ fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result Ok(format_chr(x)), LGLSXP => Ok(format_lgl(x)), CPLXSXP => Ok(format_cpl(x)), - VECSXP => Ok(format_vec(x)), + VECSXP => Ok(format_list(x)), _ => Err(anyhow::anyhow!("Unsupported column type")), } } @@ -129,7 +129,7 @@ fn format_object(x: SEXP) -> Vec { } } -fn format_vec(x: SEXP) -> Vec { +fn format_list(x: SEXP) -> Vec { let len = r_length(x); let mut output = Vec::::with_capacity(len as usize); @@ -138,7 +138,7 @@ fn format_vec(x: SEXP) -> Vec { let formatted = if r_is_null(elt) { FormattedValue::NULL } else { - FormattedValue::Value(format_vec_elt(elt)) + FormattedValue::Value(format_list_elt(elt)) }; output.push(formatted); } @@ -146,7 +146,7 @@ fn format_vec(x: SEXP) -> Vec { output } -fn format_vec_elt(x: SEXP) -> String { +fn format_list_elt(x: SEXP) -> String { // We don't use `r_classes` because we want to see, eg 'numeric' for // numeric vectors, not an empty value. let class: Vec = RFunction::new("base", "class") From 415ee07bc15eb012fcc04f5f28c6c5dbb178d36c Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 09:36:32 -0300 Subject: [PATCH 12/19] Improve comments for `format_object` --- crates/ark/src/data_explorer/format.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 21cf24246..04183964f 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -82,7 +82,7 @@ fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result Vec { - // we call r_format() to dispatch the format method + // We call r_format() to dispatch the format method let formatted: Vec = match r_format(x) { Ok(fmt) => match RObject::from(fmt).try_into() { Ok(x) => x, @@ -91,8 +91,8 @@ fn format_object(x: SEXP) -> Vec { Err(_) => return unknown_format(x), }; - // but we also want to show special value codes. we call base::is.na() to dispatch - // the is.na() function and then replace those with FormattedValues::NA. + // But we also want to show special value codes. We call `base::is.na()` to dispatch + // the `is.na()` function and then replace those with `FormattedValues::NA`. let is_na = RFunction::from("is_na_checked") .add(x) .call_in(ARK_ENVS.positron_ns); @@ -103,18 +103,18 @@ fn format_object(x: SEXP) -> Vec { .into_iter() .zip(unsafe { LogicalVector::new_unchecked(is_na.sexp).iter() }) .map(|(v, is_na)| { - // we don't expect is.na to return NA's, but if it happens, we treat it as false + // We don't expect is.na to return NA's, but if it happens, we treat it as false if is_na.unwrap_or(false) { FormattedValue::NA } else { - // base::format defaults to using `trim=FALSE` - // so it will add spaces to the end of the strings so all elements of the vector - // have the same fixed width. We don't want this behavior in the data explorer, + // `base::format` defaults to using `trim=FALSE` + // So it will add spaces to the end of the strings causing all elements of the vector + // to have the same fixed width. We don't want this behavior in the data explorer, // We tried passing `trim=TRUE` but this is unfortunately not supported for eg. `factors`: // > format(factor(c("aaaa", "a")), trim = TRUE) // [1] "aaaa" "a " // - // so we will just trim the spaces manually, which is not ideal, but it's better than + // So we will just trim the spaces manually, which is not ideal, but it's better than // having the values misaligned FormattedValue::Value(v.trim_matches(|x| x == ' ').to_string()) } @@ -122,7 +122,7 @@ fn format_object(x: SEXP) -> Vec { .collect() }, Err(_) => { - // if we fail to get the is.na() values we will just return the formatted values + // If we fail to get the is.na() values we will just return the formatted values // without additional treatments. formatted.into_iter().map(FormattedValue::Value).collect() }, From 453b01b14d1a3ba6010665e34ef136972d3c9b2c Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 13:00:11 -0300 Subject: [PATCH 13/19] Improve list formatting logic --- crates/ark/src/data_explorer/format.rs | 62 +++++++++++++------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 04183964f..c2d14026f 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -30,6 +30,7 @@ use harp::vector::NumericVector; use harp::vector::Vector; use libr::SEXP; use libr::*; +use stdext::unwrap; use crate::modules::ARK_ENVS; @@ -91,42 +92,43 @@ fn format_object(x: SEXP) -> Vec { Err(_) => return unknown_format(x), }; + let formatted = formatted.into_iter().map(|v| { + // `base::format` defaults to using `trim=FALSE` + // So it will add spaces to the end of the strings causing all elements of the vector + // to have the same fixed width. We don't want this behavior in the data explorer, + // We tried passing `trim=TRUE` but this is unfortunately not supported for eg. `factors`: + // > format(factor(c("aaaa", "a")), trim = TRUE) + // [1] "aaaa" "a " + // + // So we will just trim the spaces manually, which is not ideal, but it's better than + // having the values misaligned + FormattedValue::Value(v.trim_matches(|x| x == ' ').to_string()) + }); + // But we also want to show special value codes. We call `base::is.na()` to dispatch // the `is.na()` function and then replace those with `FormattedValues::NA`. let is_na = RFunction::from("is_na_checked") .add(x) .call_in(ARK_ENVS.positron_ns); - match is_na { - Ok(is_na) => { - formatted - .into_iter() - .zip(unsafe { LogicalVector::new_unchecked(is_na.sexp).iter() }) - .map(|(v, is_na)| { - // We don't expect is.na to return NA's, but if it happens, we treat it as false - if is_na.unwrap_or(false) { - FormattedValue::NA - } else { - // `base::format` defaults to using `trim=FALSE` - // So it will add spaces to the end of the strings causing all elements of the vector - // to have the same fixed width. We don't want this behavior in the data explorer, - // We tried passing `trim=TRUE` but this is unfortunately not supported for eg. `factors`: - // > format(factor(c("aaaa", "a")), trim = TRUE) - // [1] "aaaa" "a " - // - // So we will just trim the spaces manually, which is not ideal, but it's better than - // having the values misaligned - FormattedValue::Value(v.trim_matches(|x| x == ' ').to_string()) - } - }) - .collect() - }, - Err(_) => { - // If we fail to get the is.na() values we will just return the formatted values - // without additional treatments. - formatted.into_iter().map(FormattedValue::Value).collect() - }, - } + let is_na = unwrap!(is_na, Err(_) => { + // If we fail to evaluate `is.na()` we will just return the formatted values + // as is. + return formatted.collect(); + }); + + unsafe { LogicalVector::new_unchecked(is_na.sexp) } + .iter() + .zip(formatted) + .map(|(is_na, v)| { + // We don't expect is.na to return NA's, but if it happens, we treat it as false + // and return the formatted values as is. + match is_na.unwrap_or(false) { + true => FormattedValue::NA, + false => v, + } + }) + .collect() } fn format_list(x: SEXP) -> Vec { From 001d7eeea59654d3a7acbf72ec8160378e04b8e4 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 21:40:15 -0300 Subject: [PATCH 14/19] Move vectors constructors closer to type checks --- crates/ark/src/data_explorer/format.rs | 93 +++++++++++--------------- 1 file changed, 39 insertions(+), 54 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index c2d14026f..a8f7f0ce9 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -72,11 +72,17 @@ fn format_values(x: SEXP, format_options: &FormatOptions) -> anyhow::Result Ok(format_dbl(x, format_options)), - INTSXP => Ok(format_int(x, format_options)), - STRSXP => Ok(format_chr(x)), - LGLSXP => Ok(format_lgl(x)), - CPLXSXP => Ok(format_cpl(x)), + REALSXP => Ok(format_dbl( + unsafe { NumericVector::new_unchecked(x) }, + format_options, + )), + INTSXP => Ok(format_int( + unsafe { IntegerVector::new_unchecked(x) }, + format_options, + )), + STRSXP => Ok(format_chr(unsafe { CharacterVector::new_unchecked(x) })), + LGLSXP => Ok(format_lgl(unsafe { LogicalVector::new_unchecked(x) })), + CPLXSXP => Ok(format_cpl(unsafe { ComplexVector::new_unchecked(x) })), VECSXP => Ok(format_list(x)), _ => Err(anyhow::anyhow!("Unsupported column type")), } @@ -188,54 +194,38 @@ fn format_list_elt(x: SEXP) -> String { format!("<{} [{}]>", class_str, dim_str) } -fn format_cpl(x: SEXP) -> Vec { - unsafe { - ComplexVector::new_unchecked(x) - .iter() - .map(|x| match x { - Some(v) => { - FormattedValue::Value(format!("{}+{}i", v.r.to_string(), v.i.to_string())) - }, - None => FormattedValue::NA, - }) - .collect() - } +fn format_cpl(x: ComplexVector) -> Vec { + x.iter() + .map(|x| match x { + Some(v) => FormattedValue::Value(format!("{}+{}i", v.r.to_string(), v.i.to_string())), + None => FormattedValue::NA, + }) + .collect() } -fn format_lgl(x: SEXP) -> Vec { - unsafe { - LogicalVector::new_unchecked(x) - .iter() - .map(|x| match x { - Some(v) => match v { - true => FormattedValue::Value("TRUE".to_string()), - false => FormattedValue::Value("FALSE".to_string()), - }, - None => FormattedValue::NA, - }) - .collect() - } +fn format_lgl(x: LogicalVector) -> Vec { + x.iter() + .map(|x| match x { + Some(v) => match v { + true => FormattedValue::Value("TRUE".to_string()), + false => FormattedValue::Value("FALSE".to_string()), + }, + None => FormattedValue::NA, + }) + .collect() } -fn format_chr(x: SEXP) -> Vec { - unsafe { - CharacterVector::new_unchecked(x) - .iter() - .map(|x| match x { - Some(v) => FormattedValue::Value(v), - None => FormattedValue::NA, - }) - .collect() - } +fn format_chr(x: CharacterVector) -> Vec { + x.iter() + .map(|x| match x { + Some(v) => FormattedValue::Value(v), + None => FormattedValue::NA, + }) + .collect() } -fn format_int(x: SEXP, options: &FormatOptions) -> Vec { - unsafe { - IntegerVector::new_unchecked(x) - .iter() - .map(|x| format_int_elt(x, options)) - .collect() - } +fn format_int(x: IntegerVector, options: &FormatOptions) -> Vec { + x.iter().map(|x| format_int_elt(x, options)).collect() } fn format_int_elt(x: Option, options: &FormatOptions) -> FormattedValue { @@ -248,13 +238,8 @@ fn format_int_elt(x: Option, options: &FormatOptions) -> FormattedValue { } } -fn format_dbl(x: SEXP, options: &FormatOptions) -> Vec { - unsafe { - NumericVector::new_unchecked(x) - .iter() - .map(|x| format_dbl_elt(x, options)) - .collect() - } +fn format_dbl(x: NumericVector, options: &FormatOptions) -> Vec { + x.iter().map(|x| format_dbl_elt(x, options)).collect() } fn format_dbl_elt(x: Option, options: &FormatOptions) -> FormattedValue { From 6f9bc7ea59a410b5fd24971ebdfd81a8c64b9c2d Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 21:48:25 -0300 Subject: [PATCH 15/19] Use a global fallback format string. --- crates/ark/src/data_explorer/format.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index a8f7f0ce9..3bde96857 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -34,6 +34,8 @@ use stdext::unwrap; use crate::modules::ARK_ENVS; +const FALLBACK_FORMAT_STRING: &str = "????"; + // Used by the get_data_values method to format columns for displaying in the grid. pub fn format_column(x: SEXP, format_options: &FormatOptions) -> Vec { format(x, format_options) @@ -51,7 +53,10 @@ pub fn format_stats(x: SEXP, format_options: &FormatOptions) -> HashMap String { .unwrap_or(vec![]); let class_str = if class.is_empty() { - "????:".to_string() + format!("{}:", FALLBACK_FORMAT_STRING) } else { class[0].clone() }; @@ -406,7 +411,7 @@ impl Into for FormattedValue { FormattedValue::NaN => "NaN".to_string(), FormattedValue::Inf => "Inf".to_string(), FormattedValue::NegInf => "-Inf".to_string(), - FormattedValue::Unkown => "????".to_string(), + FormattedValue::Unkown => FALLBACK_FORMAT_STRING.to_string(), FormattedValue::Value(v) => v, } } From 9d0847b75156cd8184d9dcedca1b1fceb2d74233 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 21:51:08 -0300 Subject: [PATCH 16/19] Don't clone as it's not needed. --- crates/ark/src/data_explorer/format.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 3bde96857..56b0b65aa 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -368,7 +368,7 @@ fn pad_exponent(x: String) -> String { } // add zeros to the exponent - let mut formatted = x.clone(); + let mut formatted = x; formatted.insert(e_pos + 2, '0'); formatted From 43fe996da0d4c23270b9365c42358d5515444561 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 23:09:11 -0300 Subject: [PATCH 17/19] Expose a more generic version and move assumptions closer to the R function call. --- crates/ark/src/data_explorer/format.rs | 23 ++++--------------- .../ark/src/data_explorer/r_data_explorer.rs | 22 ++++++++++++++---- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 56b0b65aa..2baa7bd89 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -5,8 +5,6 @@ // // -use std::collections::HashMap; - use amalthea::comm::data_explorer_comm::ColumnValue; use amalthea::comm::data_explorer_comm::FormatOptions; use harp::exec::RFunction; @@ -20,7 +18,6 @@ use harp::r_null; use harp::utils::r_classes; use harp::utils::r_format; use harp::utils::r_is_null; -use harp::utils::r_names2; use harp::utils::r_typeof; use harp::vector::CharacterVector; use harp::vector::ComplexVector; @@ -45,21 +42,11 @@ pub fn format_column(x: SEXP, format_options: &FormatOptions) -> Vec HashMap { - let out = format(x, format_options); - let mut stats = HashMap::new(); - unsafe { - CharacterVector::new_unchecked(r_names2(x)) - .iter() - .zip(out.into_iter()) - .for_each(|(nm, value)| { - stats.insert( - nm.unwrap_or(FALLBACK_FORMAT_STRING.to_string()), - value.into(), - ); - }); - } - stats +pub fn format_string(x: SEXP, format_options: &FormatOptions) -> Vec { + format(x, format_options) + .into_iter() + .map(Into::into) + .collect() } fn format(x: SEXP, format_options: &FormatOptions) -> Vec { diff --git a/crates/ark/src/data_explorer/r_data_explorer.rs b/crates/ark/src/data_explorer/r_data_explorer.rs index e67784100..3d4788235 100644 --- a/crates/ark/src/data_explorer/r_data_explorer.rs +++ b/crates/ark/src/data_explorer/r_data_explorer.rs @@ -60,7 +60,10 @@ use harp::tbl_get_column; use harp::utils::r_inherits; use harp::utils::r_is_object; use harp::utils::r_is_s4; +use harp::utils::r_names2; use harp::utils::r_typeof; +use harp::vector::CharacterVector; +use harp::vector::Vector; use harp::TableInfo; use harp::TableKind; use libr::*; @@ -74,7 +77,7 @@ use uuid::Uuid; use crate::data_explorer::export_selection; use crate::data_explorer::format; -use crate::data_explorer::format::format_stats; +use crate::data_explorer::format::format_string; use crate::interface::RMain; use crate::lsp::events::EVENTS; use crate::modules::ARK_ENVS; @@ -687,10 +690,19 @@ impl RDataExplorer { match dtype { ColumnDisplayType::Number => { - let r_stats: HashMap = format_stats( - call_summary_fn("number_summary_stats")?.sexp, - &format_options, - ); + let r_stats = call_summary_fn("number_summary_stats")?; + + let names = unsafe { CharacterVector::new_unchecked(r_names2(r_stats.sexp)) }; + let values = format_string(r_stats.sexp, format_options); + + let r_stats: HashMap = names + .iter() + .zip(values.into_iter()) + .map(|(name, value)| match name { + Some(name) => (name, value), + None => ("unk".to_string(), value), + }) + .collect(); stats.number_stats = Some(SummaryStatsNumber { min_value: r_stats["min_value"].clone(), From fcbdaf58aa4700b4d2e93751d496f742c36a3049 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Tue, 11 Jun 2024 23:19:50 -0300 Subject: [PATCH 18/19] Use `harp::list_get()` --- crates/ark/src/data_explorer/format.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 2baa7bd89..217f11143 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -12,7 +12,6 @@ use harp::exec::RFunctionExt; use harp::object::r_dbl_is_finite; use harp::object::r_dbl_is_nan; use harp::object::r_length; -use harp::object::r_list_get; use harp::object::RObject; use harp::r_null; use harp::utils::r_classes; @@ -134,7 +133,7 @@ fn format_list(x: SEXP) -> Vec { let mut output = Vec::::with_capacity(len as usize); for i in 0..len { - let elt = r_list_get(x, i); + let elt = harp::list_get(x, i); let formatted = if r_is_null(elt) { FormattedValue::NULL } else { From dcfb07f0daa9fa38c194ae4756a38a8492af8c62 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Wed, 12 Jun 2024 09:04:45 -0300 Subject: [PATCH 19/19] Use if/else instead --- crates/ark/src/data_explorer/format.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index 217f11143..a07f14601 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -120,9 +120,10 @@ fn format_object(x: SEXP) -> Vec { .map(|(is_na, v)| { // We don't expect is.na to return NA's, but if it happens, we treat it as false // and return the formatted values as is. - match is_na.unwrap_or(false) { - true => FormattedValue::NA, - false => v, + if is_na.unwrap_or(false) { + FormattedValue::NA + } else { + v } }) .collect()