Skip to content

Commit 2848aaf

Browse files
authored
Chore: update wasm-supported crates, add tests (#14005)
* Chore: update wasm-supported crates * format
1 parent de6d511 commit 2848aaf

3 files changed

Lines changed: 79 additions & 12 deletions

File tree

datafusion/wasmtest/Cargo.toml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,21 @@ chrono = { version = "0.4", features = ["wasmbind"] }
4343
# code size when deploying.
4444
console_error_panic_hook = { version = "0.1.1", optional = true }
4545
datafusion = { workspace = true }
46+
datafusion-catalog = { workspace = true }
4647
datafusion-common = { workspace = true, default-features = true }
48+
datafusion-common-runtime = { workspace = true }
4749
datafusion-execution = { workspace = true }
4850
datafusion-expr = { workspace = true }
51+
datafusion-expr-common = { workspace = true }
52+
datafusion-functions = { workspace = true }
53+
datafusion-functions-aggregate = { workspace = true }
54+
datafusion-functions-aggregate-common = { workspace = true }
55+
datafusion-functions-table = { workspace = true }
4956
datafusion-optimizer = { workspace = true, default-features = true }
5057
datafusion-physical-expr = { workspace = true, default-features = true }
58+
datafusion-physical-expr-common = { workspace = true }
5159
datafusion-physical-plan = { workspace = true }
5260
datafusion-sql = { workspace = true }
53-
5461
# getrandom must be compiled with js feature
5562
getrandom = { version = "0.2.8", features = ["js"] }
5663

datafusion/wasmtest/README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,5 +87,13 @@ The following DataFusion crates are verified to work in a wasm-pack environment
8787
- `datafusion-physical-expr`
8888
- `datafusion-physical-plan`
8989
- `datafusion-sql`
90-
91-
The difficulty with getting the remaining DataFusion crates compiled to WASM is that they have non-optional dependencies on the [`parquet`](https://docs.rs/crate/parquet/) crate with its default features enabled. Several of the default parquet crate features require native dependencies that are not compatible with WASM, in particular the `lz4` and `zstd` features. If we can arrange our feature flags to make it possible to depend on parquet with these features disabled, then it should be possible to compile the core `datafusion` crate to WASM as well.
90+
- `datafusion-expr-common`
91+
- `datafusion-physical-expr-common`
92+
- `datafusion-functions`
93+
- `datafusion-functions-aggregate`
94+
- `datafusion-functions-aggregate-common`
95+
- `datafusion-functions-table`
96+
- `datafusion-catalog`
97+
- `datafusion-common-runtime`
98+
99+
The `datafusion-ffi` crate cannot compile for the wasm32-unknown-unknown target because it relies on lzma-sys, which depends on native C libraries (liblzma). The wasm32-unknown-unknown target lacks a standard C library (stdlib.h) and POSIX-like environment, preventing the native code from being compiled.

datafusion/wasmtest/src/lib.rs

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ use datafusion_sql::sqlparser::dialect::GenericDialect;
2626
use datafusion_sql::sqlparser::parser::Parser;
2727
use std::sync::Arc;
2828
use wasm_bindgen::prelude::*;
29-
3029
pub fn set_panic_hook() {
3130
// When the `console_error_panic_hook` feature is enabled, we can call the
3231
// `set_panic_hook` function at least once during initialization, and then
@@ -77,7 +76,14 @@ pub fn basic_parse() {
7776
#[cfg(test)]
7877
mod test {
7978
use super::*;
80-
use datafusion::execution::context::SessionContext;
79+
use datafusion::{
80+
arrow::{
81+
array::{ArrayRef, Int32Array, RecordBatch, StringArray},
82+
datatypes::{DataType, Field, Schema},
83+
},
84+
datasource::MemTable,
85+
execution::context::SessionContext,
86+
};
8187
use datafusion_execution::{
8288
config::SessionConfig, disk_manager::DiskManagerConfig,
8389
runtime_env::RuntimeEnvBuilder,
@@ -95,19 +101,21 @@ mod test {
95101
basic_parse();
96102
}
97103

98-
#[wasm_bindgen_test(unsupported = tokio::test)]
99-
async fn basic_execute() {
100-
let sql = "SELECT 2 + 2;";
101-
102-
// Execute SQL (using datafusion)
104+
fn get_ctx() -> Arc<SessionContext> {
103105
let rt = RuntimeEnvBuilder::new()
104106
.with_disk_manager(DiskManagerConfig::Disabled)
105107
.build_arc()
106108
.unwrap();
107109
let session_config = SessionConfig::new().with_target_partitions(1);
108-
let session_context =
109-
Arc::new(SessionContext::new_with_config_rt(session_config, rt));
110+
Arc::new(SessionContext::new_with_config_rt(session_config, rt))
111+
}
112+
#[wasm_bindgen_test(unsupported = tokio::test)]
113+
async fn basic_execute() {
114+
let sql = "SELECT 2 + 2;";
115+
116+
// Execute SQL (using datafusion)
110117

118+
let session_context = get_ctx();
111119
let statement = DFParser::parse_sql(sql).unwrap().pop_back().unwrap();
112120

113121
let logical_plan = session_context
@@ -124,4 +132,48 @@ mod test {
124132
let task_ctx = session_context.task_ctx();
125133
let _ = collect(physical_plan, task_ctx).await.unwrap();
126134
}
135+
136+
#[wasm_bindgen_test(unsupported = tokio::test)]
137+
async fn basic_df_function_execute() {
138+
let sql = "SELECT abs(-1.0);";
139+
let statement = DFParser::parse_sql(sql).unwrap().pop_back().unwrap();
140+
let ctx = get_ctx();
141+
let logical_plan = ctx.state().statement_to_plan(statement).await.unwrap();
142+
let data_frame = ctx.execute_logical_plan(logical_plan).await.unwrap();
143+
let physical_plan = data_frame.create_physical_plan().await.unwrap();
144+
145+
let task_ctx = ctx.task_ctx();
146+
let _ = collect(physical_plan, task_ctx).await.unwrap();
147+
}
148+
149+
#[wasm_bindgen_test(unsupported = tokio::test)]
150+
async fn test_basic_aggregate() {
151+
let sql =
152+
"SELECT FIRST_VALUE(value) OVER (ORDER BY id) as first_val FROM test_table;";
153+
154+
let schema = Arc::new(Schema::new(vec![
155+
Field::new("id", DataType::Int32, false),
156+
Field::new("value", DataType::Utf8, false),
157+
]));
158+
159+
let data: Vec<ArrayRef> = vec![
160+
Arc::new(Int32Array::from(vec![1])),
161+
Arc::new(StringArray::from(vec!["a"])),
162+
];
163+
164+
let batch = RecordBatch::try_new(schema.clone(), data).unwrap();
165+
let table = MemTable::try_new(schema.clone(), vec![vec![batch]]).unwrap();
166+
167+
let ctx = get_ctx();
168+
ctx.register_table("test_table", Arc::new(table)).unwrap();
169+
170+
let statement = DFParser::parse_sql(sql).unwrap().pop_back().unwrap();
171+
172+
let logical_plan = ctx.state().statement_to_plan(statement).await.unwrap();
173+
let data_frame = ctx.execute_logical_plan(logical_plan).await.unwrap();
174+
let physical_plan = data_frame.create_physical_plan().await.unwrap();
175+
176+
let task_ctx = ctx.task_ctx();
177+
let _ = collect(physical_plan, task_ctx).await.unwrap();
178+
}
127179
}

0 commit comments

Comments
 (0)