diff --git a/Cargo.toml b/Cargo.toml index 240d079..00133bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ # under the License. [workspace] -members = ["core", "ffi", "jni"] +members = ["core", "ffi", "jni", "cli"] resolver = "2" [profile.release] diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 0000000..97485ab --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "paimon-mosaic-cli" +version = "0.2.0" +edition = "2021" +description = "Mosaic file format — command line inspector (cat/meta/schema)" +license = "Apache-2.0" + +[[bin]] +name = "mosaic" +path = "src/main.rs" + +[dependencies] +paimon-mosaic-core = { path = "../core" } +arrow-array = "58" +arrow-schema = "58" +clap = { version = "4", features = ["derive"] } diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..fc38491 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,72 @@ + + +# mosaic CLI + +A native command-line inspector for Mosaic files. It drives the read-only +`MosaicReader` API, so it needs no JVM and ships as a single native binary. + +## Build & run + +```bash +cargo run -p paimon-mosaic-cli -- # from source +cargo install --path cli # install `mosaic` +mosaic +``` + +## Commands + +| Command | Shows | Reads | +|---------|-------|-------| +| `schema` | column names, Arrow types, nullability, bucket | footer only | +| `meta` | row groups, rows, per-column stats (null/min/max) | footer + index | +| `footer` | magic, version, buckets, compression | footer only | +| `buckets`| per-bucket layout and member columns | footer + index | +| `pages` | per-column encoding + on-disk slot size | bucket data | +| `dictionary` | dictionary entries of a dict column (`-c`) | bucket data | +| `column-size` | on-disk bytes per column | footer + index | +| `cat` / `head` | first N rows as a table | column data | + +Every command accepts `--json`. `cat`/`head` take `-n ` and `-c a,b` +(projection); `dictionary` takes `-c `. + +```text +$ mosaic schema data.mosaic +5 columns, 4 buckets + id: Int32 not null [bucket 0] + name: Utf8 [bucket 2] + kind: Utf8 [bucket 1] + +$ mosaic buckets data.mosaic +row group 0: + bucket 0: paged 373B [flag, id] + bucket 1: paged 32B [kind] + +$ mosaic pages data.mosaic +row group 0: + flag: bucket 0 encoding=const slot=16B + kind: bucket 1 encoding=dict slot=28B + +$ mosaic cat data.mosaic -n 2 --json +{"id":0,"name":"user_0","kind":"a","score":0,"flag":7} +{"id":1,"name":"user_1","kind":"b","score":1.5,"flag":7} +``` + +For C/C++ or Java callers, embed the format directly via the `ffi` +(`mosaic.h`) or `jni` crates rather than shelling out to this CLI. diff --git a/cli/src/fmt.rs b/cli/src/fmt.rs new file mode 100644 index 0000000..0eb914e --- /dev/null +++ b/cli/src/fmt.rs @@ -0,0 +1,247 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow_array::{Array, RecordBatch}; +use paimon_mosaic_core::values::Value; + +/// Render a stats min/max [`Value`] to a short, human-readable string. +pub fn render_value(v: &Value) -> String { + match v { + Value::Null => "null".to_string(), + Value::Boolean(b) => b.to_string(), + Value::TinyInt(x) => x.to_string(), + Value::SmallInt(x) => x.to_string(), + Value::Integer(x) => x.to_string(), + Value::BigInt(x) => x.to_string(), + Value::Float(x) => x.to_string(), + Value::Double(x) => x.to_string(), + Value::Date(x) => format!("{} (epoch-day)", x), + Value::Time(x) => format!("{} (ms)", x), + Value::String(b) => String::from_utf8_lossy(b).into_owned(), + Value::Bytes(b) | Value::DecimalLarge(b) => format!("0x{}", hex(b)), + Value::DecimalCompact(x) => x.to_string(), + Value::TimestampMillis(x) => format!("{} (ms)", x), + Value::TimestampMicros(x) => format!("{} (us)", x), + Value::TimestampNanos { millis, nanos_of_milli } => { + format!("{}ms+{}ns", millis, nanos_of_milli) + } + } +} + +fn hex(b: &[u8]) -> String { + b.iter().map(|x| format!("{:02x}", x)).collect() +} + +/// Human-readable encoding name for a `spec::ENCODING_*` id. +pub fn encoding_name(e: u8) -> &'static str { + use paimon_mosaic_core::spec::*; + match e { + ENCODING_PLAIN => "plain", + ENCODING_CONST => "const", + ENCODING_DICT => "dict", + ENCODING_ALL_NULL => "all_null", + _ => "?", + } +} + +/// Escape a string as a JSON string literal (quotes included). +pub fn json_str(s: &str) -> String { + let mut o = String::with_capacity(s.len() + 2); + o.push('"'); + for c in s.chars() { + match c { + '"' => o.push_str("\\\""), + '\\' => o.push_str("\\\\"), + '\n' => o.push_str("\\n"), + '\r' => o.push_str("\\r"), + '\t' => o.push_str("\\t"), + c if (c as u32) < 0x20 => o.push_str(&format!("\\u{:04x}", c as u32)), + c => o.push(c), + } + } + o.push('"'); + o +} + +/// Pretty-print a slice of record batches as an aligned ASCII table. +pub fn pretty_table(batches: &[RecordBatch], max_rows: usize) -> String { + let schema = batches[0].schema(); + let headers: Vec = schema.fields().iter().map(|f| f.name().clone()).collect(); + let ncols = headers.len(); + + let mut rows: Vec> = Vec::new(); + 'outer: for batch in batches { + for r in 0..batch.num_rows() { + if rows.len() >= max_rows { + break 'outer; + } + let mut row = Vec::with_capacity(ncols); + for c in 0..ncols { + row.push(cell(batch.column(c).as_ref(), r)); + } + rows.push(row); + } + } + + let mut widths: Vec = headers.iter().map(|h| h.chars().count()).collect(); + for row in &rows { + for (i, v) in row.iter().enumerate() { + widths[i] = widths[i].max(v.chars().count()); + } + } + + let sep = |out: &mut String| { + out.push('+'); + for w in &widths { + out.push_str(&"-".repeat(w + 2)); + out.push('+'); + } + out.push('\n'); + }; + let line = |out: &mut String, cells: &[String]| { + out.push('|'); + for (i, c) in cells.iter().enumerate() { + out.push_str(&format!(" {: String { + let schema = batches[0].schema(); + let names: Vec = schema.fields().iter().map(|f| f.name().clone()).collect(); + let mut out = String::new(); + let mut got = 0usize; + 'outer: for batch in batches { + for r in 0..batch.num_rows() { + if got >= max_rows { + break 'outer; + } + out.push('{'); + for (c, name) in names.iter().enumerate() { + if c > 0 { + out.push(','); + } + out.push_str(&json_str(name)); + out.push(':'); + out.push_str(&cell_json(batch.column(c).as_ref(), r)); + } + out.push_str("}\n"); + got += 1; + } + } + out +} + +/// Render one Arrow cell as a JSON value (numbers bare, strings quoted, null). +fn cell_json(arr: &dyn Array, row: usize) -> String { + use arrow_schema::DataType::*; + if arr.is_null(row) { + return "null".to_string(); + } + match arr.data_type() { + Utf8 | Date32 => json_str(&cell(arr, row)), + _ => cell(arr, row), + } +} + +/// Render one Arrow cell to a string by downcasting on the column type. +fn cell(arr: &dyn Array, row: usize) -> String { + use arrow_array::*; + use arrow_schema::DataType::*; + if arr.is_null(row) { + return "".to_string(); + } + macro_rules! d { + ($ty:ty) => { + arr.as_any().downcast_ref::<$ty>().unwrap().value(row).to_string() + }; + } + match arr.data_type() { + Boolean => d!(BooleanArray), + Int8 => d!(Int8Array), + Int16 => d!(Int16Array), + Int32 => d!(Int32Array), + Int64 => d!(Int64Array), + Float32 => d!(Float32Array), + Float64 => d!(Float64Array), + Date32 => d!(Date32Array), + Utf8 => arr.as_any().downcast_ref::().unwrap().value(row).to_string(), + _ => "?".to_string(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use arrow_array::{Int32Array, StringArray}; + use arrow_schema::{DataType, Field, Schema}; + use std::sync::Arc; + + fn sample() -> RecordBatch { + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("name", DataType::Utf8, true), + ]); + RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(Int32Array::from(vec![1, 2])), + Arc::new(StringArray::from(vec![Some("ann"), None])), + ], + ) + .unwrap() + } + + #[test] + fn json_str_escapes() { + assert_eq!(json_str("a\"b\n"), "\"a\\\"b\\n\""); + assert_eq!(json_str("x"), "\"x\""); + } + + #[test] + fn render_value_types() { + assert_eq!(render_value(&Value::Integer(5)), "5"); + assert_eq!(render_value(&Value::String(b"hi".to_vec())), "hi"); + assert_eq!(render_value(&Value::Null), "null"); + } + + #[test] + fn ndjson_renders_null_and_quotes() { + let out = ndjson(&[sample()], 10); + assert_eq!(out, "{\"id\":1,\"name\":\"ann\"}\n{\"id\":2,\"name\":null}\n"); + } + + #[test] + fn pretty_table_truncates_and_aligns() { + let t = pretty_table(&[sample()], 1); + assert!(t.contains("| id ")); + assert!(t.contains("| 1 ")); + assert!(!t.contains("| 2 ")); + } +} diff --git a/cli/src/input.rs b/cli/src/input.rs new file mode 100644 index 0000000..b6e3722 --- /dev/null +++ b/cli/src/input.rs @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fs::File; +use std::io; +use std::path::Path; + +use paimon_mosaic_core::reader::InputFile; + +/// A read-only [`InputFile`] backed by a real file using positional reads. +/// +/// `read_exact_at` does not move a shared cursor, so concurrent calls from the +/// reader's coalescing threads are safe — satisfying the `Sync` bound. +pub struct FileInput { + file: File, + len: u64, +} + +impl FileInput { + pub fn open(path: &Path) -> io::Result { + let file = File::open(path)?; + let len = file.metadata()?.len(); + Ok(Self { file, len }) + } + + pub fn len(&self) -> u64 { + self.len + } +} + +impl InputFile for FileInput { + fn read_at(&self, offset: u64, buf: &mut [u8]) -> io::Result<()> { + #[cfg(unix)] + { + use std::os::unix::fs::FileExt; + self.file.read_exact_at(buf, offset) + } + #[cfg(windows)] + { + use std::os::windows::fs::FileExt; + let mut read = 0; + while read < buf.len() { + let n = self.file.seek_read(&mut buf[read..], offset + read as u64)?; + if n == 0 { + return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "read past end")); + } + read += n; + } + Ok(()) + } + } +} diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 0000000..5806386 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,352 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod fmt; +mod input; + +use std::path::PathBuf; +use std::process::ExitCode; + +use arrow_array::RecordBatch; +use clap::{Parser, Subcommand}; +use paimon_mosaic_core::reader::{MosaicReader, ReaderAccess}; + +use crate::input::FileInput; + +/// Mosaic file inspector — the cat/meta/schema/pages toolkit (cf. parquet-cli). +#[derive(Parser)] +#[command(name = "mosaic", version, about)] +struct Cli { + #[command(subcommand)] + cmd: Cmd, +} + +#[derive(Subcommand)] +enum Cmd { + /// Print the column names, types, nullability and bucket assignment. + Schema { + file: PathBuf, + #[arg(long)] + json: bool, + }, + /// Print row-group / bucket / stats metadata. + Meta { + file: PathBuf, + #[arg(long)] + json: bool, + }, + /// Print per-column encoding and slot size for each row group. + Pages { + file: PathBuf, + #[arg(long)] + json: bool, + }, + /// Print the first N rows as a table. + Cat { + file: PathBuf, + /// Number of rows to print. + #[arg(short = 'n', long, default_value_t = 10)] + num: usize, + /// Comma-separated columns to project. + #[arg(short, long)] + columns: Option, + #[arg(long)] + json: bool, + }, + /// Print the first N rows (alias of cat). + Head { + file: PathBuf, + #[arg(short = 'n', long, default_value_t = 10)] + num: usize, + #[arg(short, long)] + columns: Option, + #[arg(long)] + json: bool, + }, + /// Print the file footer: version, buckets, compression, offsets. + Footer { + file: PathBuf, + #[arg(long)] + json: bool, + }, + /// Print on-disk bytes per column (summed over row groups). + ColumnSize { + file: PathBuf, + #[arg(long)] + json: bool, + }, + /// Print the dictionary of a dict-encoded column. + Dictionary { + file: PathBuf, + /// Column name to dump. + #[arg(short = 'c', long)] + column: String, + #[arg(long)] + json: bool, + }, + /// Print bucket layout per row group (Mosaic's column grouping). + Buckets { + file: PathBuf, + #[arg(long)] + json: bool, + }, +} + +fn main() -> ExitCode { + let cli = Cli::parse(); + let res = match cli.cmd { + Cmd::Schema { file, json } => schema(&file, json), + Cmd::Meta { file, json } => meta(&file, json), + Cmd::Pages { file, json } => pages(&file, json), + Cmd::Cat { file, num, columns, json } => cat(&file, num, columns, json), + Cmd::Head { file, num, columns, json } => cat(&file, num, columns, json), + Cmd::Footer { file, json } => footer(&file, json), + Cmd::ColumnSize { file, json } => column_size(&file, json), + Cmd::Dictionary { file, column, json } => dictionary(&file, &column, json), + Cmd::Buckets { file, json } => buckets(&file, json), + }; + match res { + Ok(()) => ExitCode::SUCCESS, + Err(e) => { + eprintln!("error: {e}"); + ExitCode::FAILURE + } + } +} + +fn open(file: &PathBuf) -> std::io::Result> { + let input = FileInput::open(file)?; + let len = input.len(); + MosaicReader::new(input, len) +} + +/// Columns in original (write) order rather than the name-sorted layout. +fn original_order(s: &paimon_mosaic_core::schema::MosaicSchema) -> Vec { + let mut by_sorted = vec![0usize; s.columns.len()]; + for (orig, &sorted) in s.original_order.iter().enumerate() { + by_sorted[sorted] = orig; + } + let mut cols: Vec = (0..s.columns.len()).collect(); + cols.sort_by_key(|&i| by_sorted[i]); + cols +} + +fn schema(file: &PathBuf, json: bool) -> std::io::Result<()> { + let reader = open(file)?; + let s = reader.schema(); + let cols = original_order(s); + if json { + let items: Vec = cols.iter().map(|&i| { + let c = &s.columns[i]; + format!("{{\"name\":{},\"type\":{},\"nullable\":{},\"bucket\":{}}}", + fmt::json_str(&c.name), fmt::json_str(&format!("{:?}", c.data_type)), c.nullable, c.bucket_id) + }).collect(); + println!("{{\"columns\":{},\"buckets\":{},\"fields\":[{}]}}", s.columns.len(), s.num_buckets, items.join(",")); + return Ok(()); + } + println!("{} columns, {} buckets", s.columns.len(), s.num_buckets); + for i in cols { + let c = &s.columns[i]; + let null = if c.nullable { "" } else { " not null" }; + println!(" {}: {:?}{} [bucket {}]", c.name, c.data_type, null, c.bucket_id); + } + Ok(()) +} + +fn meta(file: &PathBuf, json: bool) -> std::io::Result<()> { + let reader = open(file)?; + let s = reader.schema(); + let nrg = reader.num_row_groups(); + let total: usize = (0..nrg).map(|i| reader.row_group_num_rows(i).unwrap_or(0)).sum(); + if json { + let mut rgs = Vec::new(); + for rg in 0..nrg { + let st: Vec = reader.row_group_stats(rg)?.iter().map(|x| { + let mm = match (&x.min, &x.max) { + (Some(lo), Some(hi)) => format!(",\"min\":{},\"max\":{}", fmt::json_str(&fmt::render_value(lo)), fmt::json_str(&fmt::render_value(hi))), + _ => String::new(), + }; + format!("{{\"column\":{},\"nulls\":{}{}}}", fmt::json_str(&s.columns[x.column_index].name), x.null_count, mm) + }).collect(); + rgs.push(format!("{{\"rows\":{},\"stats\":[{}]}}", reader.row_group_num_rows(rg)?, st.join(","))); + } + println!("{{\"rows\":{},\"columns\":{},\"buckets\":{},\"row_groups\":[{}]}}", total, s.columns.len(), s.num_buckets, rgs.join(",")); + return Ok(()); + } + println!("file: {} rows, {} columns, {} buckets, {} row groups", total, s.columns.len(), s.num_buckets, nrg); + for rg in 0..nrg { + println!("row group {rg}: {} rows", reader.row_group_num_rows(rg)?); + for st in reader.row_group_stats(rg)? { + let mm = match (&st.min, &st.max) { + (Some(lo), Some(hi)) => format!("min={} max={}", fmt::render_value(lo), fmt::render_value(hi)), + _ => "no min/max".to_string(), + }; + println!(" {}: nulls={} {}", s.columns[st.column_index].name, st.null_count, mm); + } + } + Ok(()) +} + +fn pages(file: &PathBuf, json: bool) -> std::io::Result<()> { + let reader = open(file)?; + let s = reader.schema(); + let nrg = reader.num_row_groups(); + if json { + let mut rgs = Vec::new(); + for rg in 0..nrg { + let items: Vec = reader.page_infos(rg)?.iter().map(|p| { + format!("{{\"column\":{},\"bucket\":{},\"encoding\":{},\"slot_size\":{}}}", + fmt::json_str(&s.columns[p.column_index].name), p.bucket, fmt::json_str(fmt::encoding_name(p.encoding)), p.slot_size) + }).collect(); + rgs.push(format!("[{}]", items.join(","))); + } + println!("{{\"row_groups\":[{}]}}", rgs.join(",")); + return Ok(()); + } + for rg in 0..nrg { + println!("row group {rg}:"); + for p in reader.page_infos(rg)? { + let c = &s.columns[p.column_index]; + println!(" {}: bucket {} encoding={} slot={}B", c.name, p.bucket, fmt::encoding_name(p.encoding), p.slot_size); + } + } + Ok(()) +} + +fn cat(file: &PathBuf, num: usize, columns: Option, json: bool) -> std::io::Result<()> { + let mut reader = open(file)?; + if let Some(list) = &columns { + let names: Vec<&str> = list.split(',').map(|x| x.trim()).filter(|x| !x.is_empty()).collect(); + reader.project(&names)?; + } + let mut batches: Vec = Vec::new(); + let mut got = 0usize; + for rg in 0..reader.num_row_groups() { + if got >= num { + break; + } + let batch = reader.row_group_reader(rg)?.read_columns()?; + got += batch.num_rows(); + batches.push(batch); + } + if batches.is_empty() { + if !json { + println!("(no rows)"); + } + } else if json { + print!("{}", fmt::ndjson(&batches, num)); + } else { + print!("{}", fmt::pretty_table(&batches, num)); + } + Ok(()) +} + +fn footer(file: &PathBuf, json: bool) -> std::io::Result<()> { + use paimon_mosaic_core::spec::{COMPRESSION_ZSTD, MAGIC, VERSION}; + let reader = open(file)?; + let s = reader.schema(); + let comp = if reader.compression() == COMPRESSION_ZSTD { "zstd" } else { "none" }; + let magic = std::str::from_utf8(&MAGIC).unwrap_or("MOSA"); + if json { + println!("{{\"magic\":{},\"version\":{},\"buckets\":{},\"row_groups\":{},\"compression\":{}}}", + fmt::json_str(magic), VERSION, s.num_buckets, reader.num_row_groups(), fmt::json_str(comp)); + } else { + println!("magic={} version={} buckets={} row_groups={} compression={}", + magic, VERSION, s.num_buckets, reader.num_row_groups(), comp); + } + Ok(()) +} + +fn column_size(file: &PathBuf, json: bool) -> std::io::Result<()> { + let reader = open(file)?; + let s = reader.schema(); + let mut bytes = vec![0usize; s.columns.len()]; + for rg in 0..reader.num_row_groups() { + for p in reader.page_infos(rg)? { + bytes[p.column_index] += p.slot_size; + } + } + let cols = original_order(s); + if json { + let items: Vec = cols.iter().map(|&i| format!("{{\"column\":{},\"bytes\":{}}}", fmt::json_str(&s.columns[i].name), bytes[i])).collect(); + println!("[{}]", items.join(",")); + } else { + for i in cols { + println!(" {}: {} B", s.columns[i].name, bytes[i]); + } + } + Ok(()) +} + +fn dictionary(file: &PathBuf, column: &str, json: bool) -> std::io::Result<()> { + let reader = open(file)?; + let col = reader.schema().columns.iter().position(|c| c.name == column) + .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("column '{column}' not found")))?; + if json { + let mut rgs = Vec::new(); + for rg in 0..reader.num_row_groups() { + match reader.dictionary(rg, col)? { + Some(vals) => { + let e: Vec = vals.iter().map(|v| fmt::json_str(&fmt::render_value(v))).collect(); + rgs.push(format!("[{}]", e.join(","))); + } + None => rgs.push("null".to_string()), + } + } + println!("{{\"column\":{},\"row_groups\":[{}]}}", fmt::json_str(column), rgs.join(",")); + return Ok(()); + } + for rg in 0..reader.num_row_groups() { + match reader.dictionary(rg, col)? { + Some(vals) => { + println!("row group {rg}: {} entries", vals.len()); + for (i, v) in vals.iter().enumerate() { + println!(" {i}: {}", fmt::render_value(v)); + } + } + None => println!("row group {rg}: not dict-encoded"), + } + } + Ok(()) +} + +fn buckets(file: &PathBuf, json: bool) -> std::io::Result<()> { + let reader = open(file)?; + let s = reader.schema(); + let name = |i: usize| s.columns[i].name.clone(); + let mut rgs = Vec::new(); + for rg in 0..reader.num_row_groups() { + let infos = reader.bucket_infos(rg)?; + if json { + let items: Vec = infos.iter().map(|b| { + let cols: Vec = b.columns.iter().map(|&i| fmt::json_str(&name(i))).collect(); + format!("{{\"bucket\":{},\"kind\":{},\"size\":{},\"columns\":[{}]}}", b.bucket, fmt::json_str(b.kind), b.size, cols.join(",")) + }).collect(); + rgs.push(format!("[{}]", items.join(","))); + } else { + println!("row group {rg}:"); + for b in &infos { + let cols: Vec = b.columns.iter().map(|&i| name(i)).collect(); + println!(" bucket {}: {} {}B [{}]", b.bucket, b.kind, b.size, cols.join(", ")); + } + } + } + if json { + println!("{{\"row_groups\":[{}]}}", rgs.join(",")); + } + Ok(()) +} diff --git a/cli/tests/e2e.rs b/cli/tests/e2e.rs new file mode 100644 index 0000000..f630503 --- /dev/null +++ b/cli/tests/e2e.rs @@ -0,0 +1,190 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! End-to-end tests: drive the `mosaic` binary against a fixture file and +//! assert stdout. Zero external dev-deps — uses CARGO_BIN_EXE and std only. + +use std::fs::File; +use std::io::Write; +use std::process::Command; +use std::sync::Arc; + +use arrow_array::{Int32Array, RecordBatch, StringArray}; +use arrow_schema::{DataType, Field, Schema}; +use paimon_mosaic_core::writer::{MosaicWriter, OutputFile, WriterOptions}; + +struct FileOut { + f: File, + pos: u64, +} +impl OutputFile for FileOut { + fn write(&mut self, d: &[u8]) -> std::io::Result<()> { + self.f.write_all(d)?; + self.pos += d.len() as u64; + Ok(()) + } + fn flush(&mut self) -> std::io::Result<()> { + self.f.flush() + } + fn pos(&self) -> u64 { + self.pos + } +} + +/// Write a small fixture and return its path under the test temp dir. +fn fixture(name: &str) -> String { + let path = format!("{}/mosaic_e2e_{}.mosaic", std::env::temp_dir().display(), name); + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, false), + Field::new("kind", DataType::Utf8, true), + Field::new("flag", DataType::Int32, true), + ]); + let out = FileOut { f: File::create(&path).unwrap(), pos: 0 }; + let opts = WriterOptions { + num_buckets: 3, + page_size_threshold: 1, + stats_columns: vec!["id".into()], + ..Default::default() + }; + let mut w = MosaicWriter::new(out, &schema, opts).unwrap(); + let n = 200; + let ids: Vec = (0..n).collect(); + let kinds: Vec<&str> = (0..n).map(|i| ["a", "b", "c"][(i % 3) as usize]).collect(); + let flags = vec![7; n as usize]; + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + Arc::new(Int32Array::from(ids)), + Arc::new(StringArray::from(kinds)), + Arc::new(Int32Array::from(flags)), + ], + ) + .unwrap(); + w.write_batch(&batch).unwrap(); + w.close().unwrap(); + path +} + +fn run(args: &[&str]) -> (String, String, bool) { + let out = Command::new(env!("CARGO_BIN_EXE_mosaic")).args(args).output().unwrap(); + ( + String::from_utf8(out.stdout).unwrap(), + String::from_utf8(out.stderr).unwrap(), + out.status.success(), + ) +} + +#[test] +fn schema_lists_columns() { + let f = fixture("schema"); + let (out, _, ok) = run(&["schema", &f]); + assert!(ok); + assert!(out.contains("3 columns, 3 buckets")); + assert!(out.contains("id: Int32 not null")); + assert!(out.contains("kind: Utf8")); +} + +#[test] +fn meta_shows_stats() { + let f = fixture("meta"); + let (out, _, ok) = run(&["meta", &f]); + assert!(ok); + assert!(out.contains("200 rows")); + assert!(out.contains("id: nulls=0 min=0 max=199")); +} + +#[test] +fn pages_shows_encodings() { + let f = fixture("pages"); + let (out, _, ok) = run(&["pages", &f]); + assert!(ok); + assert!(out.contains("flag: bucket 0 encoding=const")); + assert!(out.contains("kind: bucket 2 encoding=dict")); +} + +#[test] +fn cat_truncates_and_projects() { + let f = fixture("cat"); + let (out, _, ok) = run(&["cat", &f, "-n", "2"]); + assert!(ok); + assert!(out.contains("| id | kind | flag |")); + assert_eq!(out.matches('\n').count(), 6); // 3 borders + header + 2 rows + let (proj, _, _) = run(&["cat", &f, "-c", "kind,id", "-n", "1"]); + assert!(proj.contains("| kind | id |")); +} + +#[test] +fn cat_json_is_ndjson() { + let f = fixture("json"); + let (out, _, ok) = run(&["cat", &f, "-n", "2", "--json"]); + assert!(ok); + assert_eq!(out, "{\"id\":0,\"kind\":\"a\",\"flag\":7}\n{\"id\":1,\"kind\":\"b\",\"flag\":7}\n"); +} + +#[test] +fn missing_file_fails() { + let (_, err, ok) = run(&["schema", "/no/such/file.mosaic"]); + assert!(!ok); + assert!(err.contains("error:")); +} + +#[test] +fn footer_shows_format() { + let f = fixture("footer"); + let (out, _, ok) = run(&["footer", &f]); + assert!(ok); + assert!(out.contains("magic=MOSA")); + assert!(out.contains("buckets=3")); + assert!(out.contains("compression=zstd")); + let (j, _, ok) = run(&["footer", &f, "--json"]); + assert!(ok); + assert!(j.contains("\"magic\":\"MOSA\"") && j.contains("\"compression\":\"zstd\"")); +} + +#[test] +fn dictionary_dumps_entries() { + let f = fixture("dict"); + let (out, _, ok) = run(&["dictionary", &f, "-c", "kind"]); + assert!(ok); + assert!(out.contains("3 entries")); + assert!(out.contains("a") && out.contains("b") && out.contains("c")); + let (j, _, ok) = run(&["dictionary", &f, "-c", "kind", "--json"]); + assert!(ok); + assert_eq!(j, "{\"column\":\"kind\",\"row_groups\":[[\"a\",\"b\",\"c\"]]}\n"); +} + +#[test] +fn column_size_sums_bytes() { + let f = fixture("size"); + let (out, _, ok) = run(&["column-size", &f]); + assert!(ok); + assert!(out.contains("id:") && out.contains("kind:")); + assert!(out.contains("flag: 0 B")); // const column has no slot +} + +#[test] +fn buckets_show_layout() { + let f = fixture("buckets"); + let (out, _, ok) = run(&["buckets", &f]); + assert!(ok); + assert!(out.contains("row group 0:")); + assert!(out.contains("[flag]") && out.contains("[id]") && out.contains("[kind]")); + assert!(out.contains("monolithic") || out.contains("paged")); + let (j, _, ok) = run(&["buckets", &f, "--json"]); + assert!(ok); + assert!(j.contains("\"bucket\":0") && j.contains("\"columns\":")); +} diff --git a/core/src/bucket_reader.rs b/core/src/bucket_reader.rs index 115bae4..2655ebb 100644 --- a/core/src/bucket_reader.rs +++ b/core/src/bucket_reader.rs @@ -507,6 +507,16 @@ impl BucketReader { self.num_rows - null_count } + /// Per-column encoding ids (in this bucket's column order). See `spec::ENCODING_*`. + pub fn encodings(&self) -> &[u8] { + &self.encodings + } + + /// Dictionary entries for one column (empty if it is not dict-encoded). + pub fn dict_values(&self, col: usize) -> &[Value] { + &self.dict_values[col] + } + pub fn read_all_columns(&self) -> io::Result> { let num_rows = self.num_rows; let mut result = Vec::with_capacity(self.num_columns); @@ -768,6 +778,16 @@ impl ColumnPageReader { } } + /// Encoding id of this column page. See `spec::ENCODING_*`. + pub fn encoding(&self) -> u8 { + self.encoding + } + + /// Dictionary entries for a dict-encoded page (empty for other encodings). + pub fn dict_values(&self) -> &[Value] { + &self.dict_values + } + pub fn read_all(&self) -> io::Result { let num_rows = self.num_rows; let variant = data_variant_for_type(&self.col_type); diff --git a/core/src/reader.rs b/core/src/reader.rs index e111b9d..ac89dfa 100644 --- a/core/src/reader.rs +++ b/core/src/reader.rs @@ -179,6 +179,27 @@ fn read_merged_ranges( Ok((merged, fetched)) } +/// Physical placement of one column within one row group. +pub struct PageInfo { + pub column_index: usize, + pub bucket: usize, + /// `spec::ENCODING_*`: plain / const / dict / all_null. + pub encoding: u8, + /// Paged-bucket on-disk slot size in bytes; 0 for monolithic/empty buckets. + pub slot_size: usize, +} + +/// Layout of one bucket within one row group. +pub struct BucketInfo { + pub bucket: usize, + /// "empty" | "monolithic" | "paged". + pub kind: &'static str, + /// On-disk compressed size in bytes (0 for empty buckets). + pub size: usize, + /// Member column indices (global, name-sorted order). + pub columns: Vec, +} + pub struct RowGroupMeta { pub num_rows: usize, pub bucket_offsets: Vec, @@ -447,6 +468,97 @@ impl MosaicReader { &self.input } + /// Footer compression code (`spec::COMPRESSION_*`). + pub fn compression(&self) -> u8 { + self.compression + } + + /// Per-bucket layout for a row group: kind, on-disk size and member columns + /// (global indices). The bucket is Mosaic's defining structure — exposed for + /// the `buckets` command. + pub fn bucket_infos(&self, rg_index: usize) -> io::Result> { + if rg_index >= self.row_group_metas.len() { + return Err(io::Error::new(io::ErrorKind::InvalidInput, "row group index out of range")); + } + let meta = &self.row_group_metas[rg_index]; + Ok((0..self.num_buckets).map(|b| { + let (kind, size) = match meta.bucket_layouts[b] { + BucketLayout::Empty => ("empty", 0), + BucketLayout::Monolithic { compressed_size, .. } => ("monolithic", compressed_size), + BucketLayout::Paged { total_size } => ("paged", total_size), + }; + BucketInfo { bucket: b, kind, size, columns: self.schema.bucket_to_global[b].clone() } + }).collect()) + } + + /// Dictionary entries for one column in one row group, or `None` if that + /// column is not dict-encoded there. Used by the `dictionary` command. + pub fn dictionary(&self, rg_index: usize, col: usize) -> io::Result>> { + let rg = self.row_group_reader_projected(rg_index, &[col])?; + Ok(rg.take_dictionary(col)) + } + + /// Per-column physical layout for a row group: bucket, encoding and on-disk + /// slot size. Reads and decompresses each non-empty bucket; used by tooling + /// (the `pages` command). Columns are reported in global (name-sorted) order. + pub fn page_infos(&self, rg_index: usize) -> io::Result> { + if rg_index >= self.row_group_metas.len() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "row group index out of range", + )); + } + let meta = &self.row_group_metas[rg_index]; + let mut out = Vec::with_capacity(self.schema.columns.len()); + for b in 0..self.num_buckets { + let globals = &self.schema.bucket_to_global[b]; + match meta.bucket_layouts[b] { + BucketLayout::Empty => { + for &gi in globals { + out.push(PageInfo { column_index: gi, bucket: b, encoding: ENCODING_ALL_NULL, slot_size: 0 }); + } + } + BucketLayout::Monolithic { compressed_size, uncompressed_size } => { + let buf = read_range(&self.input, meta.bucket_offsets[b], compressed_size)?; + let data = match self.compression { + COMPRESSION_NONE => buf, + COMPRESSION_ZSTD => zstd::bulk::decompress(&buf, uncompressed_size) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?, + _ => return Err(io::Error::new(io::ErrorKind::InvalidData, "unsupported compression")), + }; + let col_types: Vec = globals.iter().map(|&gi| self.schema.columns[gi].data_type.clone()).collect(); + let reader = BucketReader::new(col_types, data, meta.num_rows)?; + for (local, &gi) in globals.iter().enumerate() { + out.push(PageInfo { column_index: gi, bucket: b, encoding: reader.encodings()[local], slot_size: 0 }); + } + } + BucketLayout::Paged { total_size } => { + let dir_size = globals.len() * 4; + let dir = read_range(&self.input, meta.bucket_offsets[b], dir_size)?; + let mut sizes = Vec::with_capacity(globals.len()); + for i in 0..globals.len() { + sizes.push(u32::from_le_bytes(dir[i * 4..i * 4 + 4].try_into().unwrap()) as usize); + } + let mut foff = meta.bucket_offsets[b] + dir_size as u64; + for (local, &gi) in globals.iter().enumerate() { + let enc = if sizes[local] == 0 { + ENCODING_ALL_NULL + } else { + let slot = read_range(&self.input, foff, sizes[local])?; + let ct = self.schema.columns[gi].data_type.clone(); + Self::parse_column_slot(&slot, &ct, meta.num_rows)?.encoding() + }; + out.push(PageInfo { column_index: gi, bucket: b, encoding: enc, slot_size: sizes[local] }); + foff += sizes[local] as u64; + } + let _ = total_size; + } + } + } + out.sort_by_key(|p| p.column_index); + Ok(out) + } + fn parse_column_slot( slot_data: &[u8], col_type: &DataType, @@ -965,6 +1077,22 @@ impl RowGroupReader { } } + /// Dictionary entries for a projected column, or `None` if not dict-encoded. + pub fn take_dictionary(&self, global_col: usize) -> Option> { + let bucket = self.schema.columns[global_col].bucket_id; + let local = self.bucket_to_global[bucket].iter().position(|&g| g == global_col)?; + match self.bucket_states[bucket].as_ref()? { + BucketState::Paged { column_readers } => { + let d = column_readers[local].as_ref()?.dict_values(); + if d.is_empty() { None } else { Some(d.to_vec()) } + } + BucketState::Monolithic { reader } => { + let d = reader.dict_values(local); + if d.is_empty() { None } else { Some(d.to_vec()) } + } + } + } + pub fn num_rows(&self) -> usize { self.num_rows } diff --git a/core/src/reader_tests.rs b/core/src/reader_tests.rs index 55eecee..fde6b55 100644 --- a/core/src/reader_tests.rs +++ b/core/src/reader_tests.rs @@ -4180,3 +4180,54 @@ fn test_row_group_num_rows_out_of_range() { assert!(reader.row_group_num_rows(1).is_err()); assert!(reader.row_group_num_rows(999).is_err()); } + +#[test] +fn test_page_infos_encodings() { + use crate::spec::{ENCODING_CONST, ENCODING_DICT, ENCODING_PLAIN}; + let columns = vec![ + ("id".to_string(), DataType::Int32, false), // unique -> plain + ("kind".to_string(), DataType::Utf8, true), // low cardinality -> dict + ("flag".to_string(), DataType::Int32, true), // constant -> const + ]; + let out = MemOutputFile::new(); + let mut writer = MosaicWriter::new( + out, + &columns_to_arrow_schema(&columns), + WriterOptions { + num_buckets: 3, + page_size_threshold: 1, // force paged buckets + ..Default::default() + }, + ) + .unwrap(); + + let rows: Vec> = (0..200) + .map(|i| { + vec![ + Value::Integer(i), + Value::String(["a", "b", "c"][(i % 3) as usize].as_bytes().to_vec()), + Value::Integer(7), + ] + }) + .collect(); + write_values(&mut writer, &columns, &rows); + writer.close().unwrap(); + let data = writer.output().buf.clone(); + let len = data.len() as u64; + let reader = MosaicReader::new(ByteArrayInputFile::new(data), len).unwrap(); + + let infos = reader.page_infos(0).unwrap(); + assert_eq!(infos.len(), 3); + // page_infos is sorted by column_index; name-sorted order: flag, id, kind + let by_name = |n: &str| { + infos + .iter() + .find(|p| reader.schema().columns[p.column_index].name == n) + .unwrap() + }; + assert_eq!(by_name("id").encoding, ENCODING_PLAIN); + assert_eq!(by_name("kind").encoding, ENCODING_DICT); + assert_eq!(by_name("flag").encoding, ENCODING_CONST); + assert!(by_name("id").slot_size > 0); + assert!(reader.page_infos(999).is_err()); +} diff --git a/docs/cli.html b/docs/cli.html new file mode 100644 index 0000000..6e7e7dd --- /dev/null +++ b/docs/cli.html @@ -0,0 +1,174 @@ + + + + + + + + CLI - Paimon Mosaic + + + + + +
+ + + +
+
+

CLI

+

Inspect Mosaic files from the terminal with the mosaic binary. A native, JVM-free toolkit driving the read-only MosaicReader API.

+ +

Install

+
# run from source
+cargo run -p paimon-mosaic-cli -- schema data.mosaic
+
+# install the `mosaic` binary
+cargo install --path cli
+mosaic schema data.mosaic
+ +

Commands

+ + + + + + + + + + + + + + +
CommandShowsReads
schemacolumn names, Arrow types, nullability, bucketfooter only
metarow groups, rows, per-column statsfooter + index
footermagic, version, buckets, compressionfooter only
bucketsper-bucket layout and member columnsfooter + index
pagesper-column encoding + slot sizebucket data
dictionarydictionary entries of a dict columnbucket data
column-sizeon-disk bytes per columnfooter + index
cat / headfirst N rows as a tablecolumn data
+

Every command accepts --json. cat/head take -n <N> and -c a,b (projection); dictionary takes -c <col>.

+ +

schema

+

Columns, Arrow types, nullability and bucket assignment, in original input order. Footer only.

+
$ mosaic schema data.mosaic
+5 columns, 4 buckets
+  id: Int32 not null [bucket 0]
+  name: Utf8 [bucket 2]
+  kind: Utf8 [bucket 1]
+  score: Float64 [bucket 3]
+  flag: Int32 [bucket 0]
+
+$ mosaic schema data.mosaic --json
+{"columns":5,"buckets":4,"fields":[{"name":"id","type":"Int32","nullable":false,"bucket":0}, ...]}
+ +

meta

+

Total rows, row groups, and per-column stats (null count / min / max) for columns configured with stats.

+
$ mosaic meta data.mosaic
+file: 200 rows, 5 columns, 4 buckets, 1 row groups
+row group 0: 200 rows
+    id: nulls=0 min=0 max=199
+    score: nulls=0 min=0 max=298.5
+ +

footer

+

The 32-byte file footer: magic, format version, bucket count, row groups and compression.

+
$ mosaic footer data.mosaic
+magic=MOSA version=1 buckets=4 row_groups=1 compression=zstd
+ +

buckets

+

Per row group, each bucket's layout (empty / monolithic / paged), on-disk size and member columns. Mosaic groups columns into buckets by name order.

+
$ mosaic buckets data.mosaic
+row group 0:
+    bucket 0: paged 373B [flag, id]
+    bucket 1: paged 32B [kind]
+    bucket 2: paged 220B [name]
+    bucket 3: paged 542B [score]
+ +

pages

+

Per-column physical encoding (plain / const / dict / all_null) and on-disk slot size.

+
$ mosaic pages data.mosaic
+row group 0:
+    flag: bucket 0 encoding=const slot=16B
+    id: bucket 0 encoding=plain slot=349B
+    kind: bucket 1 encoding=dict slot=28B
+    name: bucket 2 encoding=plain slot=216B
+    score: bucket 3 encoding=plain slot=538B
+ +

dictionary

+

Dump the dictionary of a dict-encoded column. Non-dict columns report as such.

+
$ mosaic dictionary data.mosaic -c kind
+row group 0: 3 entries
+    0: a
+    1: b
+    2: c
+
+$ mosaic dictionary data.mosaic -c kind --json
+{"column":"kind","row_groups":[["a","b","c"]]}
+ +

column-size

+

On-disk bytes per column, summed across row groups.

+
$ mosaic column-size data.mosaic
+  id: 349 B
+  name: 216 B
+  kind: 28 B
+  score: 538 B
+  flag: 16 B
+ +

cat / head

+

Read the first N rows as a table. -n sets the count, -c projects columns, --json emits newline-delimited JSON. head is an alias of cat.

+
$ mosaic cat data.mosaic -n 2
++----+--------+------+-------+------+
+| id | name   | kind | score | flag |
++----+--------+------+-------+------+
+| 0  | user_0 | a    | 0     | 7    |
+| 1  | user_1 | b    | 1.5   | 7    |
++----+--------+------+-------+------+
+
+$ mosaic cat data.mosaic -n 2 -c name,score   # projection
+
+$ mosaic cat data.mosaic -n 2 --json
+{"id":0,"name":"user_0","kind":"a","score":0,"flag":7}
+{"id":1,"name":"user_1","kind":"b","score":1.5,"flag":7}
+ +
+ Embedding instead + For C/C++ or Java callers, embed the format directly via the ffi + (mosaic.h) or jni crates rather than shelling out to this CLI. +
+
+
+ + diff --git a/docs/cpp-api.html b/docs/cpp-api.html index 9b3fa4c..5ad8b23 100644 --- a/docs/cpp-api.html +++ b/docs/cpp-api.html @@ -42,6 +42,7 @@

Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/creating-a-release.html b/docs/creating-a-release.html index 8b69a4d..91750df 100644 --- a/docs/creating-a-release.html +++ b/docs/creating-a-release.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/design.html b/docs/design.html index 84c5e4f..4f142ec 100644 --- a/docs/design.html +++ b/docs/design.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/index.html b/docs/index.html index 55e9ec8..0c3ff7b 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/java-api.html b/docs/java-api.html index f654b99..b413bf8 100644 --- a/docs/java-api.html +++ b/docs/java-api.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/python-api.html b/docs/python-api.html index 7d50afe..e70b372 100644 --- a/docs/python-api.html +++ b/docs/python-api.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/releases.html b/docs/releases.html index 2e6b35b..b5dd083 100644 --- a/docs/releases.html +++ b/docs/releases.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases
  • diff --git a/docs/verifying-a-release-candidate.html b/docs/verifying-a-release-candidate.html index 6f620de..b9c3ead 100644 --- a/docs/verifying-a-release-candidate.html +++ b/docs/verifying-a-release-candidate.html @@ -42,6 +42,7 @@

    Paimon Mosaic

  • Java API
  • Python API
  • C++ API
  • +
  • CLI
  • Releases