diff --git a/Cargo.toml b/Cargo.toml
index 240d079..00133bb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [workspace]
-members = ["core", "ffi", "jni"]
+members = ["core", "ffi", "jni", "cli"]
 resolver = "2"
 
 [profile.release]
diff --git a/cli/Cargo.toml b/cli/Cargo.toml
new file mode 100644
index 0000000..97485ab
--- /dev/null
+++ b/cli/Cargo.toml
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "paimon-mosaic-cli"
+version = "0.2.0"
+edition = "2021"
+description = "Mosaic file format — command line inspector (cat/meta/schema)"
+license = "Apache-2.0"
+
+[[bin]]
+name = "mosaic"
+path = "src/main.rs"
+
+[dependencies]
+paimon-mosaic-core = { path = "../core" }
+arrow-array = "58"
+arrow-schema = "58"
+clap = { version = "4", features = ["derive"] }
diff --git a/cli/README.md b/cli/README.md
new file mode 100644
index 0000000..fc38491
--- /dev/null
+++ b/cli/README.md
@@ -0,0 +1,72 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# mosaic CLI
+
+A native command-line inspector for Mosaic files. It drives the read-only
+`MosaicReader` API, so it needs no JVM and ships as a single native binary.
+
+## Build & run
+
+```bash
+cargo run -p paimon-mosaic-cli -- <command> <file>   # from source
+cargo install --path cli                             # install `mosaic`
+mosaic <command> <file>
+```
+
+## Commands
+
+| Command | Shows | Reads |
+|---------|-------|-------|
+| `schema` | column names, Arrow types, nullability, bucket | footer only |
+| `meta`   | row groups, rows, per-column stats (null/min/max) | footer + index |
+| `footer` | magic, version, buckets, compression | footer only |
+| `buckets`| per-bucket layout and member columns | footer + index |
+| `pages`  | per-column encoding + on-disk slot size | bucket data |
+| `dictionary` | dictionary entries of a dict column (`-c`) | bucket data |
+| `column-size` | on-disk bytes per column | footer + index |
+| `cat` / `head` | first N rows as a table | column data |
+
+Every command accepts `--json`. `cat`/`head` take `-n <N>` and `-c a,b`
+(projection); `dictionary` takes `-c <col>`.
+
+```text
+$ mosaic schema data.mosaic
+5 columns, 4 buckets
+  id: Int32 not null [bucket 0]
+  name: Utf8 [bucket 2]
+  kind: Utf8 [bucket 1]
+
+$ mosaic buckets data.mosaic
+row group 0:
+    bucket 0: paged 373B [flag, id]
+    bucket 1: paged 32B [kind]
+
+$ mosaic pages data.mosaic
+row group 0:
+    flag: bucket 0 encoding=const slot=16B
+    kind: bucket 1 encoding=dict slot=28B
+
+$ mosaic cat data.mosaic -n 2 --json
+{"id":0,"name":"user_0","kind":"a","score":0,"flag":7}
+{"id":1,"name":"user_1","kind":"b","score":1.5,"flag":7}
+```
+
+For C/C++ or Java callers, embed the format directly via the `ffi`
+(`mosaic.h`) or `jni` crates rather than shelling out to this CLI.
diff --git a/cli/src/fmt.rs b/cli/src/fmt.rs
new file mode 100644
index 0000000..0eb914e
--- /dev/null
+++ b/cli/src/fmt.rs
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::{Array, RecordBatch};
+use paimon_mosaic_core::values::Value;
+
+/// Render a stats min/max [`Value`] to a short, human-readable string.
+pub fn render_value(v: &Value) -> String {
+    match v {
+        Value::Null => "null".to_string(),
+        Value::Boolean(b) => b.to_string(),
+        Value::TinyInt(x) => x.to_string(),
+        Value::SmallInt(x) => x.to_string(),
+        Value::Integer(x) => x.to_string(),
+        Value::BigInt(x) => x.to_string(),
+        Value::Float(x) => x.to_string(),
+        Value::Double(x) => x.to_string(),
+        Value::Date(x) => format!("{} (epoch-day)", x),
+        Value::Time(x) => format!("{} (ms)", x),
+        Value::String(b) => String::from_utf8_lossy(b).into_owned(),
+        Value::Bytes(b) | Value::DecimalLarge(b) => format!("0x{}", hex(b)),
+        Value::DecimalCompact(x) => x.to_string(),
+        Value::TimestampMillis(x) => format!("{} (ms)", x),
+        Value::TimestampMicros(x) => format!("{} (us)", x),
+        Value::TimestampNanos { millis, nanos_of_milli } => {
+            format!("{}ms+{}ns", millis, nanos_of_milli)
+        }
+    }
+}
+
+fn hex(b: &[u8]) -> String {
+    b.iter().map(|x| format!("{:02x}", x)).collect()
+}
+
+/// Human-readable encoding name for a `spec::ENCODING_*` id.
+pub fn encoding_name(e: u8) -> &'static str {
+    use paimon_mosaic_core::spec::*;
+    match e {
+        ENCODING_PLAIN => "plain",
+        ENCODING_CONST => "const",
+        ENCODING_DICT => "dict",
+        ENCODING_ALL_NULL => "all_null",
+        _ => "?",
+    }
+}
+
+/// Escape a string as a JSON string literal (quotes included).
+pub fn json_str(s: &str) -> String {
+    let mut o = String::with_capacity(s.len() + 2);
+    o.push('"');
+    for c in s.chars() {
+        match c {
+            '"' => o.push_str("\\\""),
+            '\\' => o.push_str("\\\\"),
+            '\n' => o.push_str("\\n"),
+            '\r' => o.push_str("\\r"),
+            '\t' => o.push_str("\\t"),
+            c if (c as u32) < 0x20 => o.push_str(&format!("\\u{:04x}", c as u32)),
+            c => o.push(c),
+        }
+    }
+    o.push('"');
+    o
+}
+
+/// Pretty-print a slice of record batches as an aligned ASCII table.
+pub fn pretty_table(batches: &[RecordBatch], max_rows: usize) -> String {
+    let schema = batches[0].schema();
+    let headers: Vec<String> = schema.fields().iter().map(|f| f.name().clone()).collect();
+    let ncols = headers.len();
+
+    let mut rows: Vec<Vec<String>> = Vec::new();
+    'outer: for batch in batches {
+        for r in 0..batch.num_rows() {
+            if rows.len() >= max_rows {
+                break 'outer;
+            }
+            let mut row = Vec::with_capacity(ncols);
+            for c in 0..ncols {
+                row.push(cell(batch.column(c).as_ref(), r));
+            }
+            rows.push(row);
+        }
+    }
+
+    let mut widths: Vec<usize> = headers.iter().map(|h| h.chars().count()).collect();
+    for row in &rows {
+        for (i, v) in row.iter().enumerate() {
+            widths[i] = widths[i].max(v.chars().count());
+        }
+    }
+
+    let sep = |out: &mut String| {
+        out.push('+');
+        for w in &widths {
+            out.push_str(&"-".repeat(w + 2));
+            out.push('+');
+        }
+        out.push('\n');
+    };
+    let line = |out: &mut String, cells: &[String]| {
+        out.push('|');
+        for (i, c) in cells.iter().enumerate() {
+            out.push_str(&format!(" {:<w$} |", c, w = widths[i]));
+        }
+        out.push('\n');
+    };
+
+    let mut out = String::new();
+    sep(&mut out);
+    line(&mut out, &headers);
+    sep(&mut out);
+    for row in &rows {
+        line(&mut out, row);
+    }
+    sep(&mut out);
+    out
+}
+
+/// Render up to `max_rows` as newline-delimited JSON objects.
+pub fn ndjson(batches: &[RecordBatch], max_rows: usize) -> String {
+    let schema = batches[0].schema();
+    let names: Vec<String> = schema.fields().iter().map(|f| f.name().clone()).collect();
+    let mut out = String::new();
+    let mut got = 0usize;
+    'outer: for batch in batches {
+        for r in 0..batch.num_rows() {
+            if got >= max_rows {
+                break 'outer;
+            }
+            out.push('{');
+            for (c, name) in names.iter().enumerate() {
+                if c > 0 {
+                    out.push(',');
+                }
+                out.push_str(&json_str(name));
+                out.push(':');
+                out.push_str(&cell_json(batch.column(c).as_ref(), r));
+            }
+            out.push_str("}\n");
+            got += 1;
+        }
+    }
+    out
+}
+
+/// Render one Arrow cell as a JSON value (numbers bare, strings quoted, null).
+fn cell_json(arr: &dyn Array, row: usize) -> String {
+    use arrow_schema::DataType::*;
+    if arr.is_null(row) {
+        return "null".to_string();
+    }
+    match arr.data_type() {
+        Utf8 | Date32 => json_str(&cell(arr, row)),
+        _ => cell(arr, row),
+    }
+}
+
+/// Render one Arrow cell to a string by downcasting on the column type.
+fn cell(arr: &dyn Array, row: usize) -> String {
+    use arrow_array::*;
+    use arrow_schema::DataType::*;
+    if arr.is_null(row) {
+        return "".to_string();
+    }
+    macro_rules! d {
+        ($ty:ty) => {
+            arr.as_any().downcast_ref::<$ty>().unwrap().value(row).to_string()
+        };
+    }
+    match arr.data_type() {
+        Boolean => d!(BooleanArray),
+        Int8 => d!(Int8Array),
+        Int16 => d!(Int16Array),
+        Int32 => d!(Int32Array),
+        Int64 => d!(Int64Array),
+        Float32 => d!(Float32Array),
+        Float64 => d!(Float64Array),
+        Date32 => d!(Date32Array),
+        Utf8 => arr.as_any().downcast_ref::<StringArray>().unwrap().value(row).to_string(),
+        _ => "?".to_string(),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::{Int32Array, StringArray};
+    use arrow_schema::{DataType, Field, Schema};
+    use std::sync::Arc;
+
+    fn sample() -> RecordBatch {
+        let schema = Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, true),
+        ]);
+        RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(Int32Array::from(vec![1, 2])),
+                Arc::new(StringArray::from(vec![Some("ann"), None])),
+            ],
+        )
+        .unwrap()
+    }
+
+    #[test]
+    fn json_str_escapes() {
+        assert_eq!(json_str("a\"b\n"), "\"a\\\"b\\n\"");
+        assert_eq!(json_str("x"), "\"x\"");
+    }
+
+    #[test]
+    fn render_value_types() {
+        assert_eq!(render_value(&Value::Integer(5)), "5");
+        assert_eq!(render_value(&Value::String(b"hi".to_vec())), "hi");
+        assert_eq!(render_value(&Value::Null), "null");
+    }
+
+    #[test]
+    fn ndjson_renders_null_and_quotes() {
+        let out = ndjson(&[sample()], 10);
+        assert_eq!(out, "{\"id\":1,\"name\":\"ann\"}\n{\"id\":2,\"name\":null}\n");
+    }
+
+    #[test]
+    fn pretty_table_truncates_and_aligns() {
+        let t = pretty_table(&[sample()], 1);
+        assert!(t.contains("| id "));
+        assert!(t.contains("| 1  "));
+        assert!(!t.contains("| 2 "));
+    }
+}
diff --git a/cli/src/input.rs b/cli/src/input.rs
new file mode 100644
index 0000000..b6e3722
--- /dev/null
+++ b/cli/src/input.rs
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fs::File;
+use std::io;
+use std::path::Path;
+
+use paimon_mosaic_core::reader::InputFile;
+
+/// A read-only [`InputFile`] backed by a real file using positional reads.
+///
+/// `read_exact_at` does not move a shared cursor, so concurrent calls from the
+/// reader's coalescing threads are safe — satisfying the `Sync` bound.
+pub struct FileInput {
+    file: File,
+    len: u64,
+}
+
+impl FileInput {
+    pub fn open(path: &Path) -> io::Result<Self> {
+        let file = File::open(path)?;
+        let len = file.metadata()?.len();
+        Ok(Self { file, len })
+    }
+
+    pub fn len(&self) -> u64 {
+        self.len
+    }
+}
+
+impl InputFile for FileInput {
+    fn read_at(&self, offset: u64, buf: &mut [u8]) -> io::Result<()> {
+        #[cfg(unix)]
+        {
+            use std::os::unix::fs::FileExt;
+            self.file.read_exact_at(buf, offset)
+        }
+        #[cfg(windows)]
+        {
+            use std::os::windows::fs::FileExt;
+            let mut read = 0;
+            while read < buf.len() {
+                let n = self.file.seek_read(&mut buf[read..], offset + read as u64)?;
+                if n == 0 {
+                    return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "read past end"));
+                }
+                read += n;
+            }
+            Ok(())
+        }
+    }
+}
diff --git a/cli/src/main.rs b/cli/src/main.rs
new file mode 100644
index 0000000..5806386
--- /dev/null
+++ b/cli/src/main.rs
@@ -0,0 +1,352 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod fmt;
+mod input;
+
+use std::path::PathBuf;
+use std::process::ExitCode;
+
+use arrow_array::RecordBatch;
+use clap::{Parser, Subcommand};
+use paimon_mosaic_core::reader::{MosaicReader, ReaderAccess};
+
+use crate::input::FileInput;
+
+/// Mosaic file inspector — the cat/meta/schema/pages toolkit (cf. parquet-cli).
+#[derive(Parser)]
+#[command(name = "mosaic", version, about)]
+struct Cli {
+    #[command(subcommand)]
+    cmd: Cmd,
+}
+
+#[derive(Subcommand)]
+enum Cmd {
+    /// Print the column names, types, nullability and bucket assignment.
+    Schema {
+        file: PathBuf,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print row-group / bucket / stats metadata.
+    Meta {
+        file: PathBuf,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print per-column encoding and slot size for each row group.
+    Pages {
+        file: PathBuf,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print the first N rows as a table.
+    Cat {
+        file: PathBuf,
+        /// Number of rows to print.
+        #[arg(short = 'n', long, default_value_t = 10)]
+        num: usize,
+        /// Comma-separated columns to project.
+        #[arg(short, long)]
+        columns: Option<String>,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print the first N rows (alias of cat).
+    Head {
+        file: PathBuf,
+        #[arg(short = 'n', long, default_value_t = 10)]
+        num: usize,
+        #[arg(short, long)]
+        columns: Option<String>,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print the file footer: version, buckets, compression, offsets.
+    Footer {
+        file: PathBuf,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print on-disk bytes per column (summed over row groups).
+    ColumnSize {
+        file: PathBuf,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print the dictionary of a dict-encoded column.
+    Dictionary {
+        file: PathBuf,
+        /// Column name to dump.
+        #[arg(short = 'c', long)]
+        column: String,
+        #[arg(long)]
+        json: bool,
+    },
+    /// Print bucket layout per row group (Mosaic's column grouping).
+    Buckets {
+        file: PathBuf,
+        #[arg(long)]
+        json: bool,
+    },
+}
+
+fn main() -> ExitCode {
+    let cli = Cli::parse();
+    let res = match cli.cmd {
+        Cmd::Schema { file, json } => schema(&file, json),
+        Cmd::Meta { file, json } => meta(&file, json),
+        Cmd::Pages { file, json } => pages(&file, json),
+        Cmd::Cat { file, num, columns, json } => cat(&file, num, columns, json),
+        Cmd::Head { file, num, columns, json } => cat(&file, num, columns, json),
+        Cmd::Footer { file, json } => footer(&file, json),
+        Cmd::ColumnSize { file, json } => column_size(&file, json),
+        Cmd::Dictionary { file, column, json } => dictionary(&file, &column, json),
+        Cmd::Buckets { file, json } => buckets(&file, json),
+    };
+    match res {
+        Ok(()) => ExitCode::SUCCESS,
+        Err(e) => {
+            eprintln!("error: {e}");
+            ExitCode::FAILURE
+        }
+    }
+}
+
+fn open(file: &PathBuf) -> std::io::Result<MosaicReader<FileInput>> {
+    let input = FileInput::open(file)?;
+    let len = input.len();
+    MosaicReader::new(input, len)
+}
+
+/// Columns in original (write) order rather than the name-sorted layout.
+fn original_order(s: &paimon_mosaic_core::schema::MosaicSchema) -> Vec<usize> {
+    let mut by_sorted = vec![0usize; s.columns.len()];
+    for (orig, &sorted) in s.original_order.iter().enumerate() {
+        by_sorted[sorted] = orig;
+    }
+    let mut cols: Vec<usize> = (0..s.columns.len()).collect();
+    cols.sort_by_key(|&i| by_sorted[i]);
+    cols
+}
+
+fn schema(file: &PathBuf, json: bool) -> std::io::Result<()> {
+    let reader = open(file)?;
+    let s = reader.schema();
+    let cols = original_order(s);
+    if json {
+        let items: Vec<String> = cols.iter().map(|&i| {
+            let c = &s.columns[i];
+            format!("{{\"name\":{},\"type\":{},\"nullable\":{},\"bucket\":{}}}",
+                fmt::json_str(&c.name), fmt::json_str(&format!("{:?}", c.data_type)), c.nullable, c.bucket_id)
+        }).collect();
+        println!("{{\"columns\":{},\"buckets\":{},\"fields\":[{}]}}", s.columns.len(), s.num_buckets, items.join(","));
+        return Ok(());
+    }
+    println!("{} columns, {} buckets", s.columns.len(), s.num_buckets);
+    for i in cols {
+        let c = &s.columns[i];
+        let null = if c.nullable { "" } else { " not null" };
+        println!("  {}: {:?}{} [bucket {}]", c.name, c.data_type, null, c.bucket_id);
+    }
+    Ok(())
+}
+
+fn meta(file: &PathBuf, json: bool) -> std::io::Result<()> {
+    let reader = open(file)?;
+    let s = reader.schema();
+    let nrg = reader.num_row_groups();
+    let total: usize = (0..nrg).map(|i| reader.row_group_num_rows(i).unwrap_or(0)).sum();
+    if json {
+        let mut rgs = Vec::new();
+        for rg in 0..nrg {
+            let st: Vec<String> = reader.row_group_stats(rg)?.iter().map(|x| {
+                let mm = match (&x.min, &x.max) {
+                    (Some(lo), Some(hi)) => format!(",\"min\":{},\"max\":{}", fmt::json_str(&fmt::render_value(lo)), fmt::json_str(&fmt::render_value(hi))),
+                    _ => String::new(),
+                };
+                format!("{{\"column\":{},\"nulls\":{}{}}}", fmt::json_str(&s.columns[x.column_index].name), x.null_count, mm)
+            }).collect();
+            rgs.push(format!("{{\"rows\":{},\"stats\":[{}]}}", reader.row_group_num_rows(rg)?, st.join(",")));
+        }
+        println!("{{\"rows\":{},\"columns\":{},\"buckets\":{},\"row_groups\":[{}]}}", total, s.columns.len(), s.num_buckets, rgs.join(","));
+        return Ok(());
+    }
+    println!("file: {} rows, {} columns, {} buckets, {} row groups", total, s.columns.len(), s.num_buckets, nrg);
+    for rg in 0..nrg {
+        println!("row group {rg}: {} rows", reader.row_group_num_rows(rg)?);
+        for st in reader.row_group_stats(rg)? {
+            let mm = match (&st.min, &st.max) {
+                (Some(lo), Some(hi)) => format!("min={} max={}", fmt::render_value(lo), fmt::render_value(hi)),
+                _ => "no min/max".to_string(),
+            };
+            println!("    {}: nulls={} {}", s.columns[st.column_index].name, st.null_count, mm);
+        }
+    }
+    Ok(())
+}
+
+fn pages(file: &PathBuf, json: bool) -> std::io::Result<()> {
+    let reader = open(file)?;
+    let s = reader.schema();
+    let nrg = reader.num_row_groups();
+    if json {
+        let mut rgs = Vec::new();
+        for rg in 0..nrg {
+            let items: Vec<String> = reader.page_infos(rg)?.iter().map(|p| {
+                format!("{{\"column\":{},\"bucket\":{},\"encoding\":{},\"slot_size\":{}}}",
+                    fmt::json_str(&s.columns[p.column_index].name), p.bucket, fmt::json_str(fmt::encoding_name(p.encoding)), p.slot_size)
+            }).collect();
+            rgs.push(format!("[{}]", items.join(",")));
+        }
+        println!("{{\"row_groups\":[{}]}}", rgs.join(","));
+        return Ok(());
+    }
+    for rg in 0..nrg {
+        println!("row group {rg}:");
+        for p in reader.page_infos(rg)? {
+            let c = &s.columns[p.column_index];
+            println!("    {}: bucket {} encoding={} slot={}B", c.name, p.bucket, fmt::encoding_name(p.encoding), p.slot_size);
+        }
+    }
+    Ok(())
+}
+
+fn cat(file: &PathBuf, num: usize, columns: Option<String>, json: bool) -> std::io::Result<()> {
+    let mut reader = open(file)?;
+    if let Some(list) = &columns {
+        let names: Vec<&str> = list.split(',').map(|x| x.trim()).filter(|x| !x.is_empty()).collect();
+        reader.project(&names)?;
+    }
+    let mut batches: Vec<RecordBatch> = Vec::new();
+    let mut got = 0usize;
+    for rg in 0..reader.num_row_groups() {
+        if got >= num {
+            break;
+        }
+        let batch = reader.row_group_reader(rg)?.read_columns()?;
+        got += batch.num_rows();
+        batches.push(batch);
+    }
+    if batches.is_empty() {
+        if !json {
+            println!("(no rows)");
+        }
+    } else if json {
+        print!("{}", fmt::ndjson(&batches, num));
+    } else {
+        print!("{}", fmt::pretty_table(&batches, num));
+    }
+    Ok(())
+}
+
+fn footer(file: &PathBuf, json: bool) -> std::io::Result<()> {
+    use paimon_mosaic_core::spec::{COMPRESSION_ZSTD, MAGIC, VERSION};
+    let reader = open(file)?;
+    let s = reader.schema();
+    let comp = if reader.compression() == COMPRESSION_ZSTD { "zstd" } else { "none" };
+    let magic = std::str::from_utf8(&MAGIC).unwrap_or("MOSA");
+    if json {
+        println!("{{\"magic\":{},\"version\":{},\"buckets\":{},\"row_groups\":{},\"compression\":{}}}",
+            fmt::json_str(magic), VERSION, s.num_buckets, reader.num_row_groups(), fmt::json_str(comp));
+    } else {
+        println!("magic={} version={} buckets={} row_groups={} compression={}",
+            magic, VERSION, s.num_buckets, reader.num_row_groups(), comp);
+    }
+    Ok(())
+}
+
+fn column_size(file: &PathBuf, json: bool) -> std::io::Result<()> {
+    let reader = open(file)?;
+    let s = reader.schema();
+    let mut bytes = vec![0usize; s.columns.len()];
+    for rg in 0..reader.num_row_groups() {
+        for p in reader.page_infos(rg)? {
+            bytes[p.column_index] += p.slot_size;
+        }
+    }
+    let cols = original_order(s);
+    if json {
+        let items: Vec<String> = cols.iter().map(|&i| format!("{{\"column\":{},\"bytes\":{}}}", fmt::json_str(&s.columns[i].name), bytes[i])).collect();
+        println!("[{}]", items.join(","));
+    } else {
+        for i in cols {
+            println!("  {}: {} B", s.columns[i].name, bytes[i]);
+        }
+    }
+    Ok(())
+}
+
+fn dictionary(file: &PathBuf, column: &str, json: bool) -> std::io::Result<()> {
+    let reader = open(file)?;
+    let col = reader.schema().columns.iter().position(|c| c.name == column)
+        .ok_or_else(|| std::io::Error::new(std::io::ErrorKind::InvalidInput, format!("column '{column}' not found")))?;
+    if json {
+        let mut rgs = Vec::new();
+        for rg in 0..reader.num_row_groups() {
+            match reader.dictionary(rg, col)? {
+                Some(vals) => {
+                    let e: Vec<String> = vals.iter().map(|v| fmt::json_str(&fmt::render_value(v))).collect();
+                    rgs.push(format!("[{}]", e.join(",")));
+                }
+                None => rgs.push("null".to_string()),
+            }
+        }
+        println!("{{\"column\":{},\"row_groups\":[{}]}}", fmt::json_str(column), rgs.join(","));
+        return Ok(());
+    }
+    for rg in 0..reader.num_row_groups() {
+        match reader.dictionary(rg, col)? {
+            Some(vals) => {
+                println!("row group {rg}: {} entries", vals.len());
+                for (i, v) in vals.iter().enumerate() {
+                    println!("    {i}: {}", fmt::render_value(v));
+                }
+            }
+            None => println!("row group {rg}: not dict-encoded"),
+        }
+    }
+    Ok(())
+}
+
+fn buckets(file: &PathBuf, json: bool) -> std::io::Result<()> {
+    let reader = open(file)?;
+    let s = reader.schema();
+    let name = |i: usize| s.columns[i].name.clone();
+    let mut rgs = Vec::new();
+    for rg in 0..reader.num_row_groups() {
+        let infos = reader.bucket_infos(rg)?;
+        if json {
+            let items: Vec<String> = infos.iter().map(|b| {
+                let cols: Vec<String> = b.columns.iter().map(|&i| fmt::json_str(&name(i))).collect();
+                format!("{{\"bucket\":{},\"kind\":{},\"size\":{},\"columns\":[{}]}}", b.bucket, fmt::json_str(b.kind), b.size, cols.join(","))
+            }).collect();
+            rgs.push(format!("[{}]", items.join(",")));
+        } else {
+            println!("row group {rg}:");
+            for b in &infos {
+                let cols: Vec<String> = b.columns.iter().map(|&i| name(i)).collect();
+                println!("    bucket {}: {} {}B [{}]", b.bucket, b.kind, b.size, cols.join(", "));
+            }
+        }
+    }
+    if json {
+        println!("{{\"row_groups\":[{}]}}", rgs.join(","));
+    }
+    Ok(())
+}
diff --git a/cli/tests/e2e.rs b/cli/tests/e2e.rs
new file mode 100644
index 0000000..f630503
--- /dev/null
+++ b/cli/tests/e2e.rs
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! End-to-end tests: drive the `mosaic` binary against a fixture file and
+//! assert stdout. Zero external dev-deps — uses CARGO_BIN_EXE and std only.
+
+use std::fs::File;
+use std::io::Write;
+use std::process::Command;
+use std::sync::Arc;
+
+use arrow_array::{Int32Array, RecordBatch, StringArray};
+use arrow_schema::{DataType, Field, Schema};
+use paimon_mosaic_core::writer::{MosaicWriter, OutputFile, WriterOptions};
+
+struct FileOut {
+    f: File,
+    pos: u64,
+}
+impl OutputFile for FileOut {
+    fn write(&mut self, d: &[u8]) -> std::io::Result<()> {
+        self.f.write_all(d)?;
+        self.pos += d.len() as u64;
+        Ok(())
+    }
+    fn flush(&mut self) -> std::io::Result<()> {
+        self.f.flush()
+    }
+    fn pos(&self) -> u64 {
+        self.pos
+    }
+}
+
+/// Write a small fixture and return its path under the test temp dir.
+fn fixture(name: &str) -> String {
+    let path = format!("{}/mosaic_e2e_{}.mosaic", std::env::temp_dir().display(), name);
+    let schema = Schema::new(vec![
+        Field::new("id", DataType::Int32, false),
+        Field::new("kind", DataType::Utf8, true),
+        Field::new("flag", DataType::Int32, true),
+    ]);
+    let out = FileOut { f: File::create(&path).unwrap(), pos: 0 };
+    let opts = WriterOptions {
+        num_buckets: 3,
+        page_size_threshold: 1,
+        stats_columns: vec!["id".into()],
+        ..Default::default()
+    };
+    let mut w = MosaicWriter::new(out, &schema, opts).unwrap();
+    let n = 200;
+    let ids: Vec<i32> = (0..n).collect();
+    let kinds: Vec<&str> = (0..n).map(|i| ["a", "b", "c"][(i % 3) as usize]).collect();
+    let flags = vec![7; n as usize];
+    let batch = RecordBatch::try_new(
+        Arc::new(schema),
+        vec![
+            Arc::new(Int32Array::from(ids)),
+            Arc::new(StringArray::from(kinds)),
+            Arc::new(Int32Array::from(flags)),
+        ],
+    )
+    .unwrap();
+    w.write_batch(&batch).unwrap();
+    w.close().unwrap();
+    path
+}
+
+fn run(args: &[&str]) -> (String, String, bool) {
+    let out = Command::new(env!("CARGO_BIN_EXE_mosaic")).args(args).output().unwrap();
+    (
+        String::from_utf8(out.stdout).unwrap(),
+        String::from_utf8(out.stderr).unwrap(),
+        out.status.success(),
+    )
+}
+
+#[test]
+fn schema_lists_columns() {
+    let f = fixture("schema");
+    let (out, _, ok) = run(&["schema", &f]);
+    assert!(ok);
+    assert!(out.contains("3 columns, 3 buckets"));
+    assert!(out.contains("id: Int32 not null"));
+    assert!(out.contains("kind: Utf8"));
+}
+
+#[test]
+fn meta_shows_stats() {
+    let f = fixture("meta");
+    let (out, _, ok) = run(&["meta", &f]);
+    assert!(ok);
+    assert!(out.contains("200 rows"));
+    assert!(out.contains("id: nulls=0 min=0 max=199"));
+}
+
+#[test]
+fn pages_shows_encodings() {
+    let f = fixture("pages");
+    let (out, _, ok) = run(&["pages", &f]);
+    assert!(ok);
+    assert!(out.contains("flag: bucket 0 encoding=const"));
+    assert!(out.contains("kind: bucket 2 encoding=dict"));
+}
+
+#[test]
+fn cat_truncates_and_projects() {
+    let f = fixture("cat");
+    let (out, _, ok) = run(&["cat", &f, "-n", "2"]);
+    assert!(ok);
+    assert!(out.contains("| id | kind | flag |"));
+    assert_eq!(out.matches('\n').count(), 6); // 3 borders + header + 2 rows
+    let (proj, _, _) = run(&["cat", &f, "-c", "kind,id", "-n", "1"]);
+    assert!(proj.contains("| kind | id |"));
+}
+
+#[test]
+fn cat_json_is_ndjson() {
+    let f = fixture("json");
+    let (out, _, ok) = run(&["cat", &f, "-n", "2", "--json"]);
+    assert!(ok);
+    assert_eq!(out, "{\"id\":0,\"kind\":\"a\",\"flag\":7}\n{\"id\":1,\"kind\":\"b\",\"flag\":7}\n");
+}
+
+#[test]
+fn missing_file_fails() {
+    let (_, err, ok) = run(&["schema", "/no/such/file.mosaic"]);
+    assert!(!ok);
+    assert!(err.contains("error:"));
+}
+
+#[test]
+fn footer_shows_format() {
+    let f = fixture("footer");
+    let (out, _, ok) = run(&["footer", &f]);
+    assert!(ok);
+    assert!(out.contains("magic=MOSA"));
+    assert!(out.contains("buckets=3"));
+    assert!(out.contains("compression=zstd"));
+    let (j, _, ok) = run(&["footer", &f, "--json"]);
+    assert!(ok);
+    assert!(j.contains("\"magic\":\"MOSA\"") && j.contains("\"compression\":\"zstd\""));
+}
+
+#[test]
+fn dictionary_dumps_entries() {
+    let f = fixture("dict");
+    let (out, _, ok) = run(&["dictionary", &f, "-c", "kind"]);
+    assert!(ok);
+    assert!(out.contains("3 entries"));
+    assert!(out.contains("a") && out.contains("b") && out.contains("c"));
+    let (j, _, ok) = run(&["dictionary", &f, "-c", "kind", "--json"]);
+    assert!(ok);
+    assert_eq!(j, "{\"column\":\"kind\",\"row_groups\":[[\"a\",\"b\",\"c\"]]}\n");
+}
+
+#[test]
+fn column_size_sums_bytes() {
+    let f = fixture("size");
+    let (out, _, ok) = run(&["column-size", &f]);
+    assert!(ok);
+    assert!(out.contains("id:") && out.contains("kind:"));
+    assert!(out.contains("flag: 0 B")); // const column has no slot
+}
+
+#[test]
+fn buckets_show_layout() {
+    let f = fixture("buckets");
+    let (out, _, ok) = run(&["buckets", &f]);
+    assert!(ok);
+    assert!(out.contains("row group 0:"));
+    assert!(out.contains("[flag]") && out.contains("[id]") && out.contains("[kind]"));
+    assert!(out.contains("monolithic") || out.contains("paged"));
+    let (j, _, ok) = run(&["buckets", &f, "--json"]);
+    assert!(ok);
+    assert!(j.contains("\"bucket\":0") && j.contains("\"columns\":"));
+}
diff --git a/core/src/bucket_reader.rs b/core/src/bucket_reader.rs
index 115bae4..2655ebb 100644
--- a/core/src/bucket_reader.rs
+++ b/core/src/bucket_reader.rs
@@ -507,6 +507,16 @@ impl BucketReader {
         self.num_rows - null_count
     }
 
+    /// Per-column encoding ids (in this bucket's column order). See `spec::ENCODING_*`.
+    pub fn encodings(&self) -> &[u8] {
+        &self.encodings
+    }
+
+    /// Dictionary entries for one column (empty if it is not dict-encoded).
+    pub fn dict_values(&self, col: usize) -> &[Value] {
+        &self.dict_values[col]
+    }
+
     pub fn read_all_columns(&self) -> io::Result<Vec<ArrayRef>> {
         let num_rows = self.num_rows;
         let mut result = Vec::with_capacity(self.num_columns);
@@ -768,6 +778,16 @@ impl ColumnPageReader {
         }
     }
 
+    /// Encoding id of this column page. See `spec::ENCODING_*`.
+    pub fn encoding(&self) -> u8 {
+        self.encoding
+    }
+
+    /// Dictionary entries for a dict-encoded page (empty for other encodings).
+    pub fn dict_values(&self) -> &[Value] {
+        &self.dict_values
+    }
+
     pub fn read_all(&self) -> io::Result<ArrayRef> {
         let num_rows = self.num_rows;
         let variant = data_variant_for_type(&self.col_type);
diff --git a/core/src/reader.rs b/core/src/reader.rs
index e111b9d..ac89dfa 100644
--- a/core/src/reader.rs
+++ b/core/src/reader.rs
@@ -179,6 +179,27 @@ fn read_merged_ranges<I: InputFile + ?Sized>(
     Ok((merged, fetched))
 }
 
+/// Physical placement of one column within one row group.
+pub struct PageInfo {
+    pub column_index: usize,
+    pub bucket: usize,
+    /// `spec::ENCODING_*`: plain / const / dict / all_null.
+    pub encoding: u8,
+    /// Paged-bucket on-disk slot size in bytes; 0 for monolithic/empty buckets.
+    pub slot_size: usize,
+}
+
+/// Layout of one bucket within one row group.
+pub struct BucketInfo {
+    pub bucket: usize,
+    /// "empty" | "monolithic" | "paged".
+    pub kind: &'static str,
+    /// On-disk compressed size in bytes (0 for empty buckets).
+    pub size: usize,
+    /// Member column indices (global, name-sorted order).
+    pub columns: Vec<usize>,
+}
+
 pub struct RowGroupMeta {
     pub num_rows: usize,
     pub bucket_offsets: Vec<u64>,
@@ -447,6 +468,97 @@ impl<I: InputFile> MosaicReader<I> {
         &self.input
     }
 
+    /// Footer compression code (`spec::COMPRESSION_*`).
+    pub fn compression(&self) -> u8 {
+        self.compression
+    }
+
+    /// Per-bucket layout for a row group: kind, on-disk size and member columns
+    /// (global indices). The bucket is Mosaic's defining structure — exposed for
+    /// the `buckets` command.
+    pub fn bucket_infos(&self, rg_index: usize) -> io::Result<Vec<BucketInfo>> {
+        if rg_index >= self.row_group_metas.len() {
+            return Err(io::Error::new(io::ErrorKind::InvalidInput, "row group index out of range"));
+        }
+        let meta = &self.row_group_metas[rg_index];
+        Ok((0..self.num_buckets).map(|b| {
+            let (kind, size) = match meta.bucket_layouts[b] {
+                BucketLayout::Empty => ("empty", 0),
+                BucketLayout::Monolithic { compressed_size, .. } => ("monolithic", compressed_size),
+                BucketLayout::Paged { total_size } => ("paged", total_size),
+            };
+            BucketInfo { bucket: b, kind, size, columns: self.schema.bucket_to_global[b].clone() }
+        }).collect())
+    }
+
+    /// Dictionary entries for one column in one row group, or `None` if that
+    /// column is not dict-encoded there. Used by the `dictionary` command.
+    pub fn dictionary(&self, rg_index: usize, col: usize) -> io::Result<Option<Vec<Value>>> {
+        let rg = self.row_group_reader_projected(rg_index, &[col])?;
+        Ok(rg.take_dictionary(col))
+    }
+
+    /// Per-column physical layout for a row group: bucket, encoding and on-disk
+    /// slot size. Reads and decompresses each non-empty bucket; used by tooling
+    /// (the `pages` command). Columns are reported in global (name-sorted) order.
+    pub fn page_infos(&self, rg_index: usize) -> io::Result<Vec<PageInfo>> {
+        if rg_index >= self.row_group_metas.len() {
+            return Err(io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "row group index out of range",
+            ));
+        }
+        let meta = &self.row_group_metas[rg_index];
+        let mut out = Vec::with_capacity(self.schema.columns.len());
+        for b in 0..self.num_buckets {
+            let globals = &self.schema.bucket_to_global[b];
+            match meta.bucket_layouts[b] {
+                BucketLayout::Empty => {
+                    for &gi in globals {
+                        out.push(PageInfo { column_index: gi, bucket: b, encoding: ENCODING_ALL_NULL, slot_size: 0 });
+                    }
+                }
+                BucketLayout::Monolithic { compressed_size, uncompressed_size } => {
+                    let buf = read_range(&self.input, meta.bucket_offsets[b], compressed_size)?;
+                    let data = match self.compression {
+                        COMPRESSION_NONE => buf,
+                        COMPRESSION_ZSTD => zstd::bulk::decompress(&buf, uncompressed_size)
+                            .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?,
+                        _ => return Err(io::Error::new(io::ErrorKind::InvalidData, "unsupported compression")),
+                    };
+                    let col_types: Vec<DataType> = globals.iter().map(|&gi| self.schema.columns[gi].data_type.clone()).collect();
+                    let reader = BucketReader::new(col_types, data, meta.num_rows)?;
+                    for (local, &gi) in globals.iter().enumerate() {
+                        out.push(PageInfo { column_index: gi, bucket: b, encoding: reader.encodings()[local], slot_size: 0 });
+                    }
+                }
+                BucketLayout::Paged { total_size } => {
+                    let dir_size = globals.len() * 4;
+                    let dir = read_range(&self.input, meta.bucket_offsets[b], dir_size)?;
+                    let mut sizes = Vec::with_capacity(globals.len());
+                    for i in 0..globals.len() {
+                        sizes.push(u32::from_le_bytes(dir[i * 4..i * 4 + 4].try_into().unwrap()) as usize);
+                    }
+                    let mut foff = meta.bucket_offsets[b] + dir_size as u64;
+                    for (local, &gi) in globals.iter().enumerate() {
+                        let enc = if sizes[local] == 0 {
+                            ENCODING_ALL_NULL
+                        } else {
+                            let slot = read_range(&self.input, foff, sizes[local])?;
+                            let ct = self.schema.columns[gi].data_type.clone();
+                            Self::parse_column_slot(&slot, &ct, meta.num_rows)?.encoding()
+                        };
+                        out.push(PageInfo { column_index: gi, bucket: b, encoding: enc, slot_size: sizes[local] });
+                        foff += sizes[local] as u64;
+                    }
+                    let _ = total_size;
+                }
+            }
+        }
+        out.sort_by_key(|p| p.column_index);
+        Ok(out)
+    }
+
     fn parse_column_slot(
         slot_data: &[u8],
         col_type: &DataType,
@@ -965,6 +1077,22 @@ impl RowGroupReader {
         }
     }
 
+    /// Dictionary entries for a projected column, or `None` if not dict-encoded.
+    pub fn take_dictionary(&self, global_col: usize) -> Option<Vec<Value>> {
+        let bucket = self.schema.columns[global_col].bucket_id;
+        let local = self.bucket_to_global[bucket].iter().position(|&g| g == global_col)?;
+        match self.bucket_states[bucket].as_ref()? {
+            BucketState::Paged { column_readers } => {
+                let d = column_readers[local].as_ref()?.dict_values();
+                if d.is_empty() { None } else { Some(d.to_vec()) }
+            }
+            BucketState::Monolithic { reader } => {
+                let d = reader.dict_values(local);
+                if d.is_empty() { None } else { Some(d.to_vec()) }
+            }
+        }
+    }
+
     pub fn num_rows(&self) -> usize {
         self.num_rows
     }
diff --git a/core/src/reader_tests.rs b/core/src/reader_tests.rs
index 55eecee..fde6b55 100644
--- a/core/src/reader_tests.rs
+++ b/core/src/reader_tests.rs
@@ -4180,3 +4180,54 @@ fn test_row_group_num_rows_out_of_range() {
     assert!(reader.row_group_num_rows(1).is_err());
     assert!(reader.row_group_num_rows(999).is_err());
 }
+
+#[test]
+fn test_page_infos_encodings() {
+    use crate::spec::{ENCODING_CONST, ENCODING_DICT, ENCODING_PLAIN};
+    let columns = vec![
+        ("id".to_string(), DataType::Int32, false),    // unique -> plain
+        ("kind".to_string(), DataType::Utf8, true),    // low cardinality -> dict
+        ("flag".to_string(), DataType::Int32, true),   // constant -> const
+    ];
+    let out = MemOutputFile::new();
+    let mut writer = MosaicWriter::new(
+        out,
+        &columns_to_arrow_schema(&columns),
+        WriterOptions {
+            num_buckets: 3,
+            page_size_threshold: 1, // force paged buckets
+            ..Default::default()
+        },
+    )
+    .unwrap();
+
+    let rows: Vec<Vec<Value>> = (0..200)
+        .map(|i| {
+            vec![
+                Value::Integer(i),
+                Value::String(["a", "b", "c"][(i % 3) as usize].as_bytes().to_vec()),
+                Value::Integer(7),
+            ]
+        })
+        .collect();
+    write_values(&mut writer, &columns, &rows);
+    writer.close().unwrap();
+    let data = writer.output().buf.clone();
+    let len = data.len() as u64;
+    let reader = MosaicReader::new(ByteArrayInputFile::new(data), len).unwrap();
+
+    let infos = reader.page_infos(0).unwrap();
+    assert_eq!(infos.len(), 3);
+    // page_infos is sorted by column_index; name-sorted order: flag, id, kind
+    let by_name = |n: &str| {
+        infos
+            .iter()
+            .find(|p| reader.schema().columns[p.column_index].name == n)
+            .unwrap()
+    };
+    assert_eq!(by_name("id").encoding, ENCODING_PLAIN);
+    assert_eq!(by_name("kind").encoding, ENCODING_DICT);
+    assert_eq!(by_name("flag").encoding, ENCODING_CONST);
+    assert!(by_name("id").slot_size > 0);
+    assert!(reader.page_infos(999).is_err());
+}
diff --git a/docs/cli.html b/docs/cli.html
new file mode 100644
index 0000000..6e7e7dd
--- /dev/null
+++ b/docs/cli.html
@@ -0,0 +1,174 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CLI - Paimon Mosaic</title>
+    <link rel="stylesheet" href="css/style.css">
+    <script src="js/main.js"></script>
+</head>
+<body>
+    <button class="menu-toggle" aria-label="Menu">&#9776;</button>
+    <div class="overlay"></div>
+
+    <aside class="sidebar">
+        <div class="sidebar-header">
+            <h2>Paimon Mosaic</h2>
+            <p>Columnar-bucket hybrid format</p>
+        </div>
+        <nav>
+            <ul>
+                <li><a href="index.html">Home</a></li>
+                <li><a href="design.html">Design</a></li>
+                <li><a href="cli.html" class="active">CLI</a></li>
+                <li><a href="java-api.html">Java API</a></li>
+                <li><a href="python-api.html">Python API</a></li>
+                <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="releases.html">Releases</a></li>
+            </ul>
+        </nav>
+        <div class="sidebar-footer">
+            <button class="theme-toggle">Dark Mode</button>
+        </div>
+    </aside>
+
+    <main class="main">
+        <div class="content">
+            <h1>CLI</h1>
+            <p class="subtitle">Inspect Mosaic files from the terminal with the <code>mosaic</code> binary. A native, JVM-free toolkit driving the read-only <code>MosaicReader</code> API.</p>
+
+            <h2>Install</h2>
+<pre><code><span class="cmt"># run from source</span>
+cargo run -p paimon-mosaic-cli -- schema data.mosaic
+
+<span class="cmt"># install the `mosaic` binary</span>
+cargo install --path cli
+mosaic schema data.mosaic</code></pre>
+
+            <h2>Commands</h2>
+            <table>
+                <thead>
+                    <tr><th>Command</th><th>Shows</th><th>Reads</th></tr>
+                </thead>
+                <tbody>
+                    <tr><td><code>schema</code></td><td>column names, Arrow types, nullability, bucket</td><td>footer only</td></tr>
+                    <tr><td><code>meta</code></td><td>row groups, rows, per-column stats</td><td>footer + index</td></tr>
+                    <tr><td><code>footer</code></td><td>magic, version, buckets, compression</td><td>footer only</td></tr>
+                    <tr><td><code>buckets</code></td><td>per-bucket layout and member columns</td><td>footer + index</td></tr>
+                    <tr><td><code>pages</code></td><td>per-column encoding + slot size</td><td>bucket data</td></tr>
+                    <tr><td><code>dictionary</code></td><td>dictionary entries of a dict column</td><td>bucket data</td></tr>
+                    <tr><td><code>column-size</code></td><td>on-disk bytes per column</td><td>footer + index</td></tr>
+                    <tr><td><code>cat</code> / <code>head</code></td><td>first N rows as a table</td><td>column data</td></tr>
+                </tbody>
+            </table>
+            <p>Every command accepts <code>--json</code>. <code>cat</code>/<code>head</code> take <code>-n &lt;N&gt;</code> and <code>-c a,b</code> (projection); <code>dictionary</code> takes <code>-c &lt;col&gt;</code>.</p>
+
+            <h2>schema</h2>
+            <p>Columns, Arrow types, nullability and bucket assignment, in original input order. Footer only.</p>
+<pre><code><span class="cmt">$ mosaic schema data.mosaic</span>
+5 columns, 4 buckets
+  id: Int32 not null [bucket 0]
+  name: Utf8 [bucket 2]
+  kind: Utf8 [bucket 1]
+  score: Float64 [bucket 3]
+  flag: Int32 [bucket 0]
+
+<span class="cmt">$ mosaic schema data.mosaic --json</span>
+{"columns":5,"buckets":4,"fields":[{"name":"id","type":"Int32","nullable":false,"bucket":0}, ...]}</code></pre>
+
+            <h2>meta</h2>
+            <p>Total rows, row groups, and per-column stats (null count / min / max) for columns configured with stats.</p>
+<pre><code><span class="cmt">$ mosaic meta data.mosaic</span>
+file: 200 rows, 5 columns, 4 buckets, 1 row groups
+row group 0: 200 rows
+    id: nulls=0 min=0 max=199
+    score: nulls=0 min=0 max=298.5</code></pre>
+
+            <h2>footer</h2>
+            <p>The 32-byte file footer: magic, format version, bucket count, row groups and compression.</p>
+<pre><code><span class="cmt">$ mosaic footer data.mosaic</span>
+magic=MOSA version=1 buckets=4 row_groups=1 compression=zstd</code></pre>
+
+            <h2>buckets</h2>
+            <p>Per row group, each bucket's layout (empty / monolithic / paged), on-disk size and member columns. Mosaic groups columns into buckets by name order.</p>
+<pre><code><span class="cmt">$ mosaic buckets data.mosaic</span>
+row group 0:
+    bucket 0: paged 373B [flag, id]
+    bucket 1: paged 32B [kind]
+    bucket 2: paged 220B [name]
+    bucket 3: paged 542B [score]</code></pre>
+
+            <h2>pages</h2>
+            <p>Per-column physical encoding (<code>plain</code> / <code>const</code> / <code>dict</code> / <code>all_null</code>) and on-disk slot size.</p>
+<pre><code><span class="cmt">$ mosaic pages data.mosaic</span>
+row group 0:
+    flag: bucket 0 encoding=const slot=16B
+    id: bucket 0 encoding=plain slot=349B
+    kind: bucket 1 encoding=dict slot=28B
+    name: bucket 2 encoding=plain slot=216B
+    score: bucket 3 encoding=plain slot=538B</code></pre>
+
+            <h2>dictionary</h2>
+            <p>Dump the dictionary of a dict-encoded column. Non-dict columns report as such.</p>
+<pre><code><span class="cmt">$ mosaic dictionary data.mosaic -c kind</span>
+row group 0: 3 entries
+    0: a
+    1: b
+    2: c
+
+<span class="cmt">$ mosaic dictionary data.mosaic -c kind --json</span>
+{"column":"kind","row_groups":[["a","b","c"]]}</code></pre>
+
+            <h2>column-size</h2>
+            <p>On-disk bytes per column, summed across row groups.</p>
+<pre><code><span class="cmt">$ mosaic column-size data.mosaic</span>
+  id: 349 B
+  name: 216 B
+  kind: 28 B
+  score: 538 B
+  flag: 16 B</code></pre>
+
+            <h2>cat / head</h2>
+            <p>Read the first N rows as a table. <code>-n</code> sets the count, <code>-c</code> projects columns, <code>--json</code> emits newline-delimited JSON. <code>head</code> is an alias of <code>cat</code>.</p>
+<pre><code><span class="cmt">$ mosaic cat data.mosaic -n 2</span>
++----+--------+------+-------+------+
+| id | name   | kind | score | flag |
++----+--------+------+-------+------+
+| 0  | user_0 | a    | 0     | 7    |
+| 1  | user_1 | b    | 1.5   | 7    |
++----+--------+------+-------+------+
+
+<span class="cmt">$ mosaic cat data.mosaic -n 2 -c name,score</span>   <span class="cmt"># projection</span>
+
+<span class="cmt">$ mosaic cat data.mosaic -n 2 --json</span>
+{"id":0,"name":"user_0","kind":"a","score":0,"flag":7}
+{"id":1,"name":"user_1","kind":"b","score":1.5,"flag":7}</code></pre>
+
+            <div class="tip">
+                <strong>Embedding instead</strong>
+                For C/C++ or Java callers, embed the format directly via the <code>ffi</code>
+                (<code>mosaic.h</code>) or <code>jni</code> crates rather than shelling out to this CLI.
+            </div>
+        </div>
+    </main>
+</body>
+</html>
diff --git a/docs/cpp-api.html b/docs/cpp-api.html
index 9b3fa4c..5ad8b23 100644
--- a/docs/cpp-api.html
+++ b/docs/cpp-api.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/creating-a-release.html b/docs/creating-a-release.html
index 8b69a4d..91750df 100644
--- a/docs/creating-a-release.html
+++ b/docs/creating-a-release.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/design.html b/docs/design.html
index 84c5e4f..4f142ec 100644
--- a/docs/design.html
+++ b/docs/design.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/index.html b/docs/index.html
index 55e9ec8..0c3ff7b 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/java-api.html b/docs/java-api.html
index f654b99..b413bf8 100644
--- a/docs/java-api.html
+++ b/docs/java-api.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html" class="active">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/python-api.html b/docs/python-api.html
index 7d50afe..e70b372 100644
--- a/docs/python-api.html
+++ b/docs/python-api.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html" class="active">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/releases.html b/docs/releases.html
index 2e6b35b..b5dd083 100644
--- a/docs/releases.html
+++ b/docs/releases.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html" class="active">Releases</a></li>
             </ul>
         </nav>
diff --git a/docs/verifying-a-release-candidate.html b/docs/verifying-a-release-candidate.html
index 6f620de..b9c3ead 100644
--- a/docs/verifying-a-release-candidate.html
+++ b/docs/verifying-a-release-candidate.html
@@ -42,6 +42,7 @@ <h2>Paimon Mosaic</h2>
                 <li><a href="java-api.html">Java API</a></li>
                 <li><a href="python-api.html">Python API</a></li>
                 <li><a href="cpp-api.html">C++ API</a></li>
+                <li><a href="cli.html">CLI</a></li>
                 <li><a href="releases.html">Releases</a></li>
             </ul>
         </nav>

Command	Shows	Reads
`schema`	column names, Arrow types, nullability, bucket	footer only
`meta`	row groups, rows, per-column stats	footer + index
`footer`	magic, version, buckets, compression	footer only
`buckets`	per-bucket layout and member columns	footer + index
`pages`	per-column encoding + slot size	bucket data
`dictionary`	dictionary entries of a dict column	bucket data
`column-size`	on-disk bytes per column	footer + index
`cat` / `head`	first N rows as a table	column data