diff --git a/Cargo.lock b/Cargo.lock
index 133aa0aa16202..fcf13dd3cc382 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4340,6 +4340,7 @@ dependencies = [
  "databend-storages-common-table-meta",
  "divan",
  "enum-as-inner",
+ "enum_dispatch",
  "fastrace",
  "futures",
  "futures-util",
@@ -4351,6 +4352,7 @@ dependencies = [
  "opendal",
  "parking_lot 0.12.3",
  "parquet",
+ "paste",
  "rand 0.8.5",
  "serde",
  "serde_json",
diff --git a/scripts/selfhost/restore_logs.sh b/scripts/selfhost/restore_logs.sh
index 324b369b5ddf0..948216eab4856 100755
--- a/scripts/selfhost/restore_logs.sh
+++ b/scripts/selfhost/restore_logs.sh
@@ -2,52 +2,52 @@
 
 # Simple logging
 log() {
-    echo "[$(date '+%H:%M:%S')] $1"
+	echo "[$(date '+%H:%M:%S')] $1"
 }
 
 log_error() {
-    echo "[$(date '+%H:%M:%S')] ERROR: $1" >&2
+	echo "[$(date '+%H:%M:%S')] ERROR: $1" >&2
 }
 
 log_step() {
-    echo "[$(date '+%H:%M:%S')] [$1/$2] $3"
+	echo "[$(date '+%H:%M:%S')] [$1/$2] $3"
 }
 
 # Parse arguments
 while [[ $# -gt 0 ]]; do
-    case "$1" in
-        --dsn)
-            DSN="$2"
-            shift 2
-            ;;
-        --stage)
-            STAGE="$2"
-            shift 2
-            ;;
-        *)
-            if [[ "$1" =~ ^[0-9]{8}$ ]]; then
-                DATE_ARG="$1"
-                shift
-            else
-                log_error "Unknown parameter: $1"
-                exit 1
-            fi
-            ;;
-    esac
+	case "$1" in
+	--dsn)
+		DSN="$2"
+		shift 2
+		;;
+	--stage)
+		STAGE="$2"
+		shift 2
+		;;
+	*)
+		if [[ "$1" =~ ^[0-9]{8}$ ]]; then
+			DATE_ARG="$1"
+			shift
+		else
+			log_error "Unknown parameter: $1"
+			exit 1
+		fi
+		;;
+	esac
 done
 
 # Validate parameters
 if [[ -z "$STAGE" || -z "$DATE_ARG" ]]; then
-    log_error "Missing required parameters: --stage or yyyymmdd date"
-    exit 1
+	log_error "Missing required parameters: --stage or yyyymmdd date"
+	exit 1
 fi
 
 if [[ -z "$DSN" ]]; then
-    DSN="$BENDSQL_DSN"
-    if [[ -z "$DSN" ]]; then
-        log_error "DSN not provided and BENDSQL_DSN not set"
-        exit 1
-    fi
+	DSN="$BENDSQL_DSN"
+	if [[ -z "$DSN" ]]; then
+		log_error "DSN not provided and BENDSQL_DSN not set"
+		exit 1
+	fi
 fi
 
 # Format date
@@ -66,8 +66,8 @@ DOWNLOAD_SQL="PRESIGN DOWNLOAD @${STAGE}/${TAR_FILE}"
 DOWNLOAD_URL=$(bendsql --dsn "${DSN}" --query="${DOWNLOAD_SQL}" | awk '{print $3}')
 
 if [[ -z "$DOWNLOAD_URL" ]]; then
-    log_error "Failed to generate download URL for ${TAR_FILE}"
-    exit 1
+	log_error "Failed to generate download URL for ${TAR_FILE}"
+	exit 1
 fi
 log "Download URL generated successfully"
 
@@ -76,8 +76,8 @@ log_step "2" "6" "Downloading ${TAR_FILE} from stage @${STAGE}"
 curl -s -o "${TAR_FILE}" "${DOWNLOAD_URL}"
 
 if [[ ! -f "${TAR_FILE}" ]]; then
-    log_error "Failed to download ${TAR_FILE}"
-    exit 1
+	log_error "Failed to download ${TAR_FILE}"
+	exit 1
 fi
 
 FILE_SIZE=$(du -h "${TAR_FILE}" | cut -f1)
@@ -98,21 +98,21 @@ TARGET_DIRS=("columns" "user_functions" "query_raw_logs" "query_logs" "query_pro
 PREFIX=""
 
 for target_dir in "${TARGET_DIRS[@]}"; do
-    SAMPLE_FILE=$(find "${TEMP_DIR}" -path "*/${target_dir}/*" -type f | head -1)
-    if [[ -n "$SAMPLE_FILE" ]]; then
-        RELATIVE_PATH="${SAMPLE_FILE#${TEMP_DIR}/}"
-        PREFIX=$(echo "$RELATIVE_PATH" | sed "s|/${target_dir}/.*||" | sed "s|${target_dir}/.*||")
-        if [[ -n "$PREFIX" ]]; then
-            PREFIX="${PREFIX}/"
-        fi
-        break
-    fi
+	SAMPLE_FILE=$(find "${TEMP_DIR}" -path "*/${target_dir}/*" -type f | head -1)
+	if [[ -n "$SAMPLE_FILE" ]]; then
+		RELATIVE_PATH="${SAMPLE_FILE#${TEMP_DIR}/}"
+		PREFIX=$(echo "$RELATIVE_PATH" | sed "s|/${target_dir}/.*||" | sed "s|${target_dir}/.*||")
+		if [[ -n "$PREFIX" ]]; then
+			PREFIX="${PREFIX}/"
+		fi
+		break
+	fi
 done
 
 if [[ -n "$PREFIX" ]]; then
-    log "Path prefix detected: '${PREFIX}' - will be stripped during upload"
+	log "Path prefix detected: '${PREFIX}' - will be stripped during upload"
 else
-    log "No path prefix detected - using original file paths"
+	log "No path prefix detected - using original file paths"
 fi
 
 # Step 5: Upload files
@@ -129,32 +129,32 @@ UPLOAD_SUCCESS=0
 UPLOAD_FAILED=0
 
 find "${TEMP_DIR}" -type f | while read -r FILE; do
-    CURRENT_FILE=$((CURRENT_FILE + 1))
-    RELATIVE_PATH="${FILE#${TEMP_DIR}/}"
-    
-    if [[ -n "$PREFIX" && "$RELATIVE_PATH" == ${PREFIX}* ]]; then
-        UPLOAD_PATH="${RELATIVE_PATH#${PREFIX}}"
-    else
-        UPLOAD_PATH="$RELATIVE_PATH"
-    fi
-
-    printf "\rUploading: %d/%d files (Success: %d, Failed: %d)" "$CURRENT_FILE" "$TOTAL_FILES" "$UPLOAD_SUCCESS" "$UPLOAD_FAILED"
-
-    UPLOAD_SQL="PRESIGN UPLOAD @${UPLOAD_STAGE}/${UPLOAD_PATH}"
-    UPLOAD_URL=$(bendsql --dsn "${DSN}" --query="${UPLOAD_SQL}" | awk '{print $3}')
-
-    if [[ -n "$UPLOAD_URL" ]]; then
-        if curl -s -X PUT -T "${FILE}" "${UPLOAD_URL}"; then
-            UPLOAD_SUCCESS=$((UPLOAD_SUCCESS + 1))
-        else
-            UPLOAD_FAILED=$((UPLOAD_FAILED + 1))
-        fi
-    else
-        UPLOAD_FAILED=$((UPLOAD_FAILED + 1))
-    fi
+	CURRENT_FILE=$((CURRENT_FILE + 1))
+	RELATIVE_PATH="${FILE#${TEMP_DIR}/}"
+
+	if [[ -n "$PREFIX" && "$RELATIVE_PATH" == ${PREFIX}* ]]; then
+		UPLOAD_PATH="${RELATIVE_PATH#${PREFIX}}"
+	else
+		UPLOAD_PATH="$RELATIVE_PATH"
+	fi
+
+	printf "\rUploading: %d/%d files (Success: %d, Failed: %d)" "$CURRENT_FILE" "$TOTAL_FILES" "$UPLOAD_SUCCESS" "$UPLOAD_FAILED"
+
+	UPLOAD_SQL="PRESIGN UPLOAD @${UPLOAD_STAGE}/${UPLOAD_PATH}"
+	UPLOAD_URL=$(bendsql --dsn "${DSN}" --query="${UPLOAD_SQL}" | awk '{print $3}')
+
+	if [[ -n "$UPLOAD_URL" ]]; then
+		if curl -s -X PUT -T "${FILE}" "${UPLOAD_URL}"; then
+			UPLOAD_SUCCESS=$((UPLOAD_SUCCESS + 1))
+		else
+			UPLOAD_FAILED=$((UPLOAD_FAILED + 1))
+		fi
+	else
+		UPLOAD_FAILED=$((UPLOAD_FAILED + 1))
+	fi
 done
 
-echo  # New line after progress
+echo # New line after progress
 log "Upload completed: ${UPLOAD_SUCCESS} successful, ${UPLOAD_FAILED} failed"
 
 # Cleanup
@@ -171,23 +171,23 @@ log "Created database: ${RESTORE_DATABASE}"
 
 # Restore tables
 declare -A TABLE_MAP=(
-    ["columns"]="system.columns:columns"
-    ["user_functions"]="system.user_functions:user_functions"
-    ["log_history"]="system_history.log_history:query_raw_logs"
-    ["query_history"]="system_history.query_history:query_logs"
-    ["profile_history"]="system_history.profile_history:query_profile_logs"
+	["columns"]="system.columns:columns"
+	["user_functions"]="system.user_functions:user_functions"
+	["log_history"]="system_history.log_history:query_raw_logs"
+	["query_history"]="system_history.query_history:query_logs"
+	["profile_history"]="system_history.profile_history:query_profile_logs"
 )
 
 for table_name in "${!TABLE_MAP[@]}"; do
-    IFS=':' read -r source_table source_path <<< "${TABLE_MAP[$table_name]}"
-    
-    log "Restoring table: ${RESTORE_DATABASE}.${table_name} from @${UPLOAD_STAGE}/${source_path}"
-    
-    bendsql --dsn "${DSN}" --database "${RESTORE_DATABASE}" --query="CREATE TABLE ${table_name} LIKE ${source_table};" >/dev/null 2>&1
-    bendsql --dsn "${DSN}" --database "${RESTORE_DATABASE}" --query="COPY INTO ${table_name} FROM @${UPLOAD_STAGE}/${source_path};" >/dev/null 2>&1
-    
-    ROW_COUNT=$(bendsql --dsn "${DSN}" --database "${RESTORE_DATABASE}" --query="SELECT COUNT(*) FROM ${table_name};" | tail -1)
-    log "Table ${table_name} restored: ${ROW_COUNT} rows"
+	IFS=':' read -r source_table source_path <<<"${TABLE_MAP[$table_name]}"
+
+	log "Restoring table: ${RESTORE_DATABASE}.${table_name} from @${UPLOAD_STAGE}/${source_path}"
+
+	bendsql --dsn "${DSN}" --database "${RESTORE_DATABASE}" --query="CREATE TABLE ${table_name} LIKE ${source_table};" >/dev/null 2>&1
+	bendsql --dsn "${DSN}" --database "${RESTORE_DATABASE}" --query="COPY INTO ${table_name} FROM @${UPLOAD_STAGE}/${source_path};" >/dev/null 2>&1
+
+	ROW_COUNT=$(bendsql --dsn "${DSN}" --database "${RESTORE_DATABASE}" --query="SELECT COUNT(*) FROM ${table_name};" | tail -1)
+	log "Table ${table_name} restored: ${ROW_COUNT} rows"
 done
 
 log "Log restoration completed successfully"
diff --git a/src/common/metrics/src/metrics/storage.rs b/src/common/metrics/src/metrics/storage.rs
index 8059be0b39645..d7ae34d57f9ec 100644
--- a/src/common/metrics/src/metrics/storage.rs
+++ b/src/common/metrics/src/metrics/storage.rs
@@ -337,6 +337,14 @@ static BLOCK_VIRTUAL_COLUMN_WRITE_MILLISECONDS: LazyLock<Histogram> = LazyLock::
     register_histogram_in_milliseconds("fuse_block_virtual_column_write_milliseconds")
 });
 
+// Block statistics metrics.
+static BLOCK_STATS_WRITE_NUMS: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_block_stats_write_nums"));
+static BLOCK_STATS_WRITE_BYTES: LazyLock<Counter> =
+    LazyLock::new(|| register_counter("fuse_block_stats_write_bytes"));
+static BLOCK_STATS_WRITE_MILLISECONDS: LazyLock<Histogram> =
+    LazyLock::new(|| register_histogram_in_milliseconds("fuse_block_stats_write_milliseconds"));
+
 /// Common metrics.
 pub fn metrics_inc_omit_filter_rowgroups(c: u64) {
     OMIT_FILTER_ROWGROUPS.inc_by(c);
@@ -907,3 +915,16 @@ pub fn metrics_inc_block_virtual_column_write_bytes(c: u64) {
 pub fn metrics_inc_block_virtual_column_write_milliseconds(c: u64) {
     BLOCK_VIRTUAL_COLUMN_WRITE_MILLISECONDS.observe(c as f64);
 }
+
+/// Block stats metrics.
+pub fn metrics_inc_block_stats_write_nums(c: u64) {
+    BLOCK_STATS_WRITE_NUMS.inc_by(c);
+}
+
+pub fn metrics_inc_block_stats_write_bytes(c: u64) {
+    BLOCK_STATS_WRITE_BYTES.inc_by(c);
+}
+
+pub fn metrics_inc_block_stats_write_milliseconds(c: u64) {
+    BLOCK_STATS_WRITE_MILLISECONDS.observe(c as f64);
+}
diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_table.rs b/src/query/ee/src/storages/fuse/operations/vacuum_table.rs
index 141f987a8b3e3..19ee34c627efa 100644
--- a/src/query/ee/src/storages/fuse/operations/vacuum_table.rs
+++ b/src/query/ee/src/storages/fuse/operations/vacuum_table.rs
@@ -40,6 +40,7 @@ pub struct SnapshotReferencedFiles {
     pub segments: HashSet<String>,
     pub blocks: HashSet<String>,
     pub blocks_index: HashSet<String>,
+    pub blocks_stats: HashSet<String>,
 }
 
 impl SnapshotReferencedFiles {
@@ -54,6 +55,9 @@ impl SnapshotReferencedFiles {
         for file in &self.blocks_index {
             files.push(file.clone());
         }
+        for file in &self.blocks_stats {
+            files.push(file.clone());
+        }
         files
     }
 }
@@ -132,6 +136,7 @@ pub async fn get_snapshot_referenced_files(
         segments,
         blocks: locations_referenced.block_location,
         blocks_index: locations_referenced.bloom_location,
+        blocks_stats: locations_referenced.stats_location,
     }))
 }
 
@@ -164,10 +169,11 @@ pub async fn do_gc_orphan_files(
         None => return Ok(()),
     };
     let status = format!(
-        "gc orphan: read referenced files:{},{},{}, cost:{:?}",
+        "gc orphan: read referenced files:{},{},{},{}, cost:{:?}",
         referenced_files.segments.len(),
         referenced_files.blocks.len(),
         referenced_files.blocks_index.len(),
+        referenced_files.blocks_stats.len(),
         start.elapsed()
     );
     ctx.set_status_info(&status);
@@ -268,6 +274,36 @@ pub async fn do_gc_orphan_files(
     );
     ctx.set_status_info(&status);
 
+    // 5. Purge orphan block stats files.
+    // 5.1 Get orphan block stats files to be purged
+    let stats_locations_to_be_purged = get_orphan_files_to_be_purged(
+        fuse_table,
+        location_gen.block_statistics_location_prefix(),
+        referenced_files.blocks_stats,
+        retention_time,
+    )
+    .await?;
+    let status = format!(
+        "gc orphan: read stats_locations_to_be_purged:{}, cost:{:?}",
+        stats_locations_to_be_purged.len(),
+        start.elapsed()
+    );
+    ctx.set_status_info(&status);
+
+    // 5.2 Delete all the orphan block stats files to be purged
+    let purged_file_num = stats_locations_to_be_purged.len();
+    fuse_table
+        .try_purge_location_files(
+            ctx.clone(),
+            HashSet::from_iter(stats_locations_to_be_purged.into_iter()),
+        )
+        .await?;
+    let status = format!(
+        "gc orphan: purged block stats files:{}, cost:{:?}",
+        purged_file_num,
+        start.elapsed()
+    );
+    ctx.set_status_info(&status);
     Ok(())
 }
 
@@ -286,10 +322,11 @@ pub async fn do_dry_run_orphan_files(
         None => return Ok(()),
     };
     let status = format!(
-        "dry_run orphan: read referenced files:{},{},{}, cost:{:?}",
+        "dry_run orphan: read referenced files:{},{},{},{}, cost:{:?}",
         referenced_files.segments.len(),
         referenced_files.blocks.len(),
         referenced_files.blocks_index.len(),
+        referenced_files.blocks_stats.len(),
         start.elapsed()
     );
     ctx.set_status_info(&status);
@@ -351,6 +388,23 @@ pub async fn do_dry_run_orphan_files(
 
     purge_files.extend(index_locations_to_be_purged);
 
+    // 5. Get purge orphan block stats files.
+    let stats_locations_to_be_purged = get_orphan_files_to_be_purged(
+        fuse_table,
+        location_gen.block_statistics_location_prefix(),
+        referenced_files.blocks_stats,
+        retention_time,
+    )
+    .await?;
+    let status = format!(
+        "dry_run orphan: read stats_locations_to_be_purged:{}, cost:{:?}",
+        stats_locations_to_be_purged.len(),
+        start.elapsed()
+    );
+    ctx.set_status_info(&status);
+
+    purge_files.extend(stats_locations_to_be_purged);
+
     Ok(())
 }
 
diff --git a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs
index e03ce27f8466b..da2d43f7f708e 100644
--- a/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs
+++ b/src/query/ee/src/storages/fuse/operations/vacuum_table_v2.rs
@@ -336,6 +336,9 @@ pub async fn do_vacuum2(
         }
         indexes_to_gc
             .push(TableMetaLocationGenerator::gen_bloom_index_location_from_block_location(loc));
+
+        indexes_to_gc
+            .push(TableMetaLocationGenerator::gen_block_stats_location_from_block_location(loc));
     }
 
     ctx.set_status_info(&format!(
diff --git a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs
index 87c2268ceee0d..dfdbbf35266e9 100644
--- a/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs
+++ b/src/query/ee/tests/it/storages/fuse/operations/vacuum.rs
@@ -66,6 +66,7 @@ async fn test_fuse_do_vacuum_drop_tables() -> Result<()> {
         1,
         1,
         1,
+        1,
         None,
         None,
     )
@@ -90,6 +91,7 @@ async fn test_fuse_do_vacuum_drop_tables() -> Result<()> {
             1,
             1,
             1,
+            1,
             None,
             None,
         )
@@ -108,6 +110,7 @@ async fn test_fuse_do_vacuum_drop_tables() -> Result<()> {
             0,
             0,
             0,
+            0,
             None,
             None,
         )
diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs
index bff7c60ff1703..cd12b0ee8508b 100755
--- a/src/query/expression/src/values.rs
+++ b/src/query/expression/src/values.rs
@@ -1694,6 +1694,23 @@ impl Column {
             _ => (false, None),
         }
     }
+
+    /// Checks if the average length of a string column exceeds 256 bytes.
+    /// If it does, the bloom index for the column will not be established.
+    pub fn check_large_string(&self) -> bool {
+        let (inner, len) = if let Column::Nullable(c) = self {
+            (&c.column, c.validity.true_count())
+        } else {
+            (self, self.len())
+        };
+        if let Column::String(v) = inner {
+            let bytes_per_row = v.total_bytes_len() / len.max(1);
+            if bytes_per_row > 256 {
+                return true;
+            }
+        }
+        false
+    }
 }
 
 /// Serialize a column to a base64 string.
diff --git a/src/query/service/src/interpreters/common/table_option_validation.rs b/src/query/service/src/interpreters/common/table_option_validation.rs
index ab332a1dce642..1a967a1c469a2 100644
--- a/src/query/service/src/interpreters/common/table_option_validation.rs
+++ b/src/query/service/src/interpreters/common/table_option_validation.rs
@@ -24,6 +24,7 @@ use databend_common_exception::ErrorCode;
 use databend_common_expression::TableSchemaRef;
 use databend_common_io::constants::DEFAULT_BLOCK_ROW_COUNT;
 use databend_common_settings::Settings;
+use databend_common_sql::ApproxDistinctColumns;
 use databend_common_sql::BloomIndexColumns;
 use databend_common_storages_fuse::FUSE_OPT_KEY_BLOCK_IN_MEM_SIZE_THRESHOLD;
 use databend_common_storages_fuse::FUSE_OPT_KEY_BLOCK_PER_SEGMENT;
@@ -35,6 +36,8 @@ use databend_common_storages_fuse::FUSE_OPT_KEY_ROW_AVG_DEPTH_THRESHOLD;
 use databend_common_storages_fuse::FUSE_OPT_KEY_ROW_PER_BLOCK;
 use databend_common_storages_fuse::FUSE_OPT_KEY_ROW_PER_PAGE;
 use databend_storages_common_index::BloomIndex;
+use databend_storages_common_index::RangeIndex;
+use databend_storages_common_table_meta::table::OPT_KEY_APPROX_DISTINCT_COLUMNS;
 use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS;
 use databend_storages_common_table_meta::table::OPT_KEY_CHANGE_TRACKING;
 use databend_storages_common_table_meta::table::OPT_KEY_CLUSTER_TYPE;
@@ -68,6 +71,7 @@ pub static CREATE_FUSE_OPTIONS: LazyLock<HashSet<&'static str>> = LazyLock::new(
     r.insert(FUSE_OPT_KEY_ENABLE_AUTO_VACUUM);
 
     r.insert(OPT_KEY_BLOOM_INDEX_COLUMNS);
+    r.insert(OPT_KEY_APPROX_DISTINCT_COLUMNS);
     r.insert(OPT_KEY_TABLE_COMPRESSION);
     r.insert(OPT_KEY_STORAGE_FORMAT);
     r.insert(OPT_KEY_DATABASE_ID);
@@ -213,6 +217,16 @@ pub fn is_valid_bloom_index_columns(
     Ok(())
 }
 
+pub fn is_valid_approx_distinct_columns(
+    options: &BTreeMap<String, String>,
+    schema: TableSchemaRef,
+) -> databend_common_exception::Result<()> {
+    if let Some(value) = options.get(OPT_KEY_APPROX_DISTINCT_COLUMNS) {
+        ApproxDistinctColumns::verify_definition(value, schema, RangeIndex::supported_table_type)?;
+    }
+    Ok(())
+}
+
 pub fn is_valid_change_tracking(
     options: &BTreeMap<String, String>,
 ) -> databend_common_exception::Result<()> {
diff --git a/src/query/service/src/interpreters/interpreter_table_create.rs b/src/query/service/src/interpreters/interpreter_table_create.rs
index de7c27cdd1777..faf37f95ee9d5 100644
--- a/src/query/service/src/interpreters/interpreter_table_create.rs
+++ b/src/query/service/src/interpreters/interpreter_table_create.rs
@@ -66,6 +66,7 @@ use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX;
 use log::error;
 use log::info;
 
+use crate::interpreters::common::table_option_validation::is_valid_approx_distinct_columns;
 use crate::interpreters::common::table_option_validation::is_valid_block_per_segment;
 use crate::interpreters::common::table_option_validation::is_valid_bloom_index_columns;
 use crate::interpreters::common::table_option_validation::is_valid_change_tracking;
@@ -467,7 +468,8 @@ impl CreateTableInterpreter {
         is_valid_block_per_segment(&table_meta.options)?;
         is_valid_row_per_block(&table_meta.options)?;
         // check bloom_index_columns.
-        is_valid_bloom_index_columns(&table_meta.options, schema)?;
+        is_valid_bloom_index_columns(&table_meta.options, schema.clone())?;
+        is_valid_approx_distinct_columns(&table_meta.options, schema)?;
         is_valid_change_tracking(&table_meta.options)?;
         // check random seed
         is_valid_random_seed(&table_meta.options)?;
diff --git a/src/query/service/src/interpreters/interpreter_table_set_options.rs b/src/query/service/src/interpreters/interpreter_table_set_options.rs
index d75f94772cddf..93185f25feeb8 100644
--- a/src/query/service/src/interpreters/interpreter_table_set_options.rs
+++ b/src/query/service/src/interpreters/interpreter_table_set_options.rs
@@ -49,6 +49,7 @@ use databend_storages_common_table_meta::table::OPT_KEY_STORAGE_FORMAT;
 use databend_storages_common_table_meta::table::OPT_KEY_TEMP_PREFIX;
 use log::error;
 
+use crate::interpreters::common::table_option_validation::is_valid_approx_distinct_columns;
 use crate::interpreters::common::table_option_validation::is_valid_block_per_segment;
 use crate::interpreters::common::table_option_validation::is_valid_bloom_index_columns;
 use crate::interpreters::common::table_option_validation::is_valid_create_opt;
@@ -163,6 +164,7 @@ impl Interpreter for SetOptionsInterpreter {
 
         // check bloom_index_columns.
         is_valid_bloom_index_columns(&self.plan.set_options, table.schema())?;
+        is_valid_approx_distinct_columns(&self.plan.set_options, table.schema())?;
 
         if let Some(new_snapshot_location) =
             set_segment_format(self.ctx.clone(), table.clone(), &self.plan.set_options).await?
diff --git a/src/query/service/src/test_kits/block_writer.rs b/src/query/service/src/test_kits/block_writer.rs
index 6f81b9a8f1dbe..75914348ed1f2 100644
--- a/src/query/service/src/test_kits/block_writer.rs
+++ b/src/query/service/src/test_kits/block_writer.rs
@@ -19,14 +19,17 @@ use databend_common_expression::FunctionContext;
 use databend_common_expression::TableSchemaRef;
 use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE;
 use databend_common_io::constants::DEFAULT_BLOCK_INDEX_BUFFER_SIZE;
+use databend_common_sql::ApproxDistinctColumns;
 use databend_common_sql::BloomIndexColumns;
 use databend_common_storages_fuse::io::serialize_block;
+use databend_common_storages_fuse::io::BlockStatisticsState;
 use databend_common_storages_fuse::io::TableMetaLocationGenerator;
 use databend_common_storages_fuse::io::WriteSettings;
 use databend_common_storages_fuse::FuseStorageFormat;
 use databend_storages_common_blocks::blocks_to_parquet;
 use databend_storages_common_index::BloomIndex;
 use databend_storages_common_index::BloomIndexBuilder;
+use databend_storages_common_index::RangeIndex;
 use databend_storages_common_table_meta::meta::BlockMeta;
 use databend_storages_common_table_meta::meta::ClusterStatistics;
 use databend_storages_common_table_meta::meta::Compression;
@@ -85,6 +88,9 @@ impl<'a> BlockWriter<'a> {
         let (bloom_filter_index_size, bloom_filter_index_location, meta) = self
             .build_block_index(data_accessor, schema.clone(), &block, block_id)
             .await?;
+        let (block_stats_size, block_stats_location) = self
+            .build_block_stats(data_accessor, schema.clone(), &block, block_id)
+            .await?;
 
         let write_settings = WriteSettings {
             storage_format,
@@ -112,6 +118,8 @@ impl<'a> BlockWriter<'a> {
             None,
             None,
             None,
+            block_stats_location,
+            block_stats_size,
             Compression::Lz4Raw,
             Some(Utc::now()),
         );
@@ -154,4 +162,29 @@ impl<'a> BlockWriter<'a> {
             Ok((0u64, None, None))
         }
     }
+
+    pub async fn build_block_stats(
+        &self,
+        data_accessor: &Operator,
+        schema: TableSchemaRef,
+        block: &DataBlock,
+        block_id: Uuid,
+    ) -> Result<(u64, Option<Location>)> {
+        let location = self.location_generator.block_stats_location(&block_id);
+
+        let hll_columns = ApproxDistinctColumns::All;
+        let ndv_columns_map =
+            hll_columns.distinct_column_fields(schema.clone(), RangeIndex::supported_table_type)?;
+        let maybe_block_stats =
+            BlockStatisticsState::from_data_block(location, block, &ndv_columns_map)?;
+        if let Some(block_stats) = maybe_block_stats {
+            let size = block_stats.block_stats_size();
+            data_accessor
+                .write(&block_stats.location.0, block_stats.data)
+                .await?;
+            Ok((size, Some(block_stats.location)))
+        } else {
+            Ok((0u64, None))
+        }
+    }
 }
diff --git a/src/query/service/src/test_kits/check.rs b/src/query/service/src/test_kits/check.rs
index a8a82cf60727a..6943810c7d949 100644
--- a/src/query/service/src/test_kits/check.rs
+++ b/src/query/service/src/test_kits/check.rs
@@ -24,6 +24,7 @@ use databend_common_meta_app::storage::StorageParams;
 use databend_common_storages_fuse::operations::load_last_snapshot_hint;
 use databend_common_storages_fuse::FuseTable;
 use databend_common_storages_fuse::FUSE_TBL_BLOCK_PREFIX;
+use databend_common_storages_fuse::FUSE_TBL_BLOCK_STATISTICS_PREFIX;
 use databend_common_storages_fuse::FUSE_TBL_SEGMENT_PREFIX;
 use databend_common_storages_fuse::FUSE_TBL_SNAPSHOT_PREFIX;
 use databend_common_storages_fuse::FUSE_TBL_SNAPSHOT_STATISTICS_PREFIX;
@@ -79,6 +80,7 @@ pub async fn check_data_dir(
     segment_count: u32,
     block_count: u32,
     index_count: u32,
+    block_stat_count: u32,
     check_last_snapshot: Option<()>,
     check_table_statistic_file: Option<()>,
 ) -> Result<()> {
@@ -92,12 +94,14 @@ pub async fn check_data_dir(
     let mut sg_count = 0;
     let mut b_count = 0;
     let mut i_count = 0;
+    let mut b_stat_count = 0;
     let mut table_statistic_files = vec![];
     let prefix_snapshot = FUSE_TBL_SNAPSHOT_PREFIX;
     let prefix_snapshot_statistics = FUSE_TBL_SNAPSHOT_STATISTICS_PREFIX;
     let prefix_segment = FUSE_TBL_SEGMENT_PREFIX;
     let prefix_block = FUSE_TBL_BLOCK_PREFIX;
     let prefix_index = FUSE_TBL_XOR_BLOOM_INDEX_PREFIX;
+    let prefix_block_stats = FUSE_TBL_BLOCK_STATISTICS_PREFIX;
     for entry in WalkDir::new(root) {
         let entry = entry.unwrap();
         if entry.file_type().is_file() {
@@ -109,6 +113,8 @@ pub async fn check_data_dir(
                 ss_count += 1;
             } else if path.starts_with(prefix_segment) {
                 sg_count += 1;
+            } else if path.starts_with(prefix_block_stats) {
+                b_stat_count += 1;
             } else if path.starts_with(prefix_block) {
                 b_count += 1;
             } else if path.starts_with(prefix_index) {
@@ -142,6 +148,12 @@ pub async fn check_data_dir(
         case_name
     );
 
+    assert_eq!(
+        b_stat_count, block_stat_count,
+        "case [{}], check block statistics count",
+        case_name
+    );
+
     assert_eq!(
         i_count, index_count,
         "case [{}], check index count",
diff --git a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs
index 520231794d2cb..3f958654bf778 100644
--- a/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs
+++ b/src/query/service/tests/it/storages/fuse/bloom_index_meta_size.rs
@@ -340,6 +340,8 @@ fn build_test_segment_info(
         vector_index_size: None,
         vector_index_location: None,
         virtual_block_meta: None,
+        block_stats_location: None,
+        block_stats_size: 0,
         compression: Compression::Lz4,
         create_on: Some(Utc::now()),
     };
diff --git a/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs b/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs
index 0a4d46b8c6f40..dc50928f2b4e1 100644
--- a/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs
+++ b/src/query/service/tests/it/storages/fuse/meta/column_oriented.rs
@@ -288,6 +288,39 @@ fn check_block_level_meta(
         assert!(is_null);
     }
 
+    // check block stats location
+    let block_stats_location = column_oriented_segment
+        .col_by_name(&[BLOCK_STATS_LOCATION])
+        .unwrap();
+    for (block_stats_location, block_meta) in block_stats_location.iter().zip(block_metas.iter()) {
+        let block_stats_location = block_stats_location.as_tuple();
+        if let Some(block_stats_location) = block_stats_location {
+            assert_eq!(
+                block_stats_location[0].as_string().unwrap(),
+                &block_meta.block_stats_location.as_ref().unwrap().0
+            );
+            assert_eq!(
+                block_stats_location[1]
+                    .as_number()
+                    .unwrap()
+                    .as_u_int64()
+                    .unwrap(),
+                &block_meta.block_stats_location.as_ref().unwrap().1
+            );
+        } else {
+            assert!(block_meta.block_stats_location.is_none());
+        }
+    }
+
+    // check block stats size
+    let block_stats_size = column_oriented_segment
+        .col_by_name(&[BLOCK_STATS_SIZE])
+        .unwrap();
+    for (block_stats_size, block_meta) in block_stats_size.iter().zip(block_metas.iter()) {
+        let block_stats_size = block_stats_size.as_number().unwrap().as_u_int64().unwrap();
+        assert_eq!(block_stats_size, &block_meta.block_stats_size);
+    }
+
     // check compression
     let compression = column_oriented_segment.col_by_name(&[COMPRESSION]).unwrap();
     for (compression, block_meta) in compression.iter().zip(block_metas.iter()) {
@@ -372,7 +405,7 @@ async fn test_segment_cache() -> Result<()> {
     )
     .await?;
     let cached = cache.get(&location).unwrap();
-    assert_eq!(cached.segment_schema.fields.len(), 10);
+    assert_eq!(cached.segment_schema.fields.len(), 12);
     assert_eq!(cached.segment_schema, segment_schema(&TableSchema::empty()));
     check_summary(&block_metas, &cached);
     check_block_level_meta(&block_metas, &cached);
@@ -385,7 +418,7 @@ async fn test_segment_cache() -> Result<()> {
     let _column_oriented_segment =
         read_column_oriented_segment(operator.clone(), &location, &projection, true).await?;
     let cached = cache.get(&location).unwrap();
-    assert_eq!(cached.segment_schema.fields.len(), 12);
+    assert_eq!(cached.segment_schema.fields.len(), 14);
 
     let column_1 = table_schema.field_of_column_id(col_id).unwrap();
     let stat_1 = column_oriented_segment
@@ -409,7 +442,7 @@ async fn test_segment_cache() -> Result<()> {
         read_column_oriented_segment(operator.clone(), &location, &projection, true).await?;
     let cached = cache.get(&location).unwrap();
     // column 2 does not have stats
-    assert_eq!(cached.segment_schema.fields.len(), 13);
+    assert_eq!(cached.segment_schema.fields.len(), 15);
     check_summary(&block_metas, &cached);
     check_block_level_meta(&block_metas, &cached);
     check_column_stats_and_meta(&block_metas, &cached, &[1, 2]);
@@ -423,7 +456,7 @@ async fn test_segment_cache() -> Result<()> {
         read_column_oriented_segment(operator.clone(), &location, &projection, true).await?;
     let cached = cache.get(&location).unwrap();
     // column 2 does not have stats
-    assert_eq!(cached.segment_schema.fields.len(), 13);
+    assert_eq!(cached.segment_schema.fields.len(), 15);
     check_summary(&block_metas, &cached);
     check_block_level_meta(&block_metas, &cached);
     check_column_stats_and_meta(&block_metas, &cached, &[1, 2]);
diff --git a/src/query/service/tests/it/storages/fuse/operations/analyze.rs b/src/query/service/tests/it/storages/fuse/operations/analyze.rs
index 2a23d26433b5b..0b5ee07035bc5 100644
--- a/src/query/service/tests/it/storages/fuse/operations/analyze.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/analyze.rs
@@ -32,7 +32,7 @@ async fn test_fuse_snapshot_analyze() -> Result<()> {
     do_insertions(&fixture).await?;
 
     analyze_table(&fixture).await?;
-    check_data_dir(&fixture, case_name, 3, 1, 2, 2, 2, Some(()), None).await?;
+    check_data_dir(&fixture, case_name, 3, 1, 2, 2, 2, 2, Some(()), None).await?;
 
     // Purge will keep at least two snapshots.
     let table = fixture.latest_default_table().await?;
@@ -42,7 +42,7 @@ async fn test_fuse_snapshot_analyze() -> Result<()> {
     fuse_table
         .do_purge(&table_ctx, snapshot_files, None, true, false)
         .await?;
-    check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, Some(()), Some(())).await?;
+    check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, 1, Some(()), Some(())).await?;
 
     Ok(())
 }
@@ -65,7 +65,7 @@ async fn test_fuse_snapshot_analyze_and_truncate() -> Result<()> {
 
         fixture.execute_command(&qry).await?;
 
-        check_data_dir(&fixture, case_name, 3, 1, 2, 2, 2, None, Some(())).await?;
+        check_data_dir(&fixture, case_name, 3, 1, 2, 2, 2, 2, None, Some(())).await?;
     }
 
     // truncate table
@@ -103,7 +103,19 @@ async fn test_fuse_snapshot_analyze_purge() -> Result<()> {
     // optimize statistics three times
     for i in 0..3 {
         analyze_table(&fixture).await?;
-        check_data_dir(&fixture, case_name, 3 + i, 1 + i, 2, 2, 2, Some(()), None).await?;
+        check_data_dir(
+            &fixture,
+            case_name,
+            3 + i,
+            1 + i,
+            2,
+            2,
+            2,
+            2,
+            Some(()),
+            None,
+        )
+        .await?;
     }
 
     // Purge will keep at least two snapshots.
@@ -114,7 +126,7 @@ async fn test_fuse_snapshot_analyze_purge() -> Result<()> {
     fuse_table
         .do_purge(&table_ctx, snapshot_files, None, true, false)
         .await?;
-    check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, Some(()), Some(())).await?;
+    check_data_dir(&fixture, case_name, 1, 1, 1, 1, 1, 1, Some(()), Some(())).await?;
 
     Ok(())
 }
diff --git a/src/query/service/tests/it/storages/fuse/operations/gc.rs b/src/query/service/tests/it/storages/fuse/operations/gc.rs
index 24202424686f6..bb60f0dbf240f 100644
--- a/src/query/service/tests/it/storages/fuse/operations/gc.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/gc.rs
@@ -59,6 +59,7 @@ async fn test_fuse_purge_normal_case() -> Result<()> {
         1, // 1 segments
         1, // 1 blocks
         1, // 1 index
+        1, // 1 block statistic
         Some(()),
         None,
     )
@@ -118,9 +119,10 @@ async fn test_fuse_purge_normal_orphan_snapshot() -> Result<()> {
         "do_gc: there should be 1 snapshot, 0 segment/block",
         expected_num_of_snapshot,
         0, // 0 snapshot statistic
-        1, // 0 segments
-        1, // 0 blocks
-        1, // 0 index
+        1, // 1 segments
+        1, // 1 blocks
+        1, // 1 index
+        1, // 1 block statistic
         Some(()),
         None,
     )
@@ -250,6 +252,7 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> {
     let expected_num_of_segment = 3;
     let expected_num_of_blocks = 3;
     let expected_num_of_index = expected_num_of_blocks;
+    let expected_num_of_block_stats = expected_num_of_blocks;
     check_data_dir(
         &fixture,
         "do_gc: verify retention period",
@@ -258,6 +261,7 @@ async fn test_fuse_purge_orphan_retention() -> Result<()> {
         expected_num_of_segment,
         expected_num_of_blocks,
         expected_num_of_index,
+        expected_num_of_block_stats,
         Some(()),
         None,
     )
@@ -296,6 +300,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
         let expected_num_of_segment = 3;
         let expected_num_of_blocks = 6;
         let expected_num_of_index = expected_num_of_blocks;
+        let expected_num_of_block_stats = expected_num_of_blocks;
         check_data_dir(
             &fixture,
             "do_gc: navigate to time point",
@@ -304,6 +309,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
             expected_num_of_segment,
             expected_num_of_blocks,
             expected_num_of_index,
+            expected_num_of_block_stats,
             Some(()),
             None,
         )
@@ -317,7 +323,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
     {
         let table = fixture.latest_default_table().await?;
         compact_segment(ctx.clone(), &table).await?;
-        check_data_dir(&fixture, "", 4, 0, 5, 7, 7, Some(()), None).await?;
+        check_data_dir(&fixture, "", 4, 0, 5, 7, 7, 7, Some(()), None).await?;
     }
 
     let table = fixture.latest_default_table().await?;
@@ -333,6 +339,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
         let expected_num_of_segment = 1;
         let expected_num_of_blocks = 7;
         let expected_num_of_index = expected_num_of_blocks;
+        let expected_num_of_block_stats = expected_num_of_blocks;
         check_data_dir(
             &fixture,
             "do_gc: with older version",
@@ -341,6 +348,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
             expected_num_of_segment,
             expected_num_of_blocks,
             expected_num_of_index,
+            expected_num_of_block_stats,
             Some(()),
             None,
         )
@@ -357,6 +365,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
         let expected_num_of_segment = 0;
         let expected_num_of_blocks = 0;
         let expected_num_of_index = expected_num_of_blocks;
+        let expected_num_of_block_stats = expected_num_of_blocks;
         check_data_dir(
             &fixture,
             "do_gc: purge last snapshot",
@@ -365,6 +374,7 @@ async fn test_fuse_purge_older_version() -> Result<()> {
             expected_num_of_segment,
             expected_num_of_blocks,
             expected_num_of_index,
+            expected_num_of_block_stats,
             Some(()),
             None,
         )
diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs
index 97c3e9cf85f0d..8d0ec47d7f19e 100644
--- a/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/mutation/recluster_mutator.rs
@@ -81,6 +81,8 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
             None,
             None,
             None,
+            None,
+            0,
             meta::Compression::Lz4Raw,
             Some(Utc::now()),
         ));
diff --git a/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs b/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs
index ebdcabfd13662..8387d3c7a2ace 100644
--- a/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/mutation/segments_compact_mutator.rs
@@ -781,6 +781,8 @@ impl CompactSegmentTestFixture {
                         None,
                         None,
                         None,
+                        None,
+                        0,
                         Compression::Lz4Raw,
                         Some(Utc::now()),
                     );
diff --git a/src/query/service/tests/it/storages/fuse/operations/optimize.rs b/src/query/service/tests/it/storages/fuse/operations/optimize.rs
index 02f300cf3680d..5470aa4853b8d 100644
--- a/src/query/service/tests/it/storages/fuse/operations/optimize.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/optimize.rs
@@ -24,12 +24,12 @@ use crate::storages::fuse::utils::do_purge_test;
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_fuse_snapshot_optimize_purge() -> Result<()> {
-    do_purge_test("test_fuse_snapshot_optimize_purge", 1, 0, 1, 1, 1).await
+    do_purge_test("test_fuse_snapshot_optimize_purge", 1, 0, 1, 1, 1, 1).await
 }
 
 #[tokio::test(flavor = "multi_thread")]
 async fn test_fuse_snapshot_optimize_all() -> Result<()> {
-    do_purge_test("test_fuse_snapshot_optimize_all", 1, 0, 1, 1, 1).await
+    do_purge_test("test_fuse_snapshot_optimize_all", 1, 0, 1, 1, 1, 1).await
 }
 
 #[tokio::test(flavor = "multi_thread")]
diff --git a/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs b/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs
index 75c8bb405a844..54b4a535c9946 100644
--- a/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/purge_drop.rs
@@ -46,6 +46,20 @@ async fn test_fuse_snapshot_truncate_in_drop_all_stmt() -> Result<()> {
 
     // ingests some test data
     append_sample_data(1, &fixture).await?;
+    check_data_dir(
+        &fixture,
+        "drop table: there should be 1 snapshot, 0 segment/block",
+        1, // 1 snapshot
+        0, // 0 snapshot statistic
+        1, // 0 segments
+        1, // 0 blocks
+        1, // 0 index
+        1, // 0 block statistic
+        None,
+        None,
+    )
+    .await?;
+
     // let's Drop
     let qry = format!("drop table {}.{} all", db, tbl);
     fixture.execute_command(qry.as_str()).await?;
@@ -58,6 +72,7 @@ async fn test_fuse_snapshot_truncate_in_drop_all_stmt() -> Result<()> {
         0, // 0 segments
         0, // 0 blocks
         0, // 0 index
+        0, // 0 block statistic
         None,
         None,
     )
diff --git a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs
index 09dccdb11816e..eb2d64ad67465 100644
--- a/src/query/service/tests/it/storages/fuse/operations/read_plan.rs
+++ b/src/query/service/tests/it/storages/fuse/operations/read_plan.rs
@@ -107,6 +107,8 @@ fn test_to_partitions() -> Result<()> {
         None,
         None,
         None,
+        None,
+        0,
         meta::Compression::Lz4Raw,
         Some(Utc::now()),
     ));
diff --git a/src/query/service/tests/it/storages/fuse/statistics.rs b/src/query/service/tests/it/storages/fuse/statistics.rs
index c87b524f9217f..4bcfc5bf9cf6d 100644
--- a/src/query/service/tests/it/storages/fuse/statistics.rs
+++ b/src/query/service/tests/it/storages/fuse/statistics.rs
@@ -637,6 +637,8 @@ fn test_reduce_block_meta() -> databend_common_exception::Result<()> {
             None,
             None,
             None,
+            None,
+            0,
             Compression::Lz4Raw,
             Some(Utc::now()),
         );
diff --git a/src/query/service/tests/it/storages/fuse/utils.rs b/src/query/service/tests/it/storages/fuse/utils.rs
index 4e29ed6594705..2dbf9d36de9dc 100644
--- a/src/query/service/tests/it/storages/fuse/utils.rs
+++ b/src/query/service/tests/it/storages/fuse/utils.rs
@@ -54,6 +54,7 @@ pub async fn do_purge_test(
     segment_count: u32,
     block_count: u32,
     index_count: u32,
+    block_stat_count: u32,
 ) -> Result<()> {
     let fixture = TestFixture::setup().await?;
     fixture.create_default_database().await?;
@@ -81,6 +82,7 @@ pub async fn do_purge_test(
         segment_count,
         block_count,
         index_count,
+        block_stat_count,
         Some(()),
         None,
     )
diff --git a/src/query/settings/src/settings_default.rs b/src/query/settings/src/settings_default.rs
index 9b32bbbc233fd..cc029a648ce33 100644
--- a/src/query/settings/src/settings_default.rs
+++ b/src/query/settings/src/settings_default.rs
@@ -1320,7 +1320,7 @@ impl DefaultSettings {
                     range: None,
                 }),
                 ("enable_block_stream_write", DefaultSettingValue {
-                    value: UserSettingValue::UInt64(0),
+                    value: UserSettingValue::UInt64(1),
                     desc: "Enables block stream write",
                     mode: SettingMode::Both,
                     scope: SettingScope::Both,
diff --git a/src/query/sql/src/planner/metadata/hll_columns.rs b/src/query/sql/src/planner/metadata/hll_columns.rs
new file mode 100644
index 0000000000000..ec708a5744153
--- /dev/null
+++ b/src/query/sql/src/planner/metadata/hll_columns.rs
@@ -0,0 +1,148 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::BTreeMap;
+use std::str::FromStr;
+
+use databend_common_ast::parser::parse_comma_separated_idents;
+use databend_common_ast::parser::tokenize_sql;
+use databend_common_ast::parser::Dialect;
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::ComputedExpr;
+use databend_common_expression::FieldIndex;
+use databend_common_expression::TableDataType;
+use databend_common_expression::TableField;
+use databend_common_expression::TableSchemaRef;
+use databend_common_meta_app::tenant::Tenant;
+use databend_common_settings::Settings;
+
+use crate::normalize_identifier;
+use crate::NameResolutionContext;
+
+#[derive(Clone)]
+pub enum ApproxDistinctColumns {
+    /// Default, all columns that support distinct columns.
+    All,
+    /// Specify with column names.
+    Specify(Vec<String>),
+    /// The column of distinct is empty.
+    None,
+}
+
+impl FromStr for ApproxDistinctColumns {
+    type Err = ErrorCode;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        let s = s.trim();
+        if s.is_empty() {
+            return Ok(ApproxDistinctColumns::None);
+        }
+
+        let sql_dialect = Dialect::default();
+        let tokens = tokenize_sql(s)?;
+        let idents = parse_comma_separated_idents(&tokens, sql_dialect)?;
+
+        let settings = Settings::create(Tenant::new_literal("dummy"));
+        let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?;
+
+        let mut cols = Vec::with_capacity(idents.len());
+        idents
+            .into_iter()
+            .for_each(|ident| cols.push(normalize_identifier(&ident, &name_resolution_ctx).name));
+
+        Ok(ApproxDistinctColumns::Specify(cols))
+    }
+}
+
+impl ApproxDistinctColumns {
+    /// Verify the definition based on schema.
+    pub fn verify_definition<F>(
+        definition: &str,
+        schema: TableSchemaRef,
+        verify_type: F,
+    ) -> Result<()>
+    where
+        F: Fn(&TableDataType) -> bool,
+    {
+        let definition = definition.trim();
+        if definition.is_empty() {
+            return Ok(());
+        }
+
+        let settings = Settings::create(Tenant::new_literal("dummy"));
+        let name_resolution_ctx = NameResolutionContext::try_from(settings.as_ref())?;
+
+        let sql_dialect = Dialect::default();
+        let tokens = tokenize_sql(definition)?;
+        let idents = parse_comma_separated_idents(&tokens, sql_dialect)?;
+        for ident in idents.iter() {
+            let name = &normalize_identifier(ident, &name_resolution_ctx).name;
+            let field = schema.field_with_name(name)?;
+
+            if matches!(field.computed_expr(), Some(ComputedExpr::Virtual(_))) {
+                return Err(ErrorCode::TableOptionInvalid(format!(
+                    "The value specified for computed column '{}' is not allowed for distinct columns",
+                    name
+                )));
+            }
+
+            let data_type = field.data_type();
+            if !verify_type(data_type) {
+                return Err(ErrorCode::TableOptionInvalid(format!(
+                    "Unsupported data type '{}' for distinct columns",
+                    data_type
+                )));
+            }
+        }
+        Ok(())
+    }
+
+    pub fn distinct_column_fields<F>(
+        &self,
+        schema: TableSchemaRef,
+        verify_type: F,
+    ) -> Result<BTreeMap<FieldIndex, TableField>>
+    where
+        F: Fn(&TableDataType) -> bool,
+    {
+        let source_schema = schema.remove_virtual_computed_fields();
+        let mut fields_map = BTreeMap::new();
+        match self {
+            ApproxDistinctColumns::All => {
+                for (i, field) in source_schema.fields.into_iter().enumerate() {
+                    if verify_type(field.data_type()) {
+                        fields_map.insert(i, field);
+                    }
+                }
+            }
+            ApproxDistinctColumns::Specify(cols) => {
+                for col in cols {
+                    let field_index = source_schema.index_of(col)?;
+                    let field = source_schema.fields[field_index].clone();
+                    let data_type = field.data_type();
+                    if !verify_type(data_type) {
+                        return Err(ErrorCode::BadArguments(format!(
+                            "Unsupported data type for distinct columns: {:?}",
+                            data_type
+                        )));
+                    }
+                    fields_map.insert(field_index, field);
+                }
+            }
+            ApproxDistinctColumns::None => (),
+        }
+        Ok(fields_map)
+    }
+}
diff --git a/src/query/sql/src/planner/metadata/mod.rs b/src/query/sql/src/planner/metadata/mod.rs
index f3de756113350..e37aa7b078812 100644
--- a/src/query/sql/src/planner/metadata/mod.rs
+++ b/src/query/sql/src/planner/metadata/mod.rs
@@ -13,8 +13,10 @@
 // limitations under the License.
 
 mod bloom_index;
+mod hll_columns;
 #[allow(clippy::module_inception)]
 mod metadata;
 
 pub use bloom_index::BloomIndexColumns;
+pub use hll_columns::ApproxDistinctColumns;
 pub use metadata::*;
diff --git a/src/query/storages/common/cache/src/manager.rs b/src/query/storages/common/cache/src/manager.rs
index 2e4aeb6b0fb6b..1b8432a52ce98 100644
--- a/src/query/storages/common/cache/src/manager.rs
+++ b/src/query/storages/common/cache/src/manager.rs
@@ -1033,6 +1033,8 @@ mod tests {
             vector_index_location: None,
             vector_index_size: None,
             virtual_block_meta: None,
+            block_stats_location: None,
+            block_stats_size: 0,
             compression: Compression::Lz4,
             create_on: None,
         });
diff --git a/src/query/storages/common/index/src/bloom_index.rs b/src/query/storages/common/index/src/bloom_index.rs
index bf5ec35035e64..9af4d2a3734fa 100644
--- a/src/query/storages/common/index/src/bloom_index.rs
+++ b/src/query/storages/common/index/src/bloom_index.rs
@@ -14,6 +14,7 @@
 
 use std::collections::BTreeMap;
 use std::collections::HashMap;
+use std::hash::DefaultHasher;
 use std::hash::Hasher;
 use std::ops::ControlFlow;
 use std::ops::Deref;
@@ -35,12 +36,18 @@ use databend_common_expression::types::BinaryType;
 use databend_common_expression::types::Bitmap;
 use databend_common_expression::types::Buffer;
 use databend_common_expression::types::DataType;
+use databend_common_expression::types::DateType;
 use databend_common_expression::types::MapType;
 use databend_common_expression::types::NullableType;
 use databend_common_expression::types::Number;
 use databend_common_expression::types::NumberDataType;
+use databend_common_expression::types::NumberType;
+use databend_common_expression::types::StringType;
+use databend_common_expression::types::TimestampType;
 use databend_common_expression::types::UInt64Type;
+use databend_common_expression::types::ValueType;
 use databend_common_expression::visit_expr;
+use databend_common_expression::with_number_mapped_type;
 use databend_common_expression::BlockEntry;
 use databend_common_expression::Column;
 use databend_common_expression::ColumnBuilder;
@@ -349,6 +356,68 @@ impl BloomIndex {
         Ok(column)
     }
 
+    pub fn calculate_digest_by_type(data_type: &DataType, column: &Column) -> Result<Vec<u64>> {
+        let inner_type = data_type.remove_nullable();
+        with_number_mapped_type!(|NUM_TYPE| match inner_type {
+            DataType::Number(NumberDataType::NUM_TYPE) => {
+                Self::calculate_nullable_column_digests::<NumberType<NUM_TYPE>>(column)
+            }
+            DataType::String => {
+                Self::calculate_nullable_column_digests::<StringType>(column)
+            }
+            DataType::Date => {
+                Self::calculate_nullable_column_digests::<DateType>(column)
+            }
+            DataType::Timestamp => {
+                Self::calculate_nullable_column_digests::<TimestampType>(column)
+            }
+            _ => Err(ErrorCode::Internal(format!(
+                "Unsupported data type: {:?}",
+                data_type
+            ))),
+        })
+    }
+
+    #[inline(always)]
+    fn hash_one<T: DFHash>(v: &T) -> u64 {
+        let mut hasher = DefaultHasher::default();
+        DFHash::hash(v, &mut hasher);
+        hasher.finish()
+    }
+
+    fn calculate_nullable_column_digests<T: ValueType>(column: &Column) -> Result<Vec<u64>>
+    where for<'a> T::ScalarRef<'a>: DFHash {
+        let (column, validity) = if let Column::Nullable(box inner) = column {
+            let validity = if inner.validity.null_count() == 0 {
+                None
+            } else {
+                Some(&inner.validity)
+            };
+            (&inner.column, validity)
+        } else {
+            (column, None)
+        };
+
+        let capacity = validity.map_or(column.len(), |v| v.true_count());
+        let mut result = Vec::with_capacity(capacity);
+        let column = T::try_downcast_column(column).unwrap();
+        if let Some(validity) = validity {
+            let column_iter = T::iter_column(&column);
+            let value_iter = column_iter
+                .zip(validity.iter())
+                .filter(|(_, v)| *v)
+                .map(|(v, _)| v);
+            for value in value_iter {
+                result.push(Self::hash_one(&value));
+            }
+        } else {
+            for value in T::iter_column(&column) {
+                result.push(Self::hash_one(&value));
+            }
+        }
+        Ok(result)
+    }
+
     /// calculate digest for column that may have null values
     ///
     /// returns (column, validity) where column is the digest of the column
@@ -556,18 +625,6 @@ impl BloomIndex {
         let data_type = DataType::from(data_type);
         Xor8Filter::supported_type(&data_type)
     }
-
-    /// Checks if the average length of a string column exceeds 256 bytes.
-    /// If it does, the bloom index for the column will not be established.
-    pub fn check_large_string(column: &Column) -> bool {
-        if let Column::String(v) = &column {
-            let bytes_per_row = v.total_bytes_len() / v.len().max(1);
-            if bytes_per_row > 256 {
-                return true;
-            }
-        }
-        false
-    }
 }
 
 pub struct BloomIndexBuilder {
@@ -711,14 +768,14 @@ impl BloomIndexBuilder {
                             builder.push_default();
                         }
                         let str_column = builder.build();
-                        if BloomIndex::check_large_string(&str_column) {
+                        if str_column.check_large_string() {
                             bloom_keys_to_remove.push(index);
                             continue;
                         }
                         let str_type = DataType::Nullable(Box::new(DataType::String));
                         (str_column, str_type)
                     } else {
-                        if BloomIndex::check_large_string(&column) {
+                        if column.check_large_string() {
                             bloom_keys_to_remove.push(index);
                             continue;
                         }
@@ -726,7 +783,7 @@ impl BloomIndexBuilder {
                     }
                 }
                 _ => {
-                    if BloomIndex::check_large_string(&column) {
+                    if column.check_large_string() {
                         bloom_keys_to_remove.push(index);
                         continue;
                     }
@@ -734,24 +791,8 @@ impl BloomIndexBuilder {
                 }
             };
 
-            let (column, validity) =
-                BloomIndex::calculate_nullable_column_digest(&self.func_ctx, &column, &data_type)?;
-            // create filter per column
-            if validity.as_ref().map(|v| v.null_count()).unwrap_or(0) > 0 {
-                let validity = validity.unwrap();
-                let it = column.deref().iter().zip(validity.iter()).map(
-                    |(v, b)| {
-                        if !b {
-                            &0
-                        } else {
-                            v
-                        }
-                    },
-                );
-                index_column.builder.add_digests(it);
-            } else {
-                index_column.builder.add_digests(column.deref());
-            }
+            let column = BloomIndex::calculate_digest_by_type(&data_type, &column)?;
+            index_column.builder.add_digests(column.deref());
         }
         for index_column in self.ngram_columns.iter_mut() {
             let field_type = &block.data_type(index_column.index);
@@ -771,6 +812,8 @@ impl BloomIndexBuilder {
                 index_column.builder.add_digests(digests.iter())
             }
         }
+        // reverse sorting.
+        bloom_keys_to_remove.sort_by(|a, b| b.cmp(a));
         for k in bloom_keys_to_remove {
             self.bloom_columns.remove(k);
         }
diff --git a/src/query/storages/common/index/src/range_index.rs b/src/query/storages/common/index/src/range_index.rs
index 4f23fcef61bee..60d4da9655cc4 100644
--- a/src/query/storages/common/index/src/range_index.rs
+++ b/src/query/storages/common/index/src/range_index.rs
@@ -42,6 +42,7 @@ use databend_common_expression::Domain;
 use databend_common_expression::Expr;
 use databend_common_expression::FunctionContext;
 use databend_common_expression::Scalar;
+use databend_common_expression::TableDataType;
 use databend_common_expression::TableSchemaRef;
 use databend_common_functions::BUILTIN_FUNCTIONS;
 use databend_storages_common_table_meta::meta::ColumnStatistics;
@@ -169,6 +170,11 @@ impl RangeIndex {
         }
         .apply(stats, |_| false)
     }
+
+    pub fn supported_table_type(data_type: &TableDataType) -> bool {
+        let data_type = DataType::from(data_type);
+        Self::supported_type(&data_type)
+    }
 }
 
 pub fn statistics_to_domain(mut stats: Vec<&ColumnStatistics>, data_type: &DataType) -> Domain {
diff --git a/src/query/storages/common/table_meta/src/meta/column_oriented_segment/mod.rs b/src/query/storages/common/table_meta/src/meta/column_oriented_segment/mod.rs
index 86453a71543fc..21787350577ee 100644
--- a/src/query/storages/common/table_meta/src/meta/column_oriented_segment/mod.rs
+++ b/src/query/storages/common/table_meta/src/meta/column_oriented_segment/mod.rs
@@ -26,6 +26,8 @@ pub use schema::meta_name;
 pub use schema::segment_schema;
 pub use schema::stat_name;
 pub use schema::BLOCK_SIZE;
+pub use schema::BLOCK_STATS_LOCATION;
+pub use schema::BLOCK_STATS_SIZE;
 pub use schema::BLOOM_FILTER_INDEX_LOCATION;
 pub use schema::BLOOM_FILTER_INDEX_SIZE;
 pub use schema::CLUSTER_STATS;
diff --git a/src/query/storages/common/table_meta/src/meta/column_oriented_segment/schema.rs b/src/query/storages/common/table_meta/src/meta/column_oriented_segment/schema.rs
index dbba08ce7c622..daea76a037897 100644
--- a/src/query/storages/common/table_meta/src/meta/column_oriented_segment/schema.rs
+++ b/src/query/storages/common/table_meta/src/meta/column_oriented_segment/schema.rs
@@ -34,6 +34,8 @@ pub const COMPRESSION: &str = "compression";
 pub const CREATE_ON: &str = "create_on";
 pub const LOCATION_PATH: &str = "path";
 pub const LOCATION_FORMAT_VERSION: &str = "format_version";
+pub const BLOCK_STATS_LOCATION: &str = "block_stats_location";
+pub const BLOCK_STATS_SIZE: &str = "block_stats_size";
 
 pub fn block_level_field_names() -> HashSet<String> {
     let mut set = HashSet::new();
@@ -45,6 +47,8 @@ pub fn block_level_field_names() -> HashSet<String> {
     set.insert(BLOOM_FILTER_INDEX_LOCATION.to_string());
     set.insert(BLOOM_FILTER_INDEX_SIZE.to_string());
     set.insert(INVERTED_INDEX_SIZE.to_string());
+    set.insert(BLOCK_STATS_LOCATION.to_string());
+    set.insert(BLOCK_STATS_SIZE.to_string());
     set.insert(COMPRESSION.to_string());
     set.insert(CREATE_ON.to_string());
     set
@@ -130,6 +134,8 @@ pub fn segment_schema(table_schema: &TableSchema) -> TableSchema {
         TableField::new(BLOOM_FILTER_INDEX_LOCATION, nullable_location_type()),
         TableField::new(BLOOM_FILTER_INDEX_SIZE, u64_t.clone()),
         TableField::new(INVERTED_INDEX_SIZE, nullable_u64_t.clone()),
+        TableField::new(BLOCK_STATS_LOCATION, nullable_location_type()),
+        TableField::new(BLOCK_STATS_SIZE, u64_t.clone()),
         TableField::new(COMPRESSION, u8_t.clone()),
         TableField::new(CREATE_ON, i64_t.clone()),
     ];
diff --git a/src/query/storages/common/table_meta/src/meta/column_oriented_segment/segment_builder.rs b/src/query/storages/common/table_meta/src/meta/column_oriented_segment/segment_builder.rs
index eeb6c2305a8ca..4ce10f20ed867 100644
--- a/src/query/storages/common/table_meta/src/meta/column_oriented_segment/segment_builder.rs
+++ b/src/query/storages/common/table_meta/src/meta/column_oriented_segment/segment_builder.rs
@@ -45,6 +45,7 @@ use crate::meta::supported_stat_type;
 use crate::meta::BlockMeta;
 use crate::meta::ClusterStatistics;
 use crate::meta::ColumnStatistics;
+use crate::meta::Location;
 use crate::meta::MetaEncoding;
 use crate::meta::Statistics;
 use crate::meta::VirtualBlockMeta;
@@ -67,10 +68,12 @@ pub struct ColumnOrientedSegmentBuilder {
     file_size: Vec<u64>,
     cluster_stats: Vec<Option<ClusterStatistics>>,
     location: (Vec<String>, Vec<u64>),
-    bloom_filter_index_location: (Vec<String>, Vec<u64>, MutableBitmap),
+    bloom_filter_index_location: LocationsWithOption,
     bloom_filter_index_size: Vec<u64>,
     inverted_index_size: Vec<Option<u64>>,
     virtual_block_meta: Vec<Option<VirtualBlockMeta>>,
+    block_stats_location: LocationsWithOption,
+    block_stats_size: Vec<u64>,
     compression: Vec<u8>,
     create_on: Vec<Option<i64>>,
     column_stats: HashMap<ColumnId, ColStatBuilder>,
@@ -127,28 +130,16 @@ impl SegmentBuilder for ColumnOrientedSegmentBuilder {
         self.cluster_stats.push(block_meta.cluster_stats);
         self.location.0.push(block_meta.location.0);
         self.location.1.push(block_meta.location.1);
-        self.bloom_filter_index_location.0.push(
-            block_meta
-                .bloom_filter_index_location
-                .as_ref()
-                .map(|l| l.0.clone())
-                .unwrap_or_default(),
-        );
-        self.bloom_filter_index_location.1.push(
-            block_meta
-                .bloom_filter_index_location
-                .as_ref()
-                .map(|l| l.1)
-                .unwrap_or_default(),
-        );
         self.bloom_filter_index_location
-            .2
-            .push(block_meta.bloom_filter_index_location.is_some());
+            .add_location(block_meta.bloom_filter_index_location.as_ref());
         self.bloom_filter_index_size
             .push(block_meta.bloom_filter_index_size);
         self.inverted_index_size
             .push(block_meta.inverted_index_size);
         self.virtual_block_meta.push(block_meta.virtual_block_meta);
+        self.block_stats_location
+            .add_location(block_meta.block_stats_location.as_ref());
+        self.block_stats_size.push(block_meta.block_stats_size);
         self.compression.push(block_meta.compression.to_u8());
         self.create_on
             .push(block_meta.create_on.map(|t| t.timestamp()));
@@ -199,13 +190,21 @@ impl SegmentBuilder for ColumnOrientedSegmentBuilder {
             ]),
             Column::Nullable(Box::new(NullableColumn::new(
                 Column::Tuple(vec![
-                    StringType::from_data(this.bloom_filter_index_location.0),
-                    UInt64Type::from_data(this.bloom_filter_index_location.1),
+                    StringType::from_data(this.bloom_filter_index_location.locations),
+                    UInt64Type::from_data(this.bloom_filter_index_location.versions),
                 ]),
-                this.bloom_filter_index_location.2.into(),
+                this.bloom_filter_index_location.validity.into(),
             ))),
             UInt64Type::from_data(this.bloom_filter_index_size),
             UInt64Type::from_opt_data(this.inverted_index_size),
+            Column::Nullable(Box::new(NullableColumn::new(
+                Column::Tuple(vec![
+                    StringType::from_data(this.block_stats_location.locations),
+                    UInt64Type::from_data(this.block_stats_location.versions),
+                ]),
+                this.block_stats_location.validity.into(),
+            ))),
+            UInt64Type::from_data(this.block_stats_size),
             UInt8Type::from_data(this.compression),
             Int64Type::from_opt_data(this.create_on),
         ];
@@ -264,14 +263,12 @@ impl SegmentBuilder for ColumnOrientedSegmentBuilder {
                 Vec::with_capacity(block_per_segment),
                 Vec::with_capacity(block_per_segment),
             ),
-            bloom_filter_index_location: (
-                Vec::with_capacity(block_per_segment),
-                Vec::with_capacity(block_per_segment),
-                MutableBitmap::with_capacity(block_per_segment),
-            ),
+            bloom_filter_index_location: LocationsWithOption::new_with_capacity(block_per_segment),
             bloom_filter_index_size: Vec::with_capacity(block_per_segment),
             inverted_index_size: Vec::with_capacity(block_per_segment),
             virtual_block_meta: Vec::with_capacity(block_per_segment),
+            block_stats_location: LocationsWithOption::new_with_capacity(block_per_segment),
+            block_stats_size: Vec::with_capacity(block_per_segment),
             compression: Vec::with_capacity(block_per_segment),
             create_on: Vec::with_capacity(block_per_segment),
             column_stats,
@@ -434,3 +431,31 @@ fn cmp_with_null(v1: &Scalar, v2: &Scalar) -> Ordering {
         (false, false) => v1.cmp(v2),
     }
 }
+
+struct LocationsWithOption {
+    locations: Vec<String>,
+    versions: Vec<u64>,
+    validity: MutableBitmap,
+}
+
+impl LocationsWithOption {
+    fn new_with_capacity(capacity: usize) -> Self {
+        Self {
+            locations: Vec::with_capacity(capacity),
+            versions: Vec::with_capacity(capacity),
+            validity: MutableBitmap::with_capacity(capacity),
+        }
+    }
+
+    fn add_location(&mut self, location: Option<&Location>) {
+        if let Some(location) = location {
+            self.locations.push(location.0.clone());
+            self.versions.push(location.1);
+            self.validity.push(true);
+        } else {
+            self.locations.push(String::new());
+            self.versions.push(0);
+            self.validity.push(false);
+        }
+    }
+}
diff --git a/src/query/storages/common/table_meta/src/meta/current/mod.rs b/src/query/storages/common/table_meta/src/meta/current/mod.rs
index 2bb5544e0ace9..cc254a566e671 100644
--- a/src/query/storages/common/table_meta/src/meta/current/mod.rs
+++ b/src/query/storages/common/table_meta/src/meta/current/mod.rs
@@ -25,6 +25,7 @@ pub use v2::Statistics;
 pub use v2::VirtualBlockMeta;
 pub use v2::VirtualColumnMeta;
 pub use v3::TableSnapshotStatistics;
+pub use v4::BlockStatistics;
 pub use v4::CompactSegmentInfo;
 pub use v4::RawBlockMeta;
 pub use v4::SegmentInfo;
diff --git a/src/query/storages/common/table_meta/src/meta/mod.rs b/src/query/storages/common/table_meta/src/meta/mod.rs
index 68e3bfd3e0bce..3c9133ec4fdca 100644
--- a/src/query/storages/common/table_meta/src/meta/mod.rs
+++ b/src/query/storages/common/table_meta/src/meta/mod.rs
@@ -49,6 +49,7 @@ pub use utils::VACUUM2_OBJECT_KEY_PREFIX;
 pub(crate) use utils::*;
 pub use v0::ColumnMeta as ColumnMetaV0;
 pub use versions::testify_version;
+pub use versions::BlockStatisticsVersion;
 pub use versions::SegmentInfoVersion;
 pub use versions::SnapshotVersion;
 pub use versions::TableSnapshotStatisticsVersion;
diff --git a/src/query/storages/common/table_meta/src/meta/statistics.rs b/src/query/storages/common/table_meta/src/meta/statistics.rs
index b63d53cafa5af..0506f04df1e5e 100644
--- a/src/query/storages/common/table_meta/src/meta/statistics.rs
+++ b/src/query/storages/common/table_meta/src/meta/statistics.rs
@@ -25,7 +25,6 @@ pub type SnapshotId = Uuid;
 pub type Location = (String, FormatVersion);
 pub type ClusterKey = (u32, String);
 pub type StatisticsOfColumns = HashMap<ColumnId, ColumnStatistics>;
-pub type ColumnDistinctHLL = simple_hll::HyperLogLog<10>;
 
 // Assigned to executors, describes that which blocks of given segment, an executor should take care of
 #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)]
diff --git a/src/query/storages/common/table_meta/src/meta/v2/segment.rs b/src/query/storages/common/table_meta/src/meta/v2/segment.rs
index 47c2127145d60..f9b0bcbd403bd 100644
--- a/src/query/storages/common/table_meta/src/meta/v2/segment.rs
+++ b/src/query/storages/common/table_meta/src/meta/v2/segment.rs
@@ -176,6 +176,10 @@ pub struct BlockMeta {
     pub virtual_block_meta: Option<VirtualBlockMeta>,
     pub compression: Compression,
 
+    pub block_stats_location: Option<Location>,
+    #[serde(default)]
+    pub block_stats_size: u64,
+
     // block create_on
     pub create_on: Option<DateTime<Utc>>,
 }
@@ -197,6 +201,8 @@ impl BlockMeta {
         vector_index_size: Option<u64>,
         vector_index_location: Option<Location>,
         virtual_block_meta: Option<VirtualBlockMeta>,
+        block_stats_location: Option<Location>,
+        block_stats_size: u64,
         compression: Compression,
         create_on: Option<DateTime<Utc>>,
     ) -> Self {
@@ -215,6 +221,8 @@ impl BlockMeta {
             vector_index_size,
             vector_index_location,
             virtual_block_meta,
+            block_stats_location,
+            block_stats_size,
             compression,
             create_on,
         }
@@ -377,6 +385,8 @@ impl BlockMeta {
             vector_index_size: None,
             vector_index_location: None,
             virtual_block_meta: None,
+            block_stats_location: None,
+            block_stats_size: 0,
             create_on: None,
             ngram_filter_index_size: None,
         }
@@ -405,6 +415,8 @@ impl BlockMeta {
             vector_index_size: None,
             vector_index_location: None,
             virtual_block_meta: None,
+            block_stats_location: None,
+            block_stats_size: 0,
             create_on: None,
             ngram_filter_index_size: None,
         }
diff --git a/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs b/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs
index ccc22fdbdb600..a02b2761848f6 100644
--- a/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs
+++ b/src/query/storages/common/table_meta/src/meta/v3/frozen/block_meta.rs
@@ -66,6 +66,8 @@ impl From<BlockMeta> for crate::meta::BlockMeta {
             vector_index_size: None,
             vector_index_location: None,
             virtual_block_meta: None,
+            block_stats_location: None,
+            block_stats_size: 0,
             compression: value.compression.into(),
             create_on: None,
         }
diff --git a/src/query/storages/common/table_meta/src/meta/v3/table_snapshot_statistics.rs b/src/query/storages/common/table_meta/src/meta/v3/table_snapshot_statistics.rs
index 55841a886e03c..0689e4a687dfd 100644
--- a/src/query/storages/common/table_meta/src/meta/v3/table_snapshot_statistics.rs
+++ b/src/query/storages/common/table_meta/src/meta/v3/table_snapshot_statistics.rs
@@ -29,7 +29,7 @@ pub type MetaHLL = simple_hll::HyperLogLog<12>;
 
 #[derive(Serialize, Deserialize, Clone, Debug)]
 pub struct TableSnapshotStatistics {
-    /// format version of snapshot
+    /// format version of statistics
     pub format_version: FormatVersion,
 
     /// id of snapshot
diff --git a/src/query/storages/common/table_meta/src/meta/v4/block_statistics.rs b/src/query/storages/common/table_meta/src/meta/v4/block_statistics.rs
new file mode 100644
index 0000000000000..e453f47f12630
--- /dev/null
+++ b/src/query/storages/common/table_meta/src/meta/v4/block_statistics.rs
@@ -0,0 +1,84 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+use std::io::Cursor;
+use std::io::Read;
+
+use databend_common_exception::Result;
+use databend_common_expression::ColumnId;
+use databend_common_io::prelude::BinaryRead;
+use serde::Deserialize;
+use serde::Serialize;
+
+use crate::meta::format::compress;
+use crate::meta::format::encode;
+use crate::meta::format::read_and_deserialize;
+use crate::meta::versions::Versioned;
+use crate::meta::FormatVersion;
+use crate::meta::MetaCompression;
+use crate::meta::MetaEncoding;
+use crate::meta::MetaHLL;
+
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct BlockStatistics {
+    pub format_version: FormatVersion,
+
+    pub hll: HashMap<ColumnId, MetaHLL>,
+}
+
+impl BlockStatistics {
+    pub fn new(hll: HashMap<ColumnId, MetaHLL>) -> Self {
+        Self {
+            format_version: BlockStatistics::VERSION,
+            hll,
+        }
+    }
+
+    pub fn to_bytes(&self) -> Result<Vec<u8>> {
+        let encoding = MetaEncoding::MessagePack;
+        let compression = MetaCompression::default();
+
+        let data = encode(&encoding, &self)?;
+        let data_compress = compress(&compression, data)?;
+
+        let data_size = self.format_version.to_le_bytes().len()
+            + 2
+            + data_compress.len().to_le_bytes().len()
+            + data_compress.len();
+        let mut buf = Vec::with_capacity(data_size);
+
+        buf.extend_from_slice(&self.format_version.to_le_bytes());
+        buf.push(encoding as u8);
+        buf.push(compression as u8);
+        buf.extend_from_slice(&data_compress.len().to_le_bytes());
+
+        buf.extend(data_compress);
+
+        Ok(buf)
+    }
+
+    pub fn from_slice(buffer: &[u8]) -> Result<BlockStatistics> {
+        Self::from_read(Cursor::new(buffer))
+    }
+
+    pub fn from_read(mut r: impl Read) -> Result<BlockStatistics> {
+        let version = r.read_scalar::<u64>()?;
+        assert_eq!(version, BlockStatistics::VERSION);
+        let encoding = MetaEncoding::try_from(r.read_scalar::<u8>()?)?;
+        let compression = MetaCompression::try_from(r.read_scalar::<u8>()?)?;
+        let statistics_size: u64 = r.read_scalar::<u64>()?;
+        read_and_deserialize(&mut r, statistics_size, &encoding, &compression)
+    }
+}
diff --git a/src/query/storages/common/table_meta/src/meta/v4/mod.rs b/src/query/storages/common/table_meta/src/meta/v4/mod.rs
index 6a596b9ec8807..cf73718ea7c29 100644
--- a/src/query/storages/common/table_meta/src/meta/v4/mod.rs
+++ b/src/query/storages/common/table_meta/src/meta/v4/mod.rs
@@ -12,9 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+mod block_statistics;
 mod segment;
 mod snapshot;
 
+pub use block_statistics::BlockStatistics;
 pub use segment::CompactSegmentInfo;
 pub use segment::RawBlockMeta;
 pub use segment::SegmentInfo;
diff --git a/src/query/storages/common/table_meta/src/meta/versions.rs b/src/query/storages/common/table_meta/src/meta/versions.rs
index 71493750481b3..670ce5c73805c 100644
--- a/src/query/storages/common/table_meta/src/meta/versions.rs
+++ b/src/query/storages/common/table_meta/src/meta/versions.rs
@@ -128,6 +128,24 @@ impl TableSnapshotStatisticsVersion {
     }
 }
 
+impl Versioned<0> for v4::BlockStatistics {}
+
+pub enum BlockStatisticsVersion {
+    V0(PhantomData<v4::BlockStatistics>),
+}
+
+impl BlockStatisticsVersion {
+    pub fn version(&self) -> u64 {
+        match self {
+            BlockStatisticsVersion::V0(a) => Self::ver(a),
+        }
+    }
+
+    fn ver<const V: u64, T: Versioned<V>>(_v: &PhantomData<T>) -> u64 {
+        V
+    }
+}
+
 /// Statically check that if T implements Versioned<U> where U equals V
 #[inline]
 pub fn testify_version<T, const V: u64>(t: PhantomData<T>) -> PhantomData<T>
@@ -189,7 +207,21 @@ mod converters {
                     PhantomData,
                 ))),
                 _ => Err(ErrorCode::Internal(format!(
-                    "unknown table snapshot statistics version {value}, versions supported: 0"
+                    "unknown table snapshot statistics version {value}, versions supported: 0, 2, 3"
+                ))),
+            }
+        }
+    }
+
+    impl TryFrom<u64> for BlockStatisticsVersion {
+        type Error = ErrorCode;
+        fn try_from(value: u64) -> Result<Self, Self::Error> {
+            match value {
+                0 => Ok(BlockStatisticsVersion::V0(testify_version::<_, 0>(
+                    PhantomData,
+                ))),
+                _ => Err(ErrorCode::Internal(format!(
+                    "unknown block statistics version {value}, versions supported: 0"
                 ))),
             }
         }
diff --git a/src/query/storages/common/table_meta/src/readers/versioned_reader.rs b/src/query/storages/common/table_meta/src/readers/versioned_reader.rs
index a2ef7efec9558..a67c796d7eda1 100644
--- a/src/query/storages/common/table_meta/src/readers/versioned_reader.rs
+++ b/src/query/storages/common/table_meta/src/readers/versioned_reader.rs
@@ -17,6 +17,8 @@ use std::io::Read;
 use databend_common_exception::Result;
 
 use crate::meta::load_json;
+use crate::meta::BlockStatistics;
+use crate::meta::BlockStatisticsVersion;
 use crate::meta::TableSnapshotStatistics;
 use crate::meta::TableSnapshotStatisticsVersion;
 
@@ -46,3 +48,15 @@ impl VersionedReader<TableSnapshotStatistics> for TableSnapshotStatisticsVersion
         Ok(r)
     }
 }
+
+impl VersionedReader<BlockStatistics> for BlockStatisticsVersion {
+    type TargetType = BlockStatistics;
+
+    fn read<R>(&self, reader: R) -> Result<BlockStatistics>
+    where R: Read + Unpin + Send {
+        let r = match self {
+            BlockStatisticsVersion::V0(_) => BlockStatistics::from_read(reader)?,
+        };
+        Ok(r)
+    }
+}
diff --git a/src/query/storages/common/table_meta/src/table/table_keys.rs b/src/query/storages/common/table_meta/src/table/table_keys.rs
index 9a75763f47080..fddc120058b74 100644
--- a/src/query/storages/common/table_meta/src/table/table_keys.rs
+++ b/src/query/storages/common/table_meta/src/table/table_keys.rs
@@ -27,6 +27,7 @@ pub const OPT_KEY_TABLE_COMPRESSION: &str = "compression";
 pub const OPT_KEY_COMMENT: &str = "comment";
 pub const OPT_KEY_ENGINE: &str = "engine";
 pub const OPT_KEY_BLOOM_INDEX_COLUMNS: &str = "bloom_index_columns";
+pub const OPT_KEY_APPROX_DISTINCT_COLUMNS: &str = "approx_distinct_columns";
 pub const OPT_KEY_CHANGE_TRACKING: &str = "change_tracking";
 pub const OPT_KEY_CHANGE_TRACKING_BEGIN_VER: &str = "begin_version";
 
diff --git a/src/query/storages/fuse/Cargo.toml b/src/query/storages/fuse/Cargo.toml
index 811821ec8088d..0601548e6e4bf 100644
--- a/src/query/storages/fuse/Cargo.toml
+++ b/src/query/storages/fuse/Cargo.toml
@@ -50,6 +50,7 @@ backoff = { workspace = true, features = ["futures", "tokio"] }
 bytes = { workspace = true }
 chrono = { workspace = true }
 enum-as-inner = { workspace = true }
+enum_dispatch = { workspace = true }
 fastrace = { workspace = true }
 futures = { workspace = true }
 futures-util = { workspace = true }
@@ -61,6 +62,7 @@ match-template = { workspace = true }
 opendal = { workspace = true }
 parking_lot = { workspace = true }
 parquet = { workspace = true }
+paste = { workspace = true }
 rand = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
diff --git a/src/query/storages/fuse/src/constants.rs b/src/query/storages/fuse/src/constants.rs
index f16b4975939ad..bc95d3fe14a48 100644
--- a/src/query/storages/fuse/src/constants.rs
+++ b/src/query/storages/fuse/src/constants.rs
@@ -31,6 +31,7 @@ pub const FUSE_TBL_XOR_BLOOM_INDEX_PREFIX: &str = "_i_b_v2";
 pub const FUSE_TBL_SEGMENT_PREFIX: &str = "_sg";
 pub const FUSE_TBL_SNAPSHOT_PREFIX: &str = "_ss";
 pub const FUSE_TBL_SNAPSHOT_STATISTICS_PREFIX: &str = "_ts";
+pub const FUSE_TBL_BLOCK_STATISTICS_PREFIX: &str = "_bs";
 pub const FUSE_TBL_LAST_SNAPSHOT_HINT: &str = "last_snapshot_location_hint";
 pub const FUSE_TBL_LAST_SNAPSHOT_HINT_V2: &str = "last_snapshot_location_hint_v2";
 pub const FUSE_TBL_VIRTUAL_BLOCK_PREFIX: &str = "_vb";
diff --git a/src/query/storages/fuse/src/fuse_table.rs b/src/query/storages/fuse/src/fuse_table.rs
index f832b97be638c..51a096d43c0d4 100644
--- a/src/query/storages/fuse/src/fuse_table.rs
+++ b/src/query/storages/fuse/src/fuse_table.rs
@@ -69,6 +69,7 @@ use databend_common_pipeline_core::Pipeline;
 use databend_common_sql::binder::STREAM_COLUMN_FACTORY;
 use databend_common_sql::parse_cluster_keys;
 use databend_common_sql::plans::TruncateMode;
+use databend_common_sql::ApproxDistinctColumns;
 use databend_common_sql::BloomIndexColumns;
 use databend_common_storage::init_operator;
 use databend_common_storage::DataOperator;
@@ -87,6 +88,7 @@ use databend_storages_common_table_meta::meta::Versioned;
 use databend_storages_common_table_meta::table::ChangeType;
 use databend_storages_common_table_meta::table::ClusterType;
 use databend_storages_common_table_meta::table::TableCompression;
+use databend_storages_common_table_meta::table::OPT_KEY_APPROX_DISTINCT_COLUMNS;
 use databend_storages_common_table_meta::table::OPT_KEY_BLOOM_INDEX_COLUMNS;
 use databend_storages_common_table_meta::table::OPT_KEY_CHANGE_TRACKING;
 use databend_storages_common_table_meta::table::OPT_KEY_CLUSTER_TYPE;
@@ -141,6 +143,7 @@ pub struct FuseTable {
     pub(crate) segment_format: FuseSegmentFormat,
     pub(crate) table_compression: TableCompression,
     pub(crate) bloom_index_cols: BloomIndexColumns,
+    pub(crate) approx_distinct_cols: ApproxDistinctColumns,
 
     pub(crate) operator: Operator,
     pub(crate) data_metrics: Arc<StorageMetrics>,
@@ -234,6 +237,12 @@ impl FuseTable {
             .and_then(|s| s.parse::<BloomIndexColumns>().ok())
             .unwrap_or(BloomIndexColumns::All);
 
+        let approx_distinct_cols = table_info
+            .options()
+            .get(OPT_KEY_APPROX_DISTINCT_COLUMNS)
+            .and_then(|s| s.parse::<ApproxDistinctColumns>().ok())
+            .unwrap_or(ApproxDistinctColumns::All);
+
         let meta_location_generator = TableMetaLocationGenerator::new(storage_prefix);
         if !table_info.meta.part_prefix.is_empty() {
             return Err(ErrorCode::StorageOther(
@@ -246,6 +255,7 @@ impl FuseTable {
             meta_location_generator,
             cluster_key_meta,
             bloom_index_cols,
+            approx_distinct_cols,
             operator,
             data_metrics,
             storage_format: FuseStorageFormat::from_str(storage_format.as_str())?,
@@ -460,6 +470,10 @@ impl FuseTable {
         self.bloom_index_cols.clone()
     }
 
+    pub fn approx_distinct_cols(&self) -> ApproxDistinctColumns {
+        self.approx_distinct_cols.clone()
+    }
+
     // Check if table is attached.
     pub fn is_table_attached(table_meta_options: &BTreeMap<String, String>) -> bool {
         table_meta_options
@@ -745,6 +759,14 @@ impl FuseTable {
                 )
             })
     }
+
+    pub fn enable_stream_block_write(&self, ctx: Arc<dyn TableContext>) -> Result<bool> {
+        Ok(ctx.get_settings().get_enable_block_stream_write()?
+            && matches!(self.storage_format, FuseStorageFormat::Parquet)
+            && self
+                .cluster_type()
+                .is_none_or(|v| matches!(v, ClusterType::Hilbert)))
+    }
 }
 
 #[async_trait::async_trait]
diff --git a/src/query/storages/fuse/src/io/locations.rs b/src/query/storages/fuse/src/io/locations.rs
index 13272ee77706c..95ce57493f5b0 100644
--- a/src/query/storages/fuse/src/io/locations.rs
+++ b/src/query/storages/fuse/src/io/locations.rs
@@ -18,6 +18,7 @@ use databend_common_exception::Result;
 use databend_common_expression::DataBlock;
 use databend_storages_common_table_meta::meta::trim_object_prefix;
 use databend_storages_common_table_meta::meta::uuid_from_date_time;
+use databend_storages_common_table_meta::meta::BlockStatistics;
 use databend_storages_common_table_meta::meta::Location;
 use databend_storages_common_table_meta::meta::SegmentInfo;
 use databend_storages_common_table_meta::meta::SnapshotVersion;
@@ -36,6 +37,7 @@ use crate::constants::FUSE_TBL_VIRTUAL_BLOCK_PREFIX;
 use crate::index::filters::BlockFilter;
 use crate::index::InvertedIndexFile;
 use crate::FUSE_TBL_AGG_INDEX_PREFIX;
+use crate::FUSE_TBL_BLOCK_STATISTICS_PREFIX;
 use crate::FUSE_TBL_INVERTED_INDEX_PREFIX;
 use crate::FUSE_TBL_LAST_SNAPSHOT_HINT_V2;
 use crate::FUSE_TBL_VECTOR_INDEX_PREFIX;
@@ -65,6 +67,7 @@ pub struct TableMetaLocationGenerator {
     agg_index_location_prefix: String,
     inverted_index_location_prefix: String,
     vector_index_location_prefix: String,
+    block_statistics_location_prefix: String,
 }
 
 impl TableMetaLocationGenerator {
@@ -78,6 +81,8 @@ impl TableMetaLocationGenerator {
         let inverted_index_location_prefix =
             format!("{}/{}/", &prefix, FUSE_TBL_INVERTED_INDEX_PREFIX);
         let vector_index_location_prefix = format!("{}/{}/", &prefix, FUSE_TBL_VECTOR_INDEX_PREFIX);
+        let block_statistics_location_prefix =
+            format!("{}/{}/", &prefix, FUSE_TBL_BLOCK_STATISTICS_PREFIX);
         Self {
             prefix,
             block_location_prefix,
@@ -87,6 +92,7 @@ impl TableMetaLocationGenerator {
             agg_index_location_prefix,
             inverted_index_location_prefix,
             vector_index_location_prefix,
+            block_statistics_location_prefix,
         }
     }
 
@@ -114,6 +120,10 @@ impl TableMetaLocationGenerator {
         &self.snapshot_location_prefix
     }
 
+    pub fn block_statistics_location_prefix(&self) -> &str {
+        &self.block_statistics_location_prefix
+    }
+
     pub fn gen_block_location(
         &self,
         table_meta_timestamps: TableMetaTimestamps,
@@ -142,6 +152,18 @@ impl TableMetaLocationGenerator {
         )
     }
 
+    pub fn block_stats_location(&self, block_id: &Uuid) -> Location {
+        (
+            format!(
+                "{}{}_v{}.mpk",
+                self.block_statistics_location_prefix(),
+                block_id.as_simple(),
+                BlockStatistics::VERSION,
+            ),
+            BlockStatistics::VERSION,
+        )
+    }
+
     pub fn block_vector_index_location(&self) -> Location {
         let uuid = Uuid::now_v7();
         (
@@ -296,6 +318,21 @@ impl TableMetaLocationGenerator {
             BlockFilter::VERSION,
         )
     }
+
+    pub fn gen_block_stats_location_from_block_location(loc: &str) -> String {
+        let splits = loc.split('/').collect::<Vec<_>>();
+        let len = splits.len();
+        let prefix = splits[..len - 2].join("/");
+        let block_name = trim_object_prefix(splits[len - 1]);
+        let id: String = block_name.chars().take(32).collect();
+        format!(
+            "{}/{}/{}_v{}.mpk",
+            prefix,
+            FUSE_TBL_BLOCK_STATISTICS_PREFIX,
+            id,
+            BlockStatistics::VERSION,
+        )
+    }
 }
 
 trait SnapshotLocationCreator {
diff --git a/src/query/storages/fuse/src/io/mod.rs b/src/query/storages/fuse/src/io/mod.rs
index 94d22a40bbb90..24fc70297a0cd 100644
--- a/src/query/storages/fuse/src/io/mod.rs
+++ b/src/query/storages/fuse/src/io/mod.rs
@@ -43,6 +43,7 @@ pub use write::serialize_block;
 pub use write::write_data;
 pub use write::BlockBuilder;
 pub use write::BlockSerialization;
+pub use write::BlockStatisticsState;
 pub use write::BlockWriter;
 pub use write::BloomIndexRebuilder;
 pub use write::BloomIndexState;
diff --git a/src/query/storages/fuse/src/io/write/block_statistics_writer.rs b/src/query/storages/fuse/src/io/write/block_statistics_writer.rs
new file mode 100644
index 0000000000000..ad54bbb972efa
--- /dev/null
+++ b/src/query/storages/fuse/src/io/write/block_statistics_writer.rs
@@ -0,0 +1,127 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::BTreeMap;
+use std::collections::HashMap;
+
+use databend_common_exception::Result;
+use databend_common_expression::BlockEntry;
+use databend_common_expression::ColumnId;
+use databend_common_expression::DataBlock;
+use databend_common_expression::FieldIndex;
+use databend_common_expression::TableField;
+use databend_storages_common_table_meta::meta::BlockStatistics;
+use databend_storages_common_table_meta::meta::Location;
+
+use crate::io::write::stream::create_column_ndv_estimator;
+use crate::io::write::stream::ColumnNDVEstimator;
+use crate::io::write::stream::ColumnNDVEstimatorOps;
+
+#[derive(Debug)]
+pub struct BlockStatisticsState {
+    pub data: Vec<u8>,
+    pub location: Location,
+    pub column_distinct_count: HashMap<ColumnId, usize>,
+}
+
+impl BlockStatisticsState {
+    pub fn from_data_block(
+        location: Location,
+        block: &DataBlock,
+        ndv_columns_map: &BTreeMap<FieldIndex, TableField>,
+    ) -> Result<Option<Self>> {
+        let mut builder = BlockStatsBuilder::new(ndv_columns_map);
+        builder.add_block(block)?;
+        builder.finalize(location)
+    }
+
+    pub fn block_stats_size(&self) -> u64 {
+        self.data.len() as u64
+    }
+}
+
+pub struct BlockStatsBuilder {
+    builders: Vec<ColumnNDVBuilder>,
+}
+
+pub struct ColumnNDVBuilder {
+    index: FieldIndex,
+    field: TableField,
+    builder: ColumnNDVEstimator,
+}
+
+impl BlockStatsBuilder {
+    pub fn new(ndv_columns_map: &BTreeMap<FieldIndex, TableField>) -> BlockStatsBuilder {
+        let mut builders = Vec::with_capacity(ndv_columns_map.len());
+        for (index, field) in ndv_columns_map {
+            let builder = create_column_ndv_estimator(&field.data_type().into());
+            builders.push(ColumnNDVBuilder {
+                index: *index,
+                field: field.clone(),
+                builder,
+            });
+        }
+        BlockStatsBuilder { builders }
+    }
+
+    pub fn add_block(&mut self, block: &DataBlock) -> Result<()> {
+        let mut keys_to_remove = vec![];
+        for (index, column_builder) in self.builders.iter_mut().enumerate() {
+            let entry = block.get_by_offset(column_builder.index);
+            match entry {
+                BlockEntry::Const(s, ..) => {
+                    column_builder.builder.update_scalar(&s.as_ref());
+                }
+                BlockEntry::Column(col) => {
+                    if col.check_large_string() {
+                        keys_to_remove.push(index);
+                        continue;
+                    }
+                    column_builder.builder.update_column(col);
+                }
+            }
+        }
+
+        // reverse sorting.
+        keys_to_remove.sort_by(|a, b| b.cmp(a));
+        for k in keys_to_remove {
+            self.builders.remove(k);
+        }
+        Ok(())
+    }
+
+    pub fn finalize(self, location: Location) -> Result<Option<BlockStatisticsState>> {
+        if self.builders.is_empty() {
+            return Ok(None);
+        }
+
+        let mut hlls = HashMap::with_capacity(self.builders.len());
+        let mut column_distinct_count = HashMap::with_capacity(self.builders.len());
+        for column_builder in self.builders {
+            let column_id = column_builder.field.column_id();
+            let distinct_count = column_builder.builder.finalize();
+            let hll = column_builder.builder.hll();
+            hlls.insert(column_id, hll);
+            column_distinct_count.insert(column_id, distinct_count);
+        }
+
+        let block_stats = BlockStatistics::new(hlls);
+        let data = block_stats.to_bytes()?;
+        Ok(Some(BlockStatisticsState {
+            data,
+            location,
+            column_distinct_count,
+        }))
+    }
+}
diff --git a/src/query/storages/fuse/src/io/write/block_writer.rs b/src/query/storages/fuse/src/io/write/block_writer.rs
index 1f1b6a5579e25..342c455204b36 100644
--- a/src/query/storages/fuse/src/io/write/block_writer.rs
+++ b/src/query/storages/fuse/src/io/write/block_writer.rs
@@ -20,6 +20,8 @@ use std::time::Instant;
 use chrono::Utc;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::Result;
+use databend_common_expression::local_block_meta_serde;
+use databend_common_expression::BlockMetaInfo;
 use databend_common_expression::Column;
 use databend_common_expression::ColumnId;
 use databend_common_expression::DataBlock;
@@ -32,6 +34,9 @@ use databend_common_metrics::storage::metrics_inc_block_index_write_nums;
 use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_bytes;
 use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_milliseconds;
 use databend_common_metrics::storage::metrics_inc_block_inverted_index_write_nums;
+use databend_common_metrics::storage::metrics_inc_block_stats_write_bytes;
+use databend_common_metrics::storage::metrics_inc_block_stats_write_milliseconds;
+use databend_common_metrics::storage::metrics_inc_block_stats_write_nums;
 use databend_common_metrics::storage::metrics_inc_block_vector_index_write_bytes;
 use databend_common_metrics::storage::metrics_inc_block_vector_index_write_milliseconds;
 use databend_common_metrics::storage::metrics_inc_block_vector_index_write_nums;
@@ -53,6 +58,7 @@ use opendal::Operator;
 
 use crate::io::write::virtual_column_builder::VirtualColumnBuilder;
 use crate::io::write::virtual_column_builder::VirtualColumnState;
+use crate::io::write::BlockStatisticsState;
 use crate::io::write::InvertedIndexBuilder;
 use crate::io::write::InvertedIndexState;
 use crate::io::write::VectorIndexBuilder;
@@ -129,6 +135,7 @@ pub async fn write_data(data: Vec<u8>, data_accessor: &Operator, location: &str)
     Ok(())
 }
 
+#[derive(Debug)]
 pub struct BlockSerialization {
     pub block_raw_data: Vec<u8>,
     pub block_meta: BlockMeta,
@@ -136,8 +143,14 @@ pub struct BlockSerialization {
     pub inverted_index_states: Vec<InvertedIndexState>,
     pub virtual_column_state: Option<VirtualColumnState>,
     pub vector_index_state: Option<VectorIndexState>,
+    pub block_stats_state: Option<BlockStatisticsState>,
 }
 
+local_block_meta_serde!(BlockSerialization);
+
+#[typetag::serde(name = "block_serialization_meta")]
+impl BlockMetaInfo for BlockSerialization {}
+
 #[derive(Clone)]
 pub struct BlockBuilder {
     pub ctx: Arc<dyn TableContext>,
@@ -146,6 +159,7 @@ pub struct BlockBuilder {
     pub write_settings: WriteSettings,
     pub cluster_stats_gen: ClusterStatsGenerator,
     pub bloom_columns_map: BTreeMap<FieldIndex, TableField>,
+    pub ndv_columns_map: BTreeMap<FieldIndex, TableField>,
     pub ngram_args: Vec<NgramArgs>,
     pub inverted_index_builders: Vec<InvertedIndexBuilder>,
     pub virtual_column_builder: Option<VirtualColumnBuilder>,
@@ -170,9 +184,22 @@ impl BlockBuilder {
             self.bloom_columns_map.clone(),
             &self.ngram_args,
         )?;
-        let column_distinct_count = bloom_index_state
+        let mut column_distinct_count = bloom_index_state
             .as_ref()
-            .map(|i| i.column_distinct_count.clone());
+            .map(|i| i.column_distinct_count.clone())
+            .unwrap_or_default();
+
+        let block_stats_location = self.meta_locations.block_stats_location(&block_id);
+        let block_stats_state = BlockStatisticsState::from_data_block(
+            block_stats_location,
+            &data_block,
+            &self.ndv_columns_map,
+        )?;
+        if let Some(block_stats_state) = &block_stats_state {
+            for (key, val) in &block_stats_state.column_distinct_count {
+                column_distinct_count.entry(*key).or_insert(*val);
+            }
+        }
 
         let mut inverted_index_states = Vec::with_capacity(self.inverted_index_builders.len());
         for inverted_index_builder in &self.inverted_index_builders {
@@ -206,8 +233,11 @@ impl BlockBuilder {
             };
 
         let row_count = data_block.num_rows() as u64;
-        let col_stats =
-            gen_columns_statistics(&data_block, column_distinct_count, &self.source_schema)?;
+        let col_stats = gen_columns_statistics(
+            &data_block,
+            Some(column_distinct_count),
+            &self.source_schema,
+        )?;
 
         let mut buffer = Vec::with_capacity(DEFAULT_BLOCK_BUFFER_SIZE);
         let block_size = data_block.estimate_block_size() as u64;
@@ -246,6 +276,10 @@ impl BlockBuilder {
             compression: self.write_settings.table_compression.into(),
             inverted_index_size,
             virtual_block_meta: None,
+            block_stats_location: block_stats_state.as_ref().map(|v| v.location.clone()),
+            block_stats_size: block_stats_state
+                .as_ref()
+                .map_or(0, |v| v.block_stats_size()),
             create_on: Some(Utc::now()),
         };
 
@@ -256,6 +290,7 @@ impl BlockBuilder {
             inverted_index_states,
             virtual_column_state,
             vector_index_state,
+            block_stats_state,
         };
         Ok(serialized)
     }
@@ -290,6 +325,7 @@ impl BlockWriter {
         Self::write_down_vector_index_state(dal, serialized.vector_index_state).await?;
         Self::write_down_inverted_index_state(dal, serialized.inverted_index_states).await?;
         Self::write_down_virtual_column_state(dal, serialized.virtual_column_state).await?;
+        Self::write_down_block_stats_state(dal, serialized.block_stats_state).await?;
 
         Ok(extended_block_meta)
     }
@@ -391,4 +427,22 @@ impl BlockWriter {
         }
         Ok(())
     }
+
+    pub async fn write_down_block_stats_state(
+        dal: &Operator,
+        block_stats_state: Option<BlockStatisticsState>,
+    ) -> Result<()> {
+        if let Some(block_stats_state) = block_stats_state {
+            let start = Instant::now();
+
+            let stats_size = block_stats_state.block_stats_size();
+            let location = &block_stats_state.location.0;
+            write_data(block_stats_state.data, dal, location).await?;
+
+            metrics_inc_block_stats_write_nums(1);
+            metrics_inc_block_stats_write_bytes(stats_size);
+            metrics_inc_block_stats_write_milliseconds(start.elapsed().as_millis() as u64);
+        }
+        Ok(())
+    }
 }
diff --git a/src/query/storages/fuse/src/io/write/bloom_index_writer.rs b/src/query/storages/fuse/src/io/write/bloom_index_writer.rs
index ec49070a6f08f..738c33ac2f2c3 100644
--- a/src/query/storages/fuse/src/io/write/bloom_index_writer.rs
+++ b/src/query/storages/fuse/src/io/write/bloom_index_writer.rs
@@ -40,6 +40,7 @@ use opendal::Operator;
 use crate::io::BlockReader;
 use crate::FuseStorageFormat;
 
+#[derive(Debug)]
 pub struct BloomIndexState {
     pub(crate) data: Vec<u8>,
     pub(crate) size: u64,
diff --git a/src/query/storages/fuse/src/io/write/inverted_index_writer.rs b/src/query/storages/fuse/src/io/write/inverted_index_writer.rs
index 74377a86108cb..8cf0b5f2355f0 100644
--- a/src/query/storages/fuse/src/io/write/inverted_index_writer.rs
+++ b/src/query/storages/fuse/src/io/write/inverted_index_writer.rs
@@ -121,6 +121,7 @@ pub fn create_inverted_index_builders(table_meta: &TableMeta) -> Vec<InvertedInd
     inverted_index_builders
 }
 
+#[derive(Debug)]
 pub struct InvertedIndexState {
     pub(crate) data: Vec<u8>,
     pub(crate) size: u64,
diff --git a/src/query/storages/fuse/src/io/write/mod.rs b/src/query/storages/fuse/src/io/write/mod.rs
index e7f3bfbe82c2f..544d32d988988 100644
--- a/src/query/storages/fuse/src/io/write/mod.rs
+++ b/src/query/storages/fuse/src/io/write/mod.rs
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+mod block_statistics_writer;
 mod block_writer;
 mod bloom_index_writer;
 mod inverted_index_writer;
@@ -21,6 +22,8 @@ mod vector_index_writer;
 mod virtual_column_builder;
 mod write_settings;
 
+pub use block_statistics_writer::BlockStatisticsState;
+pub use block_statistics_writer::BlockStatsBuilder;
 pub use block_writer::serialize_block;
 pub use block_writer::write_data;
 pub use block_writer::BlockBuilder;
diff --git a/src/query/storages/fuse/src/io/write/stream/block_builder.rs b/src/query/storages/fuse/src/io/write/stream/block_builder.rs
index 73ee3c1ded29e..66114228a0082 100644
--- a/src/query/storages/fuse/src/io/write/stream/block_builder.rs
+++ b/src/query/storages/fuse/src/io/write/stream/block_builder.rs
@@ -32,10 +32,11 @@ use databend_common_expression::FieldIndex;
 use databend_common_expression::TableField;
 use databend_common_expression::TableSchema;
 use databend_common_expression::TableSchemaRef;
-use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COLUMN_ID;
 use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE;
 use databend_common_meta_app::schema::TableIndex;
 use databend_common_native::write::NativeWriter;
+use databend_common_native::write::WriteOptions;
+use databend_common_sql::executor::physical_plans::MutationKind;
 use databend_storages_common_index::BloomIndex;
 use databend_storages_common_index::BloomIndexBuilder;
 use databend_storages_common_index::Index;
@@ -53,7 +54,8 @@ use parquet::file::properties::WriterProperties;
 use crate::io::create_inverted_index_builders;
 use crate::io::write::stream::cluster_statistics::ClusterStatisticsBuilder;
 use crate::io::write::stream::cluster_statistics::ClusterStatisticsState;
-use crate::io::write::stream::column_statistics::ColumnStatisticsState;
+use crate::io::write::stream::ColumnStatisticsState;
+use crate::io::write::BlockStatsBuilder;
 use crate::io::write::InvertedIndexState;
 use crate::io::BlockSerialization;
 use crate::io::BloomIndexState;
@@ -69,6 +71,7 @@ use crate::FuseTable;
 
 pub enum BlockWriterImpl {
     Arrow(ArrowWriter<Vec<u8>>),
+    // Native format doesnot support stream write.
     Native(NativeWriter<Vec<u8>>),
 }
 
@@ -153,6 +156,7 @@ pub struct StreamBlockBuilder {
     bloom_index_builder: BloomIndexBuilder,
     virtual_column_builder: Option<VirtualColumnBuilder>,
     vector_index_builder: Option<VectorIndexBuilder>,
+    block_stats_builder: BlockStatsBuilder,
 
     cluster_stats_state: ClusterStatisticsState,
     column_stats_state: ColumnStatisticsState,
@@ -191,7 +195,7 @@ impl StreamBlockBuilder {
                 let writer = NativeWriter::new(
                     buffer,
                     properties.source_schema.as_ref().clone(),
-                    databend_common_native::write::WriteOptions {
+                    WriteOptions {
                         default_compression: properties.write_settings.table_compression.into(),
                         max_page_size: Some(properties.write_settings.max_page_size),
                         default_compress_ratio,
@@ -216,27 +220,13 @@ impl StreamBlockBuilder {
             &properties.ngram_args,
         )?;
 
-        let virtual_column_builder = if properties
-            .ctx
-            .get_settings()
-            .get_enable_refresh_virtual_column_after_write()
-            .unwrap_or_default()
-            && properties.support_virtual_columns
-        {
-            VirtualColumnBuilder::try_create(
-                properties.ctx.clone(),
-                properties.source_schema.clone(),
-            )
-            .ok()
-        } else {
-            None
-        };
+        let virtual_column_builder = properties.virtual_column_builder.clone();
         let vector_index_builder = VectorIndexBuilder::try_create(
             properties.ctx.clone(),
             &properties.table_indexes,
             properties.source_schema.clone(),
         );
-
+        let block_stats_builder = BlockStatsBuilder::new(&properties.ndv_columns_map);
         let cluster_stats_state =
             ClusterStatisticsState::new(properties.cluster_stats_builder.clone());
         let column_stats_state =
@@ -249,6 +239,7 @@ impl StreamBlockBuilder {
             bloom_index_builder,
             virtual_column_builder,
             vector_index_builder,
+            block_stats_builder,
             row_count: 0,
             block_size: 0,
             column_stats_state,
@@ -263,7 +254,7 @@ impl StreamBlockBuilder {
     pub fn need_flush(&self) -> bool {
         let file_size = self.block_writer.compressed_size();
         self.row_count >= self.properties.block_thresholds.min_rows_per_block
-            || self.block_size >= self.properties.block_thresholds.max_bytes_per_block
+            || self.block_size >= self.properties.block_thresholds.min_bytes_per_block * 2
             || (file_size >= self.properties.block_thresholds.min_compressed_per_block
                 && self.block_size >= self.properties.block_thresholds.min_bytes_per_block)
     }
@@ -281,6 +272,7 @@ impl StreamBlockBuilder {
         self.column_stats_state
             .add_block(&self.properties.source_schema, &block)?;
         self.bloom_index_builder.add_block(&block)?;
+        self.block_stats_builder.add_block(&block)?;
         for writer in self.inverted_index_writers.iter_mut() {
             writer.add_block(&self.properties.source_schema, &block)?;
         }
@@ -316,10 +308,20 @@ impl StreamBlockBuilder {
         } else {
             None
         };
-        let column_distinct_count = bloom_index_state
+        let mut column_distinct_count = bloom_index_state
             .as_ref()
             .map(|i| i.column_distinct_count.clone())
             .unwrap_or_default();
+        let block_stats_location = self
+            .properties
+            .meta_locations
+            .block_stats_location(&block_id);
+        let block_stats_state = self.block_stats_builder.finalize(block_stats_location)?;
+        if let Some(state) = &block_stats_state {
+            for (key, val) in &state.column_distinct_count {
+                column_distinct_count.entry(*key).or_insert(*val);
+            }
+        }
         let col_stats = self.column_stats_state.finalize(column_distinct_count)?;
 
         let mut inverted_index_states = Vec::with_capacity(self.inverted_index_writers.len());
@@ -386,8 +388,15 @@ impl StreamBlockBuilder {
             vector_index_size,
             vector_index_location,
             create_on: Some(Utc::now()),
-            ngram_filter_index_size: None,
+            ngram_filter_index_size: bloom_index_state
+                .as_ref()
+                .map(|v| v.ngram_size)
+                .unwrap_or_default(),
             virtual_block_meta: None,
+            block_stats_location: block_stats_state.as_ref().map(|v| v.location.clone()),
+            block_stats_size: block_stats_state
+                .as_ref()
+                .map_or(0, |v| v.block_stats_size()),
         };
         let serialized = BlockSerialization {
             block_raw_data,
@@ -396,6 +405,7 @@ impl StreamBlockBuilder {
             inverted_index_states,
             virtual_column_state,
             vector_index_state,
+            block_stats_state,
         };
         Ok(serialized)
     }
@@ -410,13 +420,14 @@ pub struct StreamBlockProperties {
     source_schema: TableSchemaRef,
 
     cluster_stats_builder: Arc<ClusterStatisticsBuilder>,
-    stats_columns: Vec<ColumnId>,
-    distinct_columns: Vec<ColumnId>,
+    stats_columns: Vec<(ColumnId, DataType)>,
+    distinct_columns: Vec<(ColumnId, DataType)>,
     bloom_columns_map: BTreeMap<FieldIndex, TableField>,
+    ndv_columns_map: BTreeMap<FieldIndex, TableField>,
     ngram_args: Vec<NgramArgs>,
     inverted_index_builders: Vec<InvertedIndexBuilder>,
+    virtual_column_builder: Option<VirtualColumnBuilder>,
     table_meta_timestamps: TableMetaTimestamps,
-    support_virtual_columns: bool,
     table_indexes: BTreeMap<String, TableIndex>,
 }
 
@@ -424,16 +435,23 @@ impl StreamBlockProperties {
     pub fn try_create(
         ctx: Arc<dyn TableContext>,
         table: &FuseTable,
+        kind: MutationKind,
         table_meta_timestamps: TableMetaTimestamps,
     ) -> Result<Arc<Self>> {
         // remove virtual computed fields.
-        let fields = table
+        let mut fields = table
             .schema()
             .fields()
             .iter()
             .filter(|f| !matches!(f.computed_expr(), Some(ComputedExpr::Virtual(_))))
             .cloned()
             .collect::<Vec<_>>();
+        if !matches!(kind, MutationKind::Insert | MutationKind::Replace) {
+            // add stream fields.
+            for stream_column in table.stream_columns().iter() {
+                fields.push(stream_column.table_field());
+            }
+        }
 
         let source_schema = Arc::new(TableSchema {
             fields,
@@ -449,12 +467,26 @@ impl StreamBlockProperties {
             &table.table_info.meta,
             &table.table_info.meta.schema,
         )?;
-        let bloom_column_ids = bloom_columns_map
+        let ndv_columns_map = table
+            .approx_distinct_cols
+            .distinct_column_fields(source_schema.clone(), RangeIndex::supported_table_type)?;
+        let bloom_ndv_columns = bloom_columns_map
             .values()
+            .chain(ndv_columns_map.values())
             .map(|v| v.column_id())
             .collect::<HashSet<_>>();
 
         let inverted_index_builders = create_inverted_index_builders(&table.table_info.meta);
+        let virtual_column_builder = if ctx
+            .get_settings()
+            .get_enable_refresh_virtual_column_after_write()
+            .unwrap_or_default()
+            && table.support_virtual_columns()
+        {
+            VirtualColumnBuilder::try_create(ctx.clone(), source_schema.clone()).ok()
+        } else {
+            None
+        };
 
         let cluster_stats_builder =
             ClusterStatisticsBuilder::try_create(table, ctx.clone(), &source_schema)?;
@@ -464,16 +496,14 @@ impl StreamBlockProperties {
         let leaf_fields = source_schema.leaf_fields();
         for field in leaf_fields.iter() {
             let column_id = field.column_id();
-            if RangeIndex::supported_type(&DataType::from(field.data_type()))
-                && column_id != ORIGIN_BLOCK_ROW_NUM_COLUMN_ID
-            {
-                stats_columns.push(column_id);
-                if !bloom_column_ids.contains(&column_id) {
-                    distinct_columns.push(column_id);
+            let data_type = DataType::from(field.data_type());
+            if RangeIndex::supported_type(&data_type) {
+                stats_columns.push((column_id, data_type.clone()));
+                if !bloom_ndv_columns.contains(&column_id) {
+                    distinct_columns.push((column_id, data_type));
                 }
             }
         }
-        let support_virtual_columns = table.support_virtual_columns();
         let table_indexes = table.table_info.meta.indexes.clone();
         Ok(Arc::new(StreamBlockProperties {
             ctx,
@@ -482,14 +512,15 @@ impl StreamBlockProperties {
             source_schema,
             write_settings,
             cluster_stats_builder,
+            virtual_column_builder,
             stats_columns,
             distinct_columns,
             bloom_columns_map,
             ngram_args,
             inverted_index_builders,
             table_meta_timestamps,
-            support_virtual_columns,
             table_indexes,
+            ndv_columns_map,
         }))
     }
 }
diff --git a/src/query/storages/fuse/src/io/write/stream/column_ndv_estimator.rs b/src/query/storages/fuse/src/io/write/stream/column_ndv_estimator.rs
new file mode 100644
index 0000000000000..23d2f2475546a
--- /dev/null
+++ b/src/query/storages/fuse/src/io/write/stream/column_ndv_estimator.rs
@@ -0,0 +1,187 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::hash::Hash;
+use std::marker::PhantomData;
+
+use databend_common_expression::types::boolean::TrueIdxIter;
+use databend_common_expression::types::DataType;
+use databend_common_expression::types::DateType;
+use databend_common_expression::types::Decimal128Type;
+use databend_common_expression::types::Decimal256Type;
+use databend_common_expression::types::Decimal64Type;
+use databend_common_expression::types::Float32Type;
+use databend_common_expression::types::Float64Type;
+use databend_common_expression::types::Int16Type;
+use databend_common_expression::types::Int32Type;
+use databend_common_expression::types::Int64Type;
+use databend_common_expression::types::Int8Type;
+use databend_common_expression::types::NumberDataType;
+use databend_common_expression::types::StringType;
+use databend_common_expression::types::TimestampType;
+use databend_common_expression::types::UInt16Type;
+use databend_common_expression::types::UInt32Type;
+use databend_common_expression::types::UInt64Type;
+use databend_common_expression::types::UInt8Type;
+use databend_common_expression::types::ValueType;
+use databend_common_expression::with_number_type;
+use databend_common_expression::Column;
+use databend_common_expression::ScalarRef;
+use databend_common_expression::SELECTIVITY_THRESHOLD;
+use databend_storages_common_table_meta::meta::MetaHLL;
+use enum_dispatch::enum_dispatch;
+
+#[enum_dispatch]
+pub trait ColumnNDVEstimatorOps: Send + Sync {
+    fn update_column(&mut self, column: &Column);
+    fn update_scalar(&mut self, scalar: &ScalarRef);
+    fn finalize(&self) -> usize;
+    fn hll(self) -> MetaHLL;
+}
+
+#[enum_dispatch(ColumnNDVEstimatorOps)]
+pub enum ColumnNDVEstimator {
+    Int8(ColumnNDVEstimatorImpl<Int8Type>),
+    Int16(ColumnNDVEstimatorImpl<Int16Type>),
+    Int32(ColumnNDVEstimatorImpl<Int32Type>),
+    Int64(ColumnNDVEstimatorImpl<Int64Type>),
+    UInt8(ColumnNDVEstimatorImpl<UInt8Type>),
+    UInt16(ColumnNDVEstimatorImpl<UInt16Type>),
+    UInt32(ColumnNDVEstimatorImpl<UInt32Type>),
+    UInt64(ColumnNDVEstimatorImpl<UInt64Type>),
+    Float32(ColumnNDVEstimatorImpl<Float32Type>),
+    Float64(ColumnNDVEstimatorImpl<Float64Type>),
+    String(ColumnNDVEstimatorImpl<StringType>),
+    Date(ColumnNDVEstimatorImpl<DateType>),
+    Timestamp(ColumnNDVEstimatorImpl<TimestampType>),
+    Decimal64(ColumnNDVEstimatorImpl<Decimal64Type>),
+    Decimal128(ColumnNDVEstimatorImpl<Decimal128Type>),
+    Decimal256(ColumnNDVEstimatorImpl<Decimal256Type>),
+}
+
+pub fn create_column_ndv_estimator(data_type: &DataType) -> ColumnNDVEstimator {
+    macro_rules! match_number_type_create {
+        ($inner_type:expr) => {{
+            with_number_type!(|NUM_TYPE| match $inner_type {
+                NumberDataType::NUM_TYPE => {
+                    paste::paste! {
+                        ColumnNDVEstimator::NUM_TYPE(ColumnNDVEstimatorImpl::<[<NUM_TYPE Type>]>::new())
+                    }
+                }
+            })
+        }};
+    }
+
+    let inner_type = data_type.remove_nullable();
+    match inner_type {
+        DataType::Number(num_type) => {
+            match_number_type_create!(num_type)
+        }
+        DataType::String => ColumnNDVEstimator::String(ColumnNDVEstimatorImpl::<StringType>::new()),
+        DataType::Date => ColumnNDVEstimator::Date(ColumnNDVEstimatorImpl::<DateType>::new()),
+        DataType::Timestamp => {
+            ColumnNDVEstimator::Timestamp(ColumnNDVEstimatorImpl::<TimestampType>::new())
+        }
+        DataType::Decimal(size) => {
+            if size.can_carried_by_64() {
+                ColumnNDVEstimator::Decimal64(ColumnNDVEstimatorImpl::<Decimal64Type>::new())
+            } else if size.can_carried_by_128() {
+                ColumnNDVEstimator::Decimal128(ColumnNDVEstimatorImpl::<Decimal128Type>::new())
+            } else {
+                ColumnNDVEstimator::Decimal256(ColumnNDVEstimatorImpl::<Decimal256Type>::new())
+            }
+        }
+        _ => unreachable!("Unsupported data type: {:?}", data_type),
+    }
+}
+
+pub struct ColumnNDVEstimatorImpl<T>
+where
+    T: ValueType + Send + Sync,
+    for<'a> T::ScalarRef<'a>: Hash,
+{
+    hll: MetaHLL,
+    _phantom: PhantomData<T>,
+}
+
+impl<T> ColumnNDVEstimatorImpl<T>
+where
+    T: ValueType + Send + Sync,
+    for<'a> T::ScalarRef<'a>: Hash,
+{
+    pub fn new() -> Self {
+        Self {
+            hll: MetaHLL::new(),
+            _phantom: Default::default(),
+        }
+    }
+}
+
+impl<T> ColumnNDVEstimatorOps for ColumnNDVEstimatorImpl<T>
+where
+    T: ValueType + Send + Sync,
+    for<'a> T::ScalarRef<'a>: Hash,
+{
+    fn update_column(&mut self, column: &Column) {
+        let (column, validity) = match column {
+            Column::Nullable(box inner) => {
+                let validity = if inner.validity.null_count() == 0 {
+                    None
+                } else {
+                    Some(&inner.validity)
+                };
+                (&inner.column, validity)
+            }
+            Column::Null { .. } => return,
+            column => (column, None),
+        };
+
+        let column = T::try_downcast_column(column).unwrap();
+        if let Some(v) = validity {
+            if v.true_count() as f64 / v.len() as f64 >= SELECTIVITY_THRESHOLD {
+                for (data, valid) in T::iter_column(&column).zip(v.iter()) {
+                    if valid {
+                        self.hll.add_object(&data);
+                    }
+                }
+            } else {
+                TrueIdxIter::new(v.len(), Some(v)).for_each(|idx| {
+                    let val = unsafe { T::index_column_unchecked(&column, idx) };
+                    self.hll.add_object(&val);
+                })
+            }
+        } else {
+            for value in T::iter_column(&column) {
+                self.hll.add_object(&value);
+            }
+        }
+    }
+
+    fn update_scalar(&mut self, scalar: &ScalarRef) {
+        if matches!(scalar, ScalarRef::Null) {
+            return;
+        }
+
+        let val = T::try_downcast_scalar(scalar).unwrap();
+        self.hll.add_object(&val);
+    }
+
+    fn finalize(&self) -> usize {
+        self.hll.count()
+    }
+
+    fn hll(self) -> MetaHLL {
+        self.hll
+    }
+}
diff --git a/src/query/storages/fuse/src/io/write/stream/column_statistics.rs b/src/query/storages/fuse/src/io/write/stream/column_statistics.rs
deleted file mode 100644
index 402504e3a207e..0000000000000
--- a/src/query/storages/fuse/src/io/write/stream/column_statistics.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-// Copyright 2021 Datafuse Labs
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-use std::collections::HashMap;
-
-use databend_common_exception::Result;
-use databend_common_expression::types::AccessType;
-use databend_common_expression::types::DataType;
-use databend_common_expression::types::DateType;
-use databend_common_expression::types::DecimalColumn;
-use databend_common_expression::types::DecimalScalar;
-use databend_common_expression::types::NumberDataType;
-use databend_common_expression::types::NumberType;
-use databend_common_expression::types::StringType;
-use databend_common_expression::types::TimestampType;
-use databend_common_expression::with_number_mapped_type;
-use databend_common_expression::Column;
-use databend_common_expression::ColumnId;
-use databend_common_expression::DataBlock;
-use databend_common_expression::Scalar;
-use databend_common_expression::ScalarRef;
-use databend_common_expression::TableSchemaRef;
-use databend_common_expression::Value;
-use databend_common_functions::aggregates::eval_aggr;
-use databend_storages_common_table_meta::meta::ColumnDistinctHLL;
-use databend_storages_common_table_meta::meta::ColumnStatistics;
-use databend_storages_common_table_meta::meta::StatisticsOfColumns;
-
-use crate::statistics::reducers::reduce_column_statistics;
-use crate::statistics::traverse_values_dfs;
-use crate::statistics::Trim;
-
-pub struct ColumnStatisticsState {
-    col_stats: HashMap<ColumnId, Vec<ColumnStatistics>>,
-    distinct_columns: HashMap<ColumnId, ColumnDistinctHLL>,
-}
-
-impl ColumnStatisticsState {
-    pub fn new(stats_columns: &[ColumnId], distinct_columns: &[ColumnId]) -> Self {
-        let col_stats = stats_columns
-            .iter()
-            .map(|&col_id| (col_id, Vec::new()))
-            .collect();
-
-        let distinct_columns = distinct_columns
-            .iter()
-            .map(|&col_id| (col_id, ColumnDistinctHLL::default()))
-            .collect();
-
-        Self {
-            col_stats,
-            distinct_columns,
-        }
-    }
-
-    pub fn add_block(&mut self, schema: &TableSchemaRef, data_block: &DataBlock) -> Result<()> {
-        let rows = data_block.num_rows();
-        let leaves = traverse_values_dfs(data_block.columns(), schema.fields())?;
-        for (column_id, col, data_type) in leaves {
-            match col {
-                Value::Scalar(s) => {
-                    let unset_bits = if s == Scalar::Null { rows } else { 0 };
-                    // when we read it back from parquet, it is a Column instead of Scalar
-                    let in_memory_size = s.as_ref().estimated_scalar_repeat_size(rows, &data_type);
-                    let col_stats = ColumnStatistics::new(
-                        s.clone(),
-                        s.clone(),
-                        unset_bits as u64,
-                        in_memory_size as u64,
-                        None,
-                    );
-                    if let Some(hll) = self.distinct_columns.get_mut(&column_id) {
-                        scalar_update_hll_cardinality(&s.as_ref(), &data_type, hll);
-                    }
-                    self.col_stats.get_mut(&column_id).unwrap().push(col_stats);
-                }
-                Value::Column(col) => {
-                    // later, during the evaluation of expressions, name of field does not matter
-                    let mut min = Scalar::Null;
-                    let mut max = Scalar::Null;
-
-                    let (mins, _) = eval_aggr("min", vec![], &[col.clone().into()], rows, vec![])?;
-                    if mins.len() > 0 {
-                        min = if let Some(v) = mins.index(0) {
-                            // safe upwrap.
-                            v.to_owned().trim_min().unwrap()
-                        } else {
-                            self.col_stats.remove(&column_id);
-                            continue;
-                        }
-                    }
-
-                    let (maxs, _) = eval_aggr("max", vec![], &[col.clone().into()], rows, vec![])?;
-                    if maxs.len() > 0 {
-                        max = if let Some(v) = maxs.index(0) {
-                            if let Some(v) = v.to_owned().trim_max() {
-                                v
-                            } else {
-                                self.col_stats.remove(&column_id);
-                                continue;
-                            }
-                        } else {
-                            self.col_stats.remove(&column_id);
-                            continue;
-                        }
-                    }
-
-                    let (is_all_null, bitmap) = col.validity();
-                    let unset_bits = match (is_all_null, bitmap) {
-                        (true, _) => rows,
-                        (false, Some(bitmap)) => bitmap.null_count(),
-                        (false, None) => 0,
-                    };
-                    let in_memory_size = col.memory_size() as u64;
-                    let col_stats =
-                        ColumnStatistics::new(min, max, unset_bits as u64, in_memory_size, None);
-                    self.col_stats.get_mut(&column_id).unwrap().push(col_stats);
-
-                    // use distinct count calculated by the xor hash function to avoid repetitive operation.
-                    if let Some(hll) = self.distinct_columns.get_mut(&column_id) {
-                        column_update_hll_cardinality(&col, &data_type, hll);
-                    }
-                }
-            }
-        }
-        Ok(())
-    }
-
-    pub fn finalize(
-        self,
-        column_distinct_count: HashMap<ColumnId, usize>,
-    ) -> Result<StatisticsOfColumns> {
-        let mut statistics = StatisticsOfColumns::with_capacity(self.col_stats.len());
-        for (id, stats) in &self.col_stats {
-            let mut col_stats = reduce_column_statistics(stats);
-            if let Some(count) = column_distinct_count.get(id) {
-                col_stats.distinct_of_values = Some(*count as u64);
-            } else if let Some(hll) = self.distinct_columns.get(id) {
-                col_stats.distinct_of_values = Some(hll.count() as u64);
-            }
-            statistics.insert(*id, col_stats);
-        }
-        Ok(statistics)
-    }
-}
-
-fn column_update_hll_cardinality(col: &Column, ty: &DataType, hll: &mut ColumnDistinctHLL) {
-    if let DataType::Nullable(inner) = ty {
-        let col = col.as_nullable().unwrap();
-        for (i, v) in col.validity.iter().enumerate() {
-            if v {
-                let scalar = col.column.index(i).unwrap();
-                scalar_update_hll_cardinality(&scalar, inner, hll);
-            }
-        }
-        return;
-    }
-
-    with_number_mapped_type!(|NUM_TYPE| match ty {
-        DataType::Number(NumberDataType::NUM_TYPE) => {
-            let col = NumberType::<NUM_TYPE>::try_downcast_column(col).unwrap();
-            for v in col.iter() {
-                hll.add_object(v);
-            }
-        }
-        DataType::String => {
-            let col = StringType::try_downcast_column(col).unwrap();
-            for v in col.iter() {
-                hll.add_object(&v);
-            }
-        }
-        DataType::Date => {
-            let col = DateType::try_downcast_column(col).unwrap();
-            for v in col.iter() {
-                hll.add_object(v);
-            }
-        }
-        DataType::Timestamp => {
-            let col = TimestampType::try_downcast_column(col).unwrap();
-            for v in col.iter() {
-                hll.add_object(v);
-            }
-        }
-        DataType::Decimal(_) => {
-            match col {
-                Column::Decimal(DecimalColumn::Decimal64(col, _)) => {
-                    for v in col.iter() {
-                        hll.add_object(v);
-                    }
-                }
-                Column::Decimal(DecimalColumn::Decimal128(col, _)) => {
-                    for v in col.iter() {
-                        hll.add_object(v);
-                    }
-                }
-                Column::Decimal(DecimalColumn::Decimal256(col, _)) => {
-                    for v in col.iter() {
-                        hll.add_object(v);
-                    }
-                }
-                _ => unreachable!(),
-            };
-        }
-        _ => unreachable!("Unsupported data type: {:?}", ty),
-    });
-}
-
-fn scalar_update_hll_cardinality(scalar: &ScalarRef, ty: &DataType, hll: &mut ColumnDistinctHLL) {
-    if matches!(scalar, ScalarRef::Null) {
-        return;
-    }
-
-    let ty = ty.remove_nullable();
-
-    with_number_mapped_type!(|NUM_TYPE| match ty {
-        DataType::Number(NumberDataType::NUM_TYPE) => {
-            let val = NumberType::<NUM_TYPE>::try_downcast_scalar(scalar).unwrap();
-            hll.add_object(&val);
-        }
-        DataType::String => {
-            let val = StringType::try_downcast_scalar(scalar).unwrap();
-            hll.add_object(&val);
-        }
-        DataType::Date => {
-            let val = DateType::try_downcast_scalar(scalar).unwrap();
-            hll.add_object(&val);
-        }
-        DataType::Timestamp => {
-            let val = TimestampType::try_downcast_scalar(scalar).unwrap();
-            hll.add_object(&val);
-        }
-        DataType::Decimal(_) => {
-            match scalar {
-                ScalarRef::Decimal(DecimalScalar::Decimal64(v, _)) => hll.add_object(&v),
-                ScalarRef::Decimal(DecimalScalar::Decimal128(v, _)) => hll.add_object(&v),
-                ScalarRef::Decimal(DecimalScalar::Decimal256(v, _)) => hll.add_object(&v),
-                _ => unreachable!(),
-            }
-        }
-        _ => unreachable!("Unsupported data type: {:?}", ty),
-    });
-}
diff --git a/src/query/storages/fuse/src/io/write/stream/column_statistics_builder.rs b/src/query/storages/fuse/src/io/write/stream/column_statistics_builder.rs
new file mode 100644
index 0000000000000..e9278b4b1e71a
--- /dev/null
+++ b/src/query/storages/fuse/src/io/write/stream/column_statistics_builder.rs
@@ -0,0 +1,357 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::cmp::Ordering;
+use std::marker::PhantomData;
+
+use databend_common_exception::ErrorCode;
+use databend_common_exception::Result;
+use databend_common_expression::types::boolean::TrueIdxIter;
+use databend_common_expression::types::DataType;
+use databend_common_expression::types::DateType;
+use databend_common_expression::types::Decimal;
+use databend_common_expression::types::Decimal128Type;
+use databend_common_expression::types::Decimal256Type;
+use databend_common_expression::types::Decimal64Type;
+use databend_common_expression::types::Float32Type;
+use databend_common_expression::types::Float64Type;
+use databend_common_expression::types::Int16Type;
+use databend_common_expression::types::Int32Type;
+use databend_common_expression::types::Int64Type;
+use databend_common_expression::types::Int8Type;
+use databend_common_expression::types::NumberDataType;
+use databend_common_expression::types::StringType;
+use databend_common_expression::types::TimestampType;
+use databend_common_expression::types::UInt16Type;
+use databend_common_expression::types::UInt32Type;
+use databend_common_expression::types::UInt64Type;
+use databend_common_expression::types::UInt8Type;
+use databend_common_expression::types::ValueType;
+use databend_common_expression::with_number_type;
+use databend_common_expression::Column;
+use databend_common_expression::Scalar;
+use databend_common_expression::ScalarRef;
+use databend_common_expression::SELECTIVITY_THRESHOLD;
+use databend_storages_common_table_meta::meta::ColumnStatistics;
+use enum_dispatch::enum_dispatch;
+
+use crate::statistics::Trim;
+
+pub type CommonBuilder<T> = GenericColumnStatisticsBuilder<T, CommonAdapter>;
+pub type DecimalBuilder<T> = GenericColumnStatisticsBuilder<T, DecimalAdapter>;
+
+#[enum_dispatch(ColumnStatsOps)]
+pub enum ColumnStatisticsBuilder {
+    Int8(CommonBuilder<Int8Type>),
+    Int16(CommonBuilder<Int16Type>),
+    Int32(CommonBuilder<Int32Type>),
+    Int64(CommonBuilder<Int64Type>),
+    UInt8(CommonBuilder<UInt8Type>),
+    UInt16(CommonBuilder<UInt16Type>),
+    UInt32(CommonBuilder<UInt32Type>),
+    UInt64(CommonBuilder<UInt64Type>),
+    Float32(CommonBuilder<Float32Type>),
+    Float64(CommonBuilder<Float64Type>),
+    String(CommonBuilder<StringType>),
+    Date(CommonBuilder<DateType>),
+    Timestamp(CommonBuilder<TimestampType>),
+    Decimal64(DecimalBuilder<Decimal64Type>),
+    Decimal128(DecimalBuilder<Decimal128Type>),
+    Decimal256(DecimalBuilder<Decimal256Type>),
+}
+
+#[enum_dispatch]
+pub trait ColumnStatsOps {
+    fn update_column(&mut self, column: &Column);
+    fn update_scalar(&mut self, scalar: &ScalarRef, num_rows: usize, data_type: &DataType);
+    fn finalize(self) -> Result<ColumnStatistics>;
+}
+
+impl<T, A> ColumnStatsOps for GenericColumnStatisticsBuilder<T, A>
+where
+    T: ValueType + Send + Sync,
+    T::Scalar: Send + Sync,
+    A: ColumnStatisticsAdapter<T> + 'static,
+    for<'a, 'b> T::ScalarRef<'a>: PartialOrd<T::ScalarRef<'b>>,
+{
+    fn update_column(&mut self, column: &Column) {
+        GenericColumnStatisticsBuilder::update_column(self, column);
+    }
+
+    fn update_scalar(&mut self, scalar: &ScalarRef, num_rows: usize, data_type: &DataType) {
+        GenericColumnStatisticsBuilder::update_scalar(self, scalar, num_rows, data_type);
+    }
+
+    fn finalize(self) -> Result<ColumnStatistics> {
+        GenericColumnStatisticsBuilder::finalize(self)
+    }
+}
+
+pub fn create_column_stats_builder(data_type: &DataType) -> ColumnStatisticsBuilder {
+    let inner_type = data_type.remove_nullable();
+    macro_rules! match_number_type_create {
+        ($inner_type:expr) => {{
+            with_number_type!(|NUM_TYPE| match $inner_type {
+                NumberDataType::NUM_TYPE => {
+                    paste::paste! {
+                        ColumnStatisticsBuilder::NUM_TYPE(CommonBuilder::<[<NUM_TYPE Type>]>::create(inner_type))
+                    }
+                }
+            })
+        }};
+    }
+
+    match inner_type {
+        DataType::Number(num_type) => {
+            match_number_type_create!(num_type)
+        }
+        DataType::String => {
+            ColumnStatisticsBuilder::String(CommonBuilder::<StringType>::create(inner_type))
+        }
+        DataType::Date => {
+            ColumnStatisticsBuilder::Date(CommonBuilder::<DateType>::create(inner_type))
+        }
+        DataType::Timestamp => {
+            ColumnStatisticsBuilder::Timestamp(CommonBuilder::<TimestampType>::create(inner_type))
+        }
+        DataType::Decimal(size) => {
+            if size.can_carried_by_64() {
+                ColumnStatisticsBuilder::Decimal64(DecimalBuilder::<Decimal64Type>::create(
+                    inner_type,
+                ))
+            } else if size.can_carried_by_128() {
+                ColumnStatisticsBuilder::Decimal128(DecimalBuilder::<Decimal128Type>::create(
+                    inner_type,
+                ))
+            } else {
+                ColumnStatisticsBuilder::Decimal256(DecimalBuilder::<Decimal256Type>::create(
+                    inner_type,
+                ))
+            }
+        }
+        _ => unreachable!("Unsupported data type: {:?}", data_type),
+    }
+}
+
+pub trait ColumnStatisticsAdapter<T: ValueType>: Send + Sync {
+    type Value: Clone + Send + Sync;
+
+    fn scalar_to_value(val: T::ScalarRef<'_>) -> Self::Value;
+
+    fn value_to_scalar(val: Self::Value) -> T::Scalar;
+
+    fn update_value(value: &mut Self::Value, scalar: T::ScalarRef<'_>, ordering: Ordering);
+}
+
+pub struct CommonAdapter;
+
+impl<T> ColumnStatisticsAdapter<T> for CommonAdapter
+where
+    T: ValueType,
+    T::Scalar: Send + Sync,
+    for<'a, 'b> T::ScalarRef<'a>: PartialOrd<T::ScalarRef<'b>>,
+{
+    type Value = T::Scalar;
+
+    fn scalar_to_value(val: T::ScalarRef<'_>) -> Self::Value {
+        T::to_owned_scalar(val)
+    }
+
+    fn value_to_scalar(val: Self::Value) -> T::Scalar {
+        val
+    }
+
+    fn update_value(value: &mut Self::Value, scalar: T::ScalarRef<'_>, ordering: Ordering) {
+        if scalar.partial_cmp(&T::to_scalar_ref(value)) == Some(ordering) {
+            *value = T::to_owned_scalar(scalar);
+        }
+    }
+}
+
+pub struct DecimalAdapter;
+
+impl<T> ColumnStatisticsAdapter<T> for DecimalAdapter
+where
+    T: ValueType,
+    T::Scalar: Decimal + Send + Sync,
+    for<'a, 'b> T::ScalarRef<'a>: PartialOrd<T::ScalarRef<'b>>,
+{
+    type Value = <T::Scalar as Decimal>::U64Array;
+
+    fn scalar_to_value(val: T::ScalarRef<'_>) -> Self::Value {
+        T::Scalar::to_u64_array(T::to_owned_scalar(val))
+    }
+
+    fn value_to_scalar(val: Self::Value) -> T::Scalar {
+        T::Scalar::from_u64_array(val)
+    }
+
+    fn update_value(value: &mut Self::Value, scalar: T::ScalarRef<'_>, ordering: Ordering) {
+        let val = T::Scalar::from_u64_array(*value);
+        if scalar.partial_cmp(&T::to_scalar_ref(&val)) == Some(ordering) {
+            *value = T::Scalar::to_u64_array(T::to_owned_scalar(scalar));
+        }
+    }
+}
+
+pub struct GenericColumnStatisticsBuilder<T, A>
+where
+    T: ValueType,
+    A: ColumnStatisticsAdapter<T>,
+{
+    min: Option<A::Value>,
+    max: Option<A::Value>,
+    null_count: usize,
+    in_memory_size: usize,
+    data_type: DataType,
+
+    _phantom: PhantomData<(T, A)>,
+}
+
+impl<T, A> GenericColumnStatisticsBuilder<T, A>
+where
+    T: ValueType + Send + Sync,
+    T::Scalar: Send + Sync,
+    A: ColumnStatisticsAdapter<T> + 'static,
+    for<'a, 'b> T::ScalarRef<'a>: PartialOrd<T::ScalarRef<'b>>,
+{
+    fn create(data_type: DataType) -> Self {
+        Self {
+            min: None,
+            max: None,
+            null_count: 0,
+            in_memory_size: 0,
+            data_type,
+            _phantom: PhantomData,
+        }
+    }
+
+    fn add_batch<'a, I>(&mut self, mut iter: I)
+    where I: Iterator<Item = T::ScalarRef<'a>> {
+        let first = iter.next().unwrap();
+        let mut min = first.clone();
+        let mut max = first;
+        for v in iter {
+            if matches!(min.partial_cmp(&v), Some(Ordering::Greater)) {
+                min = v;
+                continue;
+            }
+
+            if matches!(max.partial_cmp(&v), Some(Ordering::Less)) {
+                max = v;
+            }
+        }
+
+        self.add(min, max);
+    }
+
+    fn add(&mut self, min: T::ScalarRef<'_>, max: T::ScalarRef<'_>) {
+        if let Some(val) = self.min.as_mut() {
+            A::update_value(val, min, Ordering::Less);
+        } else {
+            self.min = Some(A::scalar_to_value(min));
+        }
+
+        if let Some(val) = self.max.as_mut() {
+            A::update_value(val, max, Ordering::Greater);
+        } else {
+            self.max = Some(A::scalar_to_value(max));
+        }
+    }
+
+    fn update_column(&mut self, column: &Column) {
+        self.in_memory_size += column.memory_size();
+        if column.len() == 0 {
+            return;
+        }
+        let (column, validity) = match column {
+            Column::Nullable(box inner) => {
+                let validity = if inner.validity.null_count() == 0 {
+                    None
+                } else {
+                    Some(&inner.validity)
+                };
+                (&inner.column, validity)
+            }
+            Column::Null { len } => {
+                self.null_count += *len;
+                return;
+            }
+            col => (col, None),
+        };
+        self.null_count += validity.map_or(0, |v| v.null_count());
+
+        let column = T::try_downcast_column(column).unwrap();
+        if let Some(v) = validity {
+            if v.true_count() as f64 / v.len() as f64 >= SELECTIVITY_THRESHOLD {
+                let column_iter = T::iter_column(&column);
+                let value_iter = column_iter
+                    .zip(v.iter())
+                    .filter(|(_, v)| *v)
+                    .map(|(v, _)| v);
+                self.add_batch(value_iter);
+            } else {
+                for idx in TrueIdxIter::new(v.len(), Some(v)) {
+                    let v = unsafe { T::index_column_unchecked(&column, idx) };
+                    self.add(v.clone(), v);
+                }
+            }
+        } else {
+            let column_iter = T::iter_column(&column);
+            self.add_batch(column_iter);
+        }
+    }
+
+    fn update_scalar(&mut self, scalar: &ScalarRef, num_rows: usize, data_type: &DataType) {
+        // when we read it back from parquet, it is a Column instead of Scalar
+        self.in_memory_size += scalar.estimated_scalar_repeat_size(num_rows, data_type);
+        if scalar.is_null() {
+            self.null_count += num_rows;
+            return;
+        }
+
+        let val = T::try_downcast_scalar(scalar).unwrap();
+        self.add(val.clone(), val);
+    }
+
+    fn finalize(self) -> Result<ColumnStatistics> {
+        let min = if let Some(v) = self.min {
+            let v = A::value_to_scalar(v);
+            // safe upwrap.
+            T::upcast_scalar_with_type(v, &self.data_type)
+                .trim_min()
+                .unwrap()
+        } else {
+            Scalar::Null
+        };
+        let max = if let Some(v) = self.max {
+            let v = A::value_to_scalar(v);
+            if let Some(v) = T::upcast_scalar_with_type(v, &self.data_type).trim_max() {
+                v
+            } else {
+                return Err(ErrorCode::Internal("Unable to trim string"));
+            }
+        } else {
+            Scalar::Null
+        };
+
+        Ok(ColumnStatistics::new(
+            min,
+            max,
+            self.null_count as u64,
+            self.in_memory_size as u64,
+            None,
+        ))
+    }
+}
diff --git a/src/query/storages/fuse/src/io/write/stream/column_statistics_state.rs b/src/query/storages/fuse/src/io/write/stream/column_statistics_state.rs
new file mode 100644
index 0000000000000..ac65378b20d22
--- /dev/null
+++ b/src/query/storages/fuse/src/io/write/stream/column_statistics_state.rs
@@ -0,0 +1,184 @@
+// Copyright 2021 Datafuse Labs
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use std::collections::HashMap;
+
+use databend_common_exception::Result;
+use databend_common_expression::types::DataType;
+use databend_common_expression::ColumnId;
+use databend_common_expression::DataBlock;
+use databend_common_expression::TableSchemaRef;
+use databend_common_expression::Value;
+use databend_storages_common_table_meta::meta::StatisticsOfColumns;
+
+use crate::io::write::stream::create_column_ndv_estimator;
+use crate::io::write::stream::create_column_stats_builder;
+use crate::io::write::stream::ColumnNDVEstimator;
+use crate::io::write::stream::ColumnNDVEstimatorOps;
+use crate::io::write::stream::ColumnStatisticsBuilder;
+use crate::io::write::stream::ColumnStatsOps;
+use crate::statistics::traverse_values_dfs;
+
+pub struct ColumnStatisticsState {
+    col_stats: HashMap<ColumnId, ColumnStatisticsBuilder>,
+    distinct_columns: HashMap<ColumnId, ColumnNDVEstimator>,
+}
+
+impl ColumnStatisticsState {
+    pub fn new(
+        stats_columns: &[(ColumnId, DataType)],
+        distinct_columns: &[(ColumnId, DataType)],
+    ) -> Self {
+        let col_stats = stats_columns
+            .iter()
+            .map(|(col_id, data_type)| (*col_id, create_column_stats_builder(data_type)))
+            .collect();
+
+        let distinct_columns = distinct_columns
+            .iter()
+            .map(|(col_id, data_type)| (*col_id, create_column_ndv_estimator(data_type)))
+            .collect();
+
+        Self {
+            col_stats,
+            distinct_columns,
+        }
+    }
+
+    pub fn add_block(&mut self, schema: &TableSchemaRef, data_block: &DataBlock) -> Result<()> {
+        let rows = data_block.num_rows();
+        let leaves = traverse_values_dfs(data_block.columns(), schema.fields())?;
+        for (column_id, col, data_type) in leaves {
+            match col {
+                Value::Scalar(s) => {
+                    self.col_stats.get_mut(&column_id).unwrap().update_scalar(
+                        &s.as_ref(),
+                        rows,
+                        &data_type,
+                    );
+                    if let Some(estimator) = self.distinct_columns.get_mut(&column_id) {
+                        estimator.update_scalar(&s.as_ref());
+                    }
+                }
+                Value::Column(col) => {
+                    self.col_stats
+                        .get_mut(&column_id)
+                        .unwrap()
+                        .update_column(&col);
+                    // use distinct count calculated by the xor hash function to avoid repetitive operation.
+                    if let Some(estimator) = self.distinct_columns.get_mut(&column_id) {
+                        estimator.update_column(&col);
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    pub fn finalize(
+        self,
+        mut column_distinct_count: HashMap<ColumnId, usize>,
+    ) -> Result<StatisticsOfColumns> {
+        for (column_id, estimator) in &self.distinct_columns {
+            column_distinct_count.insert(*column_id, estimator.finalize());
+        }
+
+        let mut statistics = StatisticsOfColumns::with_capacity(self.col_stats.len());
+        for (id, stats) in self.col_stats {
+            let mut col_stats = stats.finalize()?;
+            if let Some(count) = column_distinct_count.get(&id) {
+                col_stats.distinct_of_values = Some(*count as u64);
+            } else if col_stats.min == col_stats.max {
+                // Bloom index will skip the large string column, it also no need to calc distinct values.
+                if col_stats.min.is_null() {
+                    col_stats.distinct_of_values = Some(0);
+                } else {
+                    col_stats.distinct_of_values = Some(1);
+                }
+            }
+            statistics.insert(id, col_stats);
+        }
+        Ok(statistics)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use databend_common_expression::types::BinaryType;
+    use databend_common_expression::types::Int64Type;
+    use databend_common_expression::types::NumberDataType;
+    use databend_common_expression::types::StringType;
+    use databend_common_expression::types::UInt64Type;
+    use databend_common_expression::Column;
+    use databend_common_expression::FromData;
+    use databend_common_expression::TableDataType;
+    use databend_common_expression::TableField;
+    use databend_common_expression::TableSchema;
+    use databend_storages_common_index::Index;
+    use databend_storages_common_index::RangeIndex;
+
+    use super::*;
+    use crate::statistics::gen_columns_statistics;
+
+    #[test]
+    fn test_column_stats_state() -> Result<()> {
+        let field1 = TableField::new(
+            "a",
+            TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::Int64))),
+        );
+        let field2 = TableField::new("b", TableDataType::String);
+        let field3 = TableField::new("c", TableDataType::Tuple {
+            fields_name: vec!["d".to_string(), "e".to_string()],
+            fields_type: vec![
+                TableDataType::Number(NumberDataType::UInt64),
+                TableDataType::Binary,
+            ],
+        });
+        let schema = Arc::new(TableSchema::new(vec![field1, field2, field3]));
+        let block = DataBlock::new_from_columns(vec![
+            Int64Type::from_opt_data(vec![Some(1), Some(2), None, Some(4), Some(5)]),
+            StringType::from_data(vec!["a", "b", "c", "d", "e"]),
+            Column::Tuple(vec![
+                UInt64Type::from_data(vec![11, 12, 13, 14, 15]),
+                BinaryType::from_data(vec![
+                    "hello".as_bytes().to_vec(),
+                    "world".as_bytes().to_vec(),
+                    "".as_bytes().to_vec(),
+                    "foo".as_bytes().to_vec(),
+                    "bar".as_bytes().to_vec(),
+                ]),
+            ]),
+        ]);
+
+        let stats_0 = gen_columns_statistics(&block, None, &schema)?;
+
+        let mut stats_columns = vec![];
+        let leaf_fields = schema.leaf_fields();
+        for field in leaf_fields.iter() {
+            let column_id = field.column_id();
+            let data_type = DataType::from(field.data_type());
+            if RangeIndex::supported_type(&data_type) {
+                stats_columns.push((column_id, data_type.clone()));
+            }
+        }
+        let mut column_stats_state = ColumnStatisticsState::new(&stats_columns, &stats_columns);
+        column_stats_state.add_block(&schema, &block)?;
+        let stats_1 = column_stats_state.finalize(HashMap::new())?;
+
+        assert_eq!(stats_0, stats_1);
+        Ok(())
+    }
+}
diff --git a/src/query/storages/fuse/src/io/write/stream/mod.rs b/src/query/storages/fuse/src/io/write/stream/mod.rs
index 26d32ee679582..f0c7365b5ba01 100644
--- a/src/query/storages/fuse/src/io/write/stream/mod.rs
+++ b/src/query/storages/fuse/src/io/write/stream/mod.rs
@@ -14,7 +14,16 @@
 
 mod block_builder;
 mod cluster_statistics;
-mod column_statistics;
+mod column_ndv_estimator;
+mod column_statistics_builder;
+mod column_statistics_state;
 
 pub(crate) use block_builder::StreamBlockBuilder;
 pub(crate) use block_builder::StreamBlockProperties;
+pub(crate) use column_ndv_estimator::create_column_ndv_estimator;
+pub(crate) use column_ndv_estimator::ColumnNDVEstimator;
+pub(crate) use column_ndv_estimator::ColumnNDVEstimatorOps;
+pub(crate) use column_statistics_builder::create_column_stats_builder;
+pub(crate) use column_statistics_builder::ColumnStatisticsBuilder;
+pub(crate) use column_statistics_builder::ColumnStatsOps;
+pub(crate) use column_statistics_state::ColumnStatisticsState;
diff --git a/src/query/storages/fuse/src/operations/append.rs b/src/query/storages/fuse/src/operations/append.rs
index 9316374128528..b6a10fb38e06e 100644
--- a/src/query/storages/fuse/src/operations/append.rs
+++ b/src/query/storages/fuse/src/operations/append.rs
@@ -37,10 +37,11 @@ use databend_common_sql::executor::physical_plans::MutationKind;
 use databend_storages_common_table_meta::meta::TableMetaTimestamps;
 use databend_storages_common_table_meta::table::ClusterType;
 
+use crate::io::StreamBlockProperties;
+use crate::operations::TransformBlockBuilder;
 use crate::operations::TransformBlockWriter;
 use crate::operations::TransformSerializeBlock;
 use crate::statistics::ClusterStatsGenerator;
-use crate::FuseStorageFormat;
 use crate::FuseTable;
 
 impl FuseTable {
@@ -50,19 +51,22 @@ impl FuseTable {
         pipeline: &mut Pipeline,
         table_meta_timestamps: TableMetaTimestamps,
     ) -> Result<()> {
-        let enable_stream_block_write = ctx.get_settings().get_enable_block_stream_write()?
-            && matches!(self.storage_format, FuseStorageFormat::Parquet);
+        let enable_stream_block_write = self.enable_stream_block_write(ctx.clone())?;
         if enable_stream_block_write {
+            let properties = StreamBlockProperties::try_create(
+                ctx.clone(),
+                self,
+                MutationKind::Insert,
+                table_meta_timestamps,
+            )?;
+
             pipeline.add_transform(|input, output| {
-                TransformBlockWriter::try_create(
-                    ctx.clone(),
-                    input,
-                    output,
-                    self,
-                    table_meta_timestamps,
-                    false,
-                )
+                TransformBlockBuilder::try_create(input, output, properties.clone())
             })?;
+
+            pipeline.add_async_accumulating_transformer(|| {
+                TransformBlockWriter::create(ctx.clone(), MutationKind::Insert, self, false)
+            });
         } else {
             let block_thresholds = self.get_block_thresholds();
             build_compact_block_pipeline(pipeline, block_thresholds)?;
diff --git a/src/query/storages/fuse/src/operations/common/processors/mod.rs b/src/query/storages/fuse/src/operations/common/processors/mod.rs
index e0e3d3b25f25a..d43c569c14016 100644
--- a/src/query/storages/fuse/src/operations/common/processors/mod.rs
+++ b/src/query/storages/fuse/src/operations/common/processors/mod.rs
@@ -22,6 +22,7 @@ mod transform_serialize_segment;
 
 pub use multi_table_insert_commit::CommitMultiTableInsert;
 pub use sink_commit::CommitSink;
+pub use transform_block_writer::TransformBlockBuilder;
 pub use transform_block_writer::TransformBlockWriter;
 pub use transform_merge_commit_meta::TransformMergeCommitMeta;
 pub use transform_mutation_aggregator::TableMutationAggregator;
diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs b/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs
index ce11a4834c7fb..895f5c7a0ebc5 100644
--- a/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs
+++ b/src/query/storages/fuse/src/operations/common/processors/transform_block_writer.rs
@@ -22,35 +22,36 @@ use databend_common_catalog::table::Table;
 use databend_common_catalog::table_context::TableContext;
 use databend_common_exception::ErrorCode;
 use databend_common_exception::Result;
+use databend_common_expression::BlockMetaInfoDowncast;
 use databend_common_expression::DataBlock;
-use databend_common_io::constants::DEFAULT_BLOCK_ROW_COUNT;
+use databend_common_metrics::storage::metrics_inc_recluster_write_block_nums;
 use databend_common_pipeline_core::processors::Event;
 use databend_common_pipeline_core::processors::InputPort;
 use databend_common_pipeline_core::processors::OutputPort;
 use databend_common_pipeline_core::processors::Processor;
 use databend_common_pipeline_core::processors::ProcessorPtr;
+use databend_common_pipeline_transforms::AsyncAccumulatingTransform;
+use databend_common_sql::executor::physical_plans::MutationKind;
 use databend_common_storage::MutationStatus;
-use databend_storages_common_table_meta::meta::TableMetaTimestamps;
 use opendal::Operator;
 
 use crate::io::BlockSerialization;
 use crate::io::BlockWriter;
 use crate::io::StreamBlockBuilder;
 use crate::io::StreamBlockProperties;
+use crate::operations::MutationLogEntry;
+use crate::operations::MutationLogs;
 use crate::FuseTable;
-use crate::FUSE_OPT_KEY_ROW_PER_BLOCK;
 
-#[allow(clippy::large_enum_variant)]
 enum State {
     Consume,
     Collect(DataBlock),
     Serialize,
     Finalize,
     Flush,
-    Write(BlockSerialization),
 }
 
-pub struct TransformBlockWriter {
+pub struct TransformBlockBuilder {
     state: State,
     input: Arc<InputPort>,
     output: Arc<OutputPort>,
@@ -62,43 +63,27 @@ pub struct TransformBlockWriter {
     input_data_size: usize,
     input_num_rows: usize,
 
-    dal: Operator,
-    // Only used in multi table insert
-    table_id: Option<u64>,
-
-    max_block_rows: usize,
-    input_data: VecDeque<DataBlock>,
+    input_data: VecDeque<(usize, DataBlock)>,
     output_data: Option<DataBlock>,
 }
 
-impl TransformBlockWriter {
+impl TransformBlockBuilder {
     pub fn try_create(
-        ctx: Arc<dyn TableContext>,
         input: Arc<InputPort>,
         output: Arc<OutputPort>,
-        table: &FuseTable,
-        table_meta_timestamps: TableMetaTimestamps,
-        with_tid: bool,
+        properties: Arc<StreamBlockProperties>,
     ) -> Result<ProcessorPtr> {
-        let max_block_rows = std::cmp::min(
-            ctx.get_settings().get_max_block_size()? as usize,
-            table.get_option(FUSE_OPT_KEY_ROW_PER_BLOCK, DEFAULT_BLOCK_ROW_COUNT),
-        );
-        let properties = StreamBlockProperties::try_create(ctx, table, table_meta_timestamps)?;
-        Ok(ProcessorPtr::create(Box::new(TransformBlockWriter {
+        Ok(ProcessorPtr::create(Box::new(TransformBlockBuilder {
             state: State::Consume,
             input,
             output,
             properties,
             builder: None,
-            dal: table.get_operator(),
             need_flush: false,
-            table_id: if with_tid { Some(table.get_id()) } else { None },
             input_data: VecDeque::new(),
             input_data_size: 0,
             input_num_rows: 0,
             output_data: None,
-            max_block_rows,
         })))
     }
 
@@ -111,23 +96,24 @@ impl TransformBlockWriter {
         Ok(self.builder.as_mut().unwrap())
     }
 
-    fn calc_max_block_rows(&self, block: &DataBlock) -> usize {
-        let min_bytes_per_block = self.properties.block_thresholds.min_bytes_per_block;
-        let block_size = block.estimate_block_size();
-        if block_size < min_bytes_per_block {
-            return self.max_block_rows;
-        }
-        let num_rows = block.num_rows();
+    fn split_input(&self, input: DataBlock) -> Vec<DataBlock> {
+        let block_size = input.estimate_block_size();
+        let num_rows = input.num_rows();
         let average_row_size = block_size.div_ceil(num_rows);
-        let max_rows = min_bytes_per_block.div_ceil(average_row_size);
-        self.max_block_rows.min(max_rows)
+        let max_rows = self
+            .properties
+            .block_thresholds
+            .min_bytes_per_block
+            .div_ceil(average_row_size)
+            .min(self.properties.block_thresholds.max_rows_per_block);
+        input.split_by_rows_no_tail(max_rows)
     }
 }
 
 #[async_trait]
-impl Processor for TransformBlockWriter {
+impl Processor for TransformBlockBuilder {
     fn name(&self) -> String {
-        "TransformBlockWriter".to_string()
+        "TransformBlockBuilder".to_string()
     }
 
     fn as_any(&mut self) -> &mut dyn Any {
@@ -135,15 +121,15 @@ impl Processor for TransformBlockWriter {
     }
 
     fn event(&mut self) -> Result<Event> {
-        match &self.state {
-            State::Collect(_) | State::Serialize | State::Flush | State::Finalize => {
-                return Ok(Event::Sync)
-            }
-            State::Write(_) => return Ok(Event::Async),
-            _ => {}
+        if matches!(
+            self.state,
+            State::Collect(_) | State::Serialize | State::Flush | State::Finalize
+        ) {
+            return Ok(Event::Sync);
         }
 
         if self.output.is_finished() {
+            self.input.finish();
             return Ok(Event::Finished);
         }
 
@@ -196,15 +182,16 @@ impl Processor for TransformBlockWriter {
             State::Collect(block) => {
                 // Check if the datablock is valid, this is needed to ensure data is correct
                 block.check_valid()?;
-                self.input_data_size += block.estimate_block_size();
                 self.input_num_rows += block.num_rows();
-                let max_rows_per_block = self.calc_max_block_rows(&block);
-                let blocks = block.split_by_rows_no_tail(max_rows_per_block);
-                self.input_data.extend(blocks);
+                for block in self.split_input(block) {
+                    let block_size = block.estimate_block_size();
+                    self.input_data_size += block_size;
+                    self.input_data.push_back((block_size, block));
+                }
             }
             State::Serialize => {
-                while let Some(b) = self.input_data.pop_front() {
-                    self.input_data_size -= b.estimate_block_size();
+                while let Some((block_size, b)) = self.input_data.pop_front() {
+                    self.input_data_size -= block_size;
                     self.input_num_rows -= b.num_rows();
 
                     let builder = self.get_or_create_builder()?;
@@ -217,7 +204,7 @@ impl Processor for TransformBlockWriter {
                 }
             }
             State::Finalize => {
-                while let Some(b) = self.input_data.pop_front() {
+                while let Some((_, b)) = self.input_data.pop_front() {
                     let builder = self.get_or_create_builder()?;
                     builder.write(b)?;
                 }
@@ -227,7 +214,7 @@ impl Processor for TransformBlockWriter {
                 let builder = self.builder.take().unwrap();
                 if !builder.is_empty() {
                     let serialized = builder.finish()?;
-                    self.state = State::Write(serialized);
+                    self.output_data = Some(DataBlock::empty_with_meta(Box::new(serialized)));
                 }
                 self.need_flush = false;
             }
@@ -235,11 +222,41 @@ impl Processor for TransformBlockWriter {
         }
         Ok(())
     }
+}
 
-    #[async_backtrace::framed]
-    async fn async_process(&mut self) -> Result<()> {
-        match std::mem::replace(&mut self.state, State::Consume) {
-            State::Write(serialized) => {
+pub struct TransformBlockWriter {
+    kind: MutationKind,
+    dal: Operator,
+    ctx: Arc<dyn TableContext>,
+    // Only used in multi table insert
+    table_id: Option<u64>,
+}
+
+impl TransformBlockWriter {
+    pub fn create(
+        ctx: Arc<dyn TableContext>,
+        kind: MutationKind,
+        table: &FuseTable,
+        with_tid: bool,
+    ) -> Self {
+        Self {
+            ctx,
+            dal: table.get_operator(),
+            table_id: if with_tid { Some(table.get_id()) } else { None },
+            kind,
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl AsyncAccumulatingTransform for TransformBlockWriter {
+    const NAME: &'static str = "TransformBlockWriter";
+
+    async fn transform(&mut self, data: DataBlock) -> Result<Option<DataBlock>> {
+        debug_assert!(data.is_empty());
+
+        if let Some(ptr) = data.get_owned_meta() {
+            if let Some(serialized) = BlockSerialization::downcast_from(ptr) {
                 let extended_block_meta = BlockWriter::write_down(&self.dal, serialized).await?;
 
                 let bytes = if let Some(draft_virtual_block_meta) =
@@ -251,32 +268,45 @@ impl Processor for TransformBlockWriter {
                     extended_block_meta.block_meta.block_size as usize
                 };
 
-                self.properties
-                    .ctx
-                    .get_write_progress()
-                    .incr(&ProgressValues {
-                        rows: extended_block_meta.block_meta.row_count as usize,
-                        bytes,
-                    });
+                self.ctx.get_write_progress().incr(&ProgressValues {
+                    rows: extended_block_meta.block_meta.row_count as usize,
+                    bytes,
+                });
 
                 // appending new data block
                 if let Some(tid) = self.table_id {
-                    self.properties.ctx.update_multi_table_insert_status(
+                    self.ctx.update_multi_table_insert_status(
                         tid,
                         extended_block_meta.block_meta.row_count,
                     );
                 } else {
-                    self.properties.ctx.add_mutation_status(MutationStatus {
+                    self.ctx.add_mutation_status(MutationStatus {
                         insert_rows: extended_block_meta.block_meta.row_count,
                         update_rows: 0,
                         deleted_rows: 0,
                     });
                 }
 
-                self.output_data = Some(DataBlock::empty_with_meta(Box::new(extended_block_meta)));
+                let output = if matches!(self.kind, MutationKind::Insert) {
+                    DataBlock::empty_with_meta(Box::new(extended_block_meta))
+                } else {
+                    if matches!(self.kind, MutationKind::Recluster) {
+                        metrics_inc_recluster_write_block_nums();
+                    }
+
+                    DataBlock::empty_with_meta(Box::new(MutationLogs {
+                        entries: vec![MutationLogEntry::AppendBlock {
+                            block_meta: Arc::new(extended_block_meta),
+                        }],
+                    }))
+                };
+
+                return Ok(Some(output));
             }
-            _ => return Err(ErrorCode::Internal("It's a bug.")),
         }
-        Ok(())
+
+        Err(ErrorCode::Internal(
+            "Cannot downcast meta to BlockSerialization",
+        ))
     }
 }
diff --git a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs
index f13ed6701482d..9997b7dd5a893 100644
--- a/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs
+++ b/src/query/storages/fuse/src/operations/common/processors/transform_serialize_block.rs
@@ -34,6 +34,7 @@ use databend_common_pipeline_core::PipeItem;
 use databend_common_sql::executor::physical_plans::MutationKind;
 use databend_common_storage::MutationStatus;
 use databend_storages_common_index::BloomIndex;
+use databend_storages_common_index::RangeIndex;
 use databend_storages_common_table_meta::meta::TableMetaTimestamps;
 use opendal::Operator;
 
@@ -152,6 +153,9 @@ impl TransformSerializeBlock {
         let bloom_columns_map = table
             .bloom_index_cols
             .bloom_index_fields(source_schema.clone(), BloomIndex::supported_type)?;
+        let ndv_columns_map = table
+            .approx_distinct_cols
+            .distinct_column_fields(source_schema.clone(), RangeIndex::supported_table_type)?;
         let ngram_args = FuseTable::create_ngram_index_args(
             &table.table_info.meta,
             &table.table_info.meta.schema,
@@ -182,6 +186,7 @@ impl TransformSerializeBlock {
             write_settings: table.get_write_settings(),
             cluster_stats_gen,
             bloom_columns_map,
+            ndv_columns_map,
             ngram_args,
             inverted_index_builders,
             virtual_column_builder,
diff --git a/src/query/storages/fuse/src/operations/gc.rs b/src/query/storages/fuse/src/operations/gc.rs
index 9ddc1da9d1133..7e3bfee8e566e 100644
--- a/src/query/storages/fuse/src/operations/gc.rs
+++ b/src/query/storages/fuse/src/operations/gc.rs
@@ -32,6 +32,7 @@ use databend_storages_common_index::InvertedIndexFile;
 use databend_storages_common_index::InvertedIndexMeta;
 use databend_storages_common_io::Files;
 use databend_storages_common_table_meta::meta::column_oriented_segment::ColumnOrientedSegment;
+use databend_storages_common_table_meta::meta::column_oriented_segment::BLOCK_STATS_LOCATION;
 use databend_storages_common_table_meta::meta::column_oriented_segment::BLOOM_FILTER_INDEX_LOCATION;
 use databend_storages_common_table_meta::meta::column_oriented_segment::LOCATION;
 use databend_storages_common_table_meta::meta::CompactSegmentInfo;
@@ -390,6 +391,13 @@ impl FuseTable {
                 purge_files.push(loc.to_string())
             }
 
+            for loc in &locations.stats_location {
+                if locations_referenced_by_root.stats_location.contains(loc) {
+                    continue;
+                }
+                purge_files.push(loc.to_string())
+            }
+
             purge_files.extend(chunk.iter().map(|loc| loc.0.clone()));
         }
         purge_files.extend(ts_to_be_purged.iter().map(|loc| loc.to_string()));
@@ -455,6 +463,14 @@ impl FuseTable {
                 blooms_to_be_purged.insert(loc.to_string());
             }
 
+            let mut stats_to_be_purged = HashSet::new();
+            for loc in &locations.stats_location {
+                if locations_referenced_by_root.stats_location.contains(loc) {
+                    continue;
+                }
+                stats_to_be_purged.insert(loc.to_string());
+            }
+
             let segment_locations_to_be_purged = HashSet::from_iter(
                 chunk
                     .iter()
@@ -481,6 +497,7 @@ impl FuseTable {
                 agg_indexes_to_be_purged,
                 inverted_indexes_to_be_purged,
                 blooms_to_be_purged,
+                stats_to_be_purged,
                 segment_locations_to_be_purged,
             )
             .await?;
@@ -541,6 +558,7 @@ impl FuseTable {
             agg_indexes_to_be_purged,
             inverted_indexes_to_be_purged,
             root_location_tuple.bloom_location,
+            root_location_tuple.stats_location,
             segment_locations_to_be_purged,
         )
         .await?;
@@ -566,6 +584,7 @@ impl FuseTable {
         agg_indexes_to_be_purged: HashSet<String>,
         inverted_indexes_to_be_purged: HashSet<String>,
         blooms_to_be_purged: HashSet<String>,
+        stats_to_be_purged: HashSet<String>,
         segments_to_be_purged: HashSet<String>,
     ) -> Result<()> {
         // 1. Try to purge block file chunks.
@@ -616,7 +635,15 @@ impl FuseTable {
             .await?;
         }
 
-        // 3. Try to purge segment file chunks.
+        // 3. Try to purge block statistic file chunks.
+        let stats_count = stats_to_be_purged.len();
+        if stats_count > 0 {
+            counter.block_stats += stats_count;
+            self.try_purge_location_files(ctx.clone(), stats_to_be_purged)
+                .await?;
+        }
+
+        // 4. Try to purge segment file chunks.
         let segments_count = segments_to_be_purged.len();
         if segments_count > 0 {
             counter.segments += segments_count;
@@ -661,9 +688,10 @@ impl FuseTable {
         // 5. Refresh status.
         {
             let status = format!(
-                "gc: block files purged:{}, bloom files purged:{}, segment files purged:{}, table statistic files purged:{}, snapshots purged:{}, take:{:?}",
+                "gc: block files purged:{}, bloom files purged:{}, block stats files purged:{}, segment files purged:{}, table statistic files purged:{}, snapshots purged:{}, take:{:?}",
                 counter.blocks,
                 counter.blooms,
+                counter.block_stats,
                 counter.segments,
                 counter.table_statistics,
                 counter.snapshots,
@@ -714,6 +742,7 @@ impl FuseTable {
     ) -> Result<LocationTuple> {
         let mut blocks = HashSet::new();
         let mut blooms = HashSet::new();
+        let mut stats = HashSet::new();
 
         let fuse_segments = SegmentsIO::create(ctx.clone(), self.operator.clone(), self.schema());
         let chunk_size = ctx.get_settings().get_max_threads()? as usize * 4;
@@ -779,12 +808,14 @@ impl FuseTable {
                 };
                 blocks.extend(location_tuple.block_location.into_iter());
                 blooms.extend(location_tuple.bloom_location.into_iter());
+                stats.extend(location_tuple.stats_location.into_iter());
             }
         }
 
         Ok(LocationTuple {
             block_location: blocks,
             bloom_location: blooms,
+            stats_location: stats,
         })
     }
 
@@ -808,6 +839,7 @@ struct RootSnapshotInfo {
 pub struct LocationTuple {
     pub block_location: HashSet<String>,
     pub bloom_location: HashSet<String>,
+    pub stats_location: HashSet<String>,
 }
 
 impl TryFrom<Arc<CompactSegmentInfo>> for LocationTuple {
@@ -815,16 +847,21 @@ impl TryFrom<Arc<CompactSegmentInfo>> for LocationTuple {
     fn try_from(value: Arc<CompactSegmentInfo>) -> Result<Self> {
         let mut block_location = HashSet::new();
         let mut bloom_location = HashSet::new();
+        let mut stats_location = HashSet::new();
         let block_metas = value.block_metas()?;
         for block_meta in block_metas.into_iter() {
             block_location.insert(block_meta.location.0.clone());
             if let Some(bloom_loc) = &block_meta.bloom_filter_index_location {
                 bloom_location.insert(bloom_loc.0.clone());
             }
+            if let Some(stats_loc) = &block_meta.block_stats_location {
+                stats_location.insert(stats_loc.0.clone());
+            }
         }
         Ok(Self {
             block_location,
             bloom_location,
+            stats_location,
         })
     }
 }
@@ -834,6 +871,7 @@ impl TryFrom<Arc<ColumnOrientedSegment>> for LocationTuple {
     fn try_from(value: Arc<ColumnOrientedSegment>) -> Result<Self> {
         let mut block_location = HashSet::new();
         let mut bloom_location = HashSet::new();
+        let mut stats_location = HashSet::new();
 
         let location_path = value.location_path_col();
         for path in location_path.iter() {
@@ -846,19 +884,28 @@ impl TryFrom<Arc<ColumnOrientedSegment>> for LocationTuple {
             .unwrap();
         let column = value.block_metas.get_by_offset(index).to_column();
         for value in column.iter() {
-            match value {
-                ScalarRef::Null => {}
-                ScalarRef::Tuple(values) => {
-                    let path = values[0].as_string().unwrap();
-                    bloom_location.insert(path.to_string());
-                }
-                _ => unreachable!(),
+            if let ScalarRef::Tuple(values) = value {
+                let path = values[0].as_string().unwrap();
+                bloom_location.insert(path.to_string());
+            }
+        }
+
+        let (index, _) = value
+            .segment_schema
+            .column_with_name(BLOCK_STATS_LOCATION)
+            .unwrap();
+        let column = value.block_metas.get_by_offset(index).to_column();
+        for value in column.iter() {
+            if let ScalarRef::Tuple(values) = value {
+                let path = values[0].as_string().unwrap();
+                stats_location.insert(path.to_string());
             }
         }
 
         Ok(Self {
             block_location,
             bloom_location,
+            stats_location,
         })
     }
 }
@@ -870,6 +917,7 @@ struct PurgeCounter {
     agg_indexes: usize,
     inverted_indexes: usize,
     blooms: usize,
+    block_stats: usize,
     segments: usize,
     table_statistics: usize,
     snapshots: usize,
@@ -883,6 +931,7 @@ impl PurgeCounter {
             agg_indexes: 0,
             inverted_indexes: 0,
             blooms: 0,
+            block_stats: 0,
             segments: 0,
             table_statistics: 0,
             snapshots: 0,
diff --git a/src/query/storages/fuse/src/operations/merge.rs b/src/query/storages/fuse/src/operations/merge.rs
index e149196075dcd..6ec54cd993899 100644
--- a/src/query/storages/fuse/src/operations/merge.rs
+++ b/src/query/storages/fuse/src/operations/merge.rs
@@ -21,6 +21,7 @@ use databend_common_exception::Result;
 use databend_common_expression::TableSchemaRef;
 use databend_common_pipeline_core::PipeItem;
 use databend_storages_common_index::BloomIndex;
+use databend_storages_common_index::RangeIndex;
 use databend_storages_common_table_meta::meta::Location;
 use databend_storages_common_table_meta::meta::TableMetaTimestamps;
 
@@ -93,6 +94,9 @@ impl FuseTable {
         let bloom_columns_map = self
             .bloom_index_cols()
             .bloom_index_fields(new_schema.clone(), BloomIndex::supported_type)?;
+        let ndv_columns_map = self
+            .approx_distinct_cols()
+            .distinct_column_fields(new_schema.clone(), RangeIndex::supported_table_type)?;
         let ngram_args = FuseTable::create_ngram_index_args(
             &self.table_info.meta,
             &self.table_info.meta.schema,
@@ -111,6 +115,7 @@ impl FuseTable {
             write_settings: self.get_write_settings(),
             cluster_stats_gen,
             bloom_columns_map,
+            ndv_columns_map,
             ngram_args,
             inverted_index_builders,
             vector_index_builder,
diff --git a/src/query/storages/fuse/src/statistics/column_statistic.rs b/src/query/storages/fuse/src/statistics/column_statistic.rs
index 36737dd9e7a62..0f980a57a97de 100644
--- a/src/query/storages/fuse/src/statistics/column_statistic.rs
+++ b/src/query/storages/fuse/src/statistics/column_statistic.rs
@@ -83,51 +83,48 @@ pub fn gen_columns_statistics(
                 let mut min = Scalar::Null;
                 let mut max = Scalar::Null;
 
-                let (mins, _) = eval_aggr("min", vec![], &[col.clone().into()], rows, vec![])?;
-                let (maxs, _) = eval_aggr("max", vec![], &[col.clone().into()], rows, vec![])?;
-
-                if mins.len() > 0 {
-                    min = if let Some(v) = mins.index(0) {
-                        if let Some(v) = v.to_owned().trim_min() {
-                            v
+                if col.len() > 0 {
+                    let (mins, _) = eval_aggr("min", vec![], &[col.clone().into()], rows, vec![])?;
+                    let (maxs, _) = eval_aggr("max", vec![], &[col.clone().into()], rows, vec![])?;
+
+                    if mins.len() > 0 {
+                        min = if let Some(v) = mins.index(0) {
+                            if let Some(v) = v.to_owned().trim_min() {
+                                v
+                            } else {
+                                continue;
+                            }
                         } else {
                             continue;
                         }
-                    } else {
-                        continue;
                     }
-                }
 
-                if maxs.len() > 0 {
-                    max = if let Some(v) = maxs.index(0) {
-                        if let Some(v) = v.to_owned().trim_max() {
-                            v
+                    if maxs.len() > 0 {
+                        max = if let Some(v) = maxs.index(0) {
+                            if let Some(v) = v.to_owned().trim_max() {
+                                v
+                            } else {
+                                continue;
+                            }
                         } else {
                             continue;
                         }
-                    } else {
-                        continue;
                     }
                 }
 
                 let (is_all_null, bitmap) = col.validity();
                 let unset_bits = match (is_all_null, bitmap) {
-                    (true, _) => rows,
-                    (false, Some(bitmap)) => bitmap.null_count(),
+                    (_, Some(bitmap)) => bitmap.null_count(),
+                    (true, None) => rows,
                     (false, None) => 0,
                 };
 
                 // use distinct count calculated by the xor hash function to avoid repetitive operation.
-                let distinct_of_values = if let Some(value) = column_distinct_count
+                let distinct_of_values = if let Some(&value) = column_distinct_count
                     .as_ref()
                     .and_then(|v| v.get(&column_id))
                 {
-                    // value calculated by xor hash function include NULL, need to subtract one.
-                    if unset_bits > 0 {
-                        *value as u64 - 1
-                    } else {
-                        *value as u64
-                    }
+                    value as u64
                 } else {
                     calc_column_distinct_of_values(&col, rows)?
                 };
diff --git a/src/query/storages/fuse/src/statistics/traverse.rs b/src/query/storages/fuse/src/statistics/traverse.rs
index b42375eff5030..5681c281df8ae 100644
--- a/src/query/storages/fuse/src/statistics/traverse.rs
+++ b/src/query/storages/fuse/src/statistics/traverse.rs
@@ -23,7 +23,6 @@ use databend_common_expression::ColumnId;
 use databend_common_expression::Scalar;
 use databend_common_expression::TableField;
 use databend_common_expression::Value;
-use databend_common_expression::ORIGIN_BLOCK_ROW_NUM_COLUMN_ID;
 use databend_storages_common_index::Index;
 use databend_storages_common_index::RangeIndex;
 
@@ -107,9 +106,7 @@ fn traverse_column_recursive(
             _ => unreachable!(),
         },
         _ => {
-            if RangeIndex::supported_type(data_type)
-                && *next_column_id != ORIGIN_BLOCK_ROW_NUM_COLUMN_ID
-            {
+            if RangeIndex::supported_type(data_type) {
                 leaves.push((
                     *next_column_id,
                     Value::Column(column.clone()),
@@ -169,9 +166,7 @@ fn traverse_scalar_recursive(
         },
         _ => {
             // Ignore the range index does not supported type.
-            if RangeIndex::supported_type(data_type)
-                && *next_column_id != ORIGIN_BLOCK_ROW_NUM_COLUMN_ID
-            {
+            if RangeIndex::supported_type(data_type) {
                 leaves.push((
                     *next_column_id,
                     Value::Scalar(scalar.clone()),
diff --git a/src/query/storages/fuse/src/table_functions/fuse_block.rs b/src/query/storages/fuse/src/table_functions/fuse_block.rs
index eaecb5425c809..11c31d1227e0b 100644
--- a/src/query/storages/fuse/src/table_functions/fuse_block.rs
+++ b/src/query/storages/fuse/src/table_functions/fuse_block.rs
@@ -75,6 +75,10 @@ impl TableMetaFunc for FuseBlock {
                 "virtual_column_size",
                 TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
             ),
+            TableField::new(
+                "block_stats_size",
+                TableDataType::Number(NumberDataType::UInt64),
+            ),
         ])
     }
 
@@ -99,6 +103,7 @@ impl TableMetaFunc for FuseBlock {
         let mut ngram_index_size = Vec::with_capacity(len);
         let mut vector_index_size = Vec::with_capacity(len);
         let mut virtual_column_size = Vec::with_capacity(len);
+        let mut block_stats_size = Vec::with_capacity(len);
 
         let segments_io = SegmentsIO::create(ctx.clone(), tbl.operator.clone(), tbl.schema());
 
@@ -134,6 +139,7 @@ impl TableMetaFunc for FuseBlock {
                             .as_ref()
                             .map(|m| m.virtual_column_size),
                     );
+                    block_stats_size.push(block.block_stats_size);
 
                     num_rows += 1;
                     if num_rows >= limit {
@@ -157,6 +163,7 @@ impl TableMetaFunc for FuseBlock {
                 UInt64Type::from_opt_data(ngram_index_size).into(),
                 UInt64Type::from_opt_data(vector_index_size).into(),
                 UInt64Type::from_opt_data(virtual_column_size).into(),
+                UInt64Type::from_data(block_stats_size).into(),
             ],
             num_rows,
         ))
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0004_remote_insert_into_select.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0004_remote_insert_into_select.test
index 5e399dc1b406f..5ad4e316896d0 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0004_remote_insert_into_select.test
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0004_remote_insert_into_select.test
@@ -7,9 +7,6 @@ CREATE DATABASE db_09_004
 statement ok
 USE db_09_004
 
-statement ok
-set enable_block_stream_write = 1
-
 statement ok
 CREATE TABLE IF NOT EXISTS t1(a UInt8 not null, b UInt64 not null, c Int8 not null, d Int64 not null, e Date not null, f Date not null, g DateTime not null, h String not null) Engine = Fuse
 
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test
index 6f55970c81103..cad96b3804842 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test
@@ -122,12 +122,42 @@ create table t_in_memory(a uint64 not null) engine = Memory
 statement error 4000
 select * from fuse_snapshot('db_09_0006', 't_in_memory')
 
+statement ok
+CREATE TABLE t2(a int, b string) approx_distinct_columns = 'a,b';
+
+query I
+insert into t2 values(1, 'a'), (2, 'b');
+----
+2
+
+query I
+select block_stats_size > 0 from fuse_block('db_09_0006', 't2');
+----
+1
+
+statement ok
+alter table t2 set options(approx_distinct_columns = '');
+
+query I
+insert into t2 values(3, 'c'), (4, 'd');
+----
+2
+
+query I
+select block_stats_size > 0 from fuse_block('db_09_0006', 't2') order by block_location;
+----
+1
+0
+
 statement ok
 DROP TABLE t
 
 statement ok
 DROP TABLE t1
 
+statement ok
+DROP TABLE t2
+
 statement ok
 DROP TABLE t_in_memory
 
diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table.test
index 43a2b262ca2f9..c19a27a9e8890 100644
--- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table.test
+++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0008_fuse_optimize_table.test
@@ -7,9 +7,6 @@ CREATE DATABASE db_09_0008
 statement ok
 USE db_09_0008
 
-statement ok
-set enable_block_stream_write = 1
-
 statement ok
 create table t(a uint64 not null)
 
diff --git a/tests/sqllogictests/suites/base/issues/issue_18275.test b/tests/sqllogictests/suites/base/issues/issue_18275.test
index 36217cda4b3ca..ce895d228329e 100644
--- a/tests/sqllogictests/suites/base/issues/issue_18275.test
+++ b/tests/sqllogictests/suites/base/issues/issue_18275.test
@@ -14,9 +14,6 @@ CREATE OR REPLACE TABLE product_test (
     stock INT
 );
 
-statement ok
-set enable_block_stream_write = 1;
-
 statement ok
 INSERT INTO product_test (id, name, category, price, stock)
    VALUES(6, 'Keyboard', 'Electronics', 79.99, 25),
diff --git a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test
index 0d86d3d55b737..cd693939daffa 100644
--- a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test
+++ b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test
@@ -613,9 +613,6 @@ S001 ST001 A Excellent Y
 S002 ST002 B Good Y
 S003 ST003 C Average N
 
-statement ok
-set enable_block_stream_write = 1
-
 statement ok
 CREATE OR REPLACE TABLE test_stream (
     id INT,
@@ -670,9 +667,6 @@ FROM test_stream;
 9 "Richard" 33 "Austin" "hiking" "cycling"
 10 "Lisa" 26 "Chicago" "gaming" "reading"
 
-statement ok
-set enable_block_stream_write = 0
-
 statement ok
 set enable_experimental_virtual_column = 0;
 
diff --git a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh
index 4d6cafcb184f9..862680eab447d 100755
--- a/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh
+++ b/tests/suites/1_stateful/05_formats/05_01_compact/05_01_02_load_compact_copy_row_per_block.sh
@@ -15,7 +15,7 @@ echo "drop table if exists t1 all" | $BENDSQL_CLIENT_CONNECT
 echo "CREATE TABLE t1
 (
     c0 string
-) engine=fuse row_per_block=800;
+) engine=fuse row_per_block=500;
 " | $BENDSQL_CLIENT_CONNECT
 
 
diff --git a/tests/suites/1_stateful/09_http_handler/09_0007_session.py b/tests/suites/1_stateful/09_http_handler/09_0007_session.py
index 3f3c96959d59a..5860548afa3ea 100755
--- a/tests/suites/1_stateful/09_http_handler/09_0007_session.py
+++ b/tests/suites/1_stateful/09_http_handler/09_0007_session.py
@@ -16,21 +16,31 @@
 logout_url = "http://localhost:8000/v1/session/logout"
 auth = ("root", "")
 
+
 def check(func):
     def wrapper(self, *args, **kwargs):
         print(f"---- {func.__name__}{args[:1]}")
-        resp : Response = func(self, *args, **kwargs)
+        resp: Response = func(self, *args, **kwargs)
         self.session_header = resp.headers.get(HEADER_SESSION)
         last = self.session_header_json
-        self.session_header_json = json.loads(base64.urlsafe_b64decode(self.session_header))
+        self.session_header_json = json.loads(
+            base64.urlsafe_b64decode(self.session_header)
+        )
         if last:
             if last["id"] != self.session_header_json["id"]:
-                print("error: session id should not change", last, self.session_header_json)
+                print(
+                    "error: session id should not change",
+                    last,
+                    self.session_header_json,
+                )
             if last["last_refresh_time"] < time.time() - 100:
                 if last["last_refresh_time"] > time.time() - 2:
                     print("error: last_refresh_time should not change")
             else:
-                if last["last_refresh_time"] != self.session_header_json["last_refresh_time"]:
+                if (
+                    last["last_refresh_time"]
+                    != self.session_header_json["last_refresh_time"]
+                ):
                     print("error: last_refresh_time should not change")
 
         # print("get header: ", self.session_header_json)
@@ -44,11 +54,11 @@ def wrapper(self, *args, **kwargs):
         if err:
             pprint(err)
         return resp
+
     return wrapper
 
 
 class Client(object):
-
     def __init__(self):
         self.client = requests.session()
         self.session_header = ""
@@ -82,7 +92,7 @@ def do_query(self, query, url=query_url):
             auth=auth,
             headers={
                 "Content-Type": "application/json",
-                HEADER_SESSION: self.session_header
+                HEADER_SESSION: self.session_header,
             },
             json=query_payload,
         )
@@ -91,7 +101,9 @@ def do_query(self, query, url=query_url):
     def set_fake_last_refresh_time(self):
         j = self.session_header_json
         j["last_refresh_time"] = int(time.time()) - 10 * 60
-        self.session_header = base64.urlsafe_b64encode(json.dumps(j).encode('utf-8')).decode('ascii')
+        self.session_header = base64.urlsafe_b64encode(
+            json.dumps(j).encode("utf-8")
+        ).decode("ascii")
 
 
 def main():