@@ -249,8 +249,21 @@ fn evaluate_comparison_with_stats(
249249 }
250250
251251 // String comparisons
252- TypeStatistics :: String { min, max, .. } => match value {
253- PredicateValue :: Utf8 ( Some ( v) ) => evaluate_string_comparison ( min, max, op, v) ,
252+ TypeStatistics :: String {
253+ lower_bound,
254+ upper_bound,
255+ is_exact_min,
256+ is_exact_max,
257+ ..
258+ } => match value {
259+ PredicateValue :: Utf8 ( Some ( v) ) => evaluate_string_comparison (
260+ lower_bound,
261+ upper_bound,
262+ op,
263+ v,
264+ * is_exact_min,
265+ * is_exact_max,
266+ ) ,
254267 _ => {
255268 return Err ( UnexpectedSnafu {
256269 msg : "Type mismatch: expected string value" . to_string ( ) ,
@@ -295,7 +308,7 @@ fn evaluate_comparison_with_stats(
295308 PredicateValue :: Utf8 ( Some ( v) ) => {
296309 // For decimal, we need to compare strings
297310 // This is a simplified implementation
298- evaluate_string_comparison ( min, max, op, v)
311+ evaluate_string_comparison ( min, max, op, v, true , true )
299312 }
300313 _ => {
301314 return Err ( UnexpectedSnafu {
@@ -461,31 +474,41 @@ fn evaluate_float_comparison(min: f64, max: f64, op: ComparisonOp, value: f64) -
461474 }
462475}
463476
464- fn evaluate_string_comparison ( min : & str , max : & str , op : ComparisonOp , value : & str ) -> bool {
477+ fn evaluate_string_comparison (
478+ lower_bound : & str ,
479+ upper_bound : & str ,
480+ op : ComparisonOp ,
481+ value : & str ,
482+ is_exact_min : bool ,
483+ is_exact_max : bool ,
484+ ) -> bool {
485+ // Check if the column's minimum is <= value
486+ let min_le_value = lower_bound < value || ( lower_bound == value && is_exact_min) ;
487+
488+ // Check if the column's maximum is >= value
489+ let max_ge_value = upper_bound > value || ( upper_bound == value && is_exact_max) ;
490+
465491 match op {
466- ComparisonOp :: Equal => {
467- // col = value: keep if value is within [min, max] lexicographically
468- min <= value && value <= max
469- }
492+ // Range intersection: The value must be reachable from both sides.
493+ ComparisonOp :: Equal => min_le_value && max_ge_value,
494+
495+ // One-sided inclusive checks reuse the logic above.
496+ ComparisonOp :: LessThanOrEqual => min_le_value,
497+ ComparisonOp :: GreaterThanOrEqual => max_ge_value,
498+
499+ // Strict checks are simple.
500+ // Note: We don't need to check exactness here.
501+ // e.g., for LessThan: if lower_bound == value, then actual_min >= value,
502+ // so NO row can be strictly less than value.
503+ ComparisonOp :: LessThan => lower_bound < value,
504+ ComparisonOp :: GreaterThan => upper_bound > value,
505+
506+ // Special case: Only prune != if we are certain the column contains ONLY `value`.
470507 ComparisonOp :: NotEqual => {
471- // col != value: keep if value is not the only value
472- !( min == value && max == value)
473- }
474- ComparisonOp :: LessThan => {
475- // col < value: keep if min < value
476- min < value
477- }
478- ComparisonOp :: LessThanOrEqual => {
479- // col <= value: keep if min <= value
480- min <= value
481- }
482- ComparisonOp :: GreaterThan => {
483- // col > value: keep if max > value
484- max > value
485- }
486- ComparisonOp :: GreaterThanOrEqual => {
487- // col >= value: keep if max >= value
488- max >= value
508+ let is_single_value_col = lower_bound == upper_bound && is_exact_min && is_exact_max;
509+
510+ // Keep unless it's a single-value column exactly matching the target
511+ !( is_single_value_col && lower_bound == value)
489512 }
490513 }
491514}
@@ -1324,4 +1347,145 @@ mod tests {
13241347 assert_eq ! ( result. len( ) , 1 ) ;
13251348 assert ! ( result[ 0 ] ) ;
13261349 }
1350+
1351+ #[ test]
1352+ fn test_evaluate_string_comparison ( ) {
1353+ use crate :: predicate:: ComparisonOp ;
1354+
1355+ // Helper to make the call shorter
1356+ let eval = |lower : & str ,
1357+ upper : & str ,
1358+ op : ComparisonOp ,
1359+ val : & str ,
1360+ exact_min : bool ,
1361+ exact_max : bool | {
1362+ super :: evaluate_string_comparison ( lower, upper, op, val, exact_min, exact_max)
1363+ } ;
1364+
1365+ // 1. EQUAL
1366+ // Range ["a", "c"], value "b" -> Keep
1367+ assert ! ( eval( "a" , "c" , ComparisonOp :: Equal , "b" , true , true ) ) ;
1368+ // Range ["a", "c"], value "d" -> Skip
1369+ assert ! ( !eval( "a" , "c" , ComparisonOp :: Equal , "d" , true , true ) ) ;
1370+ // Range ["a", "c"], value "a" -> Keep
1371+ assert ! ( eval( "a" , "c" , ComparisonOp :: Equal , "a" , true , true ) ) ;
1372+ // Range ["a", "c"], value "c" -> Keep
1373+ assert ! ( eval( "a" , "c" , ComparisonOp :: Equal , "c" , true , true ) ) ;
1374+
1375+ // Truncated stats (inexact)
1376+ // Range ["a", "c"] (min inexact), value "a" -> Skip (actual min > "a")
1377+ assert ! ( !eval( "a" , "c" , ComparisonOp :: Equal , "a" , false , true ) ) ;
1378+ // Range ["a", "c"] (max inexact), value "c" -> Skip (actual max < "c" max is rounded up)
1379+ assert ! ( !eval( "a" , "c" , ComparisonOp :: Equal , "c" , true , false ) ) ;
1380+
1381+ // 2. LESS THAN (< value)
1382+ // Range ["a", "c"], value "b". "a" < "b" -> Keep.
1383+ assert ! ( eval( "a" , "c" , ComparisonOp :: LessThan , "b" , true , true ) ) ;
1384+ // Range ["d", "e"], value "b". "d" >= "b" -> Skip.
1385+ assert ! ( !eval( "d" , "e" , ComparisonOp :: LessThan , "b" , true , true ) ) ;
1386+ // Range ["a", "c"], value "a". "a" < "a" is false.
1387+ // If exact, min="a", so no value < "a". Skip.
1388+ assert ! ( !eval( "a" , "c" , ComparisonOp :: LessThan , "a" , true , true ) ) ;
1389+ // If not exact, min="a" (truncated). Actual min >= "a".
1390+ // So actual min >= value. No value < "a". Skip.
1391+ assert ! ( !eval( "a" , "c" , ComparisonOp :: LessThan , "a" , false , true ) ) ;
1392+
1393+ // 3. GREATER THAN (> value)
1394+ // Range ["a", "c"], value "b". "c" > "b" -> Keep.
1395+ assert ! ( eval( "a" , "c" , ComparisonOp :: GreaterThan , "b" , true , true ) ) ;
1396+ // Range ["a", "b"], value "c". "b" <= "c" -> Skip.
1397+ assert ! ( !eval( "a" , "b" , ComparisonOp :: GreaterThan , "c" , true , true ) ) ;
1398+ // Range ["a", "c"], value "c". "c" > "c" is false.
1399+ // If exact, max="c", so no value > "c". Skip.
1400+ assert ! ( !eval( "a" , "c" , ComparisonOp :: GreaterThan , "c" , true , true ) ) ;
1401+ // If not exact, actual max < "c". Skip.
1402+ assert ! ( !eval( "a" , "c" , ComparisonOp :: GreaterThan , "c" , true , false ) ) ;
1403+
1404+ // 4. NOT EQUAL
1405+ // Range ["a", "c"], value "b". Keep.
1406+ assert ! ( eval( "a" , "c" , ComparisonOp :: NotEqual , "b" , true , true ) ) ;
1407+ // Range ["a", "a"], value "a".
1408+ // Exact: Skip.
1409+ assert ! ( !eval( "a" , "a" , ComparisonOp :: NotEqual , "a" , true , true ) ) ;
1410+
1411+ // 5. LESS THAN OR EQUAL (<= value)
1412+ // Range ["a", "c"], value "b". Keep.
1413+ assert ! ( eval(
1414+ "a" ,
1415+ "c" ,
1416+ ComparisonOp :: LessThanOrEqual ,
1417+ "b" ,
1418+ true ,
1419+ true
1420+ ) ) ;
1421+ // Range ["a", "c"], value "a". Keep.
1422+ assert ! ( eval(
1423+ "a" ,
1424+ "c" ,
1425+ ComparisonOp :: LessThanOrEqual ,
1426+ "a" ,
1427+ true ,
1428+ true
1429+ ) ) ;
1430+ // Range ["b", "c"], value "a". "b" > "a". Skip.
1431+ assert ! ( !eval(
1432+ "b" ,
1433+ "c" ,
1434+ ComparisonOp :: LessThanOrEqual ,
1435+ "a" ,
1436+ true ,
1437+ true
1438+ ) ) ;
1439+ // Inexact min:
1440+ // Range ["a", "c"] (min inexact), value "a".
1441+ // Actual_min > "a". Skip.
1442+ assert ! ( !eval(
1443+ "a" ,
1444+ "c" ,
1445+ ComparisonOp :: LessThanOrEqual ,
1446+ "a" ,
1447+ false ,
1448+ true
1449+ ) ) ;
1450+
1451+ // 6. GREATER THAN OR EQUAL (>= value)
1452+ // Range ["a", "c"], value "b". Keep.
1453+ assert ! ( eval(
1454+ "a" ,
1455+ "c" ,
1456+ ComparisonOp :: GreaterThanOrEqual ,
1457+ "b" ,
1458+ true ,
1459+ true
1460+ ) ) ;
1461+ // Range ["a", "c"], value "c". Keep.
1462+ assert ! ( eval(
1463+ "a" ,
1464+ "c" ,
1465+ ComparisonOp :: GreaterThanOrEqual ,
1466+ "c" ,
1467+ true ,
1468+ true
1469+ ) ) ;
1470+ // Range ["a", "b"], value "c". "b" < "c". Skip.
1471+ assert ! ( !eval(
1472+ "a" ,
1473+ "b" ,
1474+ ComparisonOp :: GreaterThanOrEqual ,
1475+ "c" ,
1476+ true ,
1477+ true
1478+ ) ) ;
1479+ // Inexact max:
1480+ // Range ["a", "b"] (max inexact), value "b".
1481+ // Actual_max < "b". Skip.
1482+ assert ! ( !eval(
1483+ "a" ,
1484+ "b" ,
1485+ ComparisonOp :: GreaterThanOrEqual ,
1486+ "b" ,
1487+ true ,
1488+ false
1489+ ) ) ;
1490+ }
13271491}
0 commit comments