Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 190 additions & 6 deletions datafusion/functions/src/math/log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,38 @@ fn log_decimal128(value: i128, scale: i8, base: f64) -> Result<f64, ArrowError>
)));
}

let unscaled_value = decimal128_to_i128(value, scale)?;
if unscaled_value > 0 {
let log_value: u32 = unscaled_value.ilog(base as i128);
Ok(log_value as f64)
// Handle negative scales using logarithmic property:
// log_base(value * 10^(-scale)) = log_base(value) + (-scale) * log_base(10)
if scale < 0 {
// For negative scale, the actual value is value * 10^(-scale) where -scale > 0
// Use property: log_base(a * 10^(-s)) = log_base(a) + (-s) * log_base(10)
if value > 0 {
let value_f64 = value as f64;

// Compute log_base(value) - use natural log and convert
// log_base(x) = ln(x) / ln(base)
let log_value = value_f64.ln() / base.ln();

// Add the adjustment: (-scale) * log_base(10)
// log_base(10) = ln(10) / ln(base)
let log_10_base = 10.0_f64.ln() / base.ln();
let adjustment = (-scale as f64) * log_10_base;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do wonder if we're better off just casting to float and doing log there? There seem to be multiple log operations here, does it provide more accuracy?


Ok(log_value + adjustment)
} else {
// Reflect f64::log behaviour
Ok(f64::NAN)
}
} else {
// Reflect f64::log behaviour
Ok(f64::NAN)
// For non-negative scales, use existing logic
let unscaled_value = decimal128_to_i128(value, scale)?;
if unscaled_value > 0 {
let log_value: u32 = unscaled_value.ilog(base as i128);
Ok(log_value as f64)
} else {
// Reflect f64::log behaviour
Ok(f64::NAN)
}
}
}

Expand Down Expand Up @@ -291,6 +316,49 @@ impl ScalarUDFImpl for LogFunc {
}
let number = args.pop().unwrap();
let number_datatype = arg_types.pop().unwrap();

// Check if base has negative scale (if provided)
let base_has_negative_scale = if num_args == 2 {
if let Some(
DataType::Decimal32(_, scale)
| DataType::Decimal64(_, scale)
| DataType::Decimal128(_, scale)
| DataType::Decimal256(_, scale),
) = arg_types.last()
{
*scale < 0
} else {
false
}
} else {
false
};

// Skip simplification for negative scale decimals as ScalarValue doesn't support them yet
let has_negative_scale = match &number_datatype {
DataType::Decimal32(_, scale)
| DataType::Decimal64(_, scale)
| DataType::Decimal128(_, scale)
| DataType::Decimal256(_, scale) => *scale < 0,
_ => false,
};

if has_negative_scale || base_has_negative_scale {
let args = match num_args {
1 => vec![number],
2 => {
let base = args.pop().unwrap();
vec![base, number]
}
_ => {
return internal_err!(
"Unexpected number of arguments in log::simplify"
);
}
};
return Ok(ExprSimplifyResult::Original(args));
}

// default to base 10
let base = if let Some(base) = args.pop() {
base
Expand Down Expand Up @@ -1125,4 +1193,120 @@ mod tests {
"Arrow error: Not yet implemented: Log of Decimal256 larger than Decimal128 is not yet supported: 170141183460469231731687303715884106727"
);
}

#[test]
fn test_log_decimal128_negative_scale() {
// Test log with negative scale: value=100, scale=-2 means 100 * 10^2 = 10000
// log10(10000) = 4.0
let arg_field = Field::new("a", DataType::Decimal128(38, -2), false).into();
let args = ScalarFunctionArgs {
args: vec![
ColumnarValue::Array(Arc::new(
Decimal128Array::from(vec![100, 1000, 10])
.with_precision_and_scale(38, -2)
.unwrap(),
)), // num: represents 10000, 100000, 1000
],
arg_fields: vec![arg_field],
number_rows: 3,
return_field: Field::new("f", DataType::Float64, true).into(),
config_options: Arc::new(ConfigOptions::default()),
};
let result = LogFunc::new()
.invoke_with_args(args)
.expect("failed to initialize function log");

match result {
ColumnarValue::Array(arr) => {
let floats = as_float64_array(&arr)
.expect("failed to convert result to a Float64Array");

assert_eq!(floats.len(), 3);
// log10(10000) = 4.0
assert!((floats.value(0) - 4.0).abs() < 1e-10);
// log10(100000) = 5.0
assert!((floats.value(1) - 5.0).abs() < 1e-10);
// log10(1000) = 3.0
assert!((floats.value(2) - 3.0).abs() < 1e-10);
}
ColumnarValue::Scalar(_) => {
panic!("Expected an array value")
}
}
}

#[test]
fn test_log_decimal128_negative_scale_base2() {
// Test log base 2 with negative scale: value=8, scale=-1 means 8 * 10^1 = 80
// log2(80) ≈ 6.321928
let arg_fields = vec![
Field::new("b", DataType::Float64, false).into(),
Field::new("x", DataType::Decimal128(38, -1), false).into(),
];
let args = ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(ScalarValue::Float64(Some(2.0))), // base
ColumnarValue::Array(Arc::new(
Decimal128Array::from(vec![8, 16])
.with_precision_and_scale(38, -1)
.unwrap(),
)), // num: represents 80, 160
],
arg_fields,
number_rows: 2,
return_field: Field::new("f", DataType::Float64, true).into(),
config_options: Arc::new(ConfigOptions::default()),
};
let result = LogFunc::new()
.invoke_with_args(args)
.expect("failed to initialize function log");

match result {
ColumnarValue::Array(arr) => {
let floats = as_float64_array(&arr)
.expect("failed to convert result to a Float64Array");

assert_eq!(floats.len(), 2);
// log2(80) ≈ 6.321928
assert!((floats.value(0) - 80.0_f64.log2()).abs() < 1e-10);
// log2(160) ≈ 7.321928
assert!((floats.value(1) - 160.0_f64.log2()).abs() < 1e-10);
}
ColumnarValue::Scalar(_) => {
panic!("Expected an array value")
}
}
}

#[test]
fn test_log_decimal128_negative_scale_scalar() {
// Test scalar with negative scale
let arg_field = Field::new("a", DataType::Decimal128(38, -3), false).into();
let args = ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(ScalarValue::Decimal128(Some(5), 38, -3)), // num: represents 5000
],
arg_fields: vec![arg_field],
number_rows: 1,
return_field: Field::new("f", DataType::Float64, true).into(),
config_options: Arc::new(ConfigOptions::default()),
};
let result = LogFunc::new()
.invoke_with_args(args)
.expect("failed to initialize function log");

match result {
ColumnarValue::Array(arr) => {
let floats = as_float64_array(&arr)
.expect("failed to convert result to a Float64Array");

assert_eq!(floats.len(), 1);
// log10(5000) ≈ 3.69897
assert!((floats.value(0) - 5000.0_f64.log10()).abs() < 1e-10);
}
ColumnarValue::Scalar(_) => {
panic!("Expected an array value")
}
}
}
}
52 changes: 52 additions & 0 deletions datafusion/sqllogictest/test_files/decimal.slt
Original file line number Diff line number Diff line change
Expand Up @@ -918,6 +918,58 @@ select log(2.0, null);
----
NULL

# log with negative scale decimals
# Using scientific notation to create decimals with negative scales
# 1e4 = 10000 with scale -4, log10(10000) = 4.0
query R
select log(1e4);
----
4

# log with negative scale and explicit base 10
query R
select log(10, 1e4);
----
4

# log with negative scale and base 2
# 8e1 = 80 with scale -1, log2(80) ≈ 6.321928
query R
select log(2.0, 8e1);
----
6.321928094887

# log with negative scale and base 2 (another value)
# 16e1 = 160 with scale -1, log2(160) ≈ 7.321928
query R
select log(2.0, 16e1);
----
7.321928094887

# log with negative scale -3
# 5e3 = 5000 with scale -3, log10(5000) ≈ 3.69897
query R
select log(5e3);
----
3.698970004336

# log with negative scale array values
query R rowsort
select log(value) from (values (1e3), (1e4), (1e5)) as t(value);
----
3
4
5

# log with negative scale and different bases
# Note: Results may come in different order depending on execution
query R
select log(base, 1e4) from (values (10.0), (2.0), (3.0)) as t(base) order by base ;
----
13.287712379549
8.383613097158
4

# power with decimals

query RT
Expand Down