Skip to content

Commit 0022d8e

Browse files
authored
chore: Cleanup returning null arrays (apache#20423)
Cleanup a few places where the code returned a null array but it would be a bit cleaner and faster to return a typed scalar null instead. ## Which issue does this PR close? Does not close an issue; this cleanup was mentioned in the code review for apache#20361 ## Rationale for this change Returning a typed scalar null should be preferred to returning a null array: it still has type information, and avoids materializing an all-null array. The downstream consumer can always materialize the equivalent array if they want to. ## What changes are included in this PR? Cleanup five instances of this pattern. ## Are these changes tested? Yes. No new test cases possible/warranted. ## Are there any user-facing changes? No.
1 parent c3f0807 commit 0022d8e

File tree

4 files changed

+19
-30
lines changed

4 files changed

+19
-30
lines changed

datafusion/functions/src/math/gcd.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray, new_null_array};
18+
use arrow::array::{ArrayRef, AsArray, Int64Array, PrimitiveArray};
1919
use arrow::compute::try_binary;
2020
use arrow::datatypes::{DataType, Int64Type};
2121
use arrow::error::ArrowError;
@@ -144,10 +144,7 @@ fn compute_gcd_with_scalar(arr: &ArrayRef, scalar: Option<i64>) -> Result<Column
144144

145145
result.map(|arr| ColumnarValue::Array(Arc::new(arr) as ArrayRef))
146146
}
147-
None => Ok(ColumnarValue::Array(new_null_array(
148-
&DataType::Int64,
149-
arr.len(),
150-
))),
147+
None => Ok(ColumnarValue::Scalar(ScalarValue::Int64(None))),
151148
}
152149
}
153150

datafusion/functions/src/unicode/find_in_set.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use std::sync::Arc;
2020

2121
use arrow::array::{
2222
ArrayAccessor, ArrayIter, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait,
23-
PrimitiveArray, new_null_array,
23+
PrimitiveArray,
2424
};
2525
use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
2626

@@ -138,9 +138,11 @@ impl ScalarUDFImpl for FindInSetFunc {
138138
| ScalarValue::LargeUtf8(str_list_literal),
139139
),
140140
) => {
141-
let result_array = match str_list_literal {
141+
match str_list_literal {
142142
// find_in_set(column_a, null) = null
143-
None => new_null_array(return_field.data_type(), str_array.len()),
143+
None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
144+
return_field.data_type(),
145+
)?)),
144146
Some(str_list_literal) => {
145147
let str_list = str_list_literal.split(',').collect::<Vec<&str>>();
146148
let result = match str_array.data_type() {
@@ -171,10 +173,9 @@ impl ScalarUDFImpl for FindInSetFunc {
171173
)
172174
}
173175
};
174-
Arc::new(result?)
176+
Ok(ColumnarValue::Array(Arc::new(result?)))
175177
}
176-
};
177-
Ok(ColumnarValue::Array(result_array))
178+
}
178179
}
179180

180181
// `string` is scalar, `str_list` is an array
@@ -186,11 +187,11 @@ impl ScalarUDFImpl for FindInSetFunc {
186187
),
187188
ColumnarValue::Array(str_list_array),
188189
) => {
189-
let res = match string_literal {
190+
match string_literal {
190191
// find_in_set(null, column_b) = null
191-
None => {
192-
new_null_array(return_field.data_type(), str_list_array.len())
193-
}
192+
None => Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
193+
return_field.data_type(),
194+
)?)),
194195
Some(string) => {
195196
let result = match str_list_array.data_type() {
196197
DataType::Utf8 => {
@@ -217,10 +218,9 @@ impl ScalarUDFImpl for FindInSetFunc {
217218
)
218219
}
219220
};
220-
Arc::new(result?)
221+
Ok(ColumnarValue::Array(Arc::new(result?)))
221222
}
222-
};
223-
Ok(ColumnarValue::Array(res))
223+
}
224224
}
225225

226226
// both inputs are arrays

datafusion/spark/src/function/datetime/next_day.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
use std::any::Any;
1919
use std::sync::Arc;
2020

21-
use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType, new_null_array};
21+
use arrow::array::{ArrayRef, AsArray, Date32Array, StringArrayType};
2222
use arrow::datatypes::{DataType, Date32Type, Field, FieldRef};
2323
use chrono::{Datelike, Duration, Weekday};
2424
use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
@@ -129,10 +129,7 @@ impl ScalarUDFImpl for SparkNextDay {
129129
} else {
130130
// TODO: if spark.sql.ansi.enabled is false,
131131
// returns NULL instead of an error for a malformed dayOfWeek.
132-
Ok(ColumnarValue::Array(Arc::new(new_null_array(
133-
&DataType::Date32,
134-
date_array.len(),
135-
))))
132+
Ok(ColumnarValue::Scalar(ScalarValue::Date32(None)))
136133
}
137134
}
138135
_ => exec_err!(

datafusion/spark/src/function/hash/sha2.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{
19-
ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray, new_null_array,
20-
};
18+
use arrow::array::{ArrayRef, AsArray, BinaryArrayType, Int32Array, StringArray};
2119
use arrow::datatypes::{DataType, Int32Type};
2220
use datafusion_common::types::{
2321
NativeType, logical_binary, logical_int32, logical_string,
@@ -170,10 +168,7 @@ impl ScalarUDFImpl for SparkSha2 {
170168
(
171169
ColumnarValue::Array(_),
172170
ColumnarValue::Scalar(ScalarValue::Int32(None)),
173-
) => Ok(ColumnarValue::Array(new_null_array(
174-
&DataType::Utf8,
175-
args.number_rows,
176-
))),
171+
) => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
177172
_ => {
178173
// Fallback to existing behavior for any array/mixed cases
179174
make_scalar_function(sha2_impl, vec![])(&args.args)

0 commit comments

Comments
 (0)