1919
2020use crate :: utils:: make_scalar_function;
2121use arrow:: array:: {
22- Array , ArrayRef , Capacities , GenericListArray , Int64Array , MutableArrayData ,
23- NullBufferBuilder , OffsetSizeTrait , new_null_array ,
22+ new_null_array , Array , ArrayRef , Capacities , GenericListArray , Int64Array ,
23+ MutableArrayData , NullBufferBuilder , OffsetSizeTrait ,
2424} ;
2525use arrow:: buffer:: OffsetBuffer ;
2626use arrow:: datatypes:: DataType ;
@@ -31,7 +31,7 @@ use arrow::datatypes::{
3131} ;
3232use datafusion_common:: cast:: { as_int64_array, as_large_list_array, as_list_array} ;
3333use datafusion_common:: utils:: ListCoercion ;
34- use datafusion_common:: { Result , ScalarValue , exec_err , internal_datafusion_err } ;
34+ use datafusion_common:: { exec_err , internal_datafusion_err , Result , ScalarValue } ;
3535use datafusion_expr:: {
3636 ArrayFunctionArgument , ArrayFunctionSignature , ColumnarValue , Documentation ,
3737 ScalarUDFImpl , Signature , TypeSignature , Volatility ,
@@ -206,7 +206,120 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
206206 let values = array. values ( ) ;
207207 let original_data = values. to_data ( ) ;
208208
209- // create default element array
209+ // Track the largest per-row growth so the uniform-fill fast path can
210+ // materialize one reusable fill buffer of the required size.
211+ let mut max_extra: usize = 0 ;
212+ for ( row_index, offset_window) in array. offsets ( ) . windows ( 2 ) . enumerate ( ) {
213+ if array. is_null ( row_index) {
214+ continue ;
215+ }
216+ let target_count =
217+ count_array. value ( row_index) . to_usize ( ) . ok_or_else ( || {
218+ internal_datafusion_err ! (
219+ "array_resize: failed to convert size to usize"
220+ )
221+ } ) ?;
222+ let current_len = ( offset_window[ 1 ] - offset_window[ 0 ] ) . to_usize ( ) . unwrap ( ) ;
223+ if target_count > current_len {
224+ let extra = target_count - current_len;
225+ if extra > max_extra {
226+ max_extra = extra;
227+ }
228+ }
229+ }
230+
231+ // The fast path is valid when at least one row grows and every row would
232+ // use the same fill value.
233+ let is_uniform_fill = max_extra > 0
234+ && match & default_element {
235+ None => true ,
236+ Some ( fill_array) => {
237+ let len = fill_array. len ( ) ;
238+ let null_count = fill_array. logical_null_count ( ) ;
239+
240+ len <= 1
241+ || null_count == len
242+ || ( null_count == 0 && {
243+ let first = fill_array. slice ( 0 , 1 ) ;
244+ ( 1 ..len)
245+ . all ( |i| fill_array. slice ( i, 1 ) . as_ref ( ) == first. as_ref ( ) )
246+ } )
247+ }
248+ } ;
249+
250+ // Fast path: at least one row needs to grow and all rows share
251+ // the same fill value.
252+ if is_uniform_fill {
253+ let fill_scalar = match & default_element {
254+ None => ScalarValue :: try_from ( & data_type) ?,
255+ Some ( fill_array) if fill_array. logical_null_count ( ) == fill_array. len ( ) => {
256+ ScalarValue :: try_from ( & data_type) ?
257+ }
258+ Some ( fill_array) => {
259+ ScalarValue :: try_from_array ( fill_array. as_ref ( ) , 0 ) ?
260+ }
261+ } ;
262+ let default_element = fill_scalar. to_array_of_size ( max_extra) ?;
263+ let default_value_data = default_element. to_data ( ) ;
264+
265+ let capacity =
266+ Capacities :: Array ( original_data. len ( ) + default_value_data. len ( ) ) ;
267+ let mut offsets = vec ! [ O :: usize_as( 0 ) ] ;
268+ let mut mutable = MutableArrayData :: with_capacities (
269+ vec ! [ & original_data, & default_value_data] ,
270+ false ,
271+ capacity,
272+ ) ;
273+
274+ let mut null_builder = NullBufferBuilder :: new ( array. len ( ) ) ;
275+
276+ for ( row_index, offset_window) in array. offsets ( ) . windows ( 2 ) . enumerate ( ) {
277+ if array. is_null ( row_index) {
278+ null_builder. append_null ( ) ;
279+ offsets. push ( offsets[ row_index] ) ;
280+ continue ;
281+ }
282+ null_builder. append_non_null ( ) ;
283+
284+ let count = count_array. value ( row_index) . to_usize ( ) . ok_or_else ( || {
285+ internal_datafusion_err ! (
286+ "array_resize: failed to convert size to usize"
287+ )
288+ } ) ?;
289+ let count = O :: usize_as ( count) ;
290+ let start = offset_window[ 0 ] ;
291+ if start + count > offset_window[ 1 ] {
292+ let extra_count =
293+ ( start + count - offset_window[ 1 ] ) . to_usize ( ) . unwrap ( ) ;
294+ let end = offset_window[ 1 ] ;
295+ mutable. extend (
296+ 0 ,
297+ ( start) . to_usize ( ) . unwrap ( ) ,
298+ ( end) . to_usize ( ) . unwrap ( ) ,
299+ ) ;
300+ mutable. extend ( 1 , 0 , extra_count) ;
301+ } else {
302+ let end = start + count;
303+ mutable. extend (
304+ 0 ,
305+ ( start) . to_usize ( ) . unwrap ( ) ,
306+ ( end) . to_usize ( ) . unwrap ( ) ,
307+ ) ;
308+ } ;
309+ offsets. push ( offsets[ row_index] + count) ;
310+ }
311+
312+ let data = mutable. freeze ( ) ;
313+
314+ return Ok ( Arc :: new ( GenericListArray :: < O > :: try_new (
315+ Arc :: clone ( field) ,
316+ OffsetBuffer :: < O > :: new ( offsets. into ( ) ) ,
317+ arrow:: array:: make_array ( data) ,
318+ null_builder. finish ( ) ,
319+ ) ?) ) ;
320+ }
321+
322+ // Slow path: each row may have a different fill value.
210323 let default_element = if let Some ( default_element) = default_element {
211324 default_element
212325 } else {
@@ -268,3 +381,74 @@ fn general_list_resize<O: OffsetSizeTrait + TryInto<i64>>(
268381 null_builder. finish ( ) ,
269382 ) ?) )
270383}
384+
385+ #[ cfg( test) ]
386+ mod tests {
387+ use super :: * ;
388+ use arrow:: array:: { AsArray , ListArray } ;
389+ use arrow:: datatypes:: Int64Type ;
390+
391+ fn list_values ( array : & ListArray ) -> Vec < Option < Vec < Option < i64 > > > > {
392+ array
393+ . iter ( )
394+ . map ( |row| {
395+ row. map ( |values| {
396+ values
397+ . as_primitive :: < Int64Type > ( )
398+ . iter ( )
399+ . collect :: < Vec < Option < i64 > > > ( )
400+ } )
401+ } )
402+ . collect ( )
403+ }
404+
405+ #[ test]
406+ fn test_array_resize_preserves_row_fill_values ( ) -> Result < ( ) > {
407+ let list = ListArray :: from_iter_primitive :: < Int64Type , _ , _ > ( vec ! [
408+ Some ( vec![ Some ( 1 ) ] ) ,
409+ Some ( vec![ Some ( 2 ) ] ) ,
410+ ] ) ;
411+ let new_len = Int64Array :: from ( vec ! [ 3 , 2 ] ) ;
412+ let fill = Int64Array :: from ( vec ! [ 9 , 8 ] ) ;
413+
414+ let args: Vec < ArrayRef > = vec ! [
415+ Arc :: new( list) ,
416+ Arc :: new( new_len) ,
417+ Arc :: new( fill) ,
418+ ] ;
419+ let result = array_resize_inner ( & args) ?;
420+ let result = result. as_list :: < i32 > ( ) ;
421+
422+ let expected = vec ! [
423+ Some ( vec![ Some ( 1 ) , Some ( 9 ) , Some ( 9 ) ] ) ,
424+ Some ( vec![ Some ( 2 ) , Some ( 8 ) ] ) ,
425+ ] ;
426+ assert_eq ! ( expected, list_values( result) ) ;
427+ Ok ( ( ) )
428+ }
429+
430+ #[ test]
431+ fn test_array_resize_uniform_fill_fast_path ( ) -> Result < ( ) > {
432+ let list = ListArray :: from_iter_primitive :: < Int64Type , _ , _ > ( vec ! [
433+ Some ( vec![ Some ( 1 ) ] ) ,
434+ Some ( vec![ Some ( 2 ) ] ) ,
435+ ] ) ;
436+ let new_len = Int64Array :: from ( vec ! [ 3 , 2 ] ) ;
437+ let fill = Int64Array :: from ( vec ! [ 9 , 9 ] ) ;
438+
439+ let args: Vec < ArrayRef > = vec ! [
440+ Arc :: new( list) ,
441+ Arc :: new( new_len) ,
442+ Arc :: new( fill) ,
443+ ] ;
444+ let result = array_resize_inner ( & args) ?;
445+ let result = result. as_list :: < i32 > ( ) ;
446+
447+ let expected = vec ! [
448+ Some ( vec![ Some ( 1 ) , Some ( 9 ) , Some ( 9 ) ] ) ,
449+ Some ( vec![ Some ( 2 ) , Some ( 9 ) ] ) ,
450+ ] ;
451+ assert_eq ! ( expected, list_values( result) ) ;
452+ Ok ( ( ) )
453+ }
454+ }
0 commit comments