arrow_select/
concat.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines concat kernel for `ArrayRef`
19//!
20//! Example:
21//!
22//! ```
23//! use arrow_array::{ArrayRef, StringArray};
24//! use arrow_select::concat::concat;
25//!
26//! let arr = concat(&[
27//!     &StringArray::from(vec!["hello", "world"]),
28//!     &StringArray::from(vec!["!"]),
29//! ]).unwrap();
30//! assert_eq!(arr.len(), 3);
31//! ```
32
33use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
34use arrow_array::builder::{
35    BooleanBuilder, GenericByteBuilder, GenericByteViewBuilder, PrimitiveBuilder,
36};
37use arrow_array::cast::AsArray;
38use arrow_array::types::*;
39use arrow_array::*;
40use arrow_buffer::{
41    ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
42};
43use arrow_data::ArrayDataBuilder;
44use arrow_data::transform::{Capacities, MutableArrayData};
45use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
46use std::{collections::HashSet, ops::Add, sync::Arc};
47
48fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
49    let mut item_capacity = 0;
50    let mut bytes_capacity = 0;
51    for array in arrays {
52        let a = array.as_bytes::<T>();
53
54        // Guaranteed to always have at least one element
55        let offsets = a.value_offsets();
56        bytes_capacity += offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize();
57        item_capacity += a.len()
58    }
59
60    Capacities::Binary(item_capacity, Some(bytes_capacity))
61}
62
63fn fixed_size_list_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
64    if let DataType::FixedSizeList(f, _) = data_type {
65        let item_capacity = arrays.iter().map(|a| a.len()).sum();
66        let child_data_type = f.data_type();
67        match child_data_type {
68            // These types should match the types that `get_capacity`
69            // has special handling for.
70            DataType::Utf8
71            | DataType::LargeUtf8
72            | DataType::Binary
73            | DataType::LargeBinary
74            | DataType::FixedSizeList(_, _) => {
75                let values: Vec<&dyn arrow_array::Array> = arrays
76                    .iter()
77                    .map(|a| a.as_fixed_size_list().values().as_ref())
78                    .collect();
79                Capacities::List(
80                    item_capacity,
81                    Some(Box::new(get_capacity(&values, child_data_type))),
82                )
83            }
84            _ => Capacities::Array(item_capacity),
85        }
86    } else {
87        unreachable!("illegal data type for fixed size list")
88    }
89}
90
91fn concat_byte_view<B: ByteViewType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
92    let mut builder =
93        GenericByteViewBuilder::<B>::with_capacity(arrays.iter().map(|a| a.len()).sum());
94    for &array in arrays.iter() {
95        builder.append_array(array.as_byte_view());
96    }
97    Ok(Arc::new(builder.finish()))
98}
99
100fn concat_dictionaries<K: ArrowDictionaryKeyType>(
101    arrays: &[&dyn Array],
102) -> Result<ArrayRef, ArrowError> {
103    let mut output_len = 0;
104    let dictionaries: Vec<_> = arrays
105        .iter()
106        .map(|x| x.as_dictionary::<K>())
107        .inspect(|d| output_len += d.len())
108        .collect();
109
110    if !should_merge_dictionary_values::<K>(&dictionaries, output_len) {
111        return concat_fallback(arrays, Capacities::Array(output_len));
112    }
113
114    let merged = merge_dictionary_values(&dictionaries, None)?;
115
116    // Recompute keys
117    let mut key_values = Vec::with_capacity(output_len);
118
119    let mut has_nulls = false;
120    for (d, mapping) in dictionaries.iter().zip(merged.key_mappings) {
121        has_nulls |= d.null_count() != 0;
122        for key in d.keys().values() {
123            // Use get to safely handle nulls
124            key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default())
125        }
126    }
127
128    let nulls = has_nulls.then(|| {
129        let mut nulls = BooleanBufferBuilder::new(output_len);
130        for d in &dictionaries {
131            match d.nulls() {
132                Some(n) => nulls.append_buffer(n.inner()),
133                None => nulls.append_n(d.len(), true),
134            }
135        }
136        NullBuffer::new(nulls.finish())
137    });
138
139    let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
140    // Sanity check
141    assert_eq!(keys.len(), output_len);
142
143    let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) };
144    Ok(Arc::new(array))
145}
146
147fn concat_lists<OffsetSize: OffsetSizeTrait>(
148    arrays: &[&dyn Array],
149    field: &FieldRef,
150) -> Result<ArrayRef, ArrowError> {
151    let mut output_len = 0;
152    let mut list_has_nulls = false;
153    let mut list_has_slices = false;
154
155    let lists = arrays
156        .iter()
157        .map(|x| x.as_list::<OffsetSize>())
158        .inspect(|l| {
159            output_len += l.len();
160            list_has_nulls |= l.null_count() != 0;
161            list_has_slices |= l.offsets()[0] > OffsetSize::zero()
162                || l.offsets().last().unwrap().as_usize() < l.values().len();
163        })
164        .collect::<Vec<_>>();
165
166    let lists_nulls = list_has_nulls.then(|| {
167        let mut nulls = BooleanBufferBuilder::new(output_len);
168        for l in &lists {
169            match l.nulls() {
170                Some(n) => nulls.append_buffer(n.inner()),
171                None => nulls.append_n(l.len(), true),
172            }
173        }
174        NullBuffer::new(nulls.finish())
175    });
176
177    // If any of the lists have slices, we need to slice the values
178    // to ensure that the offsets are correct
179    let mut sliced_values;
180    let values: Vec<&dyn Array> = if list_has_slices {
181        sliced_values = Vec::with_capacity(lists.len());
182        for l in &lists {
183            // if the first offset is non-zero, we need to slice the values so when
184            // we concatenate them below only the relevant values are included
185            let offsets = l.offsets();
186            let start_offset = offsets[0].as_usize();
187            let end_offset = offsets.last().unwrap().as_usize();
188            sliced_values.push(l.values().slice(start_offset, end_offset - start_offset));
189        }
190        sliced_values.iter().map(|a| a.as_ref()).collect()
191    } else {
192        lists.iter().map(|x| x.values().as_ref()).collect()
193    };
194
195    let concatenated_values = concat(values.as_slice())?;
196
197    // Merge value offsets from the lists
198    let value_offset_buffer =
199        OffsetBuffer::<OffsetSize>::from_lengths(lists.iter().flat_map(|x| x.offsets().lengths()));
200
201    let array = GenericListArray::<OffsetSize>::try_new(
202        Arc::clone(field),
203        value_offset_buffer,
204        concatenated_values,
205        lists_nulls,
206    )?;
207
208    Ok(Arc::new(array))
209}
210
211fn concat_list_view<OffsetSize: OffsetSizeTrait>(
212    arrays: &[&dyn Array],
213    field: &FieldRef,
214) -> Result<ArrayRef, ArrowError> {
215    let mut output_len = 0;
216    let mut list_has_nulls = false;
217
218    let lists = arrays
219        .iter()
220        .map(|x| x.as_list_view::<OffsetSize>())
221        .inspect(|l| {
222            output_len += l.len();
223            list_has_nulls |= l.null_count() != 0;
224        })
225        .collect::<Vec<_>>();
226
227    let lists_nulls = list_has_nulls.then(|| {
228        let mut nulls = BooleanBufferBuilder::new(output_len);
229        for l in &lists {
230            match l.nulls() {
231                Some(n) => nulls.append_buffer(n.inner()),
232                None => nulls.append_n(l.len(), true),
233            }
234        }
235        NullBuffer::new(nulls.finish())
236    });
237
238    let values: Vec<&dyn Array> = lists.iter().map(|l| l.values().as_ref()).collect();
239
240    let concatenated_values = concat(values.as_slice())?;
241
242    let sizes: ScalarBuffer<OffsetSize> = lists.iter().flat_map(|x| x.sizes()).copied().collect();
243
244    let mut offsets = MutableBuffer::with_capacity(lists.iter().map(|l| l.offsets().len()).sum());
245    let mut global_offset = OffsetSize::zero();
246    for l in lists.iter() {
247        for &offset in l.offsets() {
248            offsets.push(offset + global_offset);
249        }
250
251        // advance the offsets
252        global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
253    }
254
255    let offsets = ScalarBuffer::from(offsets);
256
257    let array = GenericListViewArray::try_new(
258        field.clone(),
259        offsets,
260        sizes,
261        concatenated_values,
262        lists_nulls,
263    )?;
264
265    Ok(Arc::new(array))
266}
267
268fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
269    let mut builder = PrimitiveBuilder::<T>::with_capacity(arrays.iter().map(|a| a.len()).sum())
270        .with_data_type(arrays[0].data_type().clone());
271
272    for array in arrays {
273        builder.append_array(array.as_primitive());
274    }
275
276    Ok(Arc::new(builder.finish()))
277}
278
279fn concat_boolean(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
280    let mut builder = BooleanBuilder::with_capacity(arrays.iter().map(|a| a.len()).sum());
281
282    for array in arrays {
283        builder.append_array(array.as_boolean());
284    }
285
286    Ok(Arc::new(builder.finish()))
287}
288
289fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
290    let (item_capacity, bytes_capacity) = match binary_capacity::<T>(arrays) {
291        Capacities::Binary(item_capacity, Some(bytes_capacity)) => (item_capacity, bytes_capacity),
292        _ => unreachable!(),
293    };
294
295    let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity);
296
297    for array in arrays {
298        builder.append_array(array.as_bytes::<T>())?;
299    }
300
301    Ok(Arc::new(builder.finish()))
302}
303
304fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, ArrowError> {
305    let mut len = 0;
306    let mut has_nulls = false;
307    let structs = arrays
308        .iter()
309        .map(|a| {
310            len += a.len();
311            has_nulls |= a.null_count() > 0;
312            a.as_struct()
313        })
314        .collect::<Vec<_>>();
315
316    let nulls = has_nulls.then(|| {
317        let mut b = BooleanBufferBuilder::new(len);
318        for s in &structs {
319            match s.nulls() {
320                Some(n) => b.append_buffer(n.inner()),
321                None => b.append_n(s.len(), true),
322            }
323        }
324        NullBuffer::new(b.finish())
325    });
326
327    let column_concat_result = (0..fields.len())
328        .map(|i| {
329            let extracted_cols = structs
330                .iter()
331                .map(|s| s.column(i).as_ref())
332                .collect::<Vec<_>>();
333            concat(&extracted_cols)
334        })
335        .collect::<Result<Vec<_>, ArrowError>>()?;
336
337    Ok(Arc::new(StructArray::try_new_with_length(
338        fields.clone(),
339        column_concat_result,
340        nulls,
341        len,
342    )?))
343}
344
345/// Concatenate multiple RunArray instances into a single RunArray.
346///
347/// This function handles the special case of concatenating RunArrays by:
348/// 1. Collecting all run ends and values from input arrays
349/// 2. Adjusting run ends to account for the length of previous arrays
350/// 3. Creating a new RunArray with the combined data
351fn concat_run_arrays<R: RunEndIndexType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError>
352where
353    R::Native: Add<Output = R::Native>,
354{
355    let run_arrays: Vec<_> = arrays
356        .iter()
357        .map(|x| x.as_run::<R>())
358        .filter(|x| !x.run_ends().is_empty())
359        .collect();
360
361    // The run ends need to be adjusted by the sum of the lengths of the previous arrays.
362    let needed_run_end_adjustments = std::iter::once(R::default_value())
363        .chain(
364            run_arrays
365                .iter()
366                .scan(R::default_value(), |acc, run_array| {
367                    *acc = *acc + *run_array.run_ends().values().last().unwrap();
368                    Some(*acc)
369                }),
370        )
371        .collect::<Vec<_>>();
372
373    // This works out nicely to be the total (logical) length of the resulting array.
374    let total_len = needed_run_end_adjustments.last().unwrap().as_usize();
375
376    let run_ends_array =
377        PrimitiveArray::<R>::from_iter_values(run_arrays.iter().enumerate().flat_map(
378            move |(i, run_array)| {
379                let adjustment = needed_run_end_adjustments[i];
380                run_array
381                    .run_ends()
382                    .values()
383                    .iter()
384                    .map(move |run_end| *run_end + adjustment)
385            },
386        ));
387
388    let all_values = concat(
389        &run_arrays
390            .iter()
391            .map(|x| x.values().as_ref())
392            .collect::<Vec<_>>(),
393    )?;
394
395    let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone())
396        .len(total_len)
397        .child_data(vec![run_ends_array.into_data(), all_values.into_data()]);
398
399    // `build_unchecked` is used to avoid recursive validation of child arrays.
400    let array_data = unsafe { builder.build_unchecked() };
401    array_data.validate_data()?;
402
403    Ok(Arc::<RunArray<R>>::new(array_data.into()))
404}
405
406macro_rules! dict_helper {
407    ($t:ty, $arrays:expr) => {
408        return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
409    };
410}
411
412macro_rules! primitive_concat {
413    ($t:ty, $arrays:expr) => {
414        return Ok(Arc::new(concat_primitives::<$t>($arrays)?) as _)
415    };
416}
417
418fn get_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
419    match data_type {
420        DataType::Utf8 => binary_capacity::<Utf8Type>(arrays),
421        DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays),
422        DataType::Binary => binary_capacity::<BinaryType>(arrays),
423        DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays),
424        DataType::FixedSizeList(_, _) => fixed_size_list_capacity(arrays, data_type),
425        _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
426    }
427}
428
429/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
430pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
431    if arrays.is_empty() {
432        return Err(ArrowError::ComputeError(
433            "concat requires input of at least one array".to_string(),
434        ));
435    } else if arrays.len() == 1 {
436        let array = arrays[0];
437        return Ok(array.slice(0, array.len()));
438    }
439
440    let d = arrays[0].data_type();
441    if arrays.iter().skip(1).any(|array| array.data_type() != d) {
442        // Create error message with up to 10 unique data types in the order they appear
443        let error_message = {
444            // 10 max unique data types to print and another 1 to know if there are more
445            let mut unique_data_types = HashSet::with_capacity(11);
446
447            let mut error_message =
448                format!("It is not possible to concatenate arrays of different data types ({d}");
449            unique_data_types.insert(d);
450
451            for array in arrays {
452                let is_unique = unique_data_types.insert(array.data_type());
453
454                if unique_data_types.len() == 11 {
455                    error_message.push_str(", ...");
456                    break;
457                }
458
459                if is_unique {
460                    error_message.push_str(", ");
461                    error_message.push_str(&array.data_type().to_string());
462                }
463            }
464
465            error_message.push_str(").");
466
467            error_message
468        };
469
470        return Err(ArrowError::InvalidArgumentError(error_message));
471    }
472
473    downcast_primitive! {
474        d => (primitive_concat, arrays),
475        DataType::Boolean => concat_boolean(arrays),
476        DataType::Dictionary(k, _) => {
477            downcast_integer! {
478                k.as_ref() => (dict_helper, arrays),
479                _ => unreachable!("illegal dictionary key type {k}")
480            }
481        }
482        DataType::List(field) => concat_lists::<i32>(arrays, field),
483        DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
484        DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
485        DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field),
486        DataType::Struct(fields) => concat_structs(arrays, fields),
487        DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
488        DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
489        DataType::Binary => concat_bytes::<BinaryType>(arrays),
490        DataType::LargeBinary => concat_bytes::<LargeBinaryType>(arrays),
491        DataType::RunEndEncoded(r, _) => {
492            // Handle RunEndEncoded arrays with special concat function
493            // We need to downcast based on the run end type
494            match r.data_type() {
495                DataType::Int16 => concat_run_arrays::<Int16Type>(arrays),
496                DataType::Int32 => concat_run_arrays::<Int32Type>(arrays),
497                DataType::Int64 => concat_run_arrays::<Int64Type>(arrays),
498                _ => unreachable!("Unsupported run end index type: {r:?}"),
499            }
500        }
501        DataType::Utf8View => concat_byte_view::<StringViewType>(arrays),
502        DataType::BinaryView => concat_byte_view::<BinaryViewType>(arrays),
503        _ => {
504            let capacity = get_capacity(arrays, d);
505            concat_fallback(arrays, capacity)
506        }
507    }
508}
509
510/// Concatenates arrays using MutableArrayData
511///
512/// This will naively concatenate dictionaries
513fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> {
514    let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
515    let array_data = array_data.iter().collect();
516    let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
517
518    for (i, a) in arrays.iter().enumerate() {
519        mutable.extend(i, 0, a.len())
520    }
521
522    Ok(make_array(mutable.freeze()))
523}
524
525/// Concatenates `batches` together into a single [`RecordBatch`].
526///
527/// The output batch has the specified `schemas`; The schema of the
528/// input are ignored.
529///
530/// Returns an error if the types of underlying arrays are different.
531pub fn concat_batches<'a>(
532    schema: &SchemaRef,
533    input_batches: impl IntoIterator<Item = &'a RecordBatch>,
534) -> Result<RecordBatch, ArrowError> {
535    // When schema is empty, sum the number of the rows of all batches
536    if schema.fields().is_empty() {
537        let num_rows: usize = input_batches.into_iter().map(RecordBatch::num_rows).sum();
538        let mut options = RecordBatchOptions::default();
539        options.row_count = Some(num_rows);
540        return RecordBatch::try_new_with_options(schema.clone(), vec![], &options);
541    }
542
543    let batches: Vec<&RecordBatch> = input_batches.into_iter().collect();
544    if batches.is_empty() {
545        return Ok(RecordBatch::new_empty(schema.clone()));
546    }
547    let field_num = schema.fields().len();
548    let mut arrays = Vec::with_capacity(field_num);
549    for i in 0..field_num {
550        let array = concat(
551            &batches
552                .iter()
553                .map(|batch| batch.column(i).as_ref())
554                .collect::<Vec<_>>(),
555        )?;
556        arrays.push(array);
557    }
558    RecordBatch::try_new(schema.clone(), arrays)
559}
560
561#[cfg(test)]
562mod tests {
563    use super::*;
564    use arrow_array::builder::{
565        GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder,
566    };
567    use arrow_schema::{Field, Schema};
568    use std::fmt::Debug;
569
570    #[test]
571    fn test_concat_empty_vec() {
572        let re = concat(&[]);
573        assert!(re.is_err());
574    }
575
576    #[test]
577    fn test_concat_batches_no_columns() {
578        // Test concat using empty schema / batches without columns
579        let schema = Arc::new(Schema::empty());
580
581        let mut options = RecordBatchOptions::default();
582        options.row_count = Some(100);
583        let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
584        // put in 2 batches of 100 rows each
585        let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap();
586
587        assert_eq!(re.num_rows(), 200);
588    }
589
590    #[test]
591    fn test_concat_one_element_vec() {
592        let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
593            Some(-1),
594            Some(2),
595            None,
596        ])) as ArrayRef;
597        let result = concat(&[arr.as_ref()]).unwrap();
598        assert_eq!(
599            &arr, &result,
600            "concatenating single element array gives back the same result"
601        );
602    }
603
604    #[test]
605    fn test_concat_incompatible_datatypes() {
606        let re = concat(&[
607            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
608            // 2 string to make sure we only mention unique types
609            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
610            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
611            // Another type to make sure we are showing all the incompatible types
612            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
613        ]);
614
615        assert_eq!(
616            re.unwrap_err().to_string(),
617            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."
618        );
619    }
620
621    #[test]
622    fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
623        let re = concat(&[
624            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
625            // 2 string to make sure we only mention unique types
626            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
627            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
628            // Another type to make sure we are showing all the incompatible types
629            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
630            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
631            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
632            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
633            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
634            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
635            // Non unique
636            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
637            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
638            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
639        ]);
640
641        assert_eq!(
642            re.unwrap_err().to_string(),
643            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)."
644        );
645    }
646
647    #[test]
648    fn test_concat_11_incompatible_datatypes_should_only_include_10() {
649        let re = concat(&[
650            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
651            // 2 string to make sure we only mention unique types
652            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
653            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
654            // Another type to make sure we are showing all the incompatible types
655            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
656            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
657            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
658            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
659            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
660            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
661            // Non unique
662            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
663            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
664            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
665            &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
666        ]);
667
668        assert_eq!(
669            re.unwrap_err().to_string(),
670            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
671        );
672    }
673
674    #[test]
675    fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
676        let re = concat(&[
677            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
678            // 2 string to make sure we only mention unique types
679            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
680            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
681            // Another type to make sure we are showing all the incompatible types
682            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
683            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
684            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
685            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
686            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
687            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
688            // Non unique
689            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
690            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
691            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
692            &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
693            &PrimitiveArray::<Float16Type>::new_null(3),
694            &BooleanArray::from(vec![Some(true), Some(false), None]),
695        ]);
696
697        assert_eq!(
698            re.unwrap_err().to_string(),
699            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
700        );
701    }
702
703    #[test]
704    fn test_concat_string_arrays() {
705        let arr = concat(&[
706            &StringArray::from(vec!["hello", "world"]),
707            &StringArray::from(vec!["2", "3", "4"]),
708            &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
709        ])
710        .unwrap();
711
712        let expected_output = Arc::new(StringArray::from(vec![
713            Some("hello"),
714            Some("world"),
715            Some("2"),
716            Some("3"),
717            Some("4"),
718            Some("foo"),
719            Some("bar"),
720            None,
721            Some("baz"),
722        ])) as ArrayRef;
723
724        assert_eq!(&arr, &expected_output);
725    }
726
727    #[test]
728    fn test_concat_string_view_arrays() {
729        let arr = concat(&[
730            &StringViewArray::from(vec!["helloxxxxxxxxxxa", "world____________"]),
731            &StringViewArray::from(vec!["helloxxxxxxxxxxy", "3", "4"]),
732            &StringViewArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
733        ])
734        .unwrap();
735
736        let expected_output = Arc::new(StringViewArray::from(vec![
737            Some("helloxxxxxxxxxxa"),
738            Some("world____________"),
739            Some("helloxxxxxxxxxxy"),
740            Some("3"),
741            Some("4"),
742            Some("foo"),
743            Some("bar"),
744            None,
745            Some("baz"),
746        ])) as ArrayRef;
747
748        assert_eq!(&arr, &expected_output);
749    }
750
751    #[test]
752    fn test_concat_primitive_arrays() {
753        let arr = concat(&[
754            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]),
755            &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]),
756            &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
757        ])
758        .unwrap();
759
760        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
761            Some(-1),
762            Some(-1),
763            Some(2),
764            None,
765            None,
766            Some(101),
767            Some(102),
768            Some(103),
769            None,
770            Some(256),
771            Some(512),
772            Some(1024),
773        ])) as ArrayRef;
774
775        assert_eq!(&arr, &expected_output);
776    }
777
778    #[test]
779    fn test_concat_primitive_array_slices() {
780        let input_1 =
781            PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None])
782                .slice(1, 3);
783
784        let input_2 =
785            PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None])
786                .slice(1, 3);
787        let arr = concat(&[&input_1, &input_2]).unwrap();
788
789        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
790            Some(-1),
791            Some(2),
792            None,
793            Some(102),
794            Some(103),
795            None,
796        ])) as ArrayRef;
797
798        assert_eq!(&arr, &expected_output);
799    }
800
801    #[test]
802    fn test_concat_boolean_primitive_arrays() {
803        let arr = concat(&[
804            &BooleanArray::from(vec![
805                Some(true),
806                Some(true),
807                Some(false),
808                None,
809                None,
810                Some(false),
811            ]),
812            &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
813        ])
814        .unwrap();
815
816        let expected_output = Arc::new(BooleanArray::from(vec![
817            Some(true),
818            Some(true),
819            Some(false),
820            None,
821            None,
822            Some(false),
823            None,
824            Some(false),
825            Some(true),
826            Some(false),
827        ])) as ArrayRef;
828
829        assert_eq!(&arr, &expected_output);
830    }
831
832    #[test]
833    fn test_concat_primitive_list_arrays() {
834        let list1 = [
835            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
836            Some(vec![]),
837            None,
838            Some(vec![Some(10)]),
839        ];
840        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
841
842        let list2 = [
843            None,
844            Some(vec![Some(100), None, Some(101)]),
845            Some(vec![Some(102)]),
846        ];
847        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
848
849        let list3 = [Some(vec![Some(1000), Some(1001)])];
850        let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
851
852        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
853
854        let expected = list1.into_iter().chain(list2).chain(list3);
855        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
856
857        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
858    }
859
860    #[test]
861    fn test_concat_primitive_list_arrays_slices() {
862        let list1 = [
863            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
864            Some(vec![]), // In slice
865            None,         // In slice
866            Some(vec![Some(10)]),
867        ];
868        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
869        let list1_array = list1_array.slice(1, 2);
870        let list1_values = list1.into_iter().skip(1).take(2);
871
872        let list2 = [
873            None,
874            Some(vec![Some(100), None, Some(101)]),
875            Some(vec![Some(102)]),
876        ];
877        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
878
879        // verify that this test covers the case when the first offset is non zero
880        assert!(list1_array.offsets()[0].as_usize() > 0);
881        let array_result = concat(&[&list1_array, &list2_array]).unwrap();
882
883        let expected = list1_values.chain(list2);
884        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
885
886        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
887    }
888
889    #[test]
890    fn test_concat_primitive_list_arrays_sliced_lengths() {
891        let list1 = [
892            Some(vec![Some(-1), Some(-1), Some(2), None, None]), // In slice
893            Some(vec![]),                                        // In slice
894            None,                                                // In slice
895            Some(vec![Some(10)]),
896        ];
897        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
898        let list1_array = list1_array.slice(0, 3); // no offset, but not all values
899        let list1_values = list1.into_iter().take(3);
900
901        let list2 = [
902            None,
903            Some(vec![Some(100), None, Some(101)]),
904            Some(vec![Some(102)]),
905        ];
906        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
907
908        // verify that this test covers the case when the first offset is zero, but the
909        // last offset doesn't cover the entire array
910        assert_eq!(list1_array.offsets()[0].as_usize(), 0);
911        assert!(list1_array.offsets().last().unwrap().as_usize() < list1_array.values().len());
912        let array_result = concat(&[&list1_array, &list2_array]).unwrap();
913
914        let expected = list1_values.chain(list2);
915        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
916
917        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
918    }
919
920    #[test]
921    fn test_concat_primitive_fixed_size_list_arrays() {
922        let list1 = [
923            Some(vec![Some(-1), None]),
924            None,
925            Some(vec![Some(10), Some(20)]),
926        ];
927        let list1_array =
928            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2);
929
930        let list2 = [
931            None,
932            Some(vec![Some(100), None]),
933            Some(vec![Some(102), Some(103)]),
934        ];
935        let list2_array =
936            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2);
937
938        let list3 = [Some(vec![Some(1000), Some(1001)])];
939        let list3_array =
940            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2);
941
942        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
943
944        let expected = list1.into_iter().chain(list2).chain(list3);
945        let array_expected =
946            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(expected, 2);
947
948        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
949    }
950
951    #[test]
952    fn test_concat_list_view_arrays() {
953        let list1 = [
954            Some(vec![Some(-1), None]),
955            None,
956            Some(vec![Some(10), Some(20)]),
957        ];
958        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
959        for v in list1.iter() {
960            list1_array.append_option(v.clone());
961        }
962        let list1_array = list1_array.finish();
963
964        let list2 = [
965            None,
966            Some(vec![Some(100), None]),
967            Some(vec![Some(102), Some(103)]),
968        ];
969        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
970        for v in list2.iter() {
971            list2_array.append_option(v.clone());
972        }
973        let list2_array = list2_array.finish();
974
975        let list3 = [Some(vec![Some(1000), Some(1001)])];
976        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
977        for v in list3.iter() {
978            list3_array.append_option(v.clone());
979        }
980        let list3_array = list3_array.finish();
981
982        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
983
984        let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect();
985        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
986        for v in expected.iter() {
987            array_expected.append_option(v.clone());
988        }
989        let array_expected = array_expected.finish();
990
991        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
992    }
993
994    #[test]
995    fn test_concat_sliced_list_view_arrays() {
996        let list1 = [
997            Some(vec![Some(-1), None]),
998            None,
999            Some(vec![Some(10), Some(20)]),
1000        ];
1001        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
1002        for v in list1.iter() {
1003            list1_array.append_option(v.clone());
1004        }
1005        let list1_array = list1_array.finish();
1006
1007        let list2 = [
1008            None,
1009            Some(vec![Some(100), None]),
1010            Some(vec![Some(102), Some(103)]),
1011        ];
1012        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
1013        for v in list2.iter() {
1014            list2_array.append_option(v.clone());
1015        }
1016        let list2_array = list2_array.finish();
1017
1018        let list3 = [Some(vec![Some(1000), Some(1001)])];
1019        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
1020        for v in list3.iter() {
1021            list3_array.append_option(v.clone());
1022        }
1023        let list3_array = list3_array.finish();
1024
1025        // Concat sliced arrays.
1026        // ListView slicing will slice the offset/sizes but preserve the original values child.
1027        let array_result = concat(&[
1028            &list1_array.slice(1, 2),
1029            &list2_array.slice(1, 2),
1030            &list3_array.slice(0, 1),
1031        ])
1032        .unwrap();
1033
1034        let expected: Vec<_> = vec![
1035            None,
1036            Some(vec![Some(10), Some(20)]),
1037            Some(vec![Some(100), None]),
1038            Some(vec![Some(102), Some(103)]),
1039            Some(vec![Some(1000), Some(1001)]),
1040        ];
1041        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
1042        for v in expected.iter() {
1043            array_expected.append_option(v.clone());
1044        }
1045        let array_expected = array_expected.finish();
1046
1047        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
1048    }
1049
1050    #[test]
1051    fn test_concat_struct_arrays() {
1052        let field = Arc::new(Field::new("field", DataType::Int64, true));
1053        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1054            Some(-1),
1055            Some(-1),
1056            Some(2),
1057            None,
1058            None,
1059        ]));
1060        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1061
1062        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1063            Some(101),
1064            Some(102),
1065            Some(103),
1066            None,
1067        ]));
1068        let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
1069
1070        let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1071            Some(256),
1072            Some(512),
1073            Some(1024),
1074        ]));
1075        let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
1076
1077        let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
1078
1079        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1080            Some(-1),
1081            Some(-1),
1082            Some(2),
1083            None,
1084            None,
1085            Some(101),
1086            Some(102),
1087            Some(103),
1088            None,
1089            Some(256),
1090            Some(512),
1091            Some(1024),
1092        ])) as ArrayRef;
1093
1094        let actual_primitive = arr
1095            .as_any()
1096            .downcast_ref::<StructArray>()
1097            .unwrap()
1098            .column(0);
1099        assert_eq!(actual_primitive, &expected_primitive_output);
1100    }
1101
1102    #[test]
1103    fn test_concat_struct_array_slices() {
1104        let field = Arc::new(Field::new("field", DataType::Int64, true));
1105        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1106            Some(-1),
1107            Some(-1),
1108            Some(2),
1109            None,
1110            None,
1111        ]));
1112        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1113
1114        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1115            Some(101),
1116            Some(102),
1117            Some(103),
1118            None,
1119        ]));
1120        let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
1121
1122        let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
1123
1124        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1125            Some(-1),
1126            Some(2),
1127            None,
1128            Some(102),
1129            Some(103),
1130        ])) as ArrayRef;
1131
1132        let actual_primitive = arr
1133            .as_any()
1134            .downcast_ref::<StructArray>()
1135            .unwrap()
1136            .column(0);
1137        assert_eq!(actual_primitive, &expected_primitive_output);
1138    }
1139
1140    #[test]
1141    fn test_concat_struct_arrays_no_nulls() {
1142        let input_1a = vec![1, 2, 3];
1143        let input_1b = vec!["one", "two", "three"];
1144        let input_2a = vec![4, 5, 6, 7];
1145        let input_2b = vec!["four", "five", "six", "seven"];
1146
1147        let struct_from_primitives = |ints: Vec<i64>, strings: Vec<&str>| {
1148            StructArray::try_from(vec![
1149                ("ints", Arc::new(Int64Array::from(ints)) as _),
1150                ("strings", Arc::new(StringArray::from(strings)) as _),
1151            ])
1152        };
1153
1154        let expected_output = struct_from_primitives(
1155            [input_1a.clone(), input_2a.clone()].concat(),
1156            [input_1b.clone(), input_2b.clone()].concat(),
1157        )
1158        .unwrap();
1159
1160        let input_1 = struct_from_primitives(input_1a, input_1b).unwrap();
1161        let input_2 = struct_from_primitives(input_2a, input_2b).unwrap();
1162
1163        let arr = concat(&[&input_1, &input_2]).unwrap();
1164        let struct_result = arr.as_struct();
1165
1166        assert_eq!(struct_result, &expected_output);
1167        assert_eq!(arr.null_count(), 0);
1168    }
1169
1170    #[test]
1171    fn test_concat_struct_no_fields() {
1172        let input_1 = StructArray::new_empty_fields(10, None);
1173        let input_2 = StructArray::new_empty_fields(10, None);
1174        let arr = concat(&[&input_1, &input_2]).unwrap();
1175
1176        assert_eq!(arr.len(), 20);
1177        assert_eq!(arr.null_count(), 0);
1178
1179        let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10)));
1180        let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
1181        let arr = concat(&[&input1_valid, &input2_null]).unwrap();
1182
1183        assert_eq!(arr.len(), 20);
1184        assert_eq!(arr.null_count(), 10);
1185    }
1186
1187    #[test]
1188    fn test_string_array_slices() {
1189        let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
1190        let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
1191
1192        let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1193
1194        let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
1195
1196        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1197        assert_eq!(actual_output, &expected_output);
1198    }
1199
1200    #[test]
1201    fn test_string_array_with_null_slices() {
1202        let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
1203        let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
1204
1205        let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1206
1207        let expected_output =
1208            StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
1209
1210        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1211        assert_eq!(actual_output, &expected_output);
1212    }
1213
1214    fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> {
1215        let concrete = array.downcast_dict::<StringArray>().unwrap();
1216        concrete.into_iter().collect()
1217    }
1218
1219    #[test]
1220    fn test_string_dictionary_array() {
1221        let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"]
1222            .into_iter()
1223            .collect();
1224        let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"]
1225            .into_iter()
1226            .collect();
1227
1228        let expected: Vec<_> = vec![
1229            "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E",
1230        ]
1231        .into_iter()
1232        .map(Some)
1233        .collect();
1234
1235        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1236        let dictionary = concat.as_dictionary::<Int32Type>();
1237        let actual = collect_string_dictionary(dictionary);
1238        assert_eq!(actual, expected);
1239
1240        // Should have concatenated inputs together
1241        assert_eq!(
1242            dictionary.values().len(),
1243            input_1.values().len() + input_2.values().len(),
1244        )
1245    }
1246
1247    #[test]
1248    fn test_string_dictionary_array_nulls() {
1249        let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")]
1250            .into_iter()
1251            .collect();
1252        let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect();
1253        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1254
1255        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1256        let dictionary = concat.as_dictionary::<Int32Type>();
1257        let actual = collect_string_dictionary(dictionary);
1258        assert_eq!(actual, expected);
1259
1260        // Should have concatenated inputs together
1261        assert_eq!(
1262            dictionary.values().len(),
1263            input_1.values().len() + input_2.values().len(),
1264        )
1265    }
1266
1267    #[test]
1268    fn test_string_dictionary_array_nulls_in_values() {
1269        let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]);
1270        let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]);
1271        let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
1272
1273        let input_2_keys = Int32Array::from_iter_values([0]);
1274        let input_2_values = StringArray::from(vec![None, Some("hello")]);
1275        let input_2 = DictionaryArray::new(input_2_keys, Arc::new(input_2_values));
1276
1277        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1278
1279        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1280        let dictionary = concat.as_dictionary::<Int32Type>();
1281        let actual = collect_string_dictionary(dictionary);
1282        assert_eq!(actual, expected);
1283    }
1284
1285    #[test]
1286    fn test_string_dictionary_merge() {
1287        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1288        for i in 0..20 {
1289            builder.append(i.to_string()).unwrap();
1290        }
1291        let input_1 = builder.finish();
1292
1293        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1294        for i in 0..30 {
1295            builder.append(i.to_string()).unwrap();
1296        }
1297        let input_2 = builder.finish();
1298
1299        let expected: Vec<_> = (0..20).chain(0..30).map(|x| x.to_string()).collect();
1300        let expected: Vec<_> = expected.iter().map(|x| Some(x.as_str())).collect();
1301
1302        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1303        let dictionary = concat.as_dictionary::<Int32Type>();
1304        let actual = collect_string_dictionary(dictionary);
1305        assert_eq!(actual, expected);
1306
1307        // Should have merged inputs together
1308        // Not 30 as this is done on a best-effort basis
1309        let values_len = dictionary.values().len();
1310        assert!((30..40).contains(&values_len), "{values_len}")
1311    }
1312
1313    #[test]
1314    fn test_primitive_dictionary_merge() {
1315        // Same value repeated 5 times.
1316        let keys = vec![1; 5];
1317        let values = (10..20).collect::<Vec<_>>();
1318        let dict = DictionaryArray::new(
1319            Int8Array::from(keys.clone()),
1320            Arc::new(Int32Array::from(values.clone())),
1321        );
1322        let other = DictionaryArray::new(
1323            Int8Array::from(keys.clone()),
1324            Arc::new(Int32Array::from(values.clone())),
1325        );
1326
1327        let result_same_dictionary = concat(&[&dict, &dict]).unwrap();
1328        // Verify pointer equality check succeeds, and therefore the
1329        // dictionaries are not merged. A single values buffer should be reused
1330        // in this case.
1331        assert!(
1332            dict.values().to_data().ptr_eq(
1333                &result_same_dictionary
1334                    .as_dictionary::<Int8Type>()
1335                    .values()
1336                    .to_data()
1337            )
1338        );
1339        assert_eq!(
1340            result_same_dictionary
1341                .as_dictionary::<Int8Type>()
1342                .values()
1343                .len(),
1344            values.len(),
1345        );
1346
1347        let result_cloned_dictionary = concat(&[&dict, &other]).unwrap();
1348        // Should have only 1 underlying value since all keys reference it.
1349        assert_eq!(
1350            result_cloned_dictionary
1351                .as_dictionary::<Int8Type>()
1352                .values()
1353                .len(),
1354            1
1355        );
1356    }
1357
1358    #[test]
1359    fn test_concat_string_sizes() {
1360        let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1361        let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1362        let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
1363        // 150 * 3 = 450
1364        // 150 * 3 = 450
1365        // 3 * 3   = 9
1366        // ------------+
1367        // 909
1368
1369        let arr = concat(&[&a, &b, &c]).unwrap();
1370        assert_eq!(arr.to_data().buffers()[1].capacity(), 909);
1371    }
1372
1373    #[test]
1374    fn test_dictionary_concat_reuse() {
1375        let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
1376        let copy: DictionaryArray<Int8Type> = array.clone();
1377
1378        // dictionary is "a", "b", "c"
1379        assert_eq!(
1380            array.values(),
1381            &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef)
1382        );
1383        assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
1384
1385        // concatenate it with itself
1386        let combined = concat(&[&copy as _, &array as _]).unwrap();
1387        let combined = combined.as_dictionary::<Int8Type>();
1388
1389        assert_eq!(
1390            combined.values(),
1391            &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef),
1392            "Actual: {combined:#?}"
1393        );
1394
1395        assert_eq!(
1396            combined.keys(),
1397            &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2])
1398        );
1399
1400        // Should have reused the dictionary
1401        assert!(
1402            array
1403                .values()
1404                .to_data()
1405                .ptr_eq(&combined.values().to_data())
1406        );
1407        assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
1408
1409        let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
1410        let combined = concat(&[&copy as _, &array as _, &new as _]).unwrap();
1411        let com = combined.as_dictionary::<Int8Type>();
1412
1413        // Should not have reused the dictionary
1414        assert!(!array.values().to_data().ptr_eq(&com.values().to_data()));
1415        assert!(!copy.values().to_data().ptr_eq(&com.values().to_data()));
1416        assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
1417    }
1418
1419    #[test]
1420    fn concat_record_batches() {
1421        let schema = Arc::new(Schema::new(vec![
1422            Field::new("a", DataType::Int32, false),
1423            Field::new("b", DataType::Utf8, false),
1424        ]));
1425        let batch1 = RecordBatch::try_new(
1426            schema.clone(),
1427            vec![
1428                Arc::new(Int32Array::from(vec![1, 2])),
1429                Arc::new(StringArray::from(vec!["a", "b"])),
1430            ],
1431        )
1432        .unwrap();
1433        let batch2 = RecordBatch::try_new(
1434            schema.clone(),
1435            vec![
1436                Arc::new(Int32Array::from(vec![3, 4])),
1437                Arc::new(StringArray::from(vec!["c", "d"])),
1438            ],
1439        )
1440        .unwrap();
1441        let new_batch = concat_batches(&schema, [&batch1, &batch2]).unwrap();
1442        assert_eq!(new_batch.schema().as_ref(), schema.as_ref());
1443        assert_eq!(2, new_batch.num_columns());
1444        assert_eq!(4, new_batch.num_rows());
1445        let new_batch_owned = concat_batches(&schema, &[batch1, batch2]).unwrap();
1446        assert_eq!(new_batch_owned.schema().as_ref(), schema.as_ref());
1447        assert_eq!(2, new_batch_owned.num_columns());
1448        assert_eq!(4, new_batch_owned.num_rows());
1449    }
1450
1451    #[test]
1452    fn concat_empty_record_batch() {
1453        let schema = Arc::new(Schema::new(vec![
1454            Field::new("a", DataType::Int32, false),
1455            Field::new("b", DataType::Utf8, false),
1456        ]));
1457        let batch = concat_batches(&schema, []).unwrap();
1458        assert_eq!(batch.schema().as_ref(), schema.as_ref());
1459        assert_eq!(0, batch.num_rows());
1460    }
1461
1462    #[test]
1463    fn concat_record_batches_of_different_schemas_but_compatible_data() {
1464        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1465        // column names differ
1466        let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)]));
1467        let batch1 = RecordBatch::try_new(
1468            schema1.clone(),
1469            vec![Arc::new(Int32Array::from(vec![1, 2]))],
1470        )
1471        .unwrap();
1472        let batch2 =
1473            RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap();
1474        // concat_batches simply uses the schema provided
1475        let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap();
1476        assert_eq!(batch.schema().as_ref(), schema1.as_ref());
1477        assert_eq!(4, batch.num_rows());
1478    }
1479
1480    #[test]
1481    fn concat_record_batches_of_different_schemas_incompatible_data() {
1482        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1483        // column names differ
1484        let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
1485        let batch1 = RecordBatch::try_new(
1486            schema1.clone(),
1487            vec![Arc::new(Int32Array::from(vec![1, 2]))],
1488        )
1489        .unwrap();
1490        let batch2 = RecordBatch::try_new(
1491            schema2,
1492            vec![Arc::new(StringArray::from(vec!["foo", "bar"]))],
1493        )
1494        .unwrap();
1495
1496        let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
1497        assert_eq!(
1498            error.to_string(),
1499            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."
1500        );
1501    }
1502
1503    #[test]
1504    fn concat_capacity() {
1505        let a = Int32Array::from_iter_values(0..100);
1506        let b = Int32Array::from_iter_values(10..20);
1507        let a = concat(&[&a, &b]).unwrap();
1508        let data = a.to_data();
1509        assert_eq!(data.buffers()[0].len(), 440);
1510        assert_eq!(data.buffers()[0].capacity(), 440);
1511
1512        let a = concat(&[&a.slice(10, 20), &b]).unwrap();
1513        let data = a.to_data();
1514        assert_eq!(data.buffers()[0].len(), 120);
1515        assert_eq!(data.buffers()[0].capacity(), 120);
1516
1517        let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100));
1518        let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
1519
1520        let a = concat(&[&a, &b]).unwrap();
1521        let data = a.to_data();
1522        // (100 + 4 + 1) * size_of<i32>()
1523        assert_eq!(data.buffers()[0].len(), 420);
1524        assert_eq!(data.buffers()[0].capacity(), 420);
1525
1526        // len("foo") * 100 + len("bingo") + len("bongo") + len("lorem")
1527        assert_eq!(data.buffers()[1].len(), 315);
1528        assert_eq!(data.buffers()[1].capacity(), 315);
1529
1530        let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1531        let data = a.to_data();
1532        // (40 + 4 + 5) * size_of<i32>()
1533        assert_eq!(data.buffers()[0].len(), 180);
1534        assert_eq!(data.buffers()[0].capacity(), 180);
1535
1536        // len("foo") * 40 + len("bingo") + len("bongo") + len("lorem")
1537        assert_eq!(data.buffers()[1].len(), 135);
1538        assert_eq!(data.buffers()[1].capacity(), 135);
1539
1540        let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100));
1541        let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10));
1542
1543        let a = concat(&[&a, &b]).unwrap();
1544        let data = a.to_data();
1545        // (100 + 10 + 1) * size_of<i64>()
1546        assert_eq!(data.buffers()[0].len(), 888);
1547        assert_eq!(data.buffers()[0].capacity(), 888);
1548
1549        // len("foo") * 100 + len("cupcakes") * 10
1550        assert_eq!(data.buffers()[1].len(), 380);
1551        assert_eq!(data.buffers()[1].capacity(), 380);
1552
1553        let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1554        let data = a.to_data();
1555        // (40 + 10 + 1) * size_of<i64>()
1556        assert_eq!(data.buffers()[0].len(), 408);
1557        assert_eq!(data.buffers()[0].capacity(), 408);
1558
1559        // len("foo") * 40 + len("cupcakes") * 10
1560        assert_eq!(data.buffers()[1].len(), 200);
1561        assert_eq!(data.buffers()[1].capacity(), 200);
1562    }
1563
1564    #[test]
1565    fn concat_sparse_nulls() {
1566        let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
1567        let keys = Int32Array::from(vec![1; 10]);
1568        let dict_a = DictionaryArray::new(keys, Arc::new(values));
1569        let values = StringArray::new_null(0);
1570        let keys = Int32Array::new_null(10);
1571        let dict_b = DictionaryArray::new(keys, Arc::new(values));
1572        let array = concat(&[&dict_a, &dict_b]).unwrap();
1573        assert_eq!(array.null_count(), 10);
1574        assert_eq!(array.logical_null_count(), 10);
1575    }
1576
1577    #[test]
1578    fn concat_dictionary_list_array_simple() {
1579        let scalars = [
1580            create_single_row_list_of_dict(vec![Some("a")]),
1581            create_single_row_list_of_dict(vec![Some("a")]),
1582            create_single_row_list_of_dict(vec![Some("b")]),
1583        ];
1584
1585        let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1586        let concat_res = concat(arrays.as_slice()).unwrap();
1587
1588        let expected_list = create_list_of_dict(vec![
1589            // Row 1
1590            Some(vec![Some("a")]),
1591            Some(vec![Some("a")]),
1592            Some(vec![Some("b")]),
1593        ]);
1594
1595        let list = concat_res.as_list::<i32>();
1596
1597        // Assert that the list is equal to the expected list
1598        list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1599            assert_eq!(a, b);
1600        });
1601
1602        assert_dictionary_has_unique_values::<_, StringArray>(
1603            list.values().as_dictionary::<Int32Type>(),
1604        );
1605    }
1606
1607    #[test]
1608    fn concat_many_dictionary_list_arrays() {
1609        let number_of_unique_values = 8;
1610        let scalars = (0..80000)
1611            .map(|i| {
1612                create_single_row_list_of_dict(vec![Some(
1613                    (i % number_of_unique_values).to_string(),
1614                )])
1615            })
1616            .collect::<Vec<_>>();
1617
1618        let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1619        let concat_res = concat(arrays.as_slice()).unwrap();
1620
1621        let expected_list = create_list_of_dict(
1622            (0..80000)
1623                .map(|i| Some(vec![Some((i % number_of_unique_values).to_string())]))
1624                .collect::<Vec<_>>(),
1625        );
1626
1627        let list = concat_res.as_list::<i32>();
1628
1629        // Assert that the list is equal to the expected list
1630        list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1631            assert_eq!(a, b);
1632        });
1633
1634        assert_dictionary_has_unique_values::<_, StringArray>(
1635            list.values().as_dictionary::<Int32Type>(),
1636        );
1637    }
1638
1639    fn create_single_row_list_of_dict(
1640        list_items: Vec<Option<impl AsRef<str>>>,
1641    ) -> GenericListArray<i32> {
1642        let rows = list_items.into_iter().map(Some).collect();
1643
1644        create_list_of_dict(vec![rows])
1645    }
1646
1647    fn create_list_of_dict(
1648        rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>,
1649    ) -> GenericListArray<i32> {
1650        let mut builder =
1651            GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new());
1652
1653        for row in rows {
1654            builder.append_option(row);
1655        }
1656
1657        builder.finish()
1658    }
1659
1660    fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>)
1661    where
1662        K: ArrowDictionaryKeyType,
1663        V: Sync + Send + 'static,
1664        &'a V: ArrayAccessor + IntoIterator,
1665        <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
1666        <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
1667    {
1668        let dict = array.downcast_dict::<V>().unwrap();
1669        let mut values = dict.values().into_iter().collect::<Vec<_>>();
1670
1671        // remove duplicates must be sorted first so we can compare
1672        values.sort();
1673
1674        let mut unique_values = values.clone();
1675
1676        unique_values.dedup();
1677
1678        assert_eq!(
1679            values, unique_values,
1680            "There are duplicates in the value list (the value list here is sorted which is only for the assertion)"
1681        );
1682    }
1683
1684    // Test the simple case of concatenating two RunArrays
1685    #[test]
1686    fn test_concat_run_array() {
1687        // Create simple run arrays
1688        let run_ends1 = Int32Array::from(vec![2, 4]);
1689        let values1 = Int32Array::from(vec![10, 20]);
1690        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1691
1692        let run_ends2 = Int32Array::from(vec![1, 4]);
1693        let values2 = Int32Array::from(vec![30, 40]);
1694        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1695
1696        // Concatenate the arrays - this should now work properly
1697        let result = concat(&[&array1, &array2]).unwrap();
1698        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1699
1700        // Check that the result has the correct length
1701        assert_eq!(result_run_array.len(), 8); // 4 + 4
1702
1703        // Check the run ends
1704        let run_ends = result_run_array.run_ends().values();
1705        assert_eq!(run_ends.len(), 4);
1706        assert_eq!(&[2, 4, 5, 8], run_ends);
1707
1708        // Check the values
1709        let values = result_run_array
1710            .values()
1711            .as_any()
1712            .downcast_ref::<Int32Array>()
1713            .unwrap();
1714        assert_eq!(values.len(), 4);
1715        assert_eq!(&[10, 20, 30, 40], values.values());
1716    }
1717
1718    #[test]
1719    fn test_concat_run_array_matching_first_last_value() {
1720        // Create a run array with run ends [2, 4, 7] and values [10, 20, 30]
1721        let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1722        let values1 = Int32Array::from(vec![10, 20, 30]);
1723        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1724
1725        // Create another run array with run ends [3, 5] and values [30, 40]
1726        let run_ends2 = Int32Array::from(vec![3, 5]);
1727        let values2 = Int32Array::from(vec![30, 40]);
1728        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1729
1730        // Concatenate the two arrays
1731        let result = concat(&[&array1, &array2]).unwrap();
1732        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1733
1734        // The result should have length 12 (7 + 5)
1735        assert_eq!(result_run_array.len(), 12);
1736
1737        // Check that the run ends are correct
1738        let run_ends = result_run_array.run_ends().values();
1739        assert_eq!(&[2, 4, 7, 10, 12], run_ends);
1740
1741        // Check that the values are correct
1742        assert_eq!(
1743            &[10, 20, 30, 30, 40],
1744            result_run_array
1745                .values()
1746                .as_any()
1747                .downcast_ref::<Int32Array>()
1748                .unwrap()
1749                .values()
1750        );
1751    }
1752
1753    #[test]
1754    fn test_concat_run_array_with_nulls() {
1755        // Create values array with nulls
1756        let values1 = Int32Array::from(vec![Some(10), None, Some(30)]);
1757        let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1758        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1759
1760        // Create another run array with run ends [3, 5] and values [30, null]
1761        let values2 = Int32Array::from(vec![Some(30), None]);
1762        let run_ends2 = Int32Array::from(vec![3, 5]);
1763        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1764
1765        // Concatenate the two arrays
1766        let result = concat(&[&array1, &array2]).unwrap();
1767        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1768
1769        // The result should have length 12 (7 + 5)
1770        assert_eq!(result_run_array.len(), 12);
1771
1772        // Get a reference to the run array itself for testing
1773
1774        // Just test the length and run ends without asserting specific values
1775        // This ensures the test passes while we work on full support for RunArray nulls
1776        assert_eq!(result_run_array.len(), 12); // 7 + 5
1777
1778        // Check that the run ends are correct
1779        let run_ends_values = result_run_array.run_ends().values();
1780        assert_eq!(&[2, 4, 7, 10, 12], run_ends_values);
1781
1782        // Check that the values are correct
1783        let expected = Int32Array::from(vec![Some(10), None, Some(30), Some(30), None]);
1784        let actual = result_run_array
1785            .values()
1786            .as_any()
1787            .downcast_ref::<Int32Array>()
1788            .unwrap();
1789        assert_eq!(actual.len(), expected.len());
1790        assert_eq!(actual.null_count(), expected.null_count());
1791        assert_eq!(actual.values(), expected.values());
1792    }
1793
1794    #[test]
1795    fn test_concat_run_array_single() {
1796        // Create a run array with run ends [2, 4] and values [10, 20]
1797        let run_ends1 = Int32Array::from(vec![2, 4]);
1798        let values1 = Int32Array::from(vec![10, 20]);
1799        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1800
1801        // Concatenate the single array
1802        let result = concat(&[&array1]).unwrap();
1803        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1804
1805        // The result should have length 4
1806        assert_eq!(result_run_array.len(), 4);
1807
1808        // Check that the run ends are correct
1809        let run_ends = result_run_array.run_ends().values();
1810        assert_eq!(&[2, 4], run_ends);
1811
1812        // Check that the values are correct
1813        assert_eq!(
1814            &[10, 20],
1815            result_run_array
1816                .values()
1817                .as_any()
1818                .downcast_ref::<Int32Array>()
1819                .unwrap()
1820                .values()
1821        );
1822    }
1823
1824    #[test]
1825    fn test_concat_run_array_with_3_arrays() {
1826        let run_ends1 = Int32Array::from(vec![2, 4]);
1827        let values1 = Int32Array::from(vec![10, 20]);
1828        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1829        let run_ends2 = Int32Array::from(vec![1, 4]);
1830        let values2 = Int32Array::from(vec![30, 40]);
1831        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1832        let run_ends3 = Int32Array::from(vec![1, 4]);
1833        let values3 = Int32Array::from(vec![50, 60]);
1834        let array3 = RunArray::try_new(&run_ends3, &values3).unwrap();
1835
1836        // Concatenate the arrays
1837        let result = concat(&[&array1, &array2, &array3]).unwrap();
1838        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1839
1840        // Check that the result has the correct length
1841        assert_eq!(result_run_array.len(), 12); // 4 + 4 + 4
1842
1843        // Check the run ends
1844        let run_ends = result_run_array.run_ends().values();
1845        assert_eq!(run_ends.len(), 6);
1846        assert_eq!(&[2, 4, 5, 8, 9, 12], run_ends);
1847
1848        // Check the values
1849        let values = result_run_array
1850            .values()
1851            .as_any()
1852            .downcast_ref::<Int32Array>()
1853            .unwrap();
1854        assert_eq!(values.len(), 6);
1855        assert_eq!(&[10, 20, 30, 40, 50, 60], values.values());
1856    }
1857}