arrow_select/
concat.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines concat kernel for `ArrayRef`
19//!
20//! Example:
21//!
22//! ```
23//! use arrow_array::{ArrayRef, StringArray};
24//! use arrow_select::concat::concat;
25//!
26//! let arr = concat(&[
27//!     &StringArray::from(vec!["hello", "world"]),
28//!     &StringArray::from(vec!["!"]),
29//! ]).unwrap();
30//! assert_eq!(arr.len(), 3);
31//! ```
32
33use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
34use arrow_array::builder::{
35    BooleanBuilder, GenericByteBuilder, GenericByteViewBuilder, PrimitiveBuilder,
36};
37use arrow_array::cast::AsArray;
38use arrow_array::types::*;
39use arrow_array::*;
40use arrow_buffer::{
41    ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
42};
43use arrow_data::ArrayDataBuilder;
44use arrow_data::transform::{Capacities, MutableArrayData};
45use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
46use std::{collections::HashSet, ops::Add, sync::Arc};
47
48fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
49    let mut item_capacity = 0;
50    let mut bytes_capacity = 0;
51    for array in arrays {
52        let a = array.as_bytes::<T>();
53
54        // Guaranteed to always have at least one element
55        let offsets = a.value_offsets();
56        bytes_capacity += offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize();
57        item_capacity += a.len()
58    }
59
60    Capacities::Binary(item_capacity, Some(bytes_capacity))
61}
62
63fn fixed_size_list_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
64    if let DataType::FixedSizeList(f, _) = data_type {
65        let item_capacity = arrays.iter().map(|a| a.len()).sum();
66        let child_data_type = f.data_type();
67        match child_data_type {
68            // These types should match the types that `get_capacity`
69            // has special handling for.
70            DataType::Utf8
71            | DataType::LargeUtf8
72            | DataType::Binary
73            | DataType::LargeBinary
74            | DataType::FixedSizeList(_, _) => {
75                let values: Vec<&dyn arrow_array::Array> = arrays
76                    .iter()
77                    .map(|a| a.as_fixed_size_list().values().as_ref())
78                    .collect();
79                Capacities::List(
80                    item_capacity,
81                    Some(Box::new(get_capacity(&values, child_data_type))),
82                )
83            }
84            _ => Capacities::Array(item_capacity),
85        }
86    } else {
87        unreachable!("illegal data type for fixed size list")
88    }
89}
90
91fn concat_byte_view<B: ByteViewType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
92    let mut builder =
93        GenericByteViewBuilder::<B>::with_capacity(arrays.iter().map(|a| a.len()).sum());
94    for &array in arrays.iter() {
95        builder.append_array(array.as_byte_view());
96    }
97    Ok(Arc::new(builder.finish()))
98}
99
100fn concat_dictionaries<K: ArrowDictionaryKeyType>(
101    arrays: &[&dyn Array],
102) -> Result<ArrayRef, ArrowError> {
103    let mut output_len = 0;
104    let dictionaries: Vec<_> = arrays
105        .iter()
106        .map(|x| x.as_dictionary::<K>())
107        .inspect(|d| output_len += d.len())
108        .collect();
109
110    if !should_merge_dictionary_values::<K>(&dictionaries, output_len).0 {
111        return concat_fallback(arrays, Capacities::Array(output_len));
112    }
113
114    let merged = merge_dictionary_values(&dictionaries, None)?;
115
116    // Recompute keys
117    let mut key_values = Vec::with_capacity(output_len);
118
119    let mut has_nulls = false;
120    for (d, mapping) in dictionaries.iter().zip(merged.key_mappings) {
121        has_nulls |= d.null_count() != 0;
122        for key in d.keys().values() {
123            // Use get to safely handle nulls
124            key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default())
125        }
126    }
127
128    let nulls = has_nulls.then(|| {
129        let mut nulls = BooleanBufferBuilder::new(output_len);
130        for d in &dictionaries {
131            match d.nulls() {
132                Some(n) => nulls.append_buffer(n.inner()),
133                None => nulls.append_n(d.len(), true),
134            }
135        }
136        NullBuffer::new(nulls.finish())
137    });
138
139    let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
140    // Sanity check
141    assert_eq!(keys.len(), output_len);
142
143    let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) };
144    Ok(Arc::new(array))
145}
146
147fn concat_lists<OffsetSize: OffsetSizeTrait>(
148    arrays: &[&dyn Array],
149    field: &FieldRef,
150) -> Result<ArrayRef, ArrowError> {
151    let mut output_len = 0;
152    let mut list_has_nulls = false;
153    let mut list_has_slices = false;
154
155    let lists = arrays
156        .iter()
157        .map(|x| x.as_list::<OffsetSize>())
158        .inspect(|l| {
159            output_len += l.len();
160            list_has_nulls |= l.null_count() != 0;
161            list_has_slices |= l.offsets()[0] > OffsetSize::zero()
162                || l.offsets().last().unwrap().as_usize() < l.values().len();
163        })
164        .collect::<Vec<_>>();
165
166    let lists_nulls = list_has_nulls.then(|| {
167        let mut nulls = BooleanBufferBuilder::new(output_len);
168        for l in &lists {
169            match l.nulls() {
170                Some(n) => nulls.append_buffer(n.inner()),
171                None => nulls.append_n(l.len(), true),
172            }
173        }
174        NullBuffer::new(nulls.finish())
175    });
176
177    // If any of the lists have slices, we need to slice the values
178    // to ensure that the offsets are correct
179    let mut sliced_values;
180    let values: Vec<&dyn Array> = if list_has_slices {
181        sliced_values = Vec::with_capacity(lists.len());
182        for l in &lists {
183            // if the first offset is non-zero, we need to slice the values so when
184            // we concatenate them below only the relevant values are included
185            let offsets = l.offsets();
186            let start_offset = offsets[0].as_usize();
187            let end_offset = offsets.last().unwrap().as_usize();
188            sliced_values.push(l.values().slice(start_offset, end_offset - start_offset));
189        }
190        sliced_values.iter().map(|a| a.as_ref()).collect()
191    } else {
192        lists.iter().map(|x| x.values().as_ref()).collect()
193    };
194
195    let concatenated_values = concat(values.as_slice())?;
196
197    // Merge value offsets from the lists
198    let value_offset_buffer =
199        OffsetBuffer::<OffsetSize>::from_lengths(lists.iter().flat_map(|x| x.offsets().lengths()));
200
201    let array = GenericListArray::<OffsetSize>::try_new(
202        Arc::clone(field),
203        value_offset_buffer,
204        concatenated_values,
205        lists_nulls,
206    )?;
207
208    Ok(Arc::new(array))
209}
210
211fn concat_list_view<OffsetSize: OffsetSizeTrait>(
212    arrays: &[&dyn Array],
213    field: &FieldRef,
214) -> Result<ArrayRef, ArrowError> {
215    let mut output_len = 0;
216    let mut list_has_nulls = false;
217
218    let lists = arrays
219        .iter()
220        .map(|x| x.as_list_view::<OffsetSize>())
221        .inspect(|l| {
222            output_len += l.len();
223            list_has_nulls |= l.null_count() != 0;
224        })
225        .collect::<Vec<_>>();
226
227    let lists_nulls = list_has_nulls.then(|| {
228        let mut nulls = BooleanBufferBuilder::new(output_len);
229        for l in &lists {
230            match l.nulls() {
231                Some(n) => nulls.append_buffer(n.inner()),
232                None => nulls.append_n(l.len(), true),
233            }
234        }
235        NullBuffer::new(nulls.finish())
236    });
237
238    let values: Vec<&dyn Array> = lists.iter().map(|l| l.values().as_ref()).collect();
239
240    let concatenated_values = concat(values.as_slice())?;
241
242    let sizes: ScalarBuffer<OffsetSize> = lists.iter().flat_map(|x| x.sizes()).copied().collect();
243
244    let mut offsets = MutableBuffer::with_capacity(lists.iter().map(|l| l.offsets().len()).sum());
245    let mut global_offset = OffsetSize::zero();
246    for l in lists.iter() {
247        for &offset in l.offsets() {
248            offsets.push(offset + global_offset);
249        }
250
251        // advance the offsets
252        global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
253    }
254
255    let offsets = ScalarBuffer::from(offsets);
256
257    let array = GenericListViewArray::try_new(
258        field.clone(),
259        offsets,
260        sizes,
261        concatenated_values,
262        lists_nulls,
263    )?;
264
265    Ok(Arc::new(array))
266}
267
268fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
269    let mut builder = PrimitiveBuilder::<T>::with_capacity(arrays.iter().map(|a| a.len()).sum())
270        .with_data_type(arrays[0].data_type().clone());
271
272    for array in arrays {
273        builder.append_array(array.as_primitive());
274    }
275
276    Ok(Arc::new(builder.finish()))
277}
278
279fn concat_boolean(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
280    let mut builder = BooleanBuilder::with_capacity(arrays.iter().map(|a| a.len()).sum());
281
282    for array in arrays {
283        builder.append_array(array.as_boolean());
284    }
285
286    Ok(Arc::new(builder.finish()))
287}
288
289fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
290    let (item_capacity, bytes_capacity) = match binary_capacity::<T>(arrays) {
291        Capacities::Binary(item_capacity, Some(bytes_capacity)) => (item_capacity, bytes_capacity),
292        _ => unreachable!(),
293    };
294
295    let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity);
296
297    for array in arrays {
298        builder.append_array(array.as_bytes::<T>())?;
299    }
300
301    Ok(Arc::new(builder.finish()))
302}
303
304fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, ArrowError> {
305    let mut len = 0;
306    let mut has_nulls = false;
307    let structs = arrays
308        .iter()
309        .map(|a| {
310            len += a.len();
311            has_nulls |= a.null_count() > 0;
312            a.as_struct()
313        })
314        .collect::<Vec<_>>();
315
316    let nulls = has_nulls.then(|| {
317        let mut b = BooleanBufferBuilder::new(len);
318        for s in &structs {
319            match s.nulls() {
320                Some(n) => b.append_buffer(n.inner()),
321                None => b.append_n(s.len(), true),
322            }
323        }
324        NullBuffer::new(b.finish())
325    });
326
327    let column_concat_result = (0..fields.len())
328        .map(|i| {
329            let extracted_cols = structs
330                .iter()
331                .map(|s| s.column(i).as_ref())
332                .collect::<Vec<_>>();
333            concat(&extracted_cols)
334        })
335        .collect::<Result<Vec<_>, ArrowError>>()?;
336
337    Ok(Arc::new(StructArray::try_new_with_length(
338        fields.clone(),
339        column_concat_result,
340        nulls,
341        len,
342    )?))
343}
344
345/// Concatenate multiple RunArray instances into a single RunArray.
346///
347/// This function handles the special case of concatenating RunArrays by:
348/// 1. Collecting all run ends and values from input arrays
349/// 2. Adjusting run ends to account for the length of previous arrays
350/// 3. Creating a new RunArray with the combined data
351fn concat_run_arrays<R: RunEndIndexType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError>
352where
353    R::Native: Add<Output = R::Native>,
354{
355    let run_arrays: Vec<_> = arrays
356        .iter()
357        .map(|x| x.as_run::<R>())
358        .filter(|x| !x.run_ends().is_empty())
359        .collect();
360
361    // The run ends need to be adjusted by the sum of the lengths of the previous arrays.
362    let needed_run_end_adjustments = std::iter::once(R::default_value())
363        .chain(
364            run_arrays
365                .iter()
366                .scan(R::default_value(), |acc, run_array| {
367                    *acc = *acc + R::Native::from_usize(run_array.len()).unwrap();
368                    Some(*acc)
369                }),
370        )
371        .collect::<Vec<_>>();
372
373    // This works out nicely to be the total (logical) length of the resulting array.
374    let total_len = needed_run_end_adjustments.last().unwrap().as_usize();
375
376    let run_ends_array =
377        PrimitiveArray::<R>::from_iter_values(run_arrays.iter().enumerate().flat_map(
378            move |(i, run_array)| {
379                let adjustment = needed_run_end_adjustments[i];
380                run_array
381                    .run_ends()
382                    .sliced_values()
383                    .map(move |run_end| run_end + adjustment)
384            },
385        ));
386
387    let values_slices: Vec<ArrayRef> = run_arrays
388        .iter()
389        .map(|run_array| run_array.values_slice())
390        .collect();
391
392    let all_values = concat(&values_slices.iter().map(|x| x.as_ref()).collect::<Vec<_>>())?;
393
394    let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone())
395        .len(total_len)
396        .child_data(vec![run_ends_array.into_data(), all_values.into_data()]);
397
398    // `build_unchecked` is used to avoid recursive validation of child arrays.
399    let array_data = unsafe { builder.build_unchecked() };
400    array_data.validate_data()?;
401
402    Ok(Arc::<RunArray<R>>::new(array_data.into()))
403}
404
405macro_rules! dict_helper {
406    ($t:ty, $arrays:expr) => {
407        return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
408    };
409}
410
411macro_rules! primitive_concat {
412    ($t:ty, $arrays:expr) => {
413        return Ok(Arc::new(concat_primitives::<$t>($arrays)?) as _)
414    };
415}
416
417fn get_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
418    match data_type {
419        DataType::Utf8 => binary_capacity::<Utf8Type>(arrays),
420        DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays),
421        DataType::Binary => binary_capacity::<BinaryType>(arrays),
422        DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays),
423        DataType::FixedSizeList(_, _) => fixed_size_list_capacity(arrays, data_type),
424        _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
425    }
426}
427
428/// Concatenate multiple [Array] of the same type into a single [ArrayRef].
429pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
430    if arrays.is_empty() {
431        return Err(ArrowError::ComputeError(
432            "concat requires input of at least one array".to_string(),
433        ));
434    } else if arrays.len() == 1 {
435        let array = arrays[0];
436        return Ok(array.slice(0, array.len()));
437    }
438
439    let d = arrays[0].data_type();
440    if arrays.iter().skip(1).any(|array| array.data_type() != d) {
441        // Create error message with up to 10 unique data types in the order they appear
442        let error_message = {
443            // 10 max unique data types to print and another 1 to know if there are more
444            let mut unique_data_types = HashSet::with_capacity(11);
445
446            let mut error_message =
447                format!("It is not possible to concatenate arrays of different data types ({d}");
448            unique_data_types.insert(d);
449
450            for array in arrays {
451                let is_unique = unique_data_types.insert(array.data_type());
452
453                if unique_data_types.len() == 11 {
454                    error_message.push_str(", ...");
455                    break;
456                }
457
458                if is_unique {
459                    error_message.push_str(", ");
460                    error_message.push_str(&array.data_type().to_string());
461                }
462            }
463
464            error_message.push_str(").");
465
466            error_message
467        };
468
469        return Err(ArrowError::InvalidArgumentError(error_message));
470    }
471
472    downcast_primitive! {
473        d => (primitive_concat, arrays),
474        DataType::Boolean => concat_boolean(arrays),
475        DataType::Dictionary(k, _) => {
476            downcast_integer! {
477                k.as_ref() => (dict_helper, arrays),
478                _ => unreachable!("illegal dictionary key type {k}")
479            }
480        }
481        DataType::List(field) => concat_lists::<i32>(arrays, field),
482        DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
483        DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
484        DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field),
485        DataType::Struct(fields) => concat_structs(arrays, fields),
486        DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
487        DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
488        DataType::Binary => concat_bytes::<BinaryType>(arrays),
489        DataType::LargeBinary => concat_bytes::<LargeBinaryType>(arrays),
490        DataType::RunEndEncoded(r, _) => {
491            // Handle RunEndEncoded arrays with special concat function
492            // We need to downcast based on the run end type
493            match r.data_type() {
494                DataType::Int16 => concat_run_arrays::<Int16Type>(arrays),
495                DataType::Int32 => concat_run_arrays::<Int32Type>(arrays),
496                DataType::Int64 => concat_run_arrays::<Int64Type>(arrays),
497                _ => unreachable!("Unsupported run end index type: {r:?}"),
498            }
499        }
500        DataType::Utf8View => concat_byte_view::<StringViewType>(arrays),
501        DataType::BinaryView => concat_byte_view::<BinaryViewType>(arrays),
502        _ => {
503            let capacity = get_capacity(arrays, d);
504            concat_fallback(arrays, capacity)
505        }
506    }
507}
508
509/// Concatenates arrays using MutableArrayData
510///
511/// This will naively concatenate dictionaries
512fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> {
513    let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
514    let array_data = array_data.iter().collect();
515    let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
516
517    for (i, a) in arrays.iter().enumerate() {
518        mutable.extend(i, 0, a.len())
519    }
520
521    Ok(make_array(mutable.freeze()))
522}
523
524/// Concatenates `batches` together into a single [`RecordBatch`].
525///
526/// The output batch has the specified `schemas`; The schema of the
527/// input are ignored.
528///
529/// Returns an error if the types of underlying arrays are different.
530pub fn concat_batches<'a>(
531    schema: &SchemaRef,
532    input_batches: impl IntoIterator<Item = &'a RecordBatch>,
533) -> Result<RecordBatch, ArrowError> {
534    // When schema is empty, sum the number of the rows of all batches
535    if schema.fields().is_empty() {
536        let num_rows: usize = input_batches.into_iter().map(RecordBatch::num_rows).sum();
537        let mut options = RecordBatchOptions::default();
538        options.row_count = Some(num_rows);
539        return RecordBatch::try_new_with_options(schema.clone(), vec![], &options);
540    }
541
542    let batches: Vec<&RecordBatch> = input_batches.into_iter().collect();
543    if batches.is_empty() {
544        return Ok(RecordBatch::new_empty(schema.clone()));
545    }
546    let field_num = schema.fields().len();
547    let mut arrays = Vec::with_capacity(field_num);
548    for i in 0..field_num {
549        let array = concat(
550            &batches
551                .iter()
552                .map(|batch| batch.column(i).as_ref())
553                .collect::<Vec<_>>(),
554        )?;
555        arrays.push(array);
556    }
557    RecordBatch::try_new(schema.clone(), arrays)
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563    use arrow_array::builder::{
564        GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder,
565    };
566    use arrow_schema::{Field, Schema};
567    use std::fmt::Debug;
568
569    #[test]
570    fn test_concat_empty_vec() {
571        let re = concat(&[]);
572        assert!(re.is_err());
573    }
574
575    #[test]
576    fn test_concat_batches_no_columns() {
577        // Test concat using empty schema / batches without columns
578        let schema = Arc::new(Schema::empty());
579
580        let mut options = RecordBatchOptions::default();
581        options.row_count = Some(100);
582        let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
583        // put in 2 batches of 100 rows each
584        let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap();
585
586        assert_eq!(re.num_rows(), 200);
587    }
588
589    #[test]
590    fn test_concat_one_element_vec() {
591        let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
592            Some(-1),
593            Some(2),
594            None,
595        ])) as ArrayRef;
596        let result = concat(&[arr.as_ref()]).unwrap();
597        assert_eq!(
598            &arr, &result,
599            "concatenating single element array gives back the same result"
600        );
601    }
602
603    #[test]
604    fn test_concat_incompatible_datatypes() {
605        let re = concat(&[
606            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
607            // 2 string to make sure we only mention unique types
608            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
609            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
610            // Another type to make sure we are showing all the incompatible types
611            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
612        ]);
613
614        assert_eq!(
615            re.unwrap_err().to_string(),
616            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."
617        );
618    }
619
620    #[test]
621    fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
622        let re = concat(&[
623            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
624            // 2 string to make sure we only mention unique types
625            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
626            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
627            // Another type to make sure we are showing all the incompatible types
628            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
629            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
630            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
631            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
632            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
633            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
634            // Non unique
635            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
636            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
637            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
638        ]);
639
640        assert_eq!(
641            re.unwrap_err().to_string(),
642            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)."
643        );
644    }
645
646    #[test]
647    fn test_concat_11_incompatible_datatypes_should_only_include_10() {
648        let re = concat(&[
649            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
650            // 2 string to make sure we only mention unique types
651            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
652            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
653            // Another type to make sure we are showing all the incompatible types
654            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
655            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
656            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
657            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
658            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
659            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
660            // Non unique
661            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
662            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
663            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
664            &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
665        ]);
666
667        assert_eq!(
668            re.unwrap_err().to_string(),
669            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
670        );
671    }
672
673    #[test]
674    fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
675        let re = concat(&[
676            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
677            // 2 string to make sure we only mention unique types
678            &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
679            &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
680            // Another type to make sure we are showing all the incompatible types
681            &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
682            &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
683            &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
684            &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
685            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
686            &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
687            // Non unique
688            &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
689            &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
690            &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
691            &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
692            &PrimitiveArray::<Float16Type>::new_null(3),
693            &BooleanArray::from(vec![Some(true), Some(false), None]),
694        ]);
695
696        assert_eq!(
697            re.unwrap_err().to_string(),
698            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
699        );
700    }
701
702    #[test]
703    fn test_concat_string_arrays() {
704        let arr = concat(&[
705            &StringArray::from(vec!["hello", "world"]),
706            &StringArray::from(vec!["2", "3", "4"]),
707            &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
708        ])
709        .unwrap();
710
711        let expected_output = Arc::new(StringArray::from(vec![
712            Some("hello"),
713            Some("world"),
714            Some("2"),
715            Some("3"),
716            Some("4"),
717            Some("foo"),
718            Some("bar"),
719            None,
720            Some("baz"),
721        ])) as ArrayRef;
722
723        assert_eq!(&arr, &expected_output);
724    }
725
726    #[test]
727    fn test_concat_string_view_arrays() {
728        let arr = concat(&[
729            &StringViewArray::from(vec!["helloxxxxxxxxxxa", "world____________"]),
730            &StringViewArray::from(vec!["helloxxxxxxxxxxy", "3", "4"]),
731            &StringViewArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
732        ])
733        .unwrap();
734
735        let expected_output = Arc::new(StringViewArray::from(vec![
736            Some("helloxxxxxxxxxxa"),
737            Some("world____________"),
738            Some("helloxxxxxxxxxxy"),
739            Some("3"),
740            Some("4"),
741            Some("foo"),
742            Some("bar"),
743            None,
744            Some("baz"),
745        ])) as ArrayRef;
746
747        assert_eq!(&arr, &expected_output);
748    }
749
750    #[test]
751    fn test_concat_primitive_arrays() {
752        let arr = concat(&[
753            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]),
754            &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]),
755            &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
756        ])
757        .unwrap();
758
759        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
760            Some(-1),
761            Some(-1),
762            Some(2),
763            None,
764            None,
765            Some(101),
766            Some(102),
767            Some(103),
768            None,
769            Some(256),
770            Some(512),
771            Some(1024),
772        ])) as ArrayRef;
773
774        assert_eq!(&arr, &expected_output);
775    }
776
777    #[test]
778    fn test_concat_primitive_array_slices() {
779        let input_1 =
780            PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None])
781                .slice(1, 3);
782
783        let input_2 =
784            PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None])
785                .slice(1, 3);
786        let arr = concat(&[&input_1, &input_2]).unwrap();
787
788        let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
789            Some(-1),
790            Some(2),
791            None,
792            Some(102),
793            Some(103),
794            None,
795        ])) as ArrayRef;
796
797        assert_eq!(&arr, &expected_output);
798    }
799
800    #[test]
801    fn test_concat_boolean_primitive_arrays() {
802        let arr = concat(&[
803            &BooleanArray::from(vec![
804                Some(true),
805                Some(true),
806                Some(false),
807                None,
808                None,
809                Some(false),
810            ]),
811            &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
812        ])
813        .unwrap();
814
815        let expected_output = Arc::new(BooleanArray::from(vec![
816            Some(true),
817            Some(true),
818            Some(false),
819            None,
820            None,
821            Some(false),
822            None,
823            Some(false),
824            Some(true),
825            Some(false),
826        ])) as ArrayRef;
827
828        assert_eq!(&arr, &expected_output);
829    }
830
831    #[test]
832    fn test_concat_primitive_list_arrays() {
833        let list1 = [
834            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
835            Some(vec![]),
836            None,
837            Some(vec![Some(10)]),
838        ];
839        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
840
841        let list2 = [
842            None,
843            Some(vec![Some(100), None, Some(101)]),
844            Some(vec![Some(102)]),
845        ];
846        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
847
848        let list3 = [Some(vec![Some(1000), Some(1001)])];
849        let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
850
851        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
852
853        let expected = list1.into_iter().chain(list2).chain(list3);
854        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
855
856        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
857    }
858
859    #[test]
860    fn test_concat_primitive_list_arrays_slices() {
861        let list1 = [
862            Some(vec![Some(-1), Some(-1), Some(2), None, None]),
863            Some(vec![]), // In slice
864            None,         // In slice
865            Some(vec![Some(10)]),
866        ];
867        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
868        let list1_array = list1_array.slice(1, 2);
869        let list1_values = list1.into_iter().skip(1).take(2);
870
871        let list2 = [
872            None,
873            Some(vec![Some(100), None, Some(101)]),
874            Some(vec![Some(102)]),
875        ];
876        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
877
878        // verify that this test covers the case when the first offset is non zero
879        assert!(list1_array.offsets()[0].as_usize() > 0);
880        let array_result = concat(&[&list1_array, &list2_array]).unwrap();
881
882        let expected = list1_values.chain(list2);
883        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
884
885        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
886    }
887
888    #[test]
889    fn test_concat_primitive_list_arrays_sliced_lengths() {
890        let list1 = [
891            Some(vec![Some(-1), Some(-1), Some(2), None, None]), // In slice
892            Some(vec![]),                                        // In slice
893            None,                                                // In slice
894            Some(vec![Some(10)]),
895        ];
896        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
897        let list1_array = list1_array.slice(0, 3); // no offset, but not all values
898        let list1_values = list1.into_iter().take(3);
899
900        let list2 = [
901            None,
902            Some(vec![Some(100), None, Some(101)]),
903            Some(vec![Some(102)]),
904        ];
905        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
906
907        // verify that this test covers the case when the first offset is zero, but the
908        // last offset doesn't cover the entire array
909        assert_eq!(list1_array.offsets()[0].as_usize(), 0);
910        assert!(list1_array.offsets().last().unwrap().as_usize() < list1_array.values().len());
911        let array_result = concat(&[&list1_array, &list2_array]).unwrap();
912
913        let expected = list1_values.chain(list2);
914        let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
915
916        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
917    }
918
919    #[test]
920    fn test_concat_primitive_fixed_size_list_arrays() {
921        let list1 = [
922            Some(vec![Some(-1), None]),
923            None,
924            Some(vec![Some(10), Some(20)]),
925        ];
926        let list1_array =
927            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2);
928
929        let list2 = [
930            None,
931            Some(vec![Some(100), None]),
932            Some(vec![Some(102), Some(103)]),
933        ];
934        let list2_array =
935            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2);
936
937        let list3 = [Some(vec![Some(1000), Some(1001)])];
938        let list3_array =
939            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2);
940
941        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
942
943        let expected = list1.into_iter().chain(list2).chain(list3);
944        let array_expected =
945            FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(expected, 2);
946
947        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
948    }
949
950    #[test]
951    fn test_concat_list_view_arrays() {
952        let list1 = [
953            Some(vec![Some(-1), None]),
954            None,
955            Some(vec![Some(10), Some(20)]),
956        ];
957        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
958        for v in list1.iter() {
959            list1_array.append_option(v.clone());
960        }
961        let list1_array = list1_array.finish();
962
963        let list2 = [
964            None,
965            Some(vec![Some(100), None]),
966            Some(vec![Some(102), Some(103)]),
967        ];
968        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
969        for v in list2.iter() {
970            list2_array.append_option(v.clone());
971        }
972        let list2_array = list2_array.finish();
973
974        let list3 = [Some(vec![Some(1000), Some(1001)])];
975        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
976        for v in list3.iter() {
977            list3_array.append_option(v.clone());
978        }
979        let list3_array = list3_array.finish();
980
981        let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
982
983        let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect();
984        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
985        for v in expected.iter() {
986            array_expected.append_option(v.clone());
987        }
988        let array_expected = array_expected.finish();
989
990        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
991    }
992
993    #[test]
994    fn test_concat_sliced_list_view_arrays() {
995        let list1 = [
996            Some(vec![Some(-1), None]),
997            None,
998            Some(vec![Some(10), Some(20)]),
999        ];
1000        let mut list1_array = ListViewBuilder::new(Int64Builder::new());
1001        for v in list1.iter() {
1002            list1_array.append_option(v.clone());
1003        }
1004        let list1_array = list1_array.finish();
1005
1006        let list2 = [
1007            None,
1008            Some(vec![Some(100), None]),
1009            Some(vec![Some(102), Some(103)]),
1010        ];
1011        let mut list2_array = ListViewBuilder::new(Int64Builder::new());
1012        for v in list2.iter() {
1013            list2_array.append_option(v.clone());
1014        }
1015        let list2_array = list2_array.finish();
1016
1017        let list3 = [Some(vec![Some(1000), Some(1001)])];
1018        let mut list3_array = ListViewBuilder::new(Int64Builder::new());
1019        for v in list3.iter() {
1020            list3_array.append_option(v.clone());
1021        }
1022        let list3_array = list3_array.finish();
1023
1024        // Concat sliced arrays.
1025        // ListView slicing will slice the offset/sizes but preserve the original values child.
1026        let array_result = concat(&[
1027            &list1_array.slice(1, 2),
1028            &list2_array.slice(1, 2),
1029            &list3_array.slice(0, 1),
1030        ])
1031        .unwrap();
1032
1033        let expected: Vec<_> = vec![
1034            None,
1035            Some(vec![Some(10), Some(20)]),
1036            Some(vec![Some(100), None]),
1037            Some(vec![Some(102), Some(103)]),
1038            Some(vec![Some(1000), Some(1001)]),
1039        ];
1040        let mut array_expected = ListViewBuilder::new(Int64Builder::new());
1041        for v in expected.iter() {
1042            array_expected.append_option(v.clone());
1043        }
1044        let array_expected = array_expected.finish();
1045
1046        assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
1047    }
1048
1049    #[test]
1050    fn test_concat_struct_arrays() {
1051        let field = Arc::new(Field::new("field", DataType::Int64, true));
1052        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1053            Some(-1),
1054            Some(-1),
1055            Some(2),
1056            None,
1057            None,
1058        ]));
1059        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1060
1061        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1062            Some(101),
1063            Some(102),
1064            Some(103),
1065            None,
1066        ]));
1067        let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
1068
1069        let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1070            Some(256),
1071            Some(512),
1072            Some(1024),
1073        ]));
1074        let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
1075
1076        let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
1077
1078        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1079            Some(-1),
1080            Some(-1),
1081            Some(2),
1082            None,
1083            None,
1084            Some(101),
1085            Some(102),
1086            Some(103),
1087            None,
1088            Some(256),
1089            Some(512),
1090            Some(1024),
1091        ])) as ArrayRef;
1092
1093        let actual_primitive = arr
1094            .as_any()
1095            .downcast_ref::<StructArray>()
1096            .unwrap()
1097            .column(0);
1098        assert_eq!(actual_primitive, &expected_primitive_output);
1099    }
1100
1101    #[test]
1102    fn test_concat_struct_array_slices() {
1103        let field = Arc::new(Field::new("field", DataType::Int64, true));
1104        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1105            Some(-1),
1106            Some(-1),
1107            Some(2),
1108            None,
1109            None,
1110        ]));
1111        let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1112
1113        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1114            Some(101),
1115            Some(102),
1116            Some(103),
1117            None,
1118        ]));
1119        let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
1120
1121        let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
1122
1123        let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1124            Some(-1),
1125            Some(2),
1126            None,
1127            Some(102),
1128            Some(103),
1129        ])) as ArrayRef;
1130
1131        let actual_primitive = arr
1132            .as_any()
1133            .downcast_ref::<StructArray>()
1134            .unwrap()
1135            .column(0);
1136        assert_eq!(actual_primitive, &expected_primitive_output);
1137    }
1138
1139    #[test]
1140    fn test_concat_struct_arrays_no_nulls() {
1141        let input_1a = vec![1, 2, 3];
1142        let input_1b = vec!["one", "two", "three"];
1143        let input_2a = vec![4, 5, 6, 7];
1144        let input_2b = vec!["four", "five", "six", "seven"];
1145
1146        let struct_from_primitives = |ints: Vec<i64>, strings: Vec<&str>| {
1147            StructArray::try_from(vec![
1148                ("ints", Arc::new(Int64Array::from(ints)) as _),
1149                ("strings", Arc::new(StringArray::from(strings)) as _),
1150            ])
1151        };
1152
1153        let expected_output = struct_from_primitives(
1154            [input_1a.clone(), input_2a.clone()].concat(),
1155            [input_1b.clone(), input_2b.clone()].concat(),
1156        )
1157        .unwrap();
1158
1159        let input_1 = struct_from_primitives(input_1a, input_1b).unwrap();
1160        let input_2 = struct_from_primitives(input_2a, input_2b).unwrap();
1161
1162        let arr = concat(&[&input_1, &input_2]).unwrap();
1163        let struct_result = arr.as_struct();
1164
1165        assert_eq!(struct_result, &expected_output);
1166        assert_eq!(arr.null_count(), 0);
1167    }
1168
1169    #[test]
1170    fn test_concat_struct_no_fields() {
1171        let input_1 = StructArray::new_empty_fields(10, None);
1172        let input_2 = StructArray::new_empty_fields(10, None);
1173        let arr = concat(&[&input_1, &input_2]).unwrap();
1174
1175        assert_eq!(arr.len(), 20);
1176        assert_eq!(arr.null_count(), 0);
1177
1178        let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10)));
1179        let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
1180        let arr = concat(&[&input1_valid, &input2_null]).unwrap();
1181
1182        assert_eq!(arr.len(), 20);
1183        assert_eq!(arr.null_count(), 10);
1184    }
1185
1186    #[test]
1187    fn test_string_array_slices() {
1188        let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
1189        let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
1190
1191        let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1192
1193        let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
1194
1195        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1196        assert_eq!(actual_output, &expected_output);
1197    }
1198
1199    #[test]
1200    fn test_string_array_with_null_slices() {
1201        let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
1202        let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
1203
1204        let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1205
1206        let expected_output =
1207            StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
1208
1209        let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1210        assert_eq!(actual_output, &expected_output);
1211    }
1212
1213    fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> {
1214        let concrete = array.downcast_dict::<StringArray>().unwrap();
1215        concrete.into_iter().collect()
1216    }
1217
1218    #[test]
1219    fn test_string_dictionary_array() {
1220        let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"]
1221            .into_iter()
1222            .collect();
1223        let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"]
1224            .into_iter()
1225            .collect();
1226
1227        let expected: Vec<_> = vec![
1228            "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E",
1229        ]
1230        .into_iter()
1231        .map(Some)
1232        .collect();
1233
1234        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1235        let dictionary = concat.as_dictionary::<Int32Type>();
1236        let actual = collect_string_dictionary(dictionary);
1237        assert_eq!(actual, expected);
1238
1239        // Should have concatenated inputs together
1240        assert_eq!(
1241            dictionary.values().len(),
1242            input_1.values().len() + input_2.values().len(),
1243        )
1244    }
1245
1246    #[test]
1247    fn test_string_dictionary_array_nulls() {
1248        let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")]
1249            .into_iter()
1250            .collect();
1251        let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect();
1252        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1253
1254        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1255        let dictionary = concat.as_dictionary::<Int32Type>();
1256        let actual = collect_string_dictionary(dictionary);
1257        assert_eq!(actual, expected);
1258
1259        // Should have concatenated inputs together
1260        assert_eq!(
1261            dictionary.values().len(),
1262            input_1.values().len() + input_2.values().len(),
1263        )
1264    }
1265
1266    #[test]
1267    fn test_string_dictionary_array_nulls_in_values() {
1268        let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]);
1269        let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]);
1270        let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
1271
1272        let input_2_keys = Int32Array::from_iter_values([0]);
1273        let input_2_values = StringArray::from(vec![None, Some("hello")]);
1274        let input_2 = DictionaryArray::new(input_2_keys, Arc::new(input_2_values));
1275
1276        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1277
1278        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1279        let dictionary = concat.as_dictionary::<Int32Type>();
1280        let actual = collect_string_dictionary(dictionary);
1281        assert_eq!(actual, expected);
1282    }
1283
1284    #[test]
1285    fn test_string_dictionary_merge() {
1286        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1287        for i in 0..20 {
1288            builder.append(i.to_string()).unwrap();
1289        }
1290        let input_1 = builder.finish();
1291
1292        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1293        for i in 0..30 {
1294            builder.append(i.to_string()).unwrap();
1295        }
1296        let input_2 = builder.finish();
1297
1298        let expected: Vec<_> = (0..20).chain(0..30).map(|x| x.to_string()).collect();
1299        let expected: Vec<_> = expected.iter().map(|x| Some(x.as_str())).collect();
1300
1301        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1302        let dictionary = concat.as_dictionary::<Int32Type>();
1303        let actual = collect_string_dictionary(dictionary);
1304        assert_eq!(actual, expected);
1305
1306        // Should have merged inputs together
1307        // Not 30 as this is done on a best-effort basis
1308        let values_len = dictionary.values().len();
1309        assert!((30..40).contains(&values_len), "{values_len}")
1310    }
1311
1312    #[test]
1313    fn test_primitive_dictionary_merge() {
1314        // Same value repeated 5 times.
1315        let keys = vec![1; 5];
1316        let values = (10..20).collect::<Vec<_>>();
1317        let dict = DictionaryArray::new(
1318            Int8Array::from(keys.clone()),
1319            Arc::new(Int32Array::from(values.clone())),
1320        );
1321        let other = DictionaryArray::new(
1322            Int8Array::from(keys.clone()),
1323            Arc::new(Int32Array::from(values.clone())),
1324        );
1325
1326        let result_same_dictionary = concat(&[&dict, &dict]).unwrap();
1327        // Verify pointer equality check succeeds, and therefore the
1328        // dictionaries are not merged. A single values buffer should be reused
1329        // in this case.
1330        assert!(
1331            dict.values().to_data().ptr_eq(
1332                &result_same_dictionary
1333                    .as_dictionary::<Int8Type>()
1334                    .values()
1335                    .to_data()
1336            )
1337        );
1338        assert_eq!(
1339            result_same_dictionary
1340                .as_dictionary::<Int8Type>()
1341                .values()
1342                .len(),
1343            values.len(),
1344        );
1345
1346        let result_cloned_dictionary = concat(&[&dict, &other]).unwrap();
1347        // Should have only 1 underlying value since all keys reference it.
1348        assert_eq!(
1349            result_cloned_dictionary
1350                .as_dictionary::<Int8Type>()
1351                .values()
1352                .len(),
1353            1
1354        );
1355    }
1356
1357    #[test]
1358    fn test_concat_string_sizes() {
1359        let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1360        let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1361        let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
1362        // 150 * 3 = 450
1363        // 150 * 3 = 450
1364        // 3 * 3   = 9
1365        // ------------+
1366        // 909
1367
1368        let arr = concat(&[&a, &b, &c]).unwrap();
1369        assert_eq!(arr.to_data().buffers()[1].capacity(), 909);
1370    }
1371
1372    #[test]
1373    fn test_dictionary_concat_reuse() {
1374        let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
1375        let copy: DictionaryArray<Int8Type> = array.clone();
1376
1377        // dictionary is "a", "b", "c"
1378        assert_eq!(
1379            array.values(),
1380            &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef)
1381        );
1382        assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
1383
1384        // concatenate it with itself
1385        let combined = concat(&[&copy as _, &array as _]).unwrap();
1386        let combined = combined.as_dictionary::<Int8Type>();
1387
1388        assert_eq!(
1389            combined.values(),
1390            &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef),
1391            "Actual: {combined:#?}"
1392        );
1393
1394        assert_eq!(
1395            combined.keys(),
1396            &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2])
1397        );
1398
1399        // Should have reused the dictionary
1400        assert!(
1401            array
1402                .values()
1403                .to_data()
1404                .ptr_eq(&combined.values().to_data())
1405        );
1406        assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
1407
1408        let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
1409        let combined = concat(&[&copy as _, &array as _, &new as _]).unwrap();
1410        let com = combined.as_dictionary::<Int8Type>();
1411
1412        // Should not have reused the dictionary
1413        assert!(!array.values().to_data().ptr_eq(&com.values().to_data()));
1414        assert!(!copy.values().to_data().ptr_eq(&com.values().to_data()));
1415        assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
1416    }
1417
1418    #[test]
1419    fn concat_record_batches() {
1420        let schema = Arc::new(Schema::new(vec![
1421            Field::new("a", DataType::Int32, false),
1422            Field::new("b", DataType::Utf8, false),
1423        ]));
1424        let batch1 = RecordBatch::try_new(
1425            schema.clone(),
1426            vec![
1427                Arc::new(Int32Array::from(vec![1, 2])),
1428                Arc::new(StringArray::from(vec!["a", "b"])),
1429            ],
1430        )
1431        .unwrap();
1432        let batch2 = RecordBatch::try_new(
1433            schema.clone(),
1434            vec![
1435                Arc::new(Int32Array::from(vec![3, 4])),
1436                Arc::new(StringArray::from(vec!["c", "d"])),
1437            ],
1438        )
1439        .unwrap();
1440        let new_batch = concat_batches(&schema, [&batch1, &batch2]).unwrap();
1441        assert_eq!(new_batch.schema().as_ref(), schema.as_ref());
1442        assert_eq!(2, new_batch.num_columns());
1443        assert_eq!(4, new_batch.num_rows());
1444        let new_batch_owned = concat_batches(&schema, &[batch1, batch2]).unwrap();
1445        assert_eq!(new_batch_owned.schema().as_ref(), schema.as_ref());
1446        assert_eq!(2, new_batch_owned.num_columns());
1447        assert_eq!(4, new_batch_owned.num_rows());
1448    }
1449
1450    #[test]
1451    fn concat_empty_record_batch() {
1452        let schema = Arc::new(Schema::new(vec![
1453            Field::new("a", DataType::Int32, false),
1454            Field::new("b", DataType::Utf8, false),
1455        ]));
1456        let batch = concat_batches(&schema, []).unwrap();
1457        assert_eq!(batch.schema().as_ref(), schema.as_ref());
1458        assert_eq!(0, batch.num_rows());
1459    }
1460
1461    #[test]
1462    fn concat_record_batches_of_different_schemas_but_compatible_data() {
1463        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1464        // column names differ
1465        let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)]));
1466        let batch1 = RecordBatch::try_new(
1467            schema1.clone(),
1468            vec![Arc::new(Int32Array::from(vec![1, 2]))],
1469        )
1470        .unwrap();
1471        let batch2 =
1472            RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap();
1473        // concat_batches simply uses the schema provided
1474        let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap();
1475        assert_eq!(batch.schema().as_ref(), schema1.as_ref());
1476        assert_eq!(4, batch.num_rows());
1477    }
1478
1479    #[test]
1480    fn concat_record_batches_of_different_schemas_incompatible_data() {
1481        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1482        // column names differ
1483        let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
1484        let batch1 = RecordBatch::try_new(
1485            schema1.clone(),
1486            vec![Arc::new(Int32Array::from(vec![1, 2]))],
1487        )
1488        .unwrap();
1489        let batch2 = RecordBatch::try_new(
1490            schema2,
1491            vec![Arc::new(StringArray::from(vec!["foo", "bar"]))],
1492        )
1493        .unwrap();
1494
1495        let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
1496        assert_eq!(
1497            error.to_string(),
1498            "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."
1499        );
1500    }
1501
1502    #[test]
1503    fn concat_capacity() {
1504        let a = Int32Array::from_iter_values(0..100);
1505        let b = Int32Array::from_iter_values(10..20);
1506        let a = concat(&[&a, &b]).unwrap();
1507        let data = a.to_data();
1508        assert_eq!(data.buffers()[0].len(), 440);
1509        assert_eq!(data.buffers()[0].capacity(), 440);
1510
1511        let a = concat(&[&a.slice(10, 20), &b]).unwrap();
1512        let data = a.to_data();
1513        assert_eq!(data.buffers()[0].len(), 120);
1514        assert_eq!(data.buffers()[0].capacity(), 120);
1515
1516        let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100));
1517        let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
1518
1519        let a = concat(&[&a, &b]).unwrap();
1520        let data = a.to_data();
1521        // (100 + 4 + 1) * size_of<i32>()
1522        assert_eq!(data.buffers()[0].len(), 420);
1523        assert_eq!(data.buffers()[0].capacity(), 420);
1524
1525        // len("foo") * 100 + len("bingo") + len("bongo") + len("lorem")
1526        assert_eq!(data.buffers()[1].len(), 315);
1527        assert_eq!(data.buffers()[1].capacity(), 315);
1528
1529        let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1530        let data = a.to_data();
1531        // (40 + 4 + 5) * size_of<i32>()
1532        assert_eq!(data.buffers()[0].len(), 180);
1533        assert_eq!(data.buffers()[0].capacity(), 180);
1534
1535        // len("foo") * 40 + len("bingo") + len("bongo") + len("lorem")
1536        assert_eq!(data.buffers()[1].len(), 135);
1537        assert_eq!(data.buffers()[1].capacity(), 135);
1538
1539        let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100));
1540        let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10));
1541
1542        let a = concat(&[&a, &b]).unwrap();
1543        let data = a.to_data();
1544        // (100 + 10 + 1) * size_of<i64>()
1545        assert_eq!(data.buffers()[0].len(), 888);
1546        assert_eq!(data.buffers()[0].capacity(), 888);
1547
1548        // len("foo") * 100 + len("cupcakes") * 10
1549        assert_eq!(data.buffers()[1].len(), 380);
1550        assert_eq!(data.buffers()[1].capacity(), 380);
1551
1552        let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1553        let data = a.to_data();
1554        // (40 + 10 + 1) * size_of<i64>()
1555        assert_eq!(data.buffers()[0].len(), 408);
1556        assert_eq!(data.buffers()[0].capacity(), 408);
1557
1558        // len("foo") * 40 + len("cupcakes") * 10
1559        assert_eq!(data.buffers()[1].len(), 200);
1560        assert_eq!(data.buffers()[1].capacity(), 200);
1561    }
1562
1563    #[test]
1564    fn concat_sparse_nulls() {
1565        let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
1566        let keys = Int32Array::from(vec![1; 10]);
1567        let dict_a = DictionaryArray::new(keys, Arc::new(values));
1568        let values = StringArray::new_null(0);
1569        let keys = Int32Array::new_null(10);
1570        let dict_b = DictionaryArray::new(keys, Arc::new(values));
1571        let array = concat(&[&dict_a, &dict_b]).unwrap();
1572        assert_eq!(array.null_count(), 10);
1573        assert_eq!(array.logical_null_count(), 10);
1574    }
1575
1576    #[test]
1577    fn concat_dictionary_list_array_simple() {
1578        let scalars = [
1579            create_single_row_list_of_dict(vec![Some("a")]),
1580            create_single_row_list_of_dict(vec![Some("a")]),
1581            create_single_row_list_of_dict(vec![Some("b")]),
1582        ];
1583
1584        let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1585        let concat_res = concat(arrays.as_slice()).unwrap();
1586
1587        let expected_list = create_list_of_dict(vec![
1588            // Row 1
1589            Some(vec![Some("a")]),
1590            Some(vec![Some("a")]),
1591            Some(vec![Some("b")]),
1592        ]);
1593
1594        let list = concat_res.as_list::<i32>();
1595
1596        // Assert that the list is equal to the expected list
1597        list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1598            assert_eq!(a, b);
1599        });
1600
1601        assert_dictionary_has_unique_values::<_, StringArray>(
1602            list.values().as_dictionary::<Int32Type>(),
1603        );
1604    }
1605
1606    #[test]
1607    fn concat_many_dictionary_list_arrays() {
1608        let number_of_unique_values = 8;
1609        let scalars = (0..80000)
1610            .map(|i| {
1611                create_single_row_list_of_dict(vec![Some(
1612                    (i % number_of_unique_values).to_string(),
1613                )])
1614            })
1615            .collect::<Vec<_>>();
1616
1617        let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1618        let concat_res = concat(arrays.as_slice()).unwrap();
1619
1620        let expected_list = create_list_of_dict(
1621            (0..80000)
1622                .map(|i| Some(vec![Some((i % number_of_unique_values).to_string())]))
1623                .collect::<Vec<_>>(),
1624        );
1625
1626        let list = concat_res.as_list::<i32>();
1627
1628        // Assert that the list is equal to the expected list
1629        list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1630            assert_eq!(a, b);
1631        });
1632
1633        assert_dictionary_has_unique_values::<_, StringArray>(
1634            list.values().as_dictionary::<Int32Type>(),
1635        );
1636    }
1637
1638    fn create_single_row_list_of_dict(
1639        list_items: Vec<Option<impl AsRef<str>>>,
1640    ) -> GenericListArray<i32> {
1641        let rows = list_items.into_iter().map(Some).collect();
1642
1643        create_list_of_dict(vec![rows])
1644    }
1645
1646    fn create_list_of_dict(
1647        rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>,
1648    ) -> GenericListArray<i32> {
1649        let mut builder =
1650            GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new());
1651
1652        for row in rows {
1653            builder.append_option(row);
1654        }
1655
1656        builder.finish()
1657    }
1658
1659    fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>)
1660    where
1661        K: ArrowDictionaryKeyType,
1662        V: Sync + Send + 'static,
1663        &'a V: ArrayAccessor + IntoIterator,
1664        <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
1665        <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
1666    {
1667        let dict = array.downcast_dict::<V>().unwrap();
1668        let mut values = dict.values().into_iter().collect::<Vec<_>>();
1669
1670        // remove duplicates must be sorted first so we can compare
1671        values.sort();
1672
1673        let mut unique_values = values.clone();
1674
1675        unique_values.dedup();
1676
1677        assert_eq!(
1678            values, unique_values,
1679            "There are duplicates in the value list (the value list here is sorted which is only for the assertion)"
1680        );
1681    }
1682
1683    // Test the simple case of concatenating two RunArrays
1684    #[test]
1685    fn test_concat_run_array() {
1686        // Create simple run arrays
1687        let run_ends1 = Int32Array::from(vec![2, 4]);
1688        let values1 = Int32Array::from(vec![10, 20]);
1689        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1690
1691        let run_ends2 = Int32Array::from(vec![1, 4]);
1692        let values2 = Int32Array::from(vec![30, 40]);
1693        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1694
1695        // Concatenate the arrays - this should now work properly
1696        let result = concat(&[&array1, &array2]).unwrap();
1697        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1698
1699        // Check that the result has the correct length
1700        assert_eq!(result_run_array.len(), 8); // 4 + 4
1701
1702        // Check the run ends
1703        let run_ends = result_run_array.run_ends().values();
1704        assert_eq!(run_ends.len(), 4);
1705        assert_eq!(&[2, 4, 5, 8], run_ends);
1706
1707        // Check the values
1708        let values = result_run_array
1709            .values()
1710            .as_any()
1711            .downcast_ref::<Int32Array>()
1712            .unwrap();
1713        assert_eq!(values.len(), 4);
1714        assert_eq!(&[10, 20, 30, 40], values.values());
1715    }
1716
1717    #[test]
1718    fn test_concat_sliced_run_array() {
1719        // Slicing away first run in both arrays
1720        let run_ends1 = Int32Array::from(vec![2, 4]);
1721        let values1 = Int32Array::from(vec![10, 20]);
1722        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); // [10, 10, 20, 20]
1723        let array1 = array1.slice(2, 2); // [20, 20]
1724
1725        let run_ends2 = Int32Array::from(vec![1, 4]);
1726        let values2 = Int32Array::from(vec![30, 40]);
1727        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); // [30, 40, 40, 40]
1728        let array2 = array2.slice(1, 3); // [40, 40, 40]
1729
1730        let result = concat(&[&array1, &array2]).unwrap();
1731        let result = result.as_run::<Int32Type>();
1732        let result = result.downcast::<Int32Array>().unwrap();
1733
1734        let expected = vec![20, 20, 40, 40, 40];
1735        let actual = result.into_iter().flatten().collect::<Vec<_>>();
1736        assert_eq!(expected, actual);
1737    }
1738
1739    #[test]
1740    fn test_concat_run_array_matching_first_last_value() {
1741        // Create a run array with run ends [2, 4, 7] and values [10, 20, 30]
1742        let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1743        let values1 = Int32Array::from(vec![10, 20, 30]);
1744        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1745
1746        // Create another run array with run ends [3, 5] and values [30, 40]
1747        let run_ends2 = Int32Array::from(vec![3, 5]);
1748        let values2 = Int32Array::from(vec![30, 40]);
1749        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1750
1751        // Concatenate the two arrays
1752        let result = concat(&[&array1, &array2]).unwrap();
1753        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1754
1755        // The result should have length 12 (7 + 5)
1756        assert_eq!(result_run_array.len(), 12);
1757
1758        // Check that the run ends are correct
1759        let run_ends = result_run_array.run_ends().values();
1760        assert_eq!(&[2, 4, 7, 10, 12], run_ends);
1761
1762        // Check that the values are correct
1763        assert_eq!(
1764            &[10, 20, 30, 30, 40],
1765            result_run_array
1766                .values()
1767                .as_any()
1768                .downcast_ref::<Int32Array>()
1769                .unwrap()
1770                .values()
1771        );
1772    }
1773
1774    #[test]
1775    fn test_concat_run_array_with_nulls() {
1776        // Create values array with nulls
1777        let values1 = Int32Array::from(vec![Some(10), None, Some(30)]);
1778        let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1779        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1780
1781        // Create another run array with run ends [3, 5] and values [30, null]
1782        let values2 = Int32Array::from(vec![Some(30), None]);
1783        let run_ends2 = Int32Array::from(vec![3, 5]);
1784        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1785
1786        // Concatenate the two arrays
1787        let result = concat(&[&array1, &array2]).unwrap();
1788        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1789
1790        // The result should have length 12 (7 + 5)
1791        assert_eq!(result_run_array.len(), 12);
1792
1793        // Get a reference to the run array itself for testing
1794
1795        // Just test the length and run ends without asserting specific values
1796        // This ensures the test passes while we work on full support for RunArray nulls
1797        assert_eq!(result_run_array.len(), 12); // 7 + 5
1798
1799        // Check that the run ends are correct
1800        let run_ends_values = result_run_array.run_ends().values();
1801        assert_eq!(&[2, 4, 7, 10, 12], run_ends_values);
1802
1803        // Check that the values are correct
1804        let expected = Int32Array::from(vec![Some(10), None, Some(30), Some(30), None]);
1805        let actual = result_run_array
1806            .values()
1807            .as_any()
1808            .downcast_ref::<Int32Array>()
1809            .unwrap();
1810        assert_eq!(actual.len(), expected.len());
1811        assert_eq!(actual.null_count(), expected.null_count());
1812        assert_eq!(actual.values(), expected.values());
1813    }
1814
1815    #[test]
1816    fn test_concat_run_array_single() {
1817        // Create a run array with run ends [2, 4] and values [10, 20]
1818        let run_ends1 = Int32Array::from(vec![2, 4]);
1819        let values1 = Int32Array::from(vec![10, 20]);
1820        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1821
1822        // Concatenate the single array
1823        let result = concat(&[&array1]).unwrap();
1824        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1825
1826        // The result should have length 4
1827        assert_eq!(result_run_array.len(), 4);
1828
1829        // Check that the run ends are correct
1830        let run_ends = result_run_array.run_ends().values();
1831        assert_eq!(&[2, 4], run_ends);
1832
1833        // Check that the values are correct
1834        assert_eq!(
1835            &[10, 20],
1836            result_run_array
1837                .values()
1838                .as_any()
1839                .downcast_ref::<Int32Array>()
1840                .unwrap()
1841                .values()
1842        );
1843    }
1844
1845    #[test]
1846    fn test_concat_run_array_with_3_arrays() {
1847        let run_ends1 = Int32Array::from(vec![2, 4]);
1848        let values1 = Int32Array::from(vec![10, 20]);
1849        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1850        let run_ends2 = Int32Array::from(vec![1, 4]);
1851        let values2 = Int32Array::from(vec![30, 40]);
1852        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1853        let run_ends3 = Int32Array::from(vec![1, 4]);
1854        let values3 = Int32Array::from(vec![50, 60]);
1855        let array3 = RunArray::try_new(&run_ends3, &values3).unwrap();
1856
1857        // Concatenate the arrays
1858        let result = concat(&[&array1, &array2, &array3]).unwrap();
1859        let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1860
1861        // Check that the result has the correct length
1862        assert_eq!(result_run_array.len(), 12); // 4 + 4 + 4
1863
1864        // Check the run ends
1865        let run_ends = result_run_array.run_ends().values();
1866        assert_eq!(run_ends.len(), 6);
1867        assert_eq!(&[2, 4, 5, 8, 9, 12], run_ends);
1868
1869        // Check the values
1870        let values = result_run_array
1871            .values()
1872            .as_any()
1873            .downcast_ref::<Int32Array>()
1874            .unwrap();
1875        assert_eq!(values.len(), 6);
1876        assert_eq!(&[10, 20, 30, 40, 50, 60], values.values());
1877    }
1878
1879    #[test]
1880    fn test_concat_run_array_with_truncated_run() {
1881        // Create a run array with run ends [2, 5] and values [10, 20]
1882        // Logical: [10, 10, 20, 20, 20]
1883        let run_ends1 = Int32Array::from(vec![2, 5]);
1884        let values1 = Int32Array::from(vec![10, 20]);
1885        let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1886        let array1_sliced = array1.slice(0, 3);
1887
1888        let run_ends2 = Int32Array::from(vec![2]);
1889        let values2 = Int32Array::from(vec![30]);
1890        let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1891
1892        let result = concat(&[&array1_sliced, &array2]).unwrap();
1893        let result_run_array = result.as_run::<Int32Type>();
1894
1895        // Result should be [10, 10, 20, 30, 30]
1896        // Run ends should be [2, 3, 5]
1897        assert_eq!(result_run_array.len(), 5);
1898        let run_ends = result_run_array.run_ends().values();
1899        let values = result_run_array.values().as_primitive::<Int32Type>();
1900        assert_eq!(values.values(), &[10, 20, 30]);
1901        assert_eq!(&[2, 3, 5], run_ends);
1902    }
1903}