1use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
34use arrow_array::builder::{
35 BooleanBuilder, GenericByteBuilder, GenericByteViewBuilder, PrimitiveBuilder,
36};
37use arrow_array::cast::AsArray;
38use arrow_array::types::*;
39use arrow_array::*;
40use arrow_buffer::{
41 ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
42};
43use arrow_data::ArrayDataBuilder;
44use arrow_data::transform::{Capacities, MutableArrayData};
45use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
46use std::{collections::HashSet, ops::Add, sync::Arc};
47
48fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
49 let mut item_capacity = 0;
50 let mut bytes_capacity = 0;
51 for array in arrays {
52 let a = array.as_bytes::<T>();
53
54 let offsets = a.value_offsets();
56 bytes_capacity += offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize();
57 item_capacity += a.len()
58 }
59
60 Capacities::Binary(item_capacity, Some(bytes_capacity))
61}
62
63fn fixed_size_list_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
64 if let DataType::FixedSizeList(f, _) = data_type {
65 let item_capacity = arrays.iter().map(|a| a.len()).sum();
66 let child_data_type = f.data_type();
67 match child_data_type {
68 DataType::Utf8
71 | DataType::LargeUtf8
72 | DataType::Binary
73 | DataType::LargeBinary
74 | DataType::FixedSizeList(_, _) => {
75 let values: Vec<&dyn arrow_array::Array> = arrays
76 .iter()
77 .map(|a| a.as_fixed_size_list().values().as_ref())
78 .collect();
79 Capacities::List(
80 item_capacity,
81 Some(Box::new(get_capacity(&values, child_data_type))),
82 )
83 }
84 _ => Capacities::Array(item_capacity),
85 }
86 } else {
87 unreachable!("illegal data type for fixed size list")
88 }
89}
90
91fn concat_byte_view<B: ByteViewType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
92 let mut builder =
93 GenericByteViewBuilder::<B>::with_capacity(arrays.iter().map(|a| a.len()).sum());
94 for &array in arrays.iter() {
95 builder.append_array(array.as_byte_view());
96 }
97 Ok(Arc::new(builder.finish()))
98}
99
100fn concat_dictionaries<K: ArrowDictionaryKeyType>(
101 arrays: &[&dyn Array],
102) -> Result<ArrayRef, ArrowError> {
103 let mut output_len = 0;
104 let dictionaries: Vec<_> = arrays
105 .iter()
106 .map(|x| x.as_dictionary::<K>())
107 .inspect(|d| output_len += d.len())
108 .collect();
109
110 if !should_merge_dictionary_values::<K>(&dictionaries, output_len) {
111 return concat_fallback(arrays, Capacities::Array(output_len));
112 }
113
114 let merged = merge_dictionary_values(&dictionaries, None)?;
115
116 let mut key_values = Vec::with_capacity(output_len);
118
119 let mut has_nulls = false;
120 for (d, mapping) in dictionaries.iter().zip(merged.key_mappings) {
121 has_nulls |= d.null_count() != 0;
122 for key in d.keys().values() {
123 key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default())
125 }
126 }
127
128 let nulls = has_nulls.then(|| {
129 let mut nulls = BooleanBufferBuilder::new(output_len);
130 for d in &dictionaries {
131 match d.nulls() {
132 Some(n) => nulls.append_buffer(n.inner()),
133 None => nulls.append_n(d.len(), true),
134 }
135 }
136 NullBuffer::new(nulls.finish())
137 });
138
139 let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
140 assert_eq!(keys.len(), output_len);
142
143 let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) };
144 Ok(Arc::new(array))
145}
146
147fn concat_lists<OffsetSize: OffsetSizeTrait>(
148 arrays: &[&dyn Array],
149 field: &FieldRef,
150) -> Result<ArrayRef, ArrowError> {
151 let mut output_len = 0;
152 let mut list_has_nulls = false;
153 let mut list_has_slices = false;
154
155 let lists = arrays
156 .iter()
157 .map(|x| x.as_list::<OffsetSize>())
158 .inspect(|l| {
159 output_len += l.len();
160 list_has_nulls |= l.null_count() != 0;
161 list_has_slices |= l.offsets()[0] > OffsetSize::zero()
162 || l.offsets().last().unwrap().as_usize() < l.values().len();
163 })
164 .collect::<Vec<_>>();
165
166 let lists_nulls = list_has_nulls.then(|| {
167 let mut nulls = BooleanBufferBuilder::new(output_len);
168 for l in &lists {
169 match l.nulls() {
170 Some(n) => nulls.append_buffer(n.inner()),
171 None => nulls.append_n(l.len(), true),
172 }
173 }
174 NullBuffer::new(nulls.finish())
175 });
176
177 let mut sliced_values;
180 let values: Vec<&dyn Array> = if list_has_slices {
181 sliced_values = Vec::with_capacity(lists.len());
182 for l in &lists {
183 let offsets = l.offsets();
186 let start_offset = offsets[0].as_usize();
187 let end_offset = offsets.last().unwrap().as_usize();
188 sliced_values.push(l.values().slice(start_offset, end_offset - start_offset));
189 }
190 sliced_values.iter().map(|a| a.as_ref()).collect()
191 } else {
192 lists.iter().map(|x| x.values().as_ref()).collect()
193 };
194
195 let concatenated_values = concat(values.as_slice())?;
196
197 let value_offset_buffer =
199 OffsetBuffer::<OffsetSize>::from_lengths(lists.iter().flat_map(|x| x.offsets().lengths()));
200
201 let array = GenericListArray::<OffsetSize>::try_new(
202 Arc::clone(field),
203 value_offset_buffer,
204 concatenated_values,
205 lists_nulls,
206 )?;
207
208 Ok(Arc::new(array))
209}
210
211fn concat_list_view<OffsetSize: OffsetSizeTrait>(
212 arrays: &[&dyn Array],
213 field: &FieldRef,
214) -> Result<ArrayRef, ArrowError> {
215 let mut output_len = 0;
216 let mut list_has_nulls = false;
217
218 let lists = arrays
219 .iter()
220 .map(|x| x.as_list_view::<OffsetSize>())
221 .inspect(|l| {
222 output_len += l.len();
223 list_has_nulls |= l.null_count() != 0;
224 })
225 .collect::<Vec<_>>();
226
227 let lists_nulls = list_has_nulls.then(|| {
228 let mut nulls = BooleanBufferBuilder::new(output_len);
229 for l in &lists {
230 match l.nulls() {
231 Some(n) => nulls.append_buffer(n.inner()),
232 None => nulls.append_n(l.len(), true),
233 }
234 }
235 NullBuffer::new(nulls.finish())
236 });
237
238 let values: Vec<&dyn Array> = lists.iter().map(|l| l.values().as_ref()).collect();
239
240 let concatenated_values = concat(values.as_slice())?;
241
242 let sizes: ScalarBuffer<OffsetSize> = lists.iter().flat_map(|x| x.sizes()).copied().collect();
243
244 let mut offsets = MutableBuffer::with_capacity(lists.iter().map(|l| l.offsets().len()).sum());
245 let mut global_offset = OffsetSize::zero();
246 for l in lists.iter() {
247 for &offset in l.offsets() {
248 offsets.push(offset + global_offset);
249 }
250
251 global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
253 }
254
255 let offsets = ScalarBuffer::from(offsets);
256
257 let array = GenericListViewArray::try_new(
258 field.clone(),
259 offsets,
260 sizes,
261 concatenated_values,
262 lists_nulls,
263 )?;
264
265 Ok(Arc::new(array))
266}
267
268fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
269 let mut builder = PrimitiveBuilder::<T>::with_capacity(arrays.iter().map(|a| a.len()).sum())
270 .with_data_type(arrays[0].data_type().clone());
271
272 for array in arrays {
273 builder.append_array(array.as_primitive());
274 }
275
276 Ok(Arc::new(builder.finish()))
277}
278
279fn concat_boolean(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
280 let mut builder = BooleanBuilder::with_capacity(arrays.iter().map(|a| a.len()).sum());
281
282 for array in arrays {
283 builder.append_array(array.as_boolean());
284 }
285
286 Ok(Arc::new(builder.finish()))
287}
288
289fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
290 let (item_capacity, bytes_capacity) = match binary_capacity::<T>(arrays) {
291 Capacities::Binary(item_capacity, Some(bytes_capacity)) => (item_capacity, bytes_capacity),
292 _ => unreachable!(),
293 };
294
295 let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity);
296
297 for array in arrays {
298 builder.append_array(array.as_bytes::<T>())?;
299 }
300
301 Ok(Arc::new(builder.finish()))
302}
303
304fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, ArrowError> {
305 let mut len = 0;
306 let mut has_nulls = false;
307 let structs = arrays
308 .iter()
309 .map(|a| {
310 len += a.len();
311 has_nulls |= a.null_count() > 0;
312 a.as_struct()
313 })
314 .collect::<Vec<_>>();
315
316 let nulls = has_nulls.then(|| {
317 let mut b = BooleanBufferBuilder::new(len);
318 for s in &structs {
319 match s.nulls() {
320 Some(n) => b.append_buffer(n.inner()),
321 None => b.append_n(s.len(), true),
322 }
323 }
324 NullBuffer::new(b.finish())
325 });
326
327 let column_concat_result = (0..fields.len())
328 .map(|i| {
329 let extracted_cols = structs
330 .iter()
331 .map(|s| s.column(i).as_ref())
332 .collect::<Vec<_>>();
333 concat(&extracted_cols)
334 })
335 .collect::<Result<Vec<_>, ArrowError>>()?;
336
337 Ok(Arc::new(StructArray::try_new_with_length(
338 fields.clone(),
339 column_concat_result,
340 nulls,
341 len,
342 )?))
343}
344
345fn concat_run_arrays<R: RunEndIndexType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError>
352where
353 R::Native: Add<Output = R::Native>,
354{
355 let run_arrays: Vec<_> = arrays
356 .iter()
357 .map(|x| x.as_run::<R>())
358 .filter(|x| !x.run_ends().is_empty())
359 .collect();
360
361 let needed_run_end_adjustments = std::iter::once(R::default_value())
363 .chain(
364 run_arrays
365 .iter()
366 .scan(R::default_value(), |acc, run_array| {
367 *acc = *acc + *run_array.run_ends().values().last().unwrap();
368 Some(*acc)
369 }),
370 )
371 .collect::<Vec<_>>();
372
373 let total_len = needed_run_end_adjustments.last().unwrap().as_usize();
375
376 let run_ends_array =
377 PrimitiveArray::<R>::from_iter_values(run_arrays.iter().enumerate().flat_map(
378 move |(i, run_array)| {
379 let adjustment = needed_run_end_adjustments[i];
380 run_array
381 .run_ends()
382 .values()
383 .iter()
384 .map(move |run_end| *run_end + adjustment)
385 },
386 ));
387
388 let all_values = concat(
389 &run_arrays
390 .iter()
391 .map(|x| x.values().as_ref())
392 .collect::<Vec<_>>(),
393 )?;
394
395 let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone())
396 .len(total_len)
397 .child_data(vec![run_ends_array.into_data(), all_values.into_data()]);
398
399 let array_data = unsafe { builder.build_unchecked() };
401 array_data.validate_data()?;
402
403 Ok(Arc::<RunArray<R>>::new(array_data.into()))
404}
405
406macro_rules! dict_helper {
407 ($t:ty, $arrays:expr) => {
408 return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
409 };
410}
411
412macro_rules! primitive_concat {
413 ($t:ty, $arrays:expr) => {
414 return Ok(Arc::new(concat_primitives::<$t>($arrays)?) as _)
415 };
416}
417
418fn get_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
419 match data_type {
420 DataType::Utf8 => binary_capacity::<Utf8Type>(arrays),
421 DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays),
422 DataType::Binary => binary_capacity::<BinaryType>(arrays),
423 DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays),
424 DataType::FixedSizeList(_, _) => fixed_size_list_capacity(arrays, data_type),
425 _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
426 }
427}
428
429pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
431 if arrays.is_empty() {
432 return Err(ArrowError::ComputeError(
433 "concat requires input of at least one array".to_string(),
434 ));
435 } else if arrays.len() == 1 {
436 let array = arrays[0];
437 return Ok(array.slice(0, array.len()));
438 }
439
440 let d = arrays[0].data_type();
441 if arrays.iter().skip(1).any(|array| array.data_type() != d) {
442 let error_message = {
444 let mut unique_data_types = HashSet::with_capacity(11);
446
447 let mut error_message =
448 format!("It is not possible to concatenate arrays of different data types ({d}");
449 unique_data_types.insert(d);
450
451 for array in arrays {
452 let is_unique = unique_data_types.insert(array.data_type());
453
454 if unique_data_types.len() == 11 {
455 error_message.push_str(", ...");
456 break;
457 }
458
459 if is_unique {
460 error_message.push_str(", ");
461 error_message.push_str(&array.data_type().to_string());
462 }
463 }
464
465 error_message.push_str(").");
466
467 error_message
468 };
469
470 return Err(ArrowError::InvalidArgumentError(error_message));
471 }
472
473 downcast_primitive! {
474 d => (primitive_concat, arrays),
475 DataType::Boolean => concat_boolean(arrays),
476 DataType::Dictionary(k, _) => {
477 downcast_integer! {
478 k.as_ref() => (dict_helper, arrays),
479 _ => unreachable!("illegal dictionary key type {k}")
480 }
481 }
482 DataType::List(field) => concat_lists::<i32>(arrays, field),
483 DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
484 DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
485 DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field),
486 DataType::Struct(fields) => concat_structs(arrays, fields),
487 DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
488 DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
489 DataType::Binary => concat_bytes::<BinaryType>(arrays),
490 DataType::LargeBinary => concat_bytes::<LargeBinaryType>(arrays),
491 DataType::RunEndEncoded(r, _) => {
492 match r.data_type() {
495 DataType::Int16 => concat_run_arrays::<Int16Type>(arrays),
496 DataType::Int32 => concat_run_arrays::<Int32Type>(arrays),
497 DataType::Int64 => concat_run_arrays::<Int64Type>(arrays),
498 _ => unreachable!("Unsupported run end index type: {r:?}"),
499 }
500 }
501 DataType::Utf8View => concat_byte_view::<StringViewType>(arrays),
502 DataType::BinaryView => concat_byte_view::<BinaryViewType>(arrays),
503 _ => {
504 let capacity = get_capacity(arrays, d);
505 concat_fallback(arrays, capacity)
506 }
507 }
508}
509
510fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> {
514 let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
515 let array_data = array_data.iter().collect();
516 let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
517
518 for (i, a) in arrays.iter().enumerate() {
519 mutable.extend(i, 0, a.len())
520 }
521
522 Ok(make_array(mutable.freeze()))
523}
524
525pub fn concat_batches<'a>(
532 schema: &SchemaRef,
533 input_batches: impl IntoIterator<Item = &'a RecordBatch>,
534) -> Result<RecordBatch, ArrowError> {
535 if schema.fields().is_empty() {
537 let num_rows: usize = input_batches.into_iter().map(RecordBatch::num_rows).sum();
538 let mut options = RecordBatchOptions::default();
539 options.row_count = Some(num_rows);
540 return RecordBatch::try_new_with_options(schema.clone(), vec![], &options);
541 }
542
543 let batches: Vec<&RecordBatch> = input_batches.into_iter().collect();
544 if batches.is_empty() {
545 return Ok(RecordBatch::new_empty(schema.clone()));
546 }
547 let field_num = schema.fields().len();
548 let mut arrays = Vec::with_capacity(field_num);
549 for i in 0..field_num {
550 let array = concat(
551 &batches
552 .iter()
553 .map(|batch| batch.column(i).as_ref())
554 .collect::<Vec<_>>(),
555 )?;
556 arrays.push(array);
557 }
558 RecordBatch::try_new(schema.clone(), arrays)
559}
560
561#[cfg(test)]
562mod tests {
563 use super::*;
564 use arrow_array::builder::{
565 GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder,
566 };
567 use arrow_schema::{Field, Schema};
568 use std::fmt::Debug;
569
570 #[test]
571 fn test_concat_empty_vec() {
572 let re = concat(&[]);
573 assert!(re.is_err());
574 }
575
576 #[test]
577 fn test_concat_batches_no_columns() {
578 let schema = Arc::new(Schema::empty());
580
581 let mut options = RecordBatchOptions::default();
582 options.row_count = Some(100);
583 let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
584 let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap();
586
587 assert_eq!(re.num_rows(), 200);
588 }
589
590 #[test]
591 fn test_concat_one_element_vec() {
592 let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
593 Some(-1),
594 Some(2),
595 None,
596 ])) as ArrayRef;
597 let result = concat(&[arr.as_ref()]).unwrap();
598 assert_eq!(
599 &arr, &result,
600 "concatenating single element array gives back the same result"
601 );
602 }
603
604 #[test]
605 fn test_concat_incompatible_datatypes() {
606 let re = concat(&[
607 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
608 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
610 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
611 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
613 ]);
614
615 assert_eq!(
616 re.unwrap_err().to_string(),
617 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."
618 );
619 }
620
621 #[test]
622 fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
623 let re = concat(&[
624 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
625 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
627 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
628 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
630 &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
631 &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
632 &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
633 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
634 &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
635 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
637 &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
638 &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
639 ]);
640
641 assert_eq!(
642 re.unwrap_err().to_string(),
643 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)."
644 );
645 }
646
647 #[test]
648 fn test_concat_11_incompatible_datatypes_should_only_include_10() {
649 let re = concat(&[
650 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
651 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
653 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
654 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
656 &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
657 &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
658 &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
659 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
660 &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
661 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
663 &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
664 &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
665 &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
666 ]);
667
668 assert_eq!(
669 re.unwrap_err().to_string(),
670 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
671 );
672 }
673
674 #[test]
675 fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
676 let re = concat(&[
677 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
678 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
680 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
681 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
683 &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
684 &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
685 &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
686 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
687 &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
688 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
690 &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
691 &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
692 &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
693 &PrimitiveArray::<Float16Type>::new_null(3),
694 &BooleanArray::from(vec![Some(true), Some(false), None]),
695 ]);
696
697 assert_eq!(
698 re.unwrap_err().to_string(),
699 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
700 );
701 }
702
703 #[test]
704 fn test_concat_string_arrays() {
705 let arr = concat(&[
706 &StringArray::from(vec!["hello", "world"]),
707 &StringArray::from(vec!["2", "3", "4"]),
708 &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
709 ])
710 .unwrap();
711
712 let expected_output = Arc::new(StringArray::from(vec![
713 Some("hello"),
714 Some("world"),
715 Some("2"),
716 Some("3"),
717 Some("4"),
718 Some("foo"),
719 Some("bar"),
720 None,
721 Some("baz"),
722 ])) as ArrayRef;
723
724 assert_eq!(&arr, &expected_output);
725 }
726
727 #[test]
728 fn test_concat_string_view_arrays() {
729 let arr = concat(&[
730 &StringViewArray::from(vec!["helloxxxxxxxxxxa", "world____________"]),
731 &StringViewArray::from(vec!["helloxxxxxxxxxxy", "3", "4"]),
732 &StringViewArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
733 ])
734 .unwrap();
735
736 let expected_output = Arc::new(StringViewArray::from(vec![
737 Some("helloxxxxxxxxxxa"),
738 Some("world____________"),
739 Some("helloxxxxxxxxxxy"),
740 Some("3"),
741 Some("4"),
742 Some("foo"),
743 Some("bar"),
744 None,
745 Some("baz"),
746 ])) as ArrayRef;
747
748 assert_eq!(&arr, &expected_output);
749 }
750
751 #[test]
752 fn test_concat_primitive_arrays() {
753 let arr = concat(&[
754 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]),
755 &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]),
756 &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
757 ])
758 .unwrap();
759
760 let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
761 Some(-1),
762 Some(-1),
763 Some(2),
764 None,
765 None,
766 Some(101),
767 Some(102),
768 Some(103),
769 None,
770 Some(256),
771 Some(512),
772 Some(1024),
773 ])) as ArrayRef;
774
775 assert_eq!(&arr, &expected_output);
776 }
777
778 #[test]
779 fn test_concat_primitive_array_slices() {
780 let input_1 =
781 PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None])
782 .slice(1, 3);
783
784 let input_2 =
785 PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None])
786 .slice(1, 3);
787 let arr = concat(&[&input_1, &input_2]).unwrap();
788
789 let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
790 Some(-1),
791 Some(2),
792 None,
793 Some(102),
794 Some(103),
795 None,
796 ])) as ArrayRef;
797
798 assert_eq!(&arr, &expected_output);
799 }
800
801 #[test]
802 fn test_concat_boolean_primitive_arrays() {
803 let arr = concat(&[
804 &BooleanArray::from(vec![
805 Some(true),
806 Some(true),
807 Some(false),
808 None,
809 None,
810 Some(false),
811 ]),
812 &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
813 ])
814 .unwrap();
815
816 let expected_output = Arc::new(BooleanArray::from(vec![
817 Some(true),
818 Some(true),
819 Some(false),
820 None,
821 None,
822 Some(false),
823 None,
824 Some(false),
825 Some(true),
826 Some(false),
827 ])) as ArrayRef;
828
829 assert_eq!(&arr, &expected_output);
830 }
831
832 #[test]
833 fn test_concat_primitive_list_arrays() {
834 let list1 = [
835 Some(vec![Some(-1), Some(-1), Some(2), None, None]),
836 Some(vec![]),
837 None,
838 Some(vec![Some(10)]),
839 ];
840 let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
841
842 let list2 = [
843 None,
844 Some(vec![Some(100), None, Some(101)]),
845 Some(vec![Some(102)]),
846 ];
847 let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
848
849 let list3 = [Some(vec![Some(1000), Some(1001)])];
850 let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
851
852 let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
853
854 let expected = list1.into_iter().chain(list2).chain(list3);
855 let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
856
857 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
858 }
859
860 #[test]
861 fn test_concat_primitive_list_arrays_slices() {
862 let list1 = [
863 Some(vec![Some(-1), Some(-1), Some(2), None, None]),
864 Some(vec![]), None, Some(vec![Some(10)]),
867 ];
868 let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
869 let list1_array = list1_array.slice(1, 2);
870 let list1_values = list1.into_iter().skip(1).take(2);
871
872 let list2 = [
873 None,
874 Some(vec![Some(100), None, Some(101)]),
875 Some(vec![Some(102)]),
876 ];
877 let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
878
879 assert!(list1_array.offsets()[0].as_usize() > 0);
881 let array_result = concat(&[&list1_array, &list2_array]).unwrap();
882
883 let expected = list1_values.chain(list2);
884 let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
885
886 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
887 }
888
889 #[test]
890 fn test_concat_primitive_list_arrays_sliced_lengths() {
891 let list1 = [
892 Some(vec![Some(-1), Some(-1), Some(2), None, None]), Some(vec![]), None, Some(vec![Some(10)]),
896 ];
897 let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
898 let list1_array = list1_array.slice(0, 3); let list1_values = list1.into_iter().take(3);
900
901 let list2 = [
902 None,
903 Some(vec![Some(100), None, Some(101)]),
904 Some(vec![Some(102)]),
905 ];
906 let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
907
908 assert_eq!(list1_array.offsets()[0].as_usize(), 0);
911 assert!(list1_array.offsets().last().unwrap().as_usize() < list1_array.values().len());
912 let array_result = concat(&[&list1_array, &list2_array]).unwrap();
913
914 let expected = list1_values.chain(list2);
915 let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
916
917 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
918 }
919
920 #[test]
921 fn test_concat_primitive_fixed_size_list_arrays() {
922 let list1 = [
923 Some(vec![Some(-1), None]),
924 None,
925 Some(vec![Some(10), Some(20)]),
926 ];
927 let list1_array =
928 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2);
929
930 let list2 = [
931 None,
932 Some(vec![Some(100), None]),
933 Some(vec![Some(102), Some(103)]),
934 ];
935 let list2_array =
936 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2);
937
938 let list3 = [Some(vec![Some(1000), Some(1001)])];
939 let list3_array =
940 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2);
941
942 let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
943
944 let expected = list1.into_iter().chain(list2).chain(list3);
945 let array_expected =
946 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(expected, 2);
947
948 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
949 }
950
951 #[test]
952 fn test_concat_list_view_arrays() {
953 let list1 = [
954 Some(vec![Some(-1), None]),
955 None,
956 Some(vec![Some(10), Some(20)]),
957 ];
958 let mut list1_array = ListViewBuilder::new(Int64Builder::new());
959 for v in list1.iter() {
960 list1_array.append_option(v.clone());
961 }
962 let list1_array = list1_array.finish();
963
964 let list2 = [
965 None,
966 Some(vec![Some(100), None]),
967 Some(vec![Some(102), Some(103)]),
968 ];
969 let mut list2_array = ListViewBuilder::new(Int64Builder::new());
970 for v in list2.iter() {
971 list2_array.append_option(v.clone());
972 }
973 let list2_array = list2_array.finish();
974
975 let list3 = [Some(vec![Some(1000), Some(1001)])];
976 let mut list3_array = ListViewBuilder::new(Int64Builder::new());
977 for v in list3.iter() {
978 list3_array.append_option(v.clone());
979 }
980 let list3_array = list3_array.finish();
981
982 let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
983
984 let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect();
985 let mut array_expected = ListViewBuilder::new(Int64Builder::new());
986 for v in expected.iter() {
987 array_expected.append_option(v.clone());
988 }
989 let array_expected = array_expected.finish();
990
991 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
992 }
993
994 #[test]
995 fn test_concat_sliced_list_view_arrays() {
996 let list1 = [
997 Some(vec![Some(-1), None]),
998 None,
999 Some(vec![Some(10), Some(20)]),
1000 ];
1001 let mut list1_array = ListViewBuilder::new(Int64Builder::new());
1002 for v in list1.iter() {
1003 list1_array.append_option(v.clone());
1004 }
1005 let list1_array = list1_array.finish();
1006
1007 let list2 = [
1008 None,
1009 Some(vec![Some(100), None]),
1010 Some(vec![Some(102), Some(103)]),
1011 ];
1012 let mut list2_array = ListViewBuilder::new(Int64Builder::new());
1013 for v in list2.iter() {
1014 list2_array.append_option(v.clone());
1015 }
1016 let list2_array = list2_array.finish();
1017
1018 let list3 = [Some(vec![Some(1000), Some(1001)])];
1019 let mut list3_array = ListViewBuilder::new(Int64Builder::new());
1020 for v in list3.iter() {
1021 list3_array.append_option(v.clone());
1022 }
1023 let list3_array = list3_array.finish();
1024
1025 let array_result = concat(&[
1028 &list1_array.slice(1, 2),
1029 &list2_array.slice(1, 2),
1030 &list3_array.slice(0, 1),
1031 ])
1032 .unwrap();
1033
1034 let expected: Vec<_> = vec![
1035 None,
1036 Some(vec![Some(10), Some(20)]),
1037 Some(vec![Some(100), None]),
1038 Some(vec![Some(102), Some(103)]),
1039 Some(vec![Some(1000), Some(1001)]),
1040 ];
1041 let mut array_expected = ListViewBuilder::new(Int64Builder::new());
1042 for v in expected.iter() {
1043 array_expected.append_option(v.clone());
1044 }
1045 let array_expected = array_expected.finish();
1046
1047 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
1048 }
1049
1050 #[test]
1051 fn test_concat_struct_arrays() {
1052 let field = Arc::new(Field::new("field", DataType::Int64, true));
1053 let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1054 Some(-1),
1055 Some(-1),
1056 Some(2),
1057 None,
1058 None,
1059 ]));
1060 let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1061
1062 let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1063 Some(101),
1064 Some(102),
1065 Some(103),
1066 None,
1067 ]));
1068 let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
1069
1070 let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1071 Some(256),
1072 Some(512),
1073 Some(1024),
1074 ]));
1075 let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
1076
1077 let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
1078
1079 let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1080 Some(-1),
1081 Some(-1),
1082 Some(2),
1083 None,
1084 None,
1085 Some(101),
1086 Some(102),
1087 Some(103),
1088 None,
1089 Some(256),
1090 Some(512),
1091 Some(1024),
1092 ])) as ArrayRef;
1093
1094 let actual_primitive = arr
1095 .as_any()
1096 .downcast_ref::<StructArray>()
1097 .unwrap()
1098 .column(0);
1099 assert_eq!(actual_primitive, &expected_primitive_output);
1100 }
1101
1102 #[test]
1103 fn test_concat_struct_array_slices() {
1104 let field = Arc::new(Field::new("field", DataType::Int64, true));
1105 let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1106 Some(-1),
1107 Some(-1),
1108 Some(2),
1109 None,
1110 None,
1111 ]));
1112 let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1113
1114 let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1115 Some(101),
1116 Some(102),
1117 Some(103),
1118 None,
1119 ]));
1120 let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
1121
1122 let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
1123
1124 let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1125 Some(-1),
1126 Some(2),
1127 None,
1128 Some(102),
1129 Some(103),
1130 ])) as ArrayRef;
1131
1132 let actual_primitive = arr
1133 .as_any()
1134 .downcast_ref::<StructArray>()
1135 .unwrap()
1136 .column(0);
1137 assert_eq!(actual_primitive, &expected_primitive_output);
1138 }
1139
1140 #[test]
1141 fn test_concat_struct_arrays_no_nulls() {
1142 let input_1a = vec![1, 2, 3];
1143 let input_1b = vec!["one", "two", "three"];
1144 let input_2a = vec![4, 5, 6, 7];
1145 let input_2b = vec!["four", "five", "six", "seven"];
1146
1147 let struct_from_primitives = |ints: Vec<i64>, strings: Vec<&str>| {
1148 StructArray::try_from(vec![
1149 ("ints", Arc::new(Int64Array::from(ints)) as _),
1150 ("strings", Arc::new(StringArray::from(strings)) as _),
1151 ])
1152 };
1153
1154 let expected_output = struct_from_primitives(
1155 [input_1a.clone(), input_2a.clone()].concat(),
1156 [input_1b.clone(), input_2b.clone()].concat(),
1157 )
1158 .unwrap();
1159
1160 let input_1 = struct_from_primitives(input_1a, input_1b).unwrap();
1161 let input_2 = struct_from_primitives(input_2a, input_2b).unwrap();
1162
1163 let arr = concat(&[&input_1, &input_2]).unwrap();
1164 let struct_result = arr.as_struct();
1165
1166 assert_eq!(struct_result, &expected_output);
1167 assert_eq!(arr.null_count(), 0);
1168 }
1169
1170 #[test]
1171 fn test_concat_struct_no_fields() {
1172 let input_1 = StructArray::new_empty_fields(10, None);
1173 let input_2 = StructArray::new_empty_fields(10, None);
1174 let arr = concat(&[&input_1, &input_2]).unwrap();
1175
1176 assert_eq!(arr.len(), 20);
1177 assert_eq!(arr.null_count(), 0);
1178
1179 let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10)));
1180 let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
1181 let arr = concat(&[&input1_valid, &input2_null]).unwrap();
1182
1183 assert_eq!(arr.len(), 20);
1184 assert_eq!(arr.null_count(), 10);
1185 }
1186
1187 #[test]
1188 fn test_string_array_slices() {
1189 let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
1190 let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
1191
1192 let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1193
1194 let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
1195
1196 let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1197 assert_eq!(actual_output, &expected_output);
1198 }
1199
1200 #[test]
1201 fn test_string_array_with_null_slices() {
1202 let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
1203 let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
1204
1205 let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1206
1207 let expected_output =
1208 StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
1209
1210 let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1211 assert_eq!(actual_output, &expected_output);
1212 }
1213
1214 fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> {
1215 let concrete = array.downcast_dict::<StringArray>().unwrap();
1216 concrete.into_iter().collect()
1217 }
1218
1219 #[test]
1220 fn test_string_dictionary_array() {
1221 let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"]
1222 .into_iter()
1223 .collect();
1224 let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"]
1225 .into_iter()
1226 .collect();
1227
1228 let expected: Vec<_> = vec![
1229 "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E",
1230 ]
1231 .into_iter()
1232 .map(Some)
1233 .collect();
1234
1235 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1236 let dictionary = concat.as_dictionary::<Int32Type>();
1237 let actual = collect_string_dictionary(dictionary);
1238 assert_eq!(actual, expected);
1239
1240 assert_eq!(
1242 dictionary.values().len(),
1243 input_1.values().len() + input_2.values().len(),
1244 )
1245 }
1246
1247 #[test]
1248 fn test_string_dictionary_array_nulls() {
1249 let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")]
1250 .into_iter()
1251 .collect();
1252 let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect();
1253 let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1254
1255 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1256 let dictionary = concat.as_dictionary::<Int32Type>();
1257 let actual = collect_string_dictionary(dictionary);
1258 assert_eq!(actual, expected);
1259
1260 assert_eq!(
1262 dictionary.values().len(),
1263 input_1.values().len() + input_2.values().len(),
1264 )
1265 }
1266
1267 #[test]
1268 fn test_string_dictionary_array_nulls_in_values() {
1269 let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]);
1270 let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]);
1271 let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
1272
1273 let input_2_keys = Int32Array::from_iter_values([0]);
1274 let input_2_values = StringArray::from(vec![None, Some("hello")]);
1275 let input_2 = DictionaryArray::new(input_2_keys, Arc::new(input_2_values));
1276
1277 let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1278
1279 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1280 let dictionary = concat.as_dictionary::<Int32Type>();
1281 let actual = collect_string_dictionary(dictionary);
1282 assert_eq!(actual, expected);
1283 }
1284
1285 #[test]
1286 fn test_string_dictionary_merge() {
1287 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1288 for i in 0..20 {
1289 builder.append(i.to_string()).unwrap();
1290 }
1291 let input_1 = builder.finish();
1292
1293 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1294 for i in 0..30 {
1295 builder.append(i.to_string()).unwrap();
1296 }
1297 let input_2 = builder.finish();
1298
1299 let expected: Vec<_> = (0..20).chain(0..30).map(|x| x.to_string()).collect();
1300 let expected: Vec<_> = expected.iter().map(|x| Some(x.as_str())).collect();
1301
1302 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1303 let dictionary = concat.as_dictionary::<Int32Type>();
1304 let actual = collect_string_dictionary(dictionary);
1305 assert_eq!(actual, expected);
1306
1307 let values_len = dictionary.values().len();
1310 assert!((30..40).contains(&values_len), "{values_len}")
1311 }
1312
1313 #[test]
1314 fn test_primitive_dictionary_merge() {
1315 let keys = vec![1; 5];
1317 let values = (10..20).collect::<Vec<_>>();
1318 let dict = DictionaryArray::new(
1319 Int8Array::from(keys.clone()),
1320 Arc::new(Int32Array::from(values.clone())),
1321 );
1322 let other = DictionaryArray::new(
1323 Int8Array::from(keys.clone()),
1324 Arc::new(Int32Array::from(values.clone())),
1325 );
1326
1327 let result_same_dictionary = concat(&[&dict, &dict]).unwrap();
1328 assert!(
1332 dict.values().to_data().ptr_eq(
1333 &result_same_dictionary
1334 .as_dictionary::<Int8Type>()
1335 .values()
1336 .to_data()
1337 )
1338 );
1339 assert_eq!(
1340 result_same_dictionary
1341 .as_dictionary::<Int8Type>()
1342 .values()
1343 .len(),
1344 values.len(),
1345 );
1346
1347 let result_cloned_dictionary = concat(&[&dict, &other]).unwrap();
1348 assert_eq!(
1350 result_cloned_dictionary
1351 .as_dictionary::<Int8Type>()
1352 .values()
1353 .len(),
1354 1
1355 );
1356 }
1357
1358 #[test]
1359 fn test_concat_string_sizes() {
1360 let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1361 let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1362 let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
1363 let arr = concat(&[&a, &b, &c]).unwrap();
1370 assert_eq!(arr.to_data().buffers()[1].capacity(), 909);
1371 }
1372
1373 #[test]
1374 fn test_dictionary_concat_reuse() {
1375 let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
1376 let copy: DictionaryArray<Int8Type> = array.clone();
1377
1378 assert_eq!(
1380 array.values(),
1381 &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef)
1382 );
1383 assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
1384
1385 let combined = concat(&[© as _, &array as _]).unwrap();
1387 let combined = combined.as_dictionary::<Int8Type>();
1388
1389 assert_eq!(
1390 combined.values(),
1391 &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef),
1392 "Actual: {combined:#?}"
1393 );
1394
1395 assert_eq!(
1396 combined.keys(),
1397 &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2])
1398 );
1399
1400 assert!(
1402 array
1403 .values()
1404 .to_data()
1405 .ptr_eq(&combined.values().to_data())
1406 );
1407 assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
1408
1409 let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
1410 let combined = concat(&[© as _, &array as _, &new as _]).unwrap();
1411 let com = combined.as_dictionary::<Int8Type>();
1412
1413 assert!(!array.values().to_data().ptr_eq(&com.values().to_data()));
1415 assert!(!copy.values().to_data().ptr_eq(&com.values().to_data()));
1416 assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
1417 }
1418
1419 #[test]
1420 fn concat_record_batches() {
1421 let schema = Arc::new(Schema::new(vec![
1422 Field::new("a", DataType::Int32, false),
1423 Field::new("b", DataType::Utf8, false),
1424 ]));
1425 let batch1 = RecordBatch::try_new(
1426 schema.clone(),
1427 vec![
1428 Arc::new(Int32Array::from(vec![1, 2])),
1429 Arc::new(StringArray::from(vec!["a", "b"])),
1430 ],
1431 )
1432 .unwrap();
1433 let batch2 = RecordBatch::try_new(
1434 schema.clone(),
1435 vec![
1436 Arc::new(Int32Array::from(vec![3, 4])),
1437 Arc::new(StringArray::from(vec!["c", "d"])),
1438 ],
1439 )
1440 .unwrap();
1441 let new_batch = concat_batches(&schema, [&batch1, &batch2]).unwrap();
1442 assert_eq!(new_batch.schema().as_ref(), schema.as_ref());
1443 assert_eq!(2, new_batch.num_columns());
1444 assert_eq!(4, new_batch.num_rows());
1445 let new_batch_owned = concat_batches(&schema, &[batch1, batch2]).unwrap();
1446 assert_eq!(new_batch_owned.schema().as_ref(), schema.as_ref());
1447 assert_eq!(2, new_batch_owned.num_columns());
1448 assert_eq!(4, new_batch_owned.num_rows());
1449 }
1450
1451 #[test]
1452 fn concat_empty_record_batch() {
1453 let schema = Arc::new(Schema::new(vec![
1454 Field::new("a", DataType::Int32, false),
1455 Field::new("b", DataType::Utf8, false),
1456 ]));
1457 let batch = concat_batches(&schema, []).unwrap();
1458 assert_eq!(batch.schema().as_ref(), schema.as_ref());
1459 assert_eq!(0, batch.num_rows());
1460 }
1461
1462 #[test]
1463 fn concat_record_batches_of_different_schemas_but_compatible_data() {
1464 let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1465 let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)]));
1467 let batch1 = RecordBatch::try_new(
1468 schema1.clone(),
1469 vec![Arc::new(Int32Array::from(vec![1, 2]))],
1470 )
1471 .unwrap();
1472 let batch2 =
1473 RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap();
1474 let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap();
1476 assert_eq!(batch.schema().as_ref(), schema1.as_ref());
1477 assert_eq!(4, batch.num_rows());
1478 }
1479
1480 #[test]
1481 fn concat_record_batches_of_different_schemas_incompatible_data() {
1482 let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1483 let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
1485 let batch1 = RecordBatch::try_new(
1486 schema1.clone(),
1487 vec![Arc::new(Int32Array::from(vec![1, 2]))],
1488 )
1489 .unwrap();
1490 let batch2 = RecordBatch::try_new(
1491 schema2,
1492 vec![Arc::new(StringArray::from(vec!["foo", "bar"]))],
1493 )
1494 .unwrap();
1495
1496 let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
1497 assert_eq!(
1498 error.to_string(),
1499 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."
1500 );
1501 }
1502
1503 #[test]
1504 fn concat_capacity() {
1505 let a = Int32Array::from_iter_values(0..100);
1506 let b = Int32Array::from_iter_values(10..20);
1507 let a = concat(&[&a, &b]).unwrap();
1508 let data = a.to_data();
1509 assert_eq!(data.buffers()[0].len(), 440);
1510 assert_eq!(data.buffers()[0].capacity(), 440);
1511
1512 let a = concat(&[&a.slice(10, 20), &b]).unwrap();
1513 let data = a.to_data();
1514 assert_eq!(data.buffers()[0].len(), 120);
1515 assert_eq!(data.buffers()[0].capacity(), 120);
1516
1517 let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100));
1518 let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
1519
1520 let a = concat(&[&a, &b]).unwrap();
1521 let data = a.to_data();
1522 assert_eq!(data.buffers()[0].len(), 420);
1524 assert_eq!(data.buffers()[0].capacity(), 420);
1525
1526 assert_eq!(data.buffers()[1].len(), 315);
1528 assert_eq!(data.buffers()[1].capacity(), 315);
1529
1530 let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1531 let data = a.to_data();
1532 assert_eq!(data.buffers()[0].len(), 180);
1534 assert_eq!(data.buffers()[0].capacity(), 180);
1535
1536 assert_eq!(data.buffers()[1].len(), 135);
1538 assert_eq!(data.buffers()[1].capacity(), 135);
1539
1540 let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100));
1541 let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10));
1542
1543 let a = concat(&[&a, &b]).unwrap();
1544 let data = a.to_data();
1545 assert_eq!(data.buffers()[0].len(), 888);
1547 assert_eq!(data.buffers()[0].capacity(), 888);
1548
1549 assert_eq!(data.buffers()[1].len(), 380);
1551 assert_eq!(data.buffers()[1].capacity(), 380);
1552
1553 let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1554 let data = a.to_data();
1555 assert_eq!(data.buffers()[0].len(), 408);
1557 assert_eq!(data.buffers()[0].capacity(), 408);
1558
1559 assert_eq!(data.buffers()[1].len(), 200);
1561 assert_eq!(data.buffers()[1].capacity(), 200);
1562 }
1563
1564 #[test]
1565 fn concat_sparse_nulls() {
1566 let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
1567 let keys = Int32Array::from(vec![1; 10]);
1568 let dict_a = DictionaryArray::new(keys, Arc::new(values));
1569 let values = StringArray::new_null(0);
1570 let keys = Int32Array::new_null(10);
1571 let dict_b = DictionaryArray::new(keys, Arc::new(values));
1572 let array = concat(&[&dict_a, &dict_b]).unwrap();
1573 assert_eq!(array.null_count(), 10);
1574 assert_eq!(array.logical_null_count(), 10);
1575 }
1576
1577 #[test]
1578 fn concat_dictionary_list_array_simple() {
1579 let scalars = [
1580 create_single_row_list_of_dict(vec![Some("a")]),
1581 create_single_row_list_of_dict(vec![Some("a")]),
1582 create_single_row_list_of_dict(vec![Some("b")]),
1583 ];
1584
1585 let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1586 let concat_res = concat(arrays.as_slice()).unwrap();
1587
1588 let expected_list = create_list_of_dict(vec![
1589 Some(vec![Some("a")]),
1591 Some(vec![Some("a")]),
1592 Some(vec![Some("b")]),
1593 ]);
1594
1595 let list = concat_res.as_list::<i32>();
1596
1597 list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1599 assert_eq!(a, b);
1600 });
1601
1602 assert_dictionary_has_unique_values::<_, StringArray>(
1603 list.values().as_dictionary::<Int32Type>(),
1604 );
1605 }
1606
1607 #[test]
1608 fn concat_many_dictionary_list_arrays() {
1609 let number_of_unique_values = 8;
1610 let scalars = (0..80000)
1611 .map(|i| {
1612 create_single_row_list_of_dict(vec![Some(
1613 (i % number_of_unique_values).to_string(),
1614 )])
1615 })
1616 .collect::<Vec<_>>();
1617
1618 let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1619 let concat_res = concat(arrays.as_slice()).unwrap();
1620
1621 let expected_list = create_list_of_dict(
1622 (0..80000)
1623 .map(|i| Some(vec![Some((i % number_of_unique_values).to_string())]))
1624 .collect::<Vec<_>>(),
1625 );
1626
1627 let list = concat_res.as_list::<i32>();
1628
1629 list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1631 assert_eq!(a, b);
1632 });
1633
1634 assert_dictionary_has_unique_values::<_, StringArray>(
1635 list.values().as_dictionary::<Int32Type>(),
1636 );
1637 }
1638
1639 fn create_single_row_list_of_dict(
1640 list_items: Vec<Option<impl AsRef<str>>>,
1641 ) -> GenericListArray<i32> {
1642 let rows = list_items.into_iter().map(Some).collect();
1643
1644 create_list_of_dict(vec![rows])
1645 }
1646
1647 fn create_list_of_dict(
1648 rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>,
1649 ) -> GenericListArray<i32> {
1650 let mut builder =
1651 GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new());
1652
1653 for row in rows {
1654 builder.append_option(row);
1655 }
1656
1657 builder.finish()
1658 }
1659
1660 fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>)
1661 where
1662 K: ArrowDictionaryKeyType,
1663 V: Sync + Send + 'static,
1664 &'a V: ArrayAccessor + IntoIterator,
1665 <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
1666 <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
1667 {
1668 let dict = array.downcast_dict::<V>().unwrap();
1669 let mut values = dict.values().into_iter().collect::<Vec<_>>();
1670
1671 values.sort();
1673
1674 let mut unique_values = values.clone();
1675
1676 unique_values.dedup();
1677
1678 assert_eq!(
1679 values, unique_values,
1680 "There are duplicates in the value list (the value list here is sorted which is only for the assertion)"
1681 );
1682 }
1683
1684 #[test]
1686 fn test_concat_run_array() {
1687 let run_ends1 = Int32Array::from(vec![2, 4]);
1689 let values1 = Int32Array::from(vec![10, 20]);
1690 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1691
1692 let run_ends2 = Int32Array::from(vec![1, 4]);
1693 let values2 = Int32Array::from(vec![30, 40]);
1694 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1695
1696 let result = concat(&[&array1, &array2]).unwrap();
1698 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1699
1700 assert_eq!(result_run_array.len(), 8); let run_ends = result_run_array.run_ends().values();
1705 assert_eq!(run_ends.len(), 4);
1706 assert_eq!(&[2, 4, 5, 8], run_ends);
1707
1708 let values = result_run_array
1710 .values()
1711 .as_any()
1712 .downcast_ref::<Int32Array>()
1713 .unwrap();
1714 assert_eq!(values.len(), 4);
1715 assert_eq!(&[10, 20, 30, 40], values.values());
1716 }
1717
1718 #[test]
1719 fn test_concat_run_array_matching_first_last_value() {
1720 let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1722 let values1 = Int32Array::from(vec![10, 20, 30]);
1723 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1724
1725 let run_ends2 = Int32Array::from(vec![3, 5]);
1727 let values2 = Int32Array::from(vec![30, 40]);
1728 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1729
1730 let result = concat(&[&array1, &array2]).unwrap();
1732 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1733
1734 assert_eq!(result_run_array.len(), 12);
1736
1737 let run_ends = result_run_array.run_ends().values();
1739 assert_eq!(&[2, 4, 7, 10, 12], run_ends);
1740
1741 assert_eq!(
1743 &[10, 20, 30, 30, 40],
1744 result_run_array
1745 .values()
1746 .as_any()
1747 .downcast_ref::<Int32Array>()
1748 .unwrap()
1749 .values()
1750 );
1751 }
1752
1753 #[test]
1754 fn test_concat_run_array_with_nulls() {
1755 let values1 = Int32Array::from(vec![Some(10), None, Some(30)]);
1757 let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1758 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1759
1760 let values2 = Int32Array::from(vec![Some(30), None]);
1762 let run_ends2 = Int32Array::from(vec![3, 5]);
1763 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1764
1765 let result = concat(&[&array1, &array2]).unwrap();
1767 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1768
1769 assert_eq!(result_run_array.len(), 12);
1771
1772 assert_eq!(result_run_array.len(), 12); let run_ends_values = result_run_array.run_ends().values();
1780 assert_eq!(&[2, 4, 7, 10, 12], run_ends_values);
1781
1782 let expected = Int32Array::from(vec![Some(10), None, Some(30), Some(30), None]);
1784 let actual = result_run_array
1785 .values()
1786 .as_any()
1787 .downcast_ref::<Int32Array>()
1788 .unwrap();
1789 assert_eq!(actual.len(), expected.len());
1790 assert_eq!(actual.null_count(), expected.null_count());
1791 assert_eq!(actual.values(), expected.values());
1792 }
1793
1794 #[test]
1795 fn test_concat_run_array_single() {
1796 let run_ends1 = Int32Array::from(vec![2, 4]);
1798 let values1 = Int32Array::from(vec![10, 20]);
1799 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1800
1801 let result = concat(&[&array1]).unwrap();
1803 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1804
1805 assert_eq!(result_run_array.len(), 4);
1807
1808 let run_ends = result_run_array.run_ends().values();
1810 assert_eq!(&[2, 4], run_ends);
1811
1812 assert_eq!(
1814 &[10, 20],
1815 result_run_array
1816 .values()
1817 .as_any()
1818 .downcast_ref::<Int32Array>()
1819 .unwrap()
1820 .values()
1821 );
1822 }
1823
1824 #[test]
1825 fn test_concat_run_array_with_3_arrays() {
1826 let run_ends1 = Int32Array::from(vec![2, 4]);
1827 let values1 = Int32Array::from(vec![10, 20]);
1828 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1829 let run_ends2 = Int32Array::from(vec![1, 4]);
1830 let values2 = Int32Array::from(vec![30, 40]);
1831 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1832 let run_ends3 = Int32Array::from(vec![1, 4]);
1833 let values3 = Int32Array::from(vec![50, 60]);
1834 let array3 = RunArray::try_new(&run_ends3, &values3).unwrap();
1835
1836 let result = concat(&[&array1, &array2, &array3]).unwrap();
1838 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1839
1840 assert_eq!(result_run_array.len(), 12); let run_ends = result_run_array.run_ends().values();
1845 assert_eq!(run_ends.len(), 6);
1846 assert_eq!(&[2, 4, 5, 8, 9, 12], run_ends);
1847
1848 let values = result_run_array
1850 .values()
1851 .as_any()
1852 .downcast_ref::<Int32Array>()
1853 .unwrap();
1854 assert_eq!(values.len(), 6);
1855 assert_eq!(&[10, 20, 30, 40, 50, 60], values.values());
1856 }
1857}