1use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
34use arrow_array::builder::{
35 BooleanBuilder, GenericByteBuilder, GenericByteViewBuilder, PrimitiveBuilder,
36};
37use arrow_array::cast::AsArray;
38use arrow_array::types::*;
39use arrow_array::*;
40use arrow_buffer::{
41 ArrowNativeType, BooleanBufferBuilder, MutableBuffer, NullBuffer, OffsetBuffer, ScalarBuffer,
42};
43use arrow_data::ArrayDataBuilder;
44use arrow_data::transform::{Capacities, MutableArrayData};
45use arrow_schema::{ArrowError, DataType, FieldRef, Fields, SchemaRef};
46use std::{collections::HashSet, ops::Add, sync::Arc};
47
48fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
49 let mut item_capacity = 0;
50 let mut bytes_capacity = 0;
51 for array in arrays {
52 let a = array.as_bytes::<T>();
53
54 let offsets = a.value_offsets();
56 bytes_capacity += offsets[offsets.len() - 1].as_usize() - offsets[0].as_usize();
57 item_capacity += a.len()
58 }
59
60 Capacities::Binary(item_capacity, Some(bytes_capacity))
61}
62
63fn fixed_size_list_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
64 if let DataType::FixedSizeList(f, _) = data_type {
65 let item_capacity = arrays.iter().map(|a| a.len()).sum();
66 let child_data_type = f.data_type();
67 match child_data_type {
68 DataType::Utf8
71 | DataType::LargeUtf8
72 | DataType::Binary
73 | DataType::LargeBinary
74 | DataType::FixedSizeList(_, _) => {
75 let values: Vec<&dyn arrow_array::Array> = arrays
76 .iter()
77 .map(|a| a.as_fixed_size_list().values().as_ref())
78 .collect();
79 Capacities::List(
80 item_capacity,
81 Some(Box::new(get_capacity(&values, child_data_type))),
82 )
83 }
84 _ => Capacities::Array(item_capacity),
85 }
86 } else {
87 unreachable!("illegal data type for fixed size list")
88 }
89}
90
91fn concat_byte_view<B: ByteViewType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
92 let mut builder =
93 GenericByteViewBuilder::<B>::with_capacity(arrays.iter().map(|a| a.len()).sum());
94 for &array in arrays.iter() {
95 builder.append_array(array.as_byte_view());
96 }
97 Ok(Arc::new(builder.finish()))
98}
99
100fn concat_dictionaries<K: ArrowDictionaryKeyType>(
101 arrays: &[&dyn Array],
102) -> Result<ArrayRef, ArrowError> {
103 let mut output_len = 0;
104 let dictionaries: Vec<_> = arrays
105 .iter()
106 .map(|x| x.as_dictionary::<K>())
107 .inspect(|d| output_len += d.len())
108 .collect();
109
110 if !should_merge_dictionary_values::<K>(&dictionaries, output_len).0 {
111 return concat_fallback(arrays, Capacities::Array(output_len));
112 }
113
114 let merged = merge_dictionary_values(&dictionaries, None)?;
115
116 let mut key_values = Vec::with_capacity(output_len);
118
119 let mut has_nulls = false;
120 for (d, mapping) in dictionaries.iter().zip(merged.key_mappings) {
121 has_nulls |= d.null_count() != 0;
122 for key in d.keys().values() {
123 key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default())
125 }
126 }
127
128 let nulls = has_nulls.then(|| {
129 let mut nulls = BooleanBufferBuilder::new(output_len);
130 for d in &dictionaries {
131 match d.nulls() {
132 Some(n) => nulls.append_buffer(n.inner()),
133 None => nulls.append_n(d.len(), true),
134 }
135 }
136 NullBuffer::new(nulls.finish())
137 });
138
139 let keys = PrimitiveArray::<K>::try_new(key_values.into(), nulls)?;
140 assert_eq!(keys.len(), output_len);
142
143 let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) };
144 Ok(Arc::new(array))
145}
146
147fn concat_lists<OffsetSize: OffsetSizeTrait>(
148 arrays: &[&dyn Array],
149 field: &FieldRef,
150) -> Result<ArrayRef, ArrowError> {
151 let mut output_len = 0;
152 let mut list_has_nulls = false;
153 let mut list_has_slices = false;
154
155 let lists = arrays
156 .iter()
157 .map(|x| x.as_list::<OffsetSize>())
158 .inspect(|l| {
159 output_len += l.len();
160 list_has_nulls |= l.null_count() != 0;
161 list_has_slices |= l.offsets()[0] > OffsetSize::zero()
162 || l.offsets().last().unwrap().as_usize() < l.values().len();
163 })
164 .collect::<Vec<_>>();
165
166 let lists_nulls = list_has_nulls.then(|| {
167 let mut nulls = BooleanBufferBuilder::new(output_len);
168 for l in &lists {
169 match l.nulls() {
170 Some(n) => nulls.append_buffer(n.inner()),
171 None => nulls.append_n(l.len(), true),
172 }
173 }
174 NullBuffer::new(nulls.finish())
175 });
176
177 let mut sliced_values;
180 let values: Vec<&dyn Array> = if list_has_slices {
181 sliced_values = Vec::with_capacity(lists.len());
182 for l in &lists {
183 let offsets = l.offsets();
186 let start_offset = offsets[0].as_usize();
187 let end_offset = offsets.last().unwrap().as_usize();
188 sliced_values.push(l.values().slice(start_offset, end_offset - start_offset));
189 }
190 sliced_values.iter().map(|a| a.as_ref()).collect()
191 } else {
192 lists.iter().map(|x| x.values().as_ref()).collect()
193 };
194
195 let concatenated_values = concat(values.as_slice())?;
196
197 let value_offset_buffer =
199 OffsetBuffer::<OffsetSize>::from_lengths(lists.iter().flat_map(|x| x.offsets().lengths()));
200
201 let array = GenericListArray::<OffsetSize>::try_new(
202 Arc::clone(field),
203 value_offset_buffer,
204 concatenated_values,
205 lists_nulls,
206 )?;
207
208 Ok(Arc::new(array))
209}
210
211fn concat_list_view<OffsetSize: OffsetSizeTrait>(
212 arrays: &[&dyn Array],
213 field: &FieldRef,
214) -> Result<ArrayRef, ArrowError> {
215 let mut output_len = 0;
216 let mut list_has_nulls = false;
217
218 let lists = arrays
219 .iter()
220 .map(|x| x.as_list_view::<OffsetSize>())
221 .inspect(|l| {
222 output_len += l.len();
223 list_has_nulls |= l.null_count() != 0;
224 })
225 .collect::<Vec<_>>();
226
227 let lists_nulls = list_has_nulls.then(|| {
228 let mut nulls = BooleanBufferBuilder::new(output_len);
229 for l in &lists {
230 match l.nulls() {
231 Some(n) => nulls.append_buffer(n.inner()),
232 None => nulls.append_n(l.len(), true),
233 }
234 }
235 NullBuffer::new(nulls.finish())
236 });
237
238 let values: Vec<&dyn Array> = lists.iter().map(|l| l.values().as_ref()).collect();
239
240 let concatenated_values = concat(values.as_slice())?;
241
242 let sizes: ScalarBuffer<OffsetSize> = lists.iter().flat_map(|x| x.sizes()).copied().collect();
243
244 let mut offsets = MutableBuffer::with_capacity(lists.iter().map(|l| l.offsets().len()).sum());
245 let mut global_offset = OffsetSize::zero();
246 for l in lists.iter() {
247 for &offset in l.offsets() {
248 offsets.push(offset + global_offset);
249 }
250
251 global_offset += OffsetSize::from_usize(l.values().len()).unwrap();
253 }
254
255 let offsets = ScalarBuffer::from(offsets);
256
257 let array = GenericListViewArray::try_new(
258 field.clone(),
259 offsets,
260 sizes,
261 concatenated_values,
262 lists_nulls,
263 )?;
264
265 Ok(Arc::new(array))
266}
267
268fn concat_primitives<T: ArrowPrimitiveType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
269 let mut builder = PrimitiveBuilder::<T>::with_capacity(arrays.iter().map(|a| a.len()).sum())
270 .with_data_type(arrays[0].data_type().clone());
271
272 for array in arrays {
273 builder.append_array(array.as_primitive());
274 }
275
276 Ok(Arc::new(builder.finish()))
277}
278
279fn concat_boolean(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
280 let mut builder = BooleanBuilder::with_capacity(arrays.iter().map(|a| a.len()).sum());
281
282 for array in arrays {
283 builder.append_array(array.as_boolean());
284 }
285
286 Ok(Arc::new(builder.finish()))
287}
288
289fn concat_bytes<T: ByteArrayType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
290 let (item_capacity, bytes_capacity) = match binary_capacity::<T>(arrays) {
291 Capacities::Binary(item_capacity, Some(bytes_capacity)) => (item_capacity, bytes_capacity),
292 _ => unreachable!(),
293 };
294
295 let mut builder = GenericByteBuilder::<T>::with_capacity(item_capacity, bytes_capacity);
296
297 for array in arrays {
298 builder.append_array(array.as_bytes::<T>())?;
299 }
300
301 Ok(Arc::new(builder.finish()))
302}
303
304fn concat_structs(arrays: &[&dyn Array], fields: &Fields) -> Result<ArrayRef, ArrowError> {
305 let mut len = 0;
306 let mut has_nulls = false;
307 let structs = arrays
308 .iter()
309 .map(|a| {
310 len += a.len();
311 has_nulls |= a.null_count() > 0;
312 a.as_struct()
313 })
314 .collect::<Vec<_>>();
315
316 let nulls = has_nulls.then(|| {
317 let mut b = BooleanBufferBuilder::new(len);
318 for s in &structs {
319 match s.nulls() {
320 Some(n) => b.append_buffer(n.inner()),
321 None => b.append_n(s.len(), true),
322 }
323 }
324 NullBuffer::new(b.finish())
325 });
326
327 let column_concat_result = (0..fields.len())
328 .map(|i| {
329 let extracted_cols = structs
330 .iter()
331 .map(|s| s.column(i).as_ref())
332 .collect::<Vec<_>>();
333 concat(&extracted_cols)
334 })
335 .collect::<Result<Vec<_>, ArrowError>>()?;
336
337 Ok(Arc::new(StructArray::try_new_with_length(
338 fields.clone(),
339 column_concat_result,
340 nulls,
341 len,
342 )?))
343}
344
345fn concat_run_arrays<R: RunEndIndexType>(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError>
352where
353 R::Native: Add<Output = R::Native>,
354{
355 let run_arrays: Vec<_> = arrays
356 .iter()
357 .map(|x| x.as_run::<R>())
358 .filter(|x| !x.run_ends().is_empty())
359 .collect();
360
361 let needed_run_end_adjustments = std::iter::once(R::default_value())
363 .chain(
364 run_arrays
365 .iter()
366 .scan(R::default_value(), |acc, run_array| {
367 *acc = *acc + R::Native::from_usize(run_array.len()).unwrap();
368 Some(*acc)
369 }),
370 )
371 .collect::<Vec<_>>();
372
373 let total_len = needed_run_end_adjustments.last().unwrap().as_usize();
375
376 let run_ends_array =
377 PrimitiveArray::<R>::from_iter_values(run_arrays.iter().enumerate().flat_map(
378 move |(i, run_array)| {
379 let adjustment = needed_run_end_adjustments[i];
380 run_array
381 .run_ends()
382 .sliced_values()
383 .map(move |run_end| run_end + adjustment)
384 },
385 ));
386
387 let values_slices: Vec<ArrayRef> = run_arrays
388 .iter()
389 .map(|run_array| run_array.values_slice())
390 .collect();
391
392 let all_values = concat(&values_slices.iter().map(|x| x.as_ref()).collect::<Vec<_>>())?;
393
394 let builder = ArrayDataBuilder::new(run_arrays[0].data_type().clone())
395 .len(total_len)
396 .child_data(vec![run_ends_array.into_data(), all_values.into_data()]);
397
398 let array_data = unsafe { builder.build_unchecked() };
400 array_data.validate_data()?;
401
402 Ok(Arc::<RunArray<R>>::new(array_data.into()))
403}
404
405macro_rules! dict_helper {
406 ($t:ty, $arrays:expr) => {
407 return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
408 };
409}
410
411macro_rules! primitive_concat {
412 ($t:ty, $arrays:expr) => {
413 return Ok(Arc::new(concat_primitives::<$t>($arrays)?) as _)
414 };
415}
416
417fn get_capacity(arrays: &[&dyn Array], data_type: &DataType) -> Capacities {
418 match data_type {
419 DataType::Utf8 => binary_capacity::<Utf8Type>(arrays),
420 DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays),
421 DataType::Binary => binary_capacity::<BinaryType>(arrays),
422 DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays),
423 DataType::FixedSizeList(_, _) => fixed_size_list_capacity(arrays, data_type),
424 _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
425 }
426}
427
428pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
430 if arrays.is_empty() {
431 return Err(ArrowError::ComputeError(
432 "concat requires input of at least one array".to_string(),
433 ));
434 } else if arrays.len() == 1 {
435 let array = arrays[0];
436 return Ok(array.slice(0, array.len()));
437 }
438
439 let d = arrays[0].data_type();
440 if arrays.iter().skip(1).any(|array| array.data_type() != d) {
441 let error_message = {
443 let mut unique_data_types = HashSet::with_capacity(11);
445
446 let mut error_message =
447 format!("It is not possible to concatenate arrays of different data types ({d}");
448 unique_data_types.insert(d);
449
450 for array in arrays {
451 let is_unique = unique_data_types.insert(array.data_type());
452
453 if unique_data_types.len() == 11 {
454 error_message.push_str(", ...");
455 break;
456 }
457
458 if is_unique {
459 error_message.push_str(", ");
460 error_message.push_str(&array.data_type().to_string());
461 }
462 }
463
464 error_message.push_str(").");
465
466 error_message
467 };
468
469 return Err(ArrowError::InvalidArgumentError(error_message));
470 }
471
472 downcast_primitive! {
473 d => (primitive_concat, arrays),
474 DataType::Boolean => concat_boolean(arrays),
475 DataType::Dictionary(k, _) => {
476 downcast_integer! {
477 k.as_ref() => (dict_helper, arrays),
478 _ => unreachable!("illegal dictionary key type {k}")
479 }
480 }
481 DataType::List(field) => concat_lists::<i32>(arrays, field),
482 DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
483 DataType::ListView(field) => concat_list_view::<i32>(arrays, field),
484 DataType::LargeListView(field) => concat_list_view::<i64>(arrays, field),
485 DataType::Struct(fields) => concat_structs(arrays, fields),
486 DataType::Utf8 => concat_bytes::<Utf8Type>(arrays),
487 DataType::LargeUtf8 => concat_bytes::<LargeUtf8Type>(arrays),
488 DataType::Binary => concat_bytes::<BinaryType>(arrays),
489 DataType::LargeBinary => concat_bytes::<LargeBinaryType>(arrays),
490 DataType::RunEndEncoded(r, _) => {
491 match r.data_type() {
494 DataType::Int16 => concat_run_arrays::<Int16Type>(arrays),
495 DataType::Int32 => concat_run_arrays::<Int32Type>(arrays),
496 DataType::Int64 => concat_run_arrays::<Int64Type>(arrays),
497 _ => unreachable!("Unsupported run end index type: {r:?}"),
498 }
499 }
500 DataType::Utf8View => concat_byte_view::<StringViewType>(arrays),
501 DataType::BinaryView => concat_byte_view::<BinaryViewType>(arrays),
502 _ => {
503 let capacity = get_capacity(arrays, d);
504 concat_fallback(arrays, capacity)
505 }
506 }
507}
508
509fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> {
513 let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
514 let array_data = array_data.iter().collect();
515 let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
516
517 for (i, a) in arrays.iter().enumerate() {
518 mutable.extend(i, 0, a.len())
519 }
520
521 Ok(make_array(mutable.freeze()))
522}
523
524pub fn concat_batches<'a>(
531 schema: &SchemaRef,
532 input_batches: impl IntoIterator<Item = &'a RecordBatch>,
533) -> Result<RecordBatch, ArrowError> {
534 if schema.fields().is_empty() {
536 let num_rows: usize = input_batches.into_iter().map(RecordBatch::num_rows).sum();
537 let mut options = RecordBatchOptions::default();
538 options.row_count = Some(num_rows);
539 return RecordBatch::try_new_with_options(schema.clone(), vec![], &options);
540 }
541
542 let batches: Vec<&RecordBatch> = input_batches.into_iter().collect();
543 if batches.is_empty() {
544 return Ok(RecordBatch::new_empty(schema.clone()));
545 }
546 let field_num = schema.fields().len();
547 let mut arrays = Vec::with_capacity(field_num);
548 for i in 0..field_num {
549 let array = concat(
550 &batches
551 .iter()
552 .map(|batch| batch.column(i).as_ref())
553 .collect::<Vec<_>>(),
554 )?;
555 arrays.push(array);
556 }
557 RecordBatch::try_new(schema.clone(), arrays)
558}
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563 use arrow_array::builder::{
564 GenericListBuilder, Int64Builder, ListViewBuilder, StringDictionaryBuilder,
565 };
566 use arrow_schema::{Field, Schema};
567 use std::fmt::Debug;
568
569 #[test]
570 fn test_concat_empty_vec() {
571 let re = concat(&[]);
572 assert!(re.is_err());
573 }
574
575 #[test]
576 fn test_concat_batches_no_columns() {
577 let schema = Arc::new(Schema::empty());
579
580 let mut options = RecordBatchOptions::default();
581 options.row_count = Some(100);
582 let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
583 let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap();
585
586 assert_eq!(re.num_rows(), 200);
587 }
588
589 #[test]
590 fn test_concat_one_element_vec() {
591 let arr = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
592 Some(-1),
593 Some(2),
594 None,
595 ])) as ArrayRef;
596 let result = concat(&[arr.as_ref()]).unwrap();
597 assert_eq!(
598 &arr, &result,
599 "concatenating single element array gives back the same result"
600 );
601 }
602
603 #[test]
604 fn test_concat_incompatible_datatypes() {
605 let re = concat(&[
606 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
607 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
609 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
610 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
612 ]);
613
614 assert_eq!(
615 re.unwrap_err().to_string(),
616 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32)."
617 );
618 }
619
620 #[test]
621 fn test_concat_10_incompatible_datatypes_should_include_all_of_them() {
622 let re = concat(&[
623 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
624 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
626 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
627 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
629 &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
630 &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
631 &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
632 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
633 &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
634 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
636 &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
637 &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
638 ]);
639
640 assert_eq!(
641 re.unwrap_err().to_string(),
642 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32)."
643 );
644 }
645
646 #[test]
647 fn test_concat_11_incompatible_datatypes_should_only_include_10() {
648 let re = concat(&[
649 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
650 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
652 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
653 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
655 &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
656 &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
657 &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
658 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
659 &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
660 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
662 &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
663 &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
664 &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
665 ]);
666
667 assert_eq!(
668 re.unwrap_err().to_string(),
669 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
670 );
671 }
672
673 #[test]
674 fn test_concat_13_incompatible_datatypes_should_not_include_all_of_them() {
675 let re = concat(&[
676 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(2), None]),
677 &StringArray::from(vec![Some("hello"), Some("bar"), Some("world")]),
679 &StringArray::from(vec![Some("hey"), Some(""), Some("you")]),
680 &PrimitiveArray::<Int32Type>::from(vec![Some(-1), Some(2), None]),
682 &PrimitiveArray::<Int8Type>::from(vec![Some(-1), Some(2), None]),
683 &PrimitiveArray::<Int16Type>::from(vec![Some(-1), Some(2), None]),
684 &PrimitiveArray::<UInt8Type>::from(vec![Some(1), Some(2), None]),
685 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
686 &PrimitiveArray::<UInt32Type>::from(vec![Some(1), Some(2), None]),
687 &PrimitiveArray::<UInt16Type>::from(vec![Some(1), Some(2), None]),
689 &PrimitiveArray::<UInt64Type>::from(vec![Some(1), Some(2), None]),
690 &PrimitiveArray::<Float32Type>::from(vec![Some(1.0), Some(2.0), None]),
691 &PrimitiveArray::<Float64Type>::from(vec![Some(1.0), Some(2.0), None]),
692 &PrimitiveArray::<Float16Type>::new_null(3),
693 &BooleanArray::from(vec![Some(true), Some(false), None]),
694 ]);
695
696 assert_eq!(
697 re.unwrap_err().to_string(),
698 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int64, Utf8, Int32, Int8, Int16, UInt8, UInt16, UInt32, UInt64, Float32, ...)."
699 );
700 }
701
702 #[test]
703 fn test_concat_string_arrays() {
704 let arr = concat(&[
705 &StringArray::from(vec!["hello", "world"]),
706 &StringArray::from(vec!["2", "3", "4"]),
707 &StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
708 ])
709 .unwrap();
710
711 let expected_output = Arc::new(StringArray::from(vec![
712 Some("hello"),
713 Some("world"),
714 Some("2"),
715 Some("3"),
716 Some("4"),
717 Some("foo"),
718 Some("bar"),
719 None,
720 Some("baz"),
721 ])) as ArrayRef;
722
723 assert_eq!(&arr, &expected_output);
724 }
725
726 #[test]
727 fn test_concat_string_view_arrays() {
728 let arr = concat(&[
729 &StringViewArray::from(vec!["helloxxxxxxxxxxa", "world____________"]),
730 &StringViewArray::from(vec!["helloxxxxxxxxxxy", "3", "4"]),
731 &StringViewArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]),
732 ])
733 .unwrap();
734
735 let expected_output = Arc::new(StringViewArray::from(vec![
736 Some("helloxxxxxxxxxxa"),
737 Some("world____________"),
738 Some("helloxxxxxxxxxxy"),
739 Some("3"),
740 Some("4"),
741 Some("foo"),
742 Some("bar"),
743 None,
744 Some("baz"),
745 ])) as ArrayRef;
746
747 assert_eq!(&arr, &expected_output);
748 }
749
750 #[test]
751 fn test_concat_primitive_arrays() {
752 let arr = concat(&[
753 &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]),
754 &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]),
755 &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
756 ])
757 .unwrap();
758
759 let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
760 Some(-1),
761 Some(-1),
762 Some(2),
763 None,
764 None,
765 Some(101),
766 Some(102),
767 Some(103),
768 None,
769 Some(256),
770 Some(512),
771 Some(1024),
772 ])) as ArrayRef;
773
774 assert_eq!(&arr, &expected_output);
775 }
776
777 #[test]
778 fn test_concat_primitive_array_slices() {
779 let input_1 =
780 PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None])
781 .slice(1, 3);
782
783 let input_2 =
784 PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None])
785 .slice(1, 3);
786 let arr = concat(&[&input_1, &input_2]).unwrap();
787
788 let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
789 Some(-1),
790 Some(2),
791 None,
792 Some(102),
793 Some(103),
794 None,
795 ])) as ArrayRef;
796
797 assert_eq!(&arr, &expected_output);
798 }
799
800 #[test]
801 fn test_concat_boolean_primitive_arrays() {
802 let arr = concat(&[
803 &BooleanArray::from(vec![
804 Some(true),
805 Some(true),
806 Some(false),
807 None,
808 None,
809 Some(false),
810 ]),
811 &BooleanArray::from(vec![None, Some(false), Some(true), Some(false)]),
812 ])
813 .unwrap();
814
815 let expected_output = Arc::new(BooleanArray::from(vec![
816 Some(true),
817 Some(true),
818 Some(false),
819 None,
820 None,
821 Some(false),
822 None,
823 Some(false),
824 Some(true),
825 Some(false),
826 ])) as ArrayRef;
827
828 assert_eq!(&arr, &expected_output);
829 }
830
831 #[test]
832 fn test_concat_primitive_list_arrays() {
833 let list1 = [
834 Some(vec![Some(-1), Some(-1), Some(2), None, None]),
835 Some(vec![]),
836 None,
837 Some(vec![Some(10)]),
838 ];
839 let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
840
841 let list2 = [
842 None,
843 Some(vec![Some(100), None, Some(101)]),
844 Some(vec![Some(102)]),
845 ];
846 let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
847
848 let list3 = [Some(vec![Some(1000), Some(1001)])];
849 let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
850
851 let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
852
853 let expected = list1.into_iter().chain(list2).chain(list3);
854 let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
855
856 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
857 }
858
859 #[test]
860 fn test_concat_primitive_list_arrays_slices() {
861 let list1 = [
862 Some(vec![Some(-1), Some(-1), Some(2), None, None]),
863 Some(vec![]), None, Some(vec![Some(10)]),
866 ];
867 let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
868 let list1_array = list1_array.slice(1, 2);
869 let list1_values = list1.into_iter().skip(1).take(2);
870
871 let list2 = [
872 None,
873 Some(vec![Some(100), None, Some(101)]),
874 Some(vec![Some(102)]),
875 ];
876 let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
877
878 assert!(list1_array.offsets()[0].as_usize() > 0);
880 let array_result = concat(&[&list1_array, &list2_array]).unwrap();
881
882 let expected = list1_values.chain(list2);
883 let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
884
885 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
886 }
887
888 #[test]
889 fn test_concat_primitive_list_arrays_sliced_lengths() {
890 let list1 = [
891 Some(vec![Some(-1), Some(-1), Some(2), None, None]), Some(vec![]), None, Some(vec![Some(10)]),
895 ];
896 let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
897 let list1_array = list1_array.slice(0, 3); let list1_values = list1.into_iter().take(3);
899
900 let list2 = [
901 None,
902 Some(vec![Some(100), None, Some(101)]),
903 Some(vec![Some(102)]),
904 ];
905 let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
906
907 assert_eq!(list1_array.offsets()[0].as_usize(), 0);
910 assert!(list1_array.offsets().last().unwrap().as_usize() < list1_array.values().len());
911 let array_result = concat(&[&list1_array, &list2_array]).unwrap();
912
913 let expected = list1_values.chain(list2);
914 let array_expected = ListArray::from_iter_primitive::<Int64Type, _, _>(expected);
915
916 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
917 }
918
919 #[test]
920 fn test_concat_primitive_fixed_size_list_arrays() {
921 let list1 = [
922 Some(vec![Some(-1), None]),
923 None,
924 Some(vec![Some(10), Some(20)]),
925 ];
926 let list1_array =
927 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone(), 2);
928
929 let list2 = [
930 None,
931 Some(vec![Some(100), None]),
932 Some(vec![Some(102), Some(103)]),
933 ];
934 let list2_array =
935 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone(), 2);
936
937 let list3 = [Some(vec![Some(1000), Some(1001)])];
938 let list3_array =
939 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone(), 2);
940
941 let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
942
943 let expected = list1.into_iter().chain(list2).chain(list3);
944 let array_expected =
945 FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(expected, 2);
946
947 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
948 }
949
950 #[test]
951 fn test_concat_list_view_arrays() {
952 let list1 = [
953 Some(vec![Some(-1), None]),
954 None,
955 Some(vec![Some(10), Some(20)]),
956 ];
957 let mut list1_array = ListViewBuilder::new(Int64Builder::new());
958 for v in list1.iter() {
959 list1_array.append_option(v.clone());
960 }
961 let list1_array = list1_array.finish();
962
963 let list2 = [
964 None,
965 Some(vec![Some(100), None]),
966 Some(vec![Some(102), Some(103)]),
967 ];
968 let mut list2_array = ListViewBuilder::new(Int64Builder::new());
969 for v in list2.iter() {
970 list2_array.append_option(v.clone());
971 }
972 let list2_array = list2_array.finish();
973
974 let list3 = [Some(vec![Some(1000), Some(1001)])];
975 let mut list3_array = ListViewBuilder::new(Int64Builder::new());
976 for v in list3.iter() {
977 list3_array.append_option(v.clone());
978 }
979 let list3_array = list3_array.finish();
980
981 let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
982
983 let expected: Vec<_> = list1.into_iter().chain(list2).chain(list3).collect();
984 let mut array_expected = ListViewBuilder::new(Int64Builder::new());
985 for v in expected.iter() {
986 array_expected.append_option(v.clone());
987 }
988 let array_expected = array_expected.finish();
989
990 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
991 }
992
993 #[test]
994 fn test_concat_sliced_list_view_arrays() {
995 let list1 = [
996 Some(vec![Some(-1), None]),
997 None,
998 Some(vec![Some(10), Some(20)]),
999 ];
1000 let mut list1_array = ListViewBuilder::new(Int64Builder::new());
1001 for v in list1.iter() {
1002 list1_array.append_option(v.clone());
1003 }
1004 let list1_array = list1_array.finish();
1005
1006 let list2 = [
1007 None,
1008 Some(vec![Some(100), None]),
1009 Some(vec![Some(102), Some(103)]),
1010 ];
1011 let mut list2_array = ListViewBuilder::new(Int64Builder::new());
1012 for v in list2.iter() {
1013 list2_array.append_option(v.clone());
1014 }
1015 let list2_array = list2_array.finish();
1016
1017 let list3 = [Some(vec![Some(1000), Some(1001)])];
1018 let mut list3_array = ListViewBuilder::new(Int64Builder::new());
1019 for v in list3.iter() {
1020 list3_array.append_option(v.clone());
1021 }
1022 let list3_array = list3_array.finish();
1023
1024 let array_result = concat(&[
1027 &list1_array.slice(1, 2),
1028 &list2_array.slice(1, 2),
1029 &list3_array.slice(0, 1),
1030 ])
1031 .unwrap();
1032
1033 let expected: Vec<_> = vec![
1034 None,
1035 Some(vec![Some(10), Some(20)]),
1036 Some(vec![Some(100), None]),
1037 Some(vec![Some(102), Some(103)]),
1038 Some(vec![Some(1000), Some(1001)]),
1039 ];
1040 let mut array_expected = ListViewBuilder::new(Int64Builder::new());
1041 for v in expected.iter() {
1042 array_expected.append_option(v.clone());
1043 }
1044 let array_expected = array_expected.finish();
1045
1046 assert_eq!(array_result.as_ref(), &array_expected as &dyn Array);
1047 }
1048
1049 #[test]
1050 fn test_concat_struct_arrays() {
1051 let field = Arc::new(Field::new("field", DataType::Int64, true));
1052 let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1053 Some(-1),
1054 Some(-1),
1055 Some(2),
1056 None,
1057 None,
1058 ]));
1059 let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1060
1061 let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1062 Some(101),
1063 Some(102),
1064 Some(103),
1065 None,
1066 ]));
1067 let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
1068
1069 let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1070 Some(256),
1071 Some(512),
1072 Some(1024),
1073 ]));
1074 let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
1075
1076 let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
1077
1078 let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1079 Some(-1),
1080 Some(-1),
1081 Some(2),
1082 None,
1083 None,
1084 Some(101),
1085 Some(102),
1086 Some(103),
1087 None,
1088 Some(256),
1089 Some(512),
1090 Some(1024),
1091 ])) as ArrayRef;
1092
1093 let actual_primitive = arr
1094 .as_any()
1095 .downcast_ref::<StructArray>()
1096 .unwrap()
1097 .column(0);
1098 assert_eq!(actual_primitive, &expected_primitive_output);
1099 }
1100
1101 #[test]
1102 fn test_concat_struct_array_slices() {
1103 let field = Arc::new(Field::new("field", DataType::Int64, true));
1104 let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1105 Some(-1),
1106 Some(-1),
1107 Some(2),
1108 None,
1109 None,
1110 ]));
1111 let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
1112
1113 let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1114 Some(101),
1115 Some(102),
1116 Some(103),
1117 None,
1118 ]));
1119 let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
1120
1121 let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
1122
1123 let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
1124 Some(-1),
1125 Some(2),
1126 None,
1127 Some(102),
1128 Some(103),
1129 ])) as ArrayRef;
1130
1131 let actual_primitive = arr
1132 .as_any()
1133 .downcast_ref::<StructArray>()
1134 .unwrap()
1135 .column(0);
1136 assert_eq!(actual_primitive, &expected_primitive_output);
1137 }
1138
1139 #[test]
1140 fn test_concat_struct_arrays_no_nulls() {
1141 let input_1a = vec![1, 2, 3];
1142 let input_1b = vec!["one", "two", "three"];
1143 let input_2a = vec![4, 5, 6, 7];
1144 let input_2b = vec!["four", "five", "six", "seven"];
1145
1146 let struct_from_primitives = |ints: Vec<i64>, strings: Vec<&str>| {
1147 StructArray::try_from(vec![
1148 ("ints", Arc::new(Int64Array::from(ints)) as _),
1149 ("strings", Arc::new(StringArray::from(strings)) as _),
1150 ])
1151 };
1152
1153 let expected_output = struct_from_primitives(
1154 [input_1a.clone(), input_2a.clone()].concat(),
1155 [input_1b.clone(), input_2b.clone()].concat(),
1156 )
1157 .unwrap();
1158
1159 let input_1 = struct_from_primitives(input_1a, input_1b).unwrap();
1160 let input_2 = struct_from_primitives(input_2a, input_2b).unwrap();
1161
1162 let arr = concat(&[&input_1, &input_2]).unwrap();
1163 let struct_result = arr.as_struct();
1164
1165 assert_eq!(struct_result, &expected_output);
1166 assert_eq!(arr.null_count(), 0);
1167 }
1168
1169 #[test]
1170 fn test_concat_struct_no_fields() {
1171 let input_1 = StructArray::new_empty_fields(10, None);
1172 let input_2 = StructArray::new_empty_fields(10, None);
1173 let arr = concat(&[&input_1, &input_2]).unwrap();
1174
1175 assert_eq!(arr.len(), 20);
1176 assert_eq!(arr.null_count(), 0);
1177
1178 let input1_valid = StructArray::new_empty_fields(10, Some(NullBuffer::new_valid(10)));
1179 let input2_null = StructArray::new_empty_fields(10, Some(NullBuffer::new_null(10)));
1180 let arr = concat(&[&input1_valid, &input2_null]).unwrap();
1181
1182 assert_eq!(arr.len(), 20);
1183 assert_eq!(arr.null_count(), 10);
1184 }
1185
1186 #[test]
1187 fn test_string_array_slices() {
1188 let input_1 = StringArray::from(vec!["hello", "A", "B", "C"]);
1189 let input_2 = StringArray::from(vec!["world", "D", "E", "Z"]);
1190
1191 let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1192
1193 let expected_output = StringArray::from(vec!["A", "B", "C", "D", "E"]);
1194
1195 let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1196 assert_eq!(actual_output, &expected_output);
1197 }
1198
1199 #[test]
1200 fn test_string_array_with_null_slices() {
1201 let input_1 = StringArray::from(vec![Some("hello"), None, Some("A"), Some("C")]);
1202 let input_2 = StringArray::from(vec![None, Some("world"), Some("D"), None]);
1203
1204 let arr = concat(&[&input_1.slice(1, 3), &input_2.slice(1, 2)]).unwrap();
1205
1206 let expected_output =
1207 StringArray::from(vec![None, Some("A"), Some("C"), Some("world"), Some("D")]);
1208
1209 let actual_output = arr.as_any().downcast_ref::<StringArray>().unwrap();
1210 assert_eq!(actual_output, &expected_output);
1211 }
1212
1213 fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> {
1214 let concrete = array.downcast_dict::<StringArray>().unwrap();
1215 concrete.into_iter().collect()
1216 }
1217
1218 #[test]
1219 fn test_string_dictionary_array() {
1220 let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"]
1221 .into_iter()
1222 .collect();
1223 let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"]
1224 .into_iter()
1225 .collect();
1226
1227 let expected: Vec<_> = vec![
1228 "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E",
1229 ]
1230 .into_iter()
1231 .map(Some)
1232 .collect();
1233
1234 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1235 let dictionary = concat.as_dictionary::<Int32Type>();
1236 let actual = collect_string_dictionary(dictionary);
1237 assert_eq!(actual, expected);
1238
1239 assert_eq!(
1241 dictionary.values().len(),
1242 input_1.values().len() + input_2.values().len(),
1243 )
1244 }
1245
1246 #[test]
1247 fn test_string_dictionary_array_nulls() {
1248 let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")]
1249 .into_iter()
1250 .collect();
1251 let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect();
1252 let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1253
1254 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1255 let dictionary = concat.as_dictionary::<Int32Type>();
1256 let actual = collect_string_dictionary(dictionary);
1257 assert_eq!(actual, expected);
1258
1259 assert_eq!(
1261 dictionary.values().len(),
1262 input_1.values().len() + input_2.values().len(),
1263 )
1264 }
1265
1266 #[test]
1267 fn test_string_dictionary_array_nulls_in_values() {
1268 let input_1_keys = Int32Array::from_iter_values([0, 2, 1, 3]);
1269 let input_1_values = StringArray::from(vec![Some("foo"), None, Some("bar"), Some("fiz")]);
1270 let input_1 = DictionaryArray::new(input_1_keys, Arc::new(input_1_values));
1271
1272 let input_2_keys = Int32Array::from_iter_values([0]);
1273 let input_2_values = StringArray::from(vec![None, Some("hello")]);
1274 let input_2 = DictionaryArray::new(input_2_keys, Arc::new(input_2_values));
1275
1276 let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
1277
1278 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1279 let dictionary = concat.as_dictionary::<Int32Type>();
1280 let actual = collect_string_dictionary(dictionary);
1281 assert_eq!(actual, expected);
1282 }
1283
1284 #[test]
1285 fn test_string_dictionary_merge() {
1286 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1287 for i in 0..20 {
1288 builder.append(i.to_string()).unwrap();
1289 }
1290 let input_1 = builder.finish();
1291
1292 let mut builder = StringDictionaryBuilder::<Int32Type>::new();
1293 for i in 0..30 {
1294 builder.append(i.to_string()).unwrap();
1295 }
1296 let input_2 = builder.finish();
1297
1298 let expected: Vec<_> = (0..20).chain(0..30).map(|x| x.to_string()).collect();
1299 let expected: Vec<_> = expected.iter().map(|x| Some(x.as_str())).collect();
1300
1301 let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
1302 let dictionary = concat.as_dictionary::<Int32Type>();
1303 let actual = collect_string_dictionary(dictionary);
1304 assert_eq!(actual, expected);
1305
1306 let values_len = dictionary.values().len();
1309 assert!((30..40).contains(&values_len), "{values_len}")
1310 }
1311
1312 #[test]
1313 fn test_primitive_dictionary_merge() {
1314 let keys = vec![1; 5];
1316 let values = (10..20).collect::<Vec<_>>();
1317 let dict = DictionaryArray::new(
1318 Int8Array::from(keys.clone()),
1319 Arc::new(Int32Array::from(values.clone())),
1320 );
1321 let other = DictionaryArray::new(
1322 Int8Array::from(keys.clone()),
1323 Arc::new(Int32Array::from(values.clone())),
1324 );
1325
1326 let result_same_dictionary = concat(&[&dict, &dict]).unwrap();
1327 assert!(
1331 dict.values().to_data().ptr_eq(
1332 &result_same_dictionary
1333 .as_dictionary::<Int8Type>()
1334 .values()
1335 .to_data()
1336 )
1337 );
1338 assert_eq!(
1339 result_same_dictionary
1340 .as_dictionary::<Int8Type>()
1341 .values()
1342 .len(),
1343 values.len(),
1344 );
1345
1346 let result_cloned_dictionary = concat(&[&dict, &other]).unwrap();
1347 assert_eq!(
1349 result_cloned_dictionary
1350 .as_dictionary::<Int8Type>()
1351 .values()
1352 .len(),
1353 1
1354 );
1355 }
1356
1357 #[test]
1358 fn test_concat_string_sizes() {
1359 let a: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1360 let b: LargeStringArray = ((0..150).map(|_| Some("foo"))).collect();
1361 let c = LargeStringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
1362 let arr = concat(&[&a, &b, &c]).unwrap();
1369 assert_eq!(arr.to_data().buffers()[1].capacity(), 909);
1370 }
1371
1372 #[test]
1373 fn test_dictionary_concat_reuse() {
1374 let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
1375 let copy: DictionaryArray<Int8Type> = array.clone();
1376
1377 assert_eq!(
1379 array.values(),
1380 &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef)
1381 );
1382 assert_eq!(array.keys(), &Int8Array::from(vec![0, 0, 1, 2]));
1383
1384 let combined = concat(&[© as _, &array as _]).unwrap();
1386 let combined = combined.as_dictionary::<Int8Type>();
1387
1388 assert_eq!(
1389 combined.values(),
1390 &(Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef),
1391 "Actual: {combined:#?}"
1392 );
1393
1394 assert_eq!(
1395 combined.keys(),
1396 &Int8Array::from(vec![0, 0, 1, 2, 0, 0, 1, 2])
1397 );
1398
1399 assert!(
1401 array
1402 .values()
1403 .to_data()
1404 .ptr_eq(&combined.values().to_data())
1405 );
1406 assert!(copy.values().to_data().ptr_eq(&combined.values().to_data()));
1407
1408 let new: DictionaryArray<Int8Type> = vec!["d"].into_iter().collect();
1409 let combined = concat(&[© as _, &array as _, &new as _]).unwrap();
1410 let com = combined.as_dictionary::<Int8Type>();
1411
1412 assert!(!array.values().to_data().ptr_eq(&com.values().to_data()));
1414 assert!(!copy.values().to_data().ptr_eq(&com.values().to_data()));
1415 assert!(!new.values().to_data().ptr_eq(&com.values().to_data()));
1416 }
1417
1418 #[test]
1419 fn concat_record_batches() {
1420 let schema = Arc::new(Schema::new(vec![
1421 Field::new("a", DataType::Int32, false),
1422 Field::new("b", DataType::Utf8, false),
1423 ]));
1424 let batch1 = RecordBatch::try_new(
1425 schema.clone(),
1426 vec![
1427 Arc::new(Int32Array::from(vec![1, 2])),
1428 Arc::new(StringArray::from(vec!["a", "b"])),
1429 ],
1430 )
1431 .unwrap();
1432 let batch2 = RecordBatch::try_new(
1433 schema.clone(),
1434 vec![
1435 Arc::new(Int32Array::from(vec![3, 4])),
1436 Arc::new(StringArray::from(vec!["c", "d"])),
1437 ],
1438 )
1439 .unwrap();
1440 let new_batch = concat_batches(&schema, [&batch1, &batch2]).unwrap();
1441 assert_eq!(new_batch.schema().as_ref(), schema.as_ref());
1442 assert_eq!(2, new_batch.num_columns());
1443 assert_eq!(4, new_batch.num_rows());
1444 let new_batch_owned = concat_batches(&schema, &[batch1, batch2]).unwrap();
1445 assert_eq!(new_batch_owned.schema().as_ref(), schema.as_ref());
1446 assert_eq!(2, new_batch_owned.num_columns());
1447 assert_eq!(4, new_batch_owned.num_rows());
1448 }
1449
1450 #[test]
1451 fn concat_empty_record_batch() {
1452 let schema = Arc::new(Schema::new(vec![
1453 Field::new("a", DataType::Int32, false),
1454 Field::new("b", DataType::Utf8, false),
1455 ]));
1456 let batch = concat_batches(&schema, []).unwrap();
1457 assert_eq!(batch.schema().as_ref(), schema.as_ref());
1458 assert_eq!(0, batch.num_rows());
1459 }
1460
1461 #[test]
1462 fn concat_record_batches_of_different_schemas_but_compatible_data() {
1463 let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1464 let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)]));
1466 let batch1 = RecordBatch::try_new(
1467 schema1.clone(),
1468 vec![Arc::new(Int32Array::from(vec![1, 2]))],
1469 )
1470 .unwrap();
1471 let batch2 =
1472 RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap();
1473 let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap();
1475 assert_eq!(batch.schema().as_ref(), schema1.as_ref());
1476 assert_eq!(4, batch.num_rows());
1477 }
1478
1479 #[test]
1480 fn concat_record_batches_of_different_schemas_incompatible_data() {
1481 let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
1482 let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
1484 let batch1 = RecordBatch::try_new(
1485 schema1.clone(),
1486 vec![Arc::new(Int32Array::from(vec![1, 2]))],
1487 )
1488 .unwrap();
1489 let batch2 = RecordBatch::try_new(
1490 schema2,
1491 vec![Arc::new(StringArray::from(vec!["foo", "bar"]))],
1492 )
1493 .unwrap();
1494
1495 let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
1496 assert_eq!(
1497 error.to_string(),
1498 "Invalid argument error: It is not possible to concatenate arrays of different data types (Int32, Utf8)."
1499 );
1500 }
1501
1502 #[test]
1503 fn concat_capacity() {
1504 let a = Int32Array::from_iter_values(0..100);
1505 let b = Int32Array::from_iter_values(10..20);
1506 let a = concat(&[&a, &b]).unwrap();
1507 let data = a.to_data();
1508 assert_eq!(data.buffers()[0].len(), 440);
1509 assert_eq!(data.buffers()[0].capacity(), 440);
1510
1511 let a = concat(&[&a.slice(10, 20), &b]).unwrap();
1512 let data = a.to_data();
1513 assert_eq!(data.buffers()[0].len(), 120);
1514 assert_eq!(data.buffers()[0].capacity(), 120);
1515
1516 let a = StringArray::from_iter_values(std::iter::repeat_n("foo", 100));
1517 let b = StringArray::from(vec!["bingo", "bongo", "lorem", ""]);
1518
1519 let a = concat(&[&a, &b]).unwrap();
1520 let data = a.to_data();
1521 assert_eq!(data.buffers()[0].len(), 420);
1523 assert_eq!(data.buffers()[0].capacity(), 420);
1524
1525 assert_eq!(data.buffers()[1].len(), 315);
1527 assert_eq!(data.buffers()[1].capacity(), 315);
1528
1529 let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1530 let data = a.to_data();
1531 assert_eq!(data.buffers()[0].len(), 180);
1533 assert_eq!(data.buffers()[0].capacity(), 180);
1534
1535 assert_eq!(data.buffers()[1].len(), 135);
1537 assert_eq!(data.buffers()[1].capacity(), 135);
1538
1539 let a = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"foo", 100));
1540 let b = LargeBinaryArray::from_iter_values(std::iter::repeat_n(b"cupcakes", 10));
1541
1542 let a = concat(&[&a, &b]).unwrap();
1543 let data = a.to_data();
1544 assert_eq!(data.buffers()[0].len(), 888);
1546 assert_eq!(data.buffers()[0].capacity(), 888);
1547
1548 assert_eq!(data.buffers()[1].len(), 380);
1550 assert_eq!(data.buffers()[1].capacity(), 380);
1551
1552 let a = concat(&[&a.slice(10, 40), &b]).unwrap();
1553 let data = a.to_data();
1554 assert_eq!(data.buffers()[0].len(), 408);
1556 assert_eq!(data.buffers()[0].capacity(), 408);
1557
1558 assert_eq!(data.buffers()[1].len(), 200);
1560 assert_eq!(data.buffers()[1].capacity(), 200);
1561 }
1562
1563 #[test]
1564 fn concat_sparse_nulls() {
1565 let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
1566 let keys = Int32Array::from(vec![1; 10]);
1567 let dict_a = DictionaryArray::new(keys, Arc::new(values));
1568 let values = StringArray::new_null(0);
1569 let keys = Int32Array::new_null(10);
1570 let dict_b = DictionaryArray::new(keys, Arc::new(values));
1571 let array = concat(&[&dict_a, &dict_b]).unwrap();
1572 assert_eq!(array.null_count(), 10);
1573 assert_eq!(array.logical_null_count(), 10);
1574 }
1575
1576 #[test]
1577 fn concat_dictionary_list_array_simple() {
1578 let scalars = [
1579 create_single_row_list_of_dict(vec![Some("a")]),
1580 create_single_row_list_of_dict(vec![Some("a")]),
1581 create_single_row_list_of_dict(vec![Some("b")]),
1582 ];
1583
1584 let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1585 let concat_res = concat(arrays.as_slice()).unwrap();
1586
1587 let expected_list = create_list_of_dict(vec![
1588 Some(vec![Some("a")]),
1590 Some(vec![Some("a")]),
1591 Some(vec![Some("b")]),
1592 ]);
1593
1594 let list = concat_res.as_list::<i32>();
1595
1596 list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1598 assert_eq!(a, b);
1599 });
1600
1601 assert_dictionary_has_unique_values::<_, StringArray>(
1602 list.values().as_dictionary::<Int32Type>(),
1603 );
1604 }
1605
1606 #[test]
1607 fn concat_many_dictionary_list_arrays() {
1608 let number_of_unique_values = 8;
1609 let scalars = (0..80000)
1610 .map(|i| {
1611 create_single_row_list_of_dict(vec![Some(
1612 (i % number_of_unique_values).to_string(),
1613 )])
1614 })
1615 .collect::<Vec<_>>();
1616
1617 let arrays = scalars.iter().map(|a| a as &dyn Array).collect::<Vec<_>>();
1618 let concat_res = concat(arrays.as_slice()).unwrap();
1619
1620 let expected_list = create_list_of_dict(
1621 (0..80000)
1622 .map(|i| Some(vec![Some((i % number_of_unique_values).to_string())]))
1623 .collect::<Vec<_>>(),
1624 );
1625
1626 let list = concat_res.as_list::<i32>();
1627
1628 list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
1630 assert_eq!(a, b);
1631 });
1632
1633 assert_dictionary_has_unique_values::<_, StringArray>(
1634 list.values().as_dictionary::<Int32Type>(),
1635 );
1636 }
1637
1638 fn create_single_row_list_of_dict(
1639 list_items: Vec<Option<impl AsRef<str>>>,
1640 ) -> GenericListArray<i32> {
1641 let rows = list_items.into_iter().map(Some).collect();
1642
1643 create_list_of_dict(vec![rows])
1644 }
1645
1646 fn create_list_of_dict(
1647 rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>,
1648 ) -> GenericListArray<i32> {
1649 let mut builder =
1650 GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new());
1651
1652 for row in rows {
1653 builder.append_option(row);
1654 }
1655
1656 builder.finish()
1657 }
1658
1659 fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>)
1660 where
1661 K: ArrowDictionaryKeyType,
1662 V: Sync + Send + 'static,
1663 &'a V: ArrayAccessor + IntoIterator,
1664 <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
1665 <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
1666 {
1667 let dict = array.downcast_dict::<V>().unwrap();
1668 let mut values = dict.values().into_iter().collect::<Vec<_>>();
1669
1670 values.sort();
1672
1673 let mut unique_values = values.clone();
1674
1675 unique_values.dedup();
1676
1677 assert_eq!(
1678 values, unique_values,
1679 "There are duplicates in the value list (the value list here is sorted which is only for the assertion)"
1680 );
1681 }
1682
1683 #[test]
1685 fn test_concat_run_array() {
1686 let run_ends1 = Int32Array::from(vec![2, 4]);
1688 let values1 = Int32Array::from(vec![10, 20]);
1689 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1690
1691 let run_ends2 = Int32Array::from(vec![1, 4]);
1692 let values2 = Int32Array::from(vec![30, 40]);
1693 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1694
1695 let result = concat(&[&array1, &array2]).unwrap();
1697 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1698
1699 assert_eq!(result_run_array.len(), 8); let run_ends = result_run_array.run_ends().values();
1704 assert_eq!(run_ends.len(), 4);
1705 assert_eq!(&[2, 4, 5, 8], run_ends);
1706
1707 let values = result_run_array
1709 .values()
1710 .as_any()
1711 .downcast_ref::<Int32Array>()
1712 .unwrap();
1713 assert_eq!(values.len(), 4);
1714 assert_eq!(&[10, 20, 30, 40], values.values());
1715 }
1716
1717 #[test]
1718 fn test_concat_sliced_run_array() {
1719 let run_ends1 = Int32Array::from(vec![2, 4]);
1721 let values1 = Int32Array::from(vec![10, 20]);
1722 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap(); let array1 = array1.slice(2, 2); let run_ends2 = Int32Array::from(vec![1, 4]);
1726 let values2 = Int32Array::from(vec![30, 40]);
1727 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap(); let array2 = array2.slice(1, 3); let result = concat(&[&array1, &array2]).unwrap();
1731 let result = result.as_run::<Int32Type>();
1732 let result = result.downcast::<Int32Array>().unwrap();
1733
1734 let expected = vec![20, 20, 40, 40, 40];
1735 let actual = result.into_iter().flatten().collect::<Vec<_>>();
1736 assert_eq!(expected, actual);
1737 }
1738
1739 #[test]
1740 fn test_concat_run_array_matching_first_last_value() {
1741 let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1743 let values1 = Int32Array::from(vec![10, 20, 30]);
1744 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1745
1746 let run_ends2 = Int32Array::from(vec![3, 5]);
1748 let values2 = Int32Array::from(vec![30, 40]);
1749 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1750
1751 let result = concat(&[&array1, &array2]).unwrap();
1753 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1754
1755 assert_eq!(result_run_array.len(), 12);
1757
1758 let run_ends = result_run_array.run_ends().values();
1760 assert_eq!(&[2, 4, 7, 10, 12], run_ends);
1761
1762 assert_eq!(
1764 &[10, 20, 30, 30, 40],
1765 result_run_array
1766 .values()
1767 .as_any()
1768 .downcast_ref::<Int32Array>()
1769 .unwrap()
1770 .values()
1771 );
1772 }
1773
1774 #[test]
1775 fn test_concat_run_array_with_nulls() {
1776 let values1 = Int32Array::from(vec![Some(10), None, Some(30)]);
1778 let run_ends1 = Int32Array::from(vec![2, 4, 7]);
1779 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1780
1781 let values2 = Int32Array::from(vec![Some(30), None]);
1783 let run_ends2 = Int32Array::from(vec![3, 5]);
1784 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1785
1786 let result = concat(&[&array1, &array2]).unwrap();
1788 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1789
1790 assert_eq!(result_run_array.len(), 12);
1792
1793 assert_eq!(result_run_array.len(), 12); let run_ends_values = result_run_array.run_ends().values();
1801 assert_eq!(&[2, 4, 7, 10, 12], run_ends_values);
1802
1803 let expected = Int32Array::from(vec![Some(10), None, Some(30), Some(30), None]);
1805 let actual = result_run_array
1806 .values()
1807 .as_any()
1808 .downcast_ref::<Int32Array>()
1809 .unwrap();
1810 assert_eq!(actual.len(), expected.len());
1811 assert_eq!(actual.null_count(), expected.null_count());
1812 assert_eq!(actual.values(), expected.values());
1813 }
1814
1815 #[test]
1816 fn test_concat_run_array_single() {
1817 let run_ends1 = Int32Array::from(vec![2, 4]);
1819 let values1 = Int32Array::from(vec![10, 20]);
1820 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1821
1822 let result = concat(&[&array1]).unwrap();
1824 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1825
1826 assert_eq!(result_run_array.len(), 4);
1828
1829 let run_ends = result_run_array.run_ends().values();
1831 assert_eq!(&[2, 4], run_ends);
1832
1833 assert_eq!(
1835 &[10, 20],
1836 result_run_array
1837 .values()
1838 .as_any()
1839 .downcast_ref::<Int32Array>()
1840 .unwrap()
1841 .values()
1842 );
1843 }
1844
1845 #[test]
1846 fn test_concat_run_array_with_3_arrays() {
1847 let run_ends1 = Int32Array::from(vec![2, 4]);
1848 let values1 = Int32Array::from(vec![10, 20]);
1849 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1850 let run_ends2 = Int32Array::from(vec![1, 4]);
1851 let values2 = Int32Array::from(vec![30, 40]);
1852 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1853 let run_ends3 = Int32Array::from(vec![1, 4]);
1854 let values3 = Int32Array::from(vec![50, 60]);
1855 let array3 = RunArray::try_new(&run_ends3, &values3).unwrap();
1856
1857 let result = concat(&[&array1, &array2, &array3]).unwrap();
1859 let result_run_array: &arrow_array::RunArray<Int32Type> = result.as_run();
1860
1861 assert_eq!(result_run_array.len(), 12); let run_ends = result_run_array.run_ends().values();
1866 assert_eq!(run_ends.len(), 6);
1867 assert_eq!(&[2, 4, 5, 8, 9, 12], run_ends);
1868
1869 let values = result_run_array
1871 .values()
1872 .as_any()
1873 .downcast_ref::<Int32Array>()
1874 .unwrap();
1875 assert_eq!(values.len(), 6);
1876 assert_eq!(&[10, 20, 30, 40, 50, 60], values.values());
1877 }
1878
1879 #[test]
1880 fn test_concat_run_array_with_truncated_run() {
1881 let run_ends1 = Int32Array::from(vec![2, 5]);
1884 let values1 = Int32Array::from(vec![10, 20]);
1885 let array1 = RunArray::try_new(&run_ends1, &values1).unwrap();
1886 let array1_sliced = array1.slice(0, 3);
1887
1888 let run_ends2 = Int32Array::from(vec![2]);
1889 let values2 = Int32Array::from(vec![30]);
1890 let array2 = RunArray::try_new(&run_ends2, &values2).unwrap();
1891
1892 let result = concat(&[&array1_sliced, &array2]).unwrap();
1893 let result_run_array = result.as_run::<Int32Type>();
1894
1895 assert_eq!(result_run_array.len(), 5);
1898 let run_ends = result_run_array.run_ends().values();
1899 let values = result_run_array.values().as_primitive::<Int32Type>();
1900 assert_eq!(values.values(), &[10, 20, 30]);
1901 assert_eq!(&[2, 3, 5], run_ends);
1902 }
1903}