1use std::sync::Arc;
20
21use arrow_array::builder::{
22 BinaryViewBuilder, BufferBuilder, FixedSizeBinaryBuilder, StringViewBuilder,
23};
24use arrow_array::types::ByteArrayType;
25use arrow_array::*;
26use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer};
27use arrow_data::ArrayDataBuilder;
28use arrow_schema::{ArrowError, DataType};
29
30pub fn concat_elements_bytes<T: ByteArrayType>(
32 left: &GenericByteArray<T>,
33 right: &GenericByteArray<T>,
34) -> Result<GenericByteArray<T>, ArrowError> {
35 if left.len() != right.len() {
36 return Err(ArrowError::ComputeError(format!(
37 "Arrays must have the same length: {} != {}",
38 left.len(),
39 right.len()
40 )));
41 }
42
43 let nulls = NullBuffer::union(left.nulls(), right.nulls());
44
45 let left_offsets = left.value_offsets();
46 let right_offsets = right.value_offsets();
47
48 let left_values = left.value_data();
49 let right_values = right.value_data();
50
51 let mut output_values = BufferBuilder::<u8>::new(
52 left_values.len() + right_values.len()
53 - left_offsets[0].as_usize()
54 - right_offsets[0].as_usize(),
55 );
56
57 let mut output_offsets = BufferBuilder::<T::Offset>::new(left_offsets.len());
58 output_offsets.append(T::Offset::usize_as(0));
59 for (left_idx, right_idx) in left_offsets.windows(2).zip(right_offsets.windows(2)) {
60 output_values.append_slice(&left_values[left_idx[0].as_usize()..left_idx[1].as_usize()]);
61 output_values.append_slice(&right_values[right_idx[0].as_usize()..right_idx[1].as_usize()]);
62 output_offsets.append(T::Offset::from_usize(output_values.len()).unwrap());
63 }
64
65 let builder = ArrayDataBuilder::new(T::DATA_TYPE)
66 .len(left.len())
67 .add_buffer(output_offsets.finish())
68 .add_buffer(output_values.finish())
69 .nulls(nulls);
70
71 Ok(unsafe { builder.build_unchecked() }.into())
73}
74
75pub fn concat_elements_utf8<Offset: OffsetSizeTrait>(
90 left: &GenericStringArray<Offset>,
91 right: &GenericStringArray<Offset>,
92) -> Result<GenericStringArray<Offset>, ArrowError> {
93 concat_elements_bytes(left, right)
94}
95
96pub fn concat_element_binary<Offset: OffsetSizeTrait>(
98 left: &GenericBinaryArray<Offset>,
99 right: &GenericBinaryArray<Offset>,
100) -> Result<GenericBinaryArray<Offset>, ArrowError> {
101 concat_elements_bytes(left, right)
102}
103
104pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
112 arrays: &[&GenericStringArray<Offset>],
113) -> Result<GenericStringArray<Offset>, ArrowError> {
114 if arrays.is_empty() {
115 return Err(ArrowError::ComputeError(
116 "concat requires input of at least one array".to_string(),
117 ));
118 }
119
120 let size = arrays[0].len();
121 if !arrays.iter().all(|array| array.len() == size) {
122 return Err(ArrowError::ComputeError(format!(
123 "Arrays must have the same length of {size}",
124 )));
125 }
126
127 let nulls = arrays
128 .iter()
129 .fold(None, |acc, a| NullBuffer::union(acc.as_ref(), a.nulls()));
130
131 let data_values = arrays
132 .iter()
133 .map(|array| array.value_data())
134 .collect::<Vec<_>>();
135
136 let mut offsets = arrays
137 .iter()
138 .map(|a| a.value_offsets().iter().peekable())
139 .collect::<Vec<_>>();
140
141 let mut output_values = BufferBuilder::<u8>::new(
142 data_values
143 .iter()
144 .zip(offsets.iter_mut())
145 .map(|(data, offset)| data.len() - offset.peek().unwrap().as_usize())
146 .sum(),
147 );
148
149 let mut output_offsets = BufferBuilder::<Offset>::new(size + 1);
150 output_offsets.append(Offset::zero());
151 for _ in 0..size {
152 data_values
153 .iter()
154 .zip(offsets.iter_mut())
155 .for_each(|(values, offset)| {
156 let index_start = offset.next().unwrap().as_usize();
157 let index_end = offset.peek().unwrap().as_usize();
158 output_values.append_slice(&values[index_start..index_end]);
159 });
160 output_offsets.append(Offset::from_usize(output_values.len()).unwrap());
161 }
162
163 let builder = ArrayDataBuilder::new(GenericStringArray::<Offset>::DATA_TYPE)
164 .len(size)
165 .add_buffer(output_offsets.finish())
166 .add_buffer(output_values.finish())
167 .nulls(nulls);
168
169 Ok(unsafe { builder.build_unchecked() }.into())
171}
172
173pub fn concat_elements_fixed_size_binary(
180 left: &FixedSizeBinaryArray,
181 right: &FixedSizeBinaryArray,
182) -> Result<FixedSizeBinaryArray, ArrowError> {
183 if left.len() != right.len() {
184 return Err(ArrowError::ComputeError(format!(
185 "Arrays must have the same length: {} != {}",
186 left.len(),
187 right.len()
188 )));
189 }
190
191 let left_size: usize = left.value_length().try_into().map_err(|_| {
192 ArrowError::InvalidArgumentError(format!(
193 "Invalid size of FixedSizeBinaryArray({})",
194 left.value_length()
195 ))
196 })?;
197 let right_size: usize = right.value_length().try_into().map_err(|_| {
198 ArrowError::InvalidArgumentError(format!(
199 "Invalid size of FixedSizeBinaryArray({})",
200 right.value_length()
201 ))
202 })?;
203 let output_size = left_size + right_size;
204
205 let nulls = NullBuffer::union(left.nulls(), right.nulls());
207
208 let mut result = FixedSizeBinaryBuilder::with_capacity(left.len(), output_size as i32);
209 let mut buffer = MutableBuffer::with_capacity(output_size);
210 for i in 0..left.len() {
211 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
212 result.append_null();
213 } else {
214 buffer.clear();
215 buffer.extend_from_slice(left.value(i));
216 buffer.extend_from_slice(right.value(i));
217 result.append_value(&buffer)?;
218 }
219 }
220
221 Ok(result.finish())
222}
223
224pub fn concat_elements_binary_view_array(
231 left: &BinaryViewArray,
232 right: &BinaryViewArray,
233) -> Result<BinaryViewArray, ArrowError> {
234 if left.len() != right.len() {
235 return Err(ArrowError::ComputeError(format!(
236 "Arrays must have the same length: {} != {}",
237 left.len(),
238 right.len()
239 )));
240 }
241 let mut result = BinaryViewBuilder::with_capacity(left.len());
242
243 let mut buffer = MutableBuffer::new(0);
245
246 let nulls = NullBuffer::union(left.nulls(), right.nulls());
248
249 for i in 0..left.len() {
250 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
251 result.append_null();
252 } else {
253 buffer.clear();
254 buffer.extend_from_slice(left.value(i));
255 buffer.extend_from_slice(right.value(i));
256 result.try_append_value(&buffer)?;
257 }
258 }
259 Ok(result.finish())
260}
261
262pub fn concat_elements_string_view_array(
272 left: &StringViewArray,
273 right: &StringViewArray,
274) -> Result<StringViewArray, ArrowError> {
275 if left.len() != right.len() {
276 return Err(ArrowError::ComputeError(format!(
277 "Arrays must have the same length: {} != {}",
278 left.len(),
279 right.len()
280 )));
281 }
282
283 let mut result = StringViewBuilder::with_capacity(left.len());
284
285 let mut buffer: Vec<u8> = Vec::new();
287
288 let nulls = NullBuffer::union(left.nulls(), right.nulls());
289
290 for i in 0..left.len() {
291 if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
292 result.append_null();
293 } else {
294 buffer.clear();
295 buffer.extend_from_slice(left.value(i).as_bytes());
296 buffer.extend_from_slice(right.value(i).as_bytes());
297 let s = std::str::from_utf8(&buffer).map_err(|_| {
298 ArrowError::ComputeError("Concatenated values are not valid UTF-8".into())
299 })?;
300 result.try_append_value(s)?;
301 }
302 }
303 Ok(result.finish())
304}
305
306pub fn concat_elements_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef, ArrowError> {
312 if left.data_type() != right.data_type() {
313 return Err(ArrowError::ComputeError(format!(
314 "Cannot concat arrays of different types: {} != {}",
315 left.data_type(),
316 right.data_type()
317 )));
318 }
319 match (left.data_type(), right.data_type()) {
320 (DataType::Utf8, DataType::Utf8) => {
321 let left = left.as_any().downcast_ref::<StringArray>().unwrap();
322 let right = right.as_any().downcast_ref::<StringArray>().unwrap();
323 Ok(Arc::new(concat_elements_utf8(left, right)?))
324 }
325 (DataType::LargeUtf8, DataType::LargeUtf8) => {
326 let left = left.as_any().downcast_ref::<LargeStringArray>().unwrap();
327 let right = right.as_any().downcast_ref::<LargeStringArray>().unwrap();
328 Ok(Arc::new(concat_elements_utf8(left, right)?))
329 }
330 (DataType::Binary, DataType::Binary) => {
331 let left = left.as_any().downcast_ref::<BinaryArray>().unwrap();
332 let right = right.as_any().downcast_ref::<BinaryArray>().unwrap();
333 Ok(Arc::new(concat_element_binary(left, right)?))
334 }
335 (DataType::LargeBinary, DataType::LargeBinary) => {
336 let left = left.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
337 let right = right.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
338 Ok(Arc::new(concat_element_binary(left, right)?))
339 }
340 (DataType::BinaryView, DataType::BinaryView) => {
341 let left = left.as_any().downcast_ref::<BinaryViewArray>().unwrap();
342 let right = right.as_any().downcast_ref::<BinaryViewArray>().unwrap();
343 Ok(Arc::new(concat_elements_binary_view_array(left, right)?))
344 }
345 (DataType::Utf8View, DataType::Utf8View) => {
346 let left = left.as_any().downcast_ref::<StringViewArray>().unwrap();
347 let right = right.as_any().downcast_ref::<StringViewArray>().unwrap();
348 Ok(Arc::new(concat_elements_string_view_array(left, right)?))
349 }
350 (DataType::FixedSizeBinary(_), DataType::FixedSizeBinary(_)) => {
351 let left = left
352 .as_any()
353 .downcast_ref::<FixedSizeBinaryArray>()
354 .unwrap();
355 let right = right
356 .as_any()
357 .downcast_ref::<FixedSizeBinaryArray>()
358 .unwrap();
359 Ok(Arc::new(concat_elements_fixed_size_binary(left, right)?))
360 }
361 _ => Err(ArrowError::NotYetImplemented(format!(
363 "concat not supported for {}",
364 left.data_type()
365 ))),
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use arrow_buffer::Buffer;
373
374 #[test]
375 fn test_string_concat() {
376 let left = [Some("foo"), Some("bar"), None]
377 .into_iter()
378 .collect::<StringArray>();
379 let right = [None, Some("yyy"), Some("zzz")]
380 .into_iter()
381 .collect::<StringArray>();
382
383 let output = concat_elements_utf8(&left, &right).unwrap();
384
385 let expected = [None, Some("baryyy"), None]
386 .into_iter()
387 .collect::<StringArray>();
388
389 assert_eq!(output, expected);
390 }
391
392 #[test]
393 fn test_string_concat_empty_string() {
394 let left = [Some("foo"), Some(""), Some("bar")]
395 .into_iter()
396 .collect::<StringArray>();
397 let right = [Some("baz"), Some(""), Some("")]
398 .into_iter()
399 .collect::<StringArray>();
400
401 let output = concat_elements_utf8(&left, &right).unwrap();
402
403 let expected = [Some("foobaz"), Some(""), Some("bar")]
404 .into_iter()
405 .collect::<StringArray>();
406
407 assert_eq!(output, expected);
408 }
409
410 #[test]
411 fn test_string_concat_no_null() {
412 let left = StringArray::from(vec!["foo", "bar"]);
413 let right = StringArray::from(vec!["bar", "baz"]);
414
415 let output = concat_elements_utf8(&left, &right).unwrap();
416
417 let expected = StringArray::from(vec!["foobar", "barbaz"]);
418
419 assert_eq!(output, expected);
420 }
421
422 #[test]
423 fn test_string_concat_error() {
424 let left = StringArray::from(vec!["foo", "bar"]);
425 let right = StringArray::from(vec!["baz"]);
426
427 let output = concat_elements_utf8(&left, &right);
428
429 assert_eq!(
430 output.unwrap_err().to_string(),
431 "Compute error: Arrays must have the same length: 2 != 1".to_string()
432 );
433 }
434
435 #[test]
436 fn test_string_concat_slice() {
437 let left = &StringArray::from(vec![None, Some("foo"), Some("bar"), Some("baz")]);
438 let right = &StringArray::from(vec![Some("boo"), None, Some("far"), Some("faz")]);
439
440 let left_slice = left.slice(0, 3);
441 let right_slice = right.slice(1, 3);
442 let output = concat_elements_utf8(
443 left_slice
444 .as_any()
445 .downcast_ref::<GenericStringArray<i32>>()
446 .unwrap(),
447 right_slice
448 .as_any()
449 .downcast_ref::<GenericStringArray<i32>>()
450 .unwrap(),
451 )
452 .unwrap();
453
454 let expected = [None, Some("foofar"), Some("barfaz")]
455 .into_iter()
456 .collect::<StringArray>();
457
458 assert_eq!(output, expected);
459
460 let left_slice = left.slice(2, 2);
461 let right_slice = right.slice(1, 2);
462
463 let output = concat_elements_utf8(
464 left_slice
465 .as_any()
466 .downcast_ref::<GenericStringArray<i32>>()
467 .unwrap(),
468 right_slice
469 .as_any()
470 .downcast_ref::<GenericStringArray<i32>>()
471 .unwrap(),
472 )
473 .unwrap();
474
475 let expected = [None, Some("bazfar")].into_iter().collect::<StringArray>();
476
477 assert_eq!(output, expected);
478 }
479
480 #[test]
481 fn test_string_concat_error_empty() {
482 assert_eq!(
483 concat_elements_utf8_many::<i32>(&[])
484 .unwrap_err()
485 .to_string(),
486 "Compute error: concat requires input of at least one array".to_string()
487 );
488 }
489
490 #[test]
491 fn test_string_concat_one() {
492 let expected = [None, Some("baryyy"), None]
493 .into_iter()
494 .collect::<StringArray>();
495
496 let output = concat_elements_utf8_many(&[&expected]).unwrap();
497
498 assert_eq!(output, expected);
499 }
500
501 #[test]
502 fn test_string_concat_many() {
503 let foo = StringArray::from(vec![Some("f"), Some("o"), Some("o"), None]);
504 let bar = StringArray::from(vec![None, Some("b"), Some("a"), Some("r")]);
505 let baz = StringArray::from(vec![Some("b"), None, Some("a"), Some("z")]);
506
507 let output = concat_elements_utf8_many(&[&foo, &bar, &baz]).unwrap();
508
509 let expected = [None, None, Some("oaa"), None]
510 .into_iter()
511 .collect::<StringArray>();
512
513 assert_eq!(output, expected);
514 }
515
516 #[test]
517 fn test_fixed_size_binary_concat() {
518 let left = FixedSizeBinaryArray::try_from(vec![Some(b"foo" as &[u8]), Some(b"bar"), None])
519 .unwrap();
520 let right = FixedSizeBinaryArray::try_from(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")])
521 .unwrap();
522
523 let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
524
525 let expected =
526 FixedSizeBinaryArray::try_from(vec![None, Some(b"baryyy" as &[u8]), None]).unwrap();
527 assert_eq!(output, expected);
528 }
529
530 #[test]
531 fn test_fixed_size_binary_concat_no_null() {
532 let left = FixedSizeBinaryArray::try_from(vec![b"ab" as &[u8], b"cd"]).unwrap();
533 let right = FixedSizeBinaryArray::try_from(vec![b"12" as &[u8], b"34"]).unwrap();
534
535 let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
536
537 let expected = FixedSizeBinaryArray::try_from(vec![b"ab12" as &[u8], b"cd34"]).unwrap();
538 assert_eq!(output, expected);
539 }
540
541 #[test]
542 fn test_fixed_size_binary_concat_error() {
543 let left = FixedSizeBinaryArray::try_from(vec![b"ab" as &[u8], b"cd"]).unwrap();
544 let right = FixedSizeBinaryArray::try_from(vec![b"12" as &[u8]]).unwrap();
545
546 let output = concat_elements_fixed_size_binary(&left, &right);
547 assert_eq!(
548 output.unwrap_err().to_string(),
549 "Compute error: Arrays must have the same length: 2 != 1".to_string()
550 );
551 }
552
553 #[test]
554 fn test_fixed_size_binary_concat_empty() {
555 let left = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
556 let right = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
557
558 let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
559
560 let expected = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
561 assert_eq!(output, expected);
562 }
563
564 #[test]
565 fn test_binary_view_concat() {
566 let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), Some(b"bar"), None]);
567 let right = BinaryViewArray::from_iter(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")]);
568
569 let output = concat_elements_binary_view_array(&left, &right).unwrap();
570
571 let expected = BinaryViewArray::from_iter(vec![None, Some(b"baryyy" as &[u8]), None]);
572 assert_eq!(output, expected);
573 }
574
575 #[test]
576 fn test_string_view_concat() {
577 let left = StringViewArray::from_iter(vec![Some("foo"), Some("bar"), None]);
578 let right = StringViewArray::from_iter(vec![None, Some("yyy"), Some("zzz")]);
579
580 let output = concat_elements_string_view_array(&left, &right).unwrap();
581
582 let expected = StringViewArray::from_iter(vec![None, Some("baryyy"), None]);
583 assert_eq!(output, expected);
584 }
585
586 #[test]
587 fn test_binary_view_concat_no_null() {
588 let left = BinaryViewArray::from_iter(vec![
589 Some(b"foo" as &[u8]),
590 Some(b"bar"),
591 Some(b""),
592 Some(b"baz"),
593 ]);
594 let right = BinaryViewArray::from_iter(vec![
595 Some(b"bar" as &[u8]),
596 Some(b"baz"),
597 Some(b""),
598 Some(b""),
599 ]);
600
601 let output = concat_elements_binary_view_array(&left, &right).unwrap();
602
603 let expected = BinaryViewArray::from_iter(vec![
604 Some(b"foobar" as &[u8]),
605 Some(b"barbaz"),
606 Some(b""),
607 Some(b"baz"),
608 ]);
609 assert_eq!(output, expected);
610 }
611
612 #[test]
613 fn test_binary_view_concat_error() {
614 let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), Some(b"bar")]);
615 let right = BinaryViewArray::from_iter(vec![Some(b"baz" as &[u8])]);
616
617 let output = concat_elements_binary_view_array(&left, &right);
618 assert_eq!(
619 output.unwrap_err().to_string(),
620 "Compute error: Arrays must have the same length: 2 != 1".to_string()
621 );
622 }
623
624 #[test]
625 fn test_binary_view_concat_empty() {
626 let left = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
627 let right = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
628
629 let output = concat_elements_binary_view_array(&left, &right).unwrap();
630 let expected = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
631 assert_eq!(output, expected);
632 }
633
634 #[test]
635 fn test_concat_dyn_same_type() {
636 let left = StringArray::from(vec![Some("foo"), Some("bar"), None]);
638 let right = StringArray::from(vec![None, Some("yyy"), Some("zzz")]);
639
640 let output: StringArray = concat_elements_dyn(&left, &right)
641 .unwrap()
642 .into_data()
643 .into();
644 let expected = StringArray::from(vec![None, Some("baryyy"), None]);
645 assert_eq!(output, expected);
646
647 let left = LargeStringArray::from(vec![Some("foo"), Some("bar"), None]);
649 let right = LargeStringArray::from(vec![None, Some("yyy"), Some("zzz")]);
650
651 let output: LargeStringArray = concat_elements_dyn(&left, &right)
652 .unwrap()
653 .into_data()
654 .into();
655 let expected = LargeStringArray::from(vec![None, Some("baryyy"), None]);
656 assert_eq!(output, expected);
657
658 let left = BinaryArray::from_opt_vec(vec![Some(b"foo"), Some(b"bar"), None]);
660 let right = BinaryArray::from_opt_vec(vec![None, Some(b"yyy"), Some(b"zzz")]);
661 let output: BinaryArray = concat_elements_dyn(&left, &right)
662 .unwrap()
663 .into_data()
664 .into();
665 let expected = BinaryArray::from_opt_vec(vec![None, Some(b"baryyy"), None]);
666 assert_eq!(output, expected);
667
668 let left = LargeBinaryArray::from_opt_vec(vec![Some(b"foo"), Some(b"bar"), None]);
670 let right = LargeBinaryArray::from_opt_vec(vec![None, Some(b"yyy"), Some(b"zzz")]);
671 let output: LargeBinaryArray = concat_elements_dyn(&left, &right)
672 .unwrap()
673 .into_data()
674 .into();
675 let expected = LargeBinaryArray::from_opt_vec(vec![None, Some(b"baryyy"), None]);
676 assert_eq!(output, expected);
677
678 let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), Some(b"bar"), None]);
680 let right = BinaryViewArray::from_iter(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")]);
681 let output: BinaryViewArray = concat_elements_dyn(&left, &right)
682 .unwrap()
683 .into_data()
684 .into();
685 let expected = BinaryViewArray::from_iter(vec![None, Some(b"baryyy" as &[u8]), None]);
686 assert_eq!(output, expected);
687
688 let left = FixedSizeBinaryArray::try_from(vec![Some(b"foo" as &[u8]), Some(b"bar"), None])
690 .unwrap();
691 let right = FixedSizeBinaryArray::try_from(vec![None, Some(b"yyy" as &[u8]), Some(b"zzz")])
692 .unwrap();
693 let output: FixedSizeBinaryArray = concat_elements_dyn(&left, &right)
694 .unwrap()
695 .into_data()
696 .into();
697 let expected =
698 FixedSizeBinaryArray::try_from(vec![None, Some(b"baryyy" as &[u8]), None]).unwrap();
699 assert_eq!(output, expected);
700 }
701
702 #[test]
703 fn test_concat_dyn_different_type() {
704 let left = StringArray::from(vec![Some("foo"), Some("bar"), None]);
705 let right = LargeStringArray::from(vec![None, Some("1"), Some("2")]);
706
707 let output = concat_elements_dyn(&left, &right);
708 assert_eq!(
709 output.unwrap_err().to_string(),
710 "Compute error: Cannot concat arrays of different types: Utf8 != LargeUtf8".to_string()
711 );
712 }
713}