1use std::{mem::size_of, ptr::NonNull, sync::Arc};
105
106use arrow_buffer::{Buffer, MutableBuffer, bit_util};
107pub use arrow_data::ffi::FFI_ArrowArray;
108use arrow_data::{ArrayData, layout};
109pub use arrow_schema::ffi::FFI_ArrowSchema;
110use arrow_schema::{ArrowError, DataType, UnionMode};
111
112use crate::array::ArrayRef;
113
114type Result<T> = std::result::Result<T, ArrowError>;
115
116#[deprecated(
125 since = "52.0.0",
126 note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
127)]
128pub unsafe fn export_array_into_raw(
129 src: ArrayRef,
130 out_array: *mut FFI_ArrowArray,
131 out_schema: *mut FFI_ArrowSchema,
132) -> Result<()> {
133 let data = src.to_data();
134 let array = FFI_ArrowArray::new(&data);
135 let schema = FFI_ArrowSchema::try_from(data.data_type())?;
136
137 unsafe { std::ptr::write_unaligned(out_array, array) };
138 unsafe { std::ptr::write_unaligned(out_schema, schema) };
139
140 Ok(())
141}
142
143fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146 if let Some(primitive) = data_type.primitive_width() {
147 return match i {
148 0 => Err(ArrowError::CDataInterface(format!(
149 "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150 ))),
151 1 => Ok(primitive * 8),
152 i => Err(ArrowError::CDataInterface(format!(
153 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154 ))),
155 };
156 }
157
158 Ok(match (data_type, i) {
159 (DataType::Boolean, 1) => 1,
160 (DataType::Boolean, _) => {
161 return Err(ArrowError::CDataInterface(format!(
162 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163 )));
164 }
165 (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
166 (DataType::FixedSizeList(f, num_elems), 1) => {
167 let child_bit_width = bit_width(f.data_type(), 1)?;
168 child_bit_width * (*num_elems as usize)
169 }
170 (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
171 return Err(ArrowError::CDataInterface(format!(
172 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
173 )));
174 }
175 (DataType::Utf8, 1)
179 | (DataType::Binary, 1)
180 | (DataType::List(_), 1)
181 | (DataType::Map(_, _), 1) => i32::BITS as _,
182 (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
183 (DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS as _,
185 (DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => i64::BITS as _,
187 (DataType::List(_), _) | (DataType::Map(_, _), _) => {
188 return Err(ArrowError::CDataInterface(format!(
189 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
190 )));
191 }
192 (DataType::Utf8, _) | (DataType::Binary, _) => {
193 return Err(ArrowError::CDataInterface(format!(
194 "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
195 )));
196 }
197 (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => {
200 i64::BITS as _
201 }
202 (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2) => {
203 u8::BITS as _
204 }
205 (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _) => {
206 return Err(ArrowError::CDataInterface(format!(
207 "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
208 )));
209 }
210 (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => u128::BITS as _,
214 (DataType::Utf8View, _) | (DataType::BinaryView, _) => u8::BITS as _,
215 (DataType::Union(_, _), 0) => i8::BITS as _,
217 (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
219 (DataType::Union(_, UnionMode::Sparse), _) => {
220 return Err(ArrowError::CDataInterface(format!(
221 "The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
222 )));
223 }
224 (DataType::Union(_, UnionMode::Dense), _) => {
225 return Err(ArrowError::CDataInterface(format!(
226 "The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
227 )));
228 }
229 (_, 0) => {
230 return Err(ArrowError::CDataInterface(format!(
233 "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
234 )));
235 }
236 _ => {
237 return Err(ArrowError::CDataInterface(format!(
238 "The datatype \"{data_type}\" is still not supported in Rust implementation"
239 )));
240 }
241 })
242}
243
244unsafe fn create_buffer(
252 owner: Arc<FFI_ArrowArray>,
253 array: &FFI_ArrowArray,
254 index: usize,
255 len: usize,
256) -> Option<Buffer> {
257 if array.num_buffers() == 0 {
258 return None;
259 }
260 NonNull::new(array.buffer(index) as _)
261 .map(|ptr| unsafe { Buffer::from_custom_allocation(ptr, len, owner) })
262}
263
264pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
266 let array = FFI_ArrowArray::new(data);
267 let schema = FFI_ArrowSchema::try_from(data.data_type())?;
268 Ok((array, schema))
269}
270
271pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
277 let dt = DataType::try_from(schema)?;
278 let array = Arc::new(array);
279 let tmp = ImportedArrowArray {
280 array: &array,
281 data_type: dt,
282 owner: &array,
283 };
284 tmp.consume()
285}
286
287pub unsafe fn from_ffi_and_data_type(
293 array: FFI_ArrowArray,
294 data_type: DataType,
295) -> Result<ArrayData> {
296 let array = Arc::new(array);
297 let tmp = ImportedArrowArray {
298 array: &array,
299 data_type,
300 owner: &array,
301 };
302 tmp.consume()
303}
304
305#[derive(Debug)]
306struct ImportedArrowArray<'a> {
307 array: &'a FFI_ArrowArray,
308 data_type: DataType,
309 owner: &'a Arc<FFI_ArrowArray>,
310}
311
312impl ImportedArrowArray<'_> {
313 fn consume(self) -> Result<ArrayData> {
314 let len = self.array.len();
315 let offset = self.array.offset();
316 let null_count = match &self.data_type {
317 DataType::Null => Some(0),
318 _ => self.array.null_count_opt(),
319 };
320
321 let data_layout = layout(&self.data_type);
322 let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?;
323
324 let null_bit_buffer = if data_layout.can_contain_null_mask {
325 self.null_bit_buffer()
326 } else {
327 None
328 };
329
330 let mut child_data = self.consume_children()?;
331
332 if let Some(d) = self.dictionary()? {
333 assert!(child_data.is_empty());
336 child_data.push(d.consume()?);
337 }
338
339 Ok(unsafe {
341 ArrayData::new_unchecked(
342 self.data_type,
343 len,
344 null_count,
345 null_bit_buffer,
346 offset,
347 buffers,
348 child_data,
349 )
350 })
351 }
352
353 fn consume_children(&self) -> Result<Vec<ArrayData>> {
354 match &self.data_type {
355 DataType::List(field)
356 | DataType::FixedSizeList(field, _)
357 | DataType::LargeList(field)
358 | DataType::ListView(field)
359 | DataType::LargeListView(field)
360 | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
361 DataType::Struct(fields) => {
362 assert!(fields.len() == self.array.num_children());
363 fields
364 .iter()
365 .enumerate()
366 .map(|(i, field)| self.consume_child(i, field.data_type()))
367 .collect::<Result<Vec<_>>>()
368 }
369 DataType::Union(union_fields, _) => {
370 assert!(union_fields.len() == self.array.num_children());
371 union_fields
372 .iter()
373 .enumerate()
374 .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
375 .collect::<Result<Vec<_>>>()
376 }
377 DataType::RunEndEncoded(run_ends_field, values_field) => Ok([
378 self.consume_child(0, run_ends_field.data_type())?,
379 self.consume_child(1, values_field.data_type())?,
380 ]
381 .to_vec()),
382 _ => Ok(Vec::new()),
383 }
384 }
385
386 fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
387 ImportedArrowArray {
388 array: self.array.child(index),
389 data_type: child_type.clone(),
390 owner: self.owner,
391 }
392 .consume()
393 }
394
395 fn buffers(&self, can_contain_null_mask: bool, variadic: bool) -> Result<Vec<Buffer>> {
398 let buffer_begin = can_contain_null_mask as usize;
400 let buffer_end = self.array.num_buffers() - usize::from(variadic);
401
402 let variadic_buffer_lens = if variadic {
403 let num_variadic_buffers =
406 self.array.num_buffers() - (2 + usize::from(can_contain_null_mask));
407 if num_variadic_buffers == 0 {
408 &[]
409 } else {
410 let lengths = self.array.buffer(self.array.num_buffers() - 1);
411 unsafe { std::slice::from_raw_parts(lengths.cast::<i64>(), num_variadic_buffers) }
413 }
414 } else {
415 &[]
416 };
417
418 (buffer_begin..buffer_end)
419 .map(|index| {
420 let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
421 match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
422 Some(buf) => {
423 if buf.is_empty() {
428 Ok(MutableBuffer::new(0).into())
429 } else {
430 Ok(buf)
431 }
432 }
433 None if len == 0 => {
434 Ok(MutableBuffer::new(0).into())
437 }
438 None => Err(ArrowError::CDataInterface(format!(
439 "The external buffer at position {index} is null."
440 ))),
441 }
442 })
443 .collect()
444 }
445
446 fn buffer_len(
451 &self,
452 i: usize,
453 variadic_buffer_lengths: &[i64],
454 dt: &DataType,
455 ) -> Result<usize> {
456 let data_type = match dt {
458 DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
459 dt => dt,
460 };
461
462 let length = self.array.len() + self.array.offset();
465
466 Ok(match (&data_type, i) {
468 (DataType::Utf8, 1)
469 | (DataType::LargeUtf8, 1)
470 | (DataType::Binary, 1)
471 | (DataType::LargeBinary, 1)
472 | (DataType::List(_), 1)
473 | (DataType::LargeList(_), 1)
474 | (DataType::Map(_, _), 1) => {
475 let bits = bit_width(data_type, i)?;
477 debug_assert_eq!(bits % 8, 0);
478 (length + 1) * (bits / 8)
479 }
480 (DataType::ListView(_), 1)
481 | (DataType::ListView(_), 2)
482 | (DataType::LargeListView(_), 1)
483 | (DataType::LargeListView(_), 2) => {
484 let bits = bit_width(data_type, i)?;
485 debug_assert_eq!(bits % 8, 0);
486 length * (bits / 8)
487 }
488 (DataType::Utf8, 2) | (DataType::Binary, 2) => {
489 if self.array.is_empty() {
490 return Ok(0);
491 }
492
493 let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
495 #[allow(clippy::cast_ptr_alignment)]
498 let offset_buffer = self.array.buffer(1) as *const i32;
499 (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
501 }
502 (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
503 if self.array.is_empty() {
504 return Ok(0);
505 }
506
507 let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
509 #[allow(clippy::cast_ptr_alignment)]
512 let offset_buffer = self.array.buffer(1) as *const i64;
513 (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
515 }
516 (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => {
521 std::mem::size_of::<u128>() * length
522 }
523 (DataType::Utf8View, i) | (DataType::BinaryView, i) => {
524 variadic_buffer_lengths[i - 2] as usize
525 }
526 _ => {
528 let bits = bit_width(data_type, i)?;
529 bit_util::ceil(length * bits, 8)
530 }
531 })
532 }
533
534 fn null_bit_buffer(&self) -> Option<Buffer> {
538 let length = self.array.len() + self.array.offset();
542 let buffer_len = bit_util::ceil(length, 8);
543
544 unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
545 }
546
547 fn dictionary(&self) -> Result<Option<ImportedArrowArray<'_>>> {
548 match (self.array.dictionary(), &self.data_type) {
549 (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
550 array,
551 data_type: value_type.as_ref().clone(),
552 owner: self.owner,
553 })),
554 (Some(_), _) => Err(ArrowError::CDataInterface(
555 "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
556 )),
557 (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
558 "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
559 )),
560 (_, _) => Ok(None),
561 }
562 }
563}
564
565#[cfg(test)]
566mod tests_to_then_from_ffi {
567 use std::collections::HashMap;
568 use std::mem::ManuallyDrop;
569
570 use arrow_buffer::{ArrowNativeType, NullBuffer};
571 use arrow_schema::Field;
572
573 use crate::builder::UnionBuilder;
574 use crate::cast::AsArray;
575 use crate::types::{Float64Type, Int8Type, Int32Type};
576 use crate::*;
577
578 use super::*;
579
580 #[test]
581 fn test_round_trip() {
582 let array = Int32Array::from(vec![1, 2, 3]);
584
585 let (array, schema) = to_ffi(&array.into_data()).unwrap();
587
588 let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
590
591 assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
593 }
594
595 #[test]
596 fn test_import() {
597 let data = Int32Array::from(vec![1, 2, 3]).into_data();
601 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
602 let array = FFI_ArrowArray::new(&data);
603
604 let schema = Box::new(ManuallyDrop::new(schema));
606 let array = Box::new(ManuallyDrop::new(array));
607
608 let schema_ptr = &**schema as *const _;
609 let array_ptr = &**array as *const _;
610
611 let data =
615 unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
616
617 let array = Int32Array::from(data);
618 assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
619 }
620
621 #[test]
622 fn test_round_trip_with_offset() -> Result<()> {
623 let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), None]);
625
626 let array = array.slice(1, 2);
627
628 let (array, schema) = to_ffi(&array.to_data())?;
630
631 let data = unsafe { from_ffi(array, &schema) }?;
633 let array = make_array(data);
634 let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
635
636 assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
637
638 Ok(())
640 }
641
642 #[test]
643 #[cfg(not(feature = "force_validate"))]
644 fn test_decimal_round_trip() -> Result<()> {
645 let original_array = [Some(12345_i128), Some(-12345_i128), None]
647 .into_iter()
648 .collect::<Decimal128Array>()
649 .with_precision_and_scale(6, 2)
650 .unwrap();
651
652 let (array, schema) = to_ffi(&original_array.to_data())?;
654
655 let data = unsafe { from_ffi(array, &schema) }?;
657 let array = make_array(data);
658
659 let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
661
662 assert_eq!(array, &original_array);
664
665 Ok(())
667 }
668 #[test]
671 fn test_null_count_handling() {
672 let int32_data = ArrayData::builder(DataType::Int32)
673 .len(10)
674 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
675 .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001])))
676 .build()
677 .unwrap();
678 let mut ffi_array = FFI_ArrowArray::new(&int32_data);
679 assert_eq!(3, ffi_array.null_count());
680 assert_eq!(Some(3), ffi_array.null_count_opt());
681 unsafe {
683 ffi_array.set_null_count(-1);
684 }
685 assert_eq!(None, ffi_array.null_count_opt());
686 let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap();
687 assert_eq!(3, int32_data.null_count());
688
689 let null_data = &ArrayData::new_null(&DataType::Null, 10);
690 let mut ffi_array = FFI_ArrowArray::new(null_data);
691 assert_eq!(10, ffi_array.null_count());
692 assert_eq!(Some(10), ffi_array.null_count_opt());
693 unsafe {
695 ffi_array.set_null_count(-1);
696 }
697 assert_eq!(None, ffi_array.null_count_opt());
698 let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap();
699 assert_eq!(0, null_data.null_count());
700 }
701
702 fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
703 let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
705
706 let (array, schema) = to_ffi(&array.to_data())?;
708
709 let data = unsafe { from_ffi(array, &schema) }?;
711 let array = make_array(data);
712
713 let array = array
715 .as_any()
716 .downcast_ref::<GenericStringArray<Offset>>()
717 .unwrap();
718
719 let expected = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
721 assert_eq!(array, &expected);
722
723 Ok(())
725 }
726
727 #[test]
728 fn test_string() -> Result<()> {
729 test_generic_string::<i32>()
730 }
731
732 #[test]
733 fn test_large_string() -> Result<()> {
734 test_generic_string::<i64>()
735 }
736
737 fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
738 let value_data = ArrayData::builder(DataType::Int32)
740 .len(8)
741 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
742 .build()
743 .unwrap();
744
745 let value_offsets = [0_usize, 3, 6, 8]
748 .iter()
749 .map(|i| Offset::from_usize(*i).unwrap())
750 .collect::<Buffer>();
751
752 let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
754 Field::new_list_field(DataType::Int32, false),
755 ));
756
757 let list_data = ArrayData::builder(list_data_type)
758 .len(3)
759 .add_buffer(value_offsets)
760 .add_child_data(value_data)
761 .build()
762 .unwrap();
763
764 let array = GenericListArray::<Offset>::from(list_data.clone());
766
767 let (array, schema) = to_ffi(&array.to_data())?;
769
770 let data = unsafe { from_ffi(array, &schema) }?;
772 let array = make_array(data);
773
774 let array = array
776 .as_any()
777 .downcast_ref::<GenericListArray<Offset>>()
778 .unwrap();
779
780 let expected = GenericListArray::<Offset>::from(list_data);
782 assert_eq!(&array.value(0), &expected.value(0));
783 assert_eq!(&array.value(1), &expected.value(1));
784 assert_eq!(&array.value(2), &expected.value(2));
785
786 Ok(())
788 }
789
790 #[test]
791 fn test_list() -> Result<()> {
792 test_generic_list::<i32>()
793 }
794
795 #[test]
796 fn test_large_list() -> Result<()> {
797 test_generic_list::<i64>()
798 }
799
800 fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> Result<()> {
801 let value_data = ArrayData::builder(DataType::Int16)
803 .len(8)
804 .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
805 .build()
806 .unwrap();
807
808 let value_offsets = [0_usize, 3, 6]
811 .iter()
812 .map(|i| Offset::from_usize(*i).unwrap())
813 .collect::<Buffer>();
814
815 let sizes_buffer = [3_usize, 3, 2]
816 .iter()
817 .map(|i| Offset::from_usize(*i).unwrap())
818 .collect::<Buffer>();
819
820 let list_view_dt = GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
822 Field::new_list_field(DataType::Int16, false),
823 ));
824
825 let list_data = ArrayData::builder(list_view_dt)
826 .len(3)
827 .add_buffer(value_offsets)
828 .add_buffer(sizes_buffer)
829 .add_child_data(value_data)
830 .build()
831 .unwrap();
832
833 let original = GenericListViewArray::<Offset>::from(list_data.clone());
834
835 let (array, schema) = to_ffi(&original.to_data())?;
837
838 let data = unsafe { from_ffi(array, &schema) }?;
840 let array = make_array(data);
841
842 let array = array
844 .as_any()
845 .downcast_ref::<GenericListViewArray<Offset>>()
846 .unwrap();
847
848 assert_eq!(&array.value(0), &original.value(0));
849 assert_eq!(&array.value(1), &original.value(1));
850 assert_eq!(&array.value(2), &original.value(2));
851
852 Ok(())
853 }
854
855 #[test]
856 fn test_list_view() -> Result<()> {
857 test_generic_list_view::<i32>()
858 }
859
860 #[test]
861 fn test_large_list_view() -> Result<()> {
862 test_generic_list_view::<i64>()
863 }
864
865 fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
866 let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
868 let array = GenericBinaryArray::<Offset>::from(array);
869
870 let (array, schema) = to_ffi(&array.to_data())?;
872
873 let data = unsafe { from_ffi(array, &schema) }?;
875 let array = make_array(data);
876 let array = array
877 .as_any()
878 .downcast_ref::<GenericBinaryArray<Offset>>()
879 .unwrap();
880
881 let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
883 let expected = GenericBinaryArray::<Offset>::from(expected);
884 assert_eq!(array, &expected);
885
886 Ok(())
888 }
889
890 #[test]
891 fn test_binary() -> Result<()> {
892 test_generic_binary::<i32>()
893 }
894
895 #[test]
896 fn test_large_binary() -> Result<()> {
897 test_generic_binary::<i64>()
898 }
899
900 #[test]
901 fn test_bool() -> Result<()> {
902 let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
904
905 let (array, schema) = to_ffi(&array.to_data())?;
907
908 let data = unsafe { from_ffi(array, &schema) }?;
910 let array = make_array(data);
911 let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
912
913 assert_eq!(
915 array,
916 &BooleanArray::from(vec![None, Some(true), Some(false)])
917 );
918
919 Ok(())
921 }
922
923 #[test]
924 fn test_time32() -> Result<()> {
925 let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
927
928 let (array, schema) = to_ffi(&array.to_data())?;
930
931 let data = unsafe { from_ffi(array, &schema) }?;
933 let array = make_array(data);
934 let array = array
935 .as_any()
936 .downcast_ref::<Time32MillisecondArray>()
937 .unwrap();
938
939 assert_eq!(
941 array,
942 &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
943 );
944
945 Ok(())
947 }
948
949 #[test]
950 fn test_timestamp() -> Result<()> {
951 let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
953
954 let (array, schema) = to_ffi(&array.to_data())?;
956
957 let data = unsafe { from_ffi(array, &schema) }?;
959 let array = make_array(data);
960 let array = array
961 .as_any()
962 .downcast_ref::<TimestampMillisecondArray>()
963 .unwrap();
964
965 assert_eq!(
967 array,
968 &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
969 );
970
971 Ok(())
973 }
974
975 #[test]
976 fn test_fixed_size_binary_array() -> Result<()> {
977 let values = vec![
978 None,
979 Some(vec![10, 10, 10]),
980 None,
981 Some(vec![20, 20, 20]),
982 Some(vec![30, 30, 30]),
983 None,
984 ];
985 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
986
987 let (array, schema) = to_ffi(&array.to_data())?;
989
990 let data = unsafe { from_ffi(array, &schema) }?;
992 let array = make_array(data);
993 let array = array
994 .as_any()
995 .downcast_ref::<FixedSizeBinaryArray>()
996 .unwrap();
997
998 assert_eq!(
1000 array,
1001 &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1002 vec![
1003 None,
1004 Some(vec![10, 10, 10]),
1005 None,
1006 Some(vec![20, 20, 20]),
1007 Some(vec![30, 30, 30]),
1008 None,
1009 ]
1010 .into_iter(),
1011 3
1012 )?
1013 );
1014
1015 Ok(())
1017 }
1018
1019 #[test]
1020 fn test_fixed_size_list_array() -> Result<()> {
1021 let mut validity_bits: [u8; 1] = [0; 1];
1023 bit_util::set_bit(&mut validity_bits, 2);
1024
1025 let v: Vec<i32> = (0..9).collect();
1026 let value_data = ArrayData::builder(DataType::Int32)
1027 .len(9)
1028 .add_buffer(Buffer::from_slice_ref(&v))
1029 .build()?;
1030
1031 let list_data_type =
1032 DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int32, false)), 3);
1033 let list_data = ArrayData::builder(list_data_type.clone())
1034 .len(3)
1035 .null_bit_buffer(Some(Buffer::from(validity_bits)))
1036 .add_child_data(value_data)
1037 .build()?;
1038
1039 let (array, schema) = to_ffi(&list_data)?;
1041
1042 let data = unsafe { from_ffi(array, &schema) }?;
1044 let array = make_array(data);
1045 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
1046
1047 let mut expected_validity_bits: [u8; 1] = [0; 1];
1049 bit_util::set_bit(&mut expected_validity_bits, 2);
1050 bit_util::set_bit(&mut expected_validity_bits, 5);
1051
1052 let mut w = vec![];
1053 w.extend_from_slice(&v);
1054
1055 let expected_value_data = ArrayData::builder(DataType::Int32)
1056 .len(9)
1057 .add_buffer(Buffer::from_slice_ref(&w))
1058 .build()?;
1059
1060 let expected_list_data = ArrayData::builder(list_data_type)
1061 .len(3)
1062 .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
1063 .add_child_data(expected_value_data)
1064 .build()?;
1065 let expected_array = FixedSizeListArray::from(expected_list_data);
1066
1067 assert_eq!(array, &expected_array);
1069
1070 Ok(())
1072 }
1073
1074 #[test]
1075 fn test_dictionary() -> Result<()> {
1076 let values = vec!["a", "aaa", "aaa"];
1078 let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
1079
1080 let (array, schema) = to_ffi(&dict_array.to_data())?;
1082
1083 let data = unsafe { from_ffi(array, &schema) }?;
1085 let array = make_array(data);
1086 let actual = array
1087 .as_any()
1088 .downcast_ref::<DictionaryArray<Int8Type>>()
1089 .unwrap();
1090
1091 let new_values = vec!["a", "aaa", "aaa"];
1093 let expected: DictionaryArray<Int8Type> = new_values.into_iter().collect();
1094 assert_eq!(actual, &expected);
1095
1096 Ok(())
1098 }
1099
1100 #[test]
1101 #[allow(deprecated)]
1102 fn test_export_array_into_raw() -> Result<()> {
1103 let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
1104
1105 let mut out_array = FFI_ArrowArray::empty();
1107 let mut out_schema = FFI_ArrowSchema::empty();
1108
1109 {
1110 let out_array_ptr = std::ptr::addr_of_mut!(out_array);
1111 let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
1112 unsafe {
1113 export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1114 }
1115 }
1116
1117 let data = unsafe { from_ffi(out_array, &out_schema) }?;
1119 let array = make_array(data);
1120
1121 let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1123
1124 assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
1126 Ok(())
1127 }
1128
1129 #[test]
1130 fn test_duration() -> Result<()> {
1131 let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
1133
1134 let (array, schema) = to_ffi(&array.to_data())?;
1136
1137 let data = unsafe { from_ffi(array, &schema) }?;
1139 let array = make_array(data);
1140 let array = array
1141 .as_any()
1142 .downcast_ref::<DurationSecondArray>()
1143 .unwrap();
1144
1145 assert_eq!(
1147 array,
1148 &DurationSecondArray::from(vec![None, Some(1), Some(2)])
1149 );
1150
1151 Ok(())
1153 }
1154
1155 #[test]
1156 fn test_map_array() -> Result<()> {
1157 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1158 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1159
1160 let entry_offsets = [0, 3, 6, 8];
1163
1164 let map_array =
1165 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1166 .unwrap();
1167
1168 let (array, schema) = to_ffi(&map_array.to_data())?;
1170
1171 let data = unsafe { from_ffi(array, &schema) }?;
1173 let array = make_array(data);
1174
1175 let array = array.as_any().downcast_ref::<MapArray>().unwrap();
1177 assert_eq!(array, &map_array);
1178
1179 Ok(())
1180 }
1181
1182 #[test]
1183 fn test_struct_array() -> Result<()> {
1184 let metadata: HashMap<String, String> =
1185 [("Hello".to_string(), "World! 😊".to_string())].into();
1186 let struct_array = StructArray::from(vec![(
1187 Arc::new(Field::new("a", DataType::Int32, false).with_metadata(metadata)),
1188 Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
1189 )]);
1190
1191 let (array, schema) = to_ffi(&struct_array.to_data())?;
1193
1194 let data = unsafe { from_ffi(array, &schema) }?;
1196 let array = make_array(data);
1197
1198 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
1200 assert_eq!(array.data_type(), struct_array.data_type());
1201 assert_eq!(array, &struct_array);
1202
1203 Ok(())
1204 }
1205
1206 #[test]
1207 fn test_union_sparse_array() -> Result<()> {
1208 let mut builder = UnionBuilder::new_sparse();
1209 builder.append::<Int32Type>("a", 1).unwrap();
1210 builder.append_null::<Int32Type>("a").unwrap();
1211 builder.append::<Float64Type>("c", 3.0).unwrap();
1212 builder.append::<Int32Type>("a", 4).unwrap();
1213 let union = builder.build().unwrap();
1214
1215 let (array, schema) = to_ffi(&union.to_data())?;
1217
1218 let data = unsafe { from_ffi(array, &schema) }?;
1220 let array = make_array(data);
1221
1222 let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
1223
1224 let expected_type_ids = vec![0_i8, 0, 1, 0];
1225
1226 assert_eq!(*array.type_ids(), expected_type_ids);
1228 for (i, id) in expected_type_ids.iter().enumerate() {
1229 assert_eq!(id, &array.type_id(i));
1230 }
1231
1232 assert!(array.offsets().is_none());
1234
1235 for i in 0..array.len() {
1236 let slot = array.value(i);
1237 match i {
1238 0 => {
1239 let slot = slot.as_primitive::<Int32Type>();
1240 assert!(!slot.is_null(0));
1241 assert_eq!(slot.len(), 1);
1242 let value = slot.value(0);
1243 assert_eq!(1_i32, value);
1244 }
1245 1 => assert!(slot.is_null(0)),
1246 2 => {
1247 let slot = slot.as_primitive::<Float64Type>();
1248 assert!(!slot.is_null(0));
1249 assert_eq!(slot.len(), 1);
1250 let value = slot.value(0);
1251 assert_eq!(value, 3_f64);
1252 }
1253 3 => {
1254 let slot = slot.as_primitive::<Int32Type>();
1255 assert!(!slot.is_null(0));
1256 assert_eq!(slot.len(), 1);
1257 let value = slot.value(0);
1258 assert_eq!(4_i32, value);
1259 }
1260 _ => unreachable!(),
1261 }
1262 }
1263
1264 Ok(())
1265 }
1266
1267 #[test]
1268 fn test_union_dense_array() -> Result<()> {
1269 let mut builder = UnionBuilder::new_dense();
1270 builder.append::<Int32Type>("a", 1).unwrap();
1271 builder.append_null::<Int32Type>("a").unwrap();
1272 builder.append::<Float64Type>("c", 3.0).unwrap();
1273 builder.append::<Int32Type>("a", 4).unwrap();
1274 let union = builder.build().unwrap();
1275
1276 let (array, schema) = to_ffi(&union.to_data())?;
1278
1279 let data = unsafe { from_ffi(array, &schema) }?;
1281 let array = UnionArray::from(data);
1282
1283 let expected_type_ids = vec![0_i8, 0, 1, 0];
1284
1285 assert_eq!(*array.type_ids(), expected_type_ids);
1287 for (i, id) in expected_type_ids.iter().enumerate() {
1288 assert_eq!(id, &array.type_id(i));
1289 }
1290
1291 assert!(array.offsets().is_some());
1292
1293 for i in 0..array.len() {
1294 let slot = array.value(i);
1295 match i {
1296 0 => {
1297 let slot = slot.as_primitive::<Int32Type>();
1298 assert!(!slot.is_null(0));
1299 assert_eq!(slot.len(), 1);
1300 let value = slot.value(0);
1301 assert_eq!(1_i32, value);
1302 }
1303 1 => assert!(slot.is_null(0)),
1304 2 => {
1305 let slot = slot.as_primitive::<Float64Type>();
1306 assert!(!slot.is_null(0));
1307 assert_eq!(slot.len(), 1);
1308 let value = slot.value(0);
1309 assert_eq!(value, 3_f64);
1310 }
1311 3 => {
1312 let slot = slot.as_primitive::<Int32Type>();
1313 assert!(!slot.is_null(0));
1314 assert_eq!(slot.len(), 1);
1315 let value = slot.value(0);
1316 assert_eq!(4_i32, value);
1317 }
1318 _ => unreachable!(),
1319 }
1320 }
1321
1322 Ok(())
1323 }
1324
1325 #[test]
1326 fn test_run_array() -> Result<()> {
1327 let value_data =
1328 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1329
1330 let run_ends_values = [4_i32, 6, 7, 9, 13, 18, 20, 22];
1332 let run_ends_data =
1333 PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1334
1335 let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1337
1338 let (array, schema) = to_ffi(&ree_array.to_data())?;
1340
1341 let data = unsafe { from_ffi(array, &schema) }?;
1343 let array = make_array(data);
1344
1345 let array = array
1347 .as_any()
1348 .downcast_ref::<RunArray<Int32Type>>()
1349 .unwrap();
1350 assert_eq!(array.data_type(), ree_array.data_type());
1351 assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1352 assert_eq!(array.values(), ree_array.values());
1353
1354 Ok(())
1355 }
1356
1357 #[test]
1358 fn test_nullable_run_array() -> Result<()> {
1359 let nulls = NullBuffer::from(vec![true, false, true, true, false]);
1360 let value_data =
1361 PrimitiveArray::<Int8Type>::new(vec![1_i8, 2, 3, 4, 5].into(), Some(nulls));
1362
1363 let run_ends_values = [5_i32, 6, 7, 8, 10];
1365 let run_ends_data =
1366 PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1367
1368 let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1370
1371 let (array, schema) = to_ffi(&ree_array.to_data())?;
1373
1374 let data = unsafe { from_ffi(array, &schema) }?;
1376 let array = make_array(data);
1377
1378 let array = array
1380 .as_any()
1381 .downcast_ref::<RunArray<Int32Type>>()
1382 .unwrap();
1383 assert_eq!(array.data_type(), ree_array.data_type());
1384 assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1385 assert_eq!(array.values(), ree_array.values());
1386
1387 Ok(())
1388 }
1389}
1390
1391#[cfg(test)]
1392mod tests_from_ffi {
1393 #[cfg(not(feature = "force_validate"))]
1394 use std::ptr::NonNull;
1395 use std::sync::Arc;
1396
1397 use arrow_buffer::NullBuffer;
1398 #[cfg(not(feature = "force_validate"))]
1399 use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
1400 #[cfg(feature = "force_validate")]
1401 use arrow_buffer::{bit_util, buffer::Buffer};
1402
1403 use arrow_data::ArrayData;
1404 use arrow_data::transform::MutableArrayData;
1405 use arrow_schema::{DataType, Field};
1406
1407 use super::Result;
1408
1409 use crate::builder::GenericByteViewBuilder;
1410 use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
1411 use crate::{
1412 ArrayRef, GenericByteViewArray, ListArray,
1413 array::{
1414 Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
1415 Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
1416 },
1417 ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi},
1418 make_array,
1419 };
1420
1421 fn test_round_trip(expected: &ArrayData) -> Result<()> {
1422 let array = FFI_ArrowArray::new(expected);
1424 let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
1425
1426 let result = &unsafe { from_ffi(array, &schema) }?;
1428
1429 assert_eq!(result, expected);
1430 Ok(())
1431 }
1432
1433 #[test]
1434 fn test_u32() -> Result<()> {
1435 let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
1436 let data = array.into_data();
1437 test_round_trip(&data)
1438 }
1439
1440 #[test]
1441 fn test_u64() -> Result<()> {
1442 let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
1443 let data = array.into_data();
1444 test_round_trip(&data)
1445 }
1446
1447 #[test]
1448 fn test_i64() -> Result<()> {
1449 let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
1450 let data = array.into_data();
1451 test_round_trip(&data)
1452 }
1453
1454 #[test]
1455 fn test_struct() -> Result<()> {
1456 let inner = StructArray::from(vec![
1457 (
1458 Arc::new(Field::new("a1", DataType::Boolean, false)),
1459 Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
1460 ),
1461 (
1462 Arc::new(Field::new("a2", DataType::UInt32, false)),
1463 Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
1464 ),
1465 ]);
1466
1467 let array = StructArray::from(vec![
1468 (
1469 Arc::new(Field::new("a", inner.data_type().clone(), false)),
1470 Arc::new(inner) as Arc<dyn Array>,
1471 ),
1472 (
1473 Arc::new(Field::new("b", DataType::Boolean, false)),
1474 Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
1475 ),
1476 (
1477 Arc::new(Field::new("c", DataType::UInt32, false)),
1478 Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
1479 ),
1480 ]);
1481 let data = array.into_data();
1482 test_round_trip(&data)
1483 }
1484
1485 #[test]
1486 fn test_dictionary() -> Result<()> {
1487 let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
1488 let keys = Int32Array::from(vec![
1489 Some(0),
1490 Some(1),
1491 None,
1492 Some(1),
1493 Some(1),
1494 None,
1495 Some(1),
1496 Some(2),
1497 Some(1),
1498 None,
1499 ]);
1500 let array = DictionaryArray::new(keys, Arc::new(values));
1501
1502 let data = array.into_data();
1503 test_round_trip(&data)
1504 }
1505
1506 #[test]
1507 fn test_fixed_size_binary() -> Result<()> {
1508 let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
1509 let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
1510
1511 let data = array.into_data();
1512 test_round_trip(&data)
1513 }
1514
1515 #[test]
1516 fn test_fixed_size_binary_with_nulls() -> Result<()> {
1517 let values = vec![
1518 None,
1519 Some(vec![10, 10, 10]),
1520 None,
1521 Some(vec![20, 20, 20]),
1522 Some(vec![30, 30, 30]),
1523 None,
1524 ];
1525 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1526
1527 let data = array.into_data();
1528 test_round_trip(&data)
1529 }
1530
1531 #[test]
1532 fn test_fixed_size_list() -> Result<()> {
1533 let v: Vec<i64> = (0..9).collect();
1534 let value_data = ArrayData::builder(DataType::Int64)
1535 .len(9)
1536 .add_buffer(Buffer::from_slice_ref(v))
1537 .build()?;
1538 let list_data_type =
1539 DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3);
1540 let list_data = ArrayData::builder(list_data_type)
1541 .len(3)
1542 .add_child_data(value_data)
1543 .build()?;
1544 let array = FixedSizeListArray::from(list_data);
1545
1546 let data = array.into_data();
1547 test_round_trip(&data)
1548 }
1549
1550 #[test]
1551 fn test_fixed_size_list_with_nulls() -> Result<()> {
1552 let mut validity_bits: [u8; 1] = [0; 1];
1554 bit_util::set_bit(&mut validity_bits, 1);
1555 bit_util::set_bit(&mut validity_bits, 2);
1556 bit_util::set_bit(&mut validity_bits, 6);
1557
1558 let v: Vec<i16> = (0..16).collect();
1559 let value_data = ArrayData::builder(DataType::Int16)
1560 .len(16)
1561 .add_buffer(Buffer::from_slice_ref(v))
1562 .build()?;
1563 let list_data_type =
1564 DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2);
1565 let list_data = ArrayData::builder(list_data_type)
1566 .len(8)
1567 .null_bit_buffer(Some(Buffer::from(validity_bits)))
1568 .add_child_data(value_data)
1569 .build()?;
1570 let array = FixedSizeListArray::from(list_data);
1571
1572 let data = array.into_data();
1573 test_round_trip(&data)
1574 }
1575
1576 #[test]
1577 fn test_fixed_size_list_nested() -> Result<()> {
1578 let v: Vec<i32> = (0..16).collect();
1579 let value_data = ArrayData::builder(DataType::Int32)
1580 .len(16)
1581 .add_buffer(Buffer::from_slice_ref(v))
1582 .build()?;
1583
1584 let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
1585 let value_offsets = Buffer::from_slice_ref(offsets);
1586 let inner_list_data_type =
1587 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1588 let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
1589 .len(8)
1590 .add_buffer(value_offsets)
1591 .add_child_data(value_data)
1592 .build()?;
1593
1594 let mut validity_bits: [u8; 1] = [0; 1];
1596 bit_util::set_bit(&mut validity_bits, 2);
1597
1598 let list_data_type =
1599 DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
1600 let list_data = ArrayData::builder(list_data_type)
1601 .len(4)
1602 .null_bit_buffer(Some(Buffer::from(validity_bits)))
1603 .add_child_data(inner_list_data)
1604 .build()?;
1605
1606 let array = FixedSizeListArray::from(list_data);
1607
1608 let data = array.into_data();
1609 test_round_trip(&data)
1610 }
1611
1612 #[test]
1613 fn test_list_view() -> Result<()> {
1614 let value_data = ArrayData::builder(DataType::Int16)
1616 .len(8)
1617 .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1618 .build()
1619 .unwrap();
1620
1621 let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
1624 let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
1625
1626 let list_view_dt =
1628 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, false)));
1629
1630 let list_view_data = ArrayData::builder(list_view_dt)
1631 .len(3)
1632 .add_buffer(value_offsets)
1633 .add_buffer(sizes_buffer)
1634 .add_child_data(value_data)
1635 .build()
1636 .unwrap();
1637
1638 test_round_trip(&list_view_data)
1639 }
1640
1641 #[test]
1642 fn test_list_view_with_nulls() -> Result<()> {
1643 let value_data = ArrayData::builder(DataType::Int16)
1645 .len(8)
1646 .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1647 .build()
1648 .unwrap();
1649
1650 let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
1653 let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
1654
1655 let list_view_dt =
1657 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, true)));
1658
1659 let list_view_data = ArrayData::builder(list_view_dt)
1660 .len(4)
1661 .add_buffer(value_offsets)
1662 .add_buffer(sizes_buffer)
1663 .add_child_data(value_data)
1664 .nulls(Some(NullBuffer::from(vec![true, true, true, false])))
1665 .build()
1666 .unwrap();
1667
1668 test_round_trip(&list_view_data)
1669 }
1670
1671 #[test]
1672 #[cfg(not(feature = "force_validate"))]
1673 fn test_empty_string_with_non_zero_offset() -> Result<()> {
1674 use super::ImportedArrowArray;
1675 use arrow_buffer::{MutableBuffer, OffsetBuffer};
1676
1677 let data: Buffer = MutableBuffer::new(0).into();
1679 let offsets = OffsetBuffer::new(vec![123].into());
1680 let string_array =
1681 unsafe { StringArray::new_unchecked(offsets.clone(), data.clone(), None) };
1682
1683 let data = string_array.into_data();
1684
1685 let array = FFI_ArrowArray::new(&data);
1686 let schema = FFI_ArrowSchema::try_from(data.data_type())?;
1687
1688 let dt = DataType::try_from(&schema)?;
1689 let array = Arc::new(array);
1690 let imported_array = ImportedArrowArray {
1691 array: &array,
1692 data_type: dt,
1693 owner: &array,
1694 };
1695
1696 let offset_buf_len = imported_array.buffer_len(1, &[], &imported_array.data_type)?;
1697 let data_buf_len = imported_array.buffer_len(2, &[], &imported_array.data_type)?;
1698
1699 assert_eq!(offset_buf_len, 4);
1700 assert_eq!(data_buf_len, 0);
1701
1702 test_round_trip(&imported_array.consume()?)
1703 }
1704
1705 fn roundtrip_string_array(array: StringArray) -> StringArray {
1706 let data = array.into_data();
1707
1708 let array = FFI_ArrowArray::new(&data);
1709 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1710
1711 let array = unsafe { from_ffi(array, &schema) }.unwrap();
1712 StringArray::from(array)
1713 }
1714
1715 fn roundtrip_byte_view_array<T: ByteViewType>(
1716 array: GenericByteViewArray<T>,
1717 ) -> GenericByteViewArray<T> {
1718 let data = array.into_data();
1719
1720 let array = FFI_ArrowArray::new(&data);
1721 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1722
1723 let array = unsafe { from_ffi(array, &schema) }.unwrap();
1724 GenericByteViewArray::<T>::from(array)
1725 }
1726
1727 fn extend_array(array: &dyn Array) -> ArrayRef {
1728 let len = array.len();
1729 let data = array.to_data();
1730
1731 let mut mutable = MutableArrayData::new(vec![&data], false, len);
1732 mutable.extend(0, 0, len);
1733 make_array(mutable.freeze())
1734 }
1735
1736 #[test]
1737 fn test_extend_imported_string_slice() {
1738 let mut strings = vec![];
1739
1740 for i in 0..1000 {
1741 strings.push(format!("string: {i}"));
1742 }
1743
1744 let string_array = StringArray::from(strings);
1745
1746 let imported = roundtrip_string_array(string_array.clone());
1747 assert_eq!(imported.len(), 1000);
1748 assert_eq!(imported.value(0), "string: 0");
1749 assert_eq!(imported.value(499), "string: 499");
1750
1751 let copied = extend_array(&imported);
1752 assert_eq!(
1753 copied.as_any().downcast_ref::<StringArray>().unwrap(),
1754 &imported
1755 );
1756
1757 let slice = string_array.slice(500, 500);
1758
1759 let imported = roundtrip_string_array(slice);
1760 assert_eq!(imported.len(), 500);
1761 assert_eq!(imported.value(0), "string: 500");
1762 assert_eq!(imported.value(499), "string: 999");
1763
1764 let copied = extend_array(&imported);
1765 assert_eq!(
1766 copied.as_any().downcast_ref::<StringArray>().unwrap(),
1767 &imported
1768 );
1769 }
1770
1771 fn roundtrip_list_array(array: ListArray) -> ListArray {
1772 let data = array.into_data();
1773
1774 let array = FFI_ArrowArray::new(&data);
1775 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1776
1777 let array = unsafe { from_ffi(array, &schema) }.unwrap();
1778 ListArray::from(array)
1779 }
1780
1781 #[test]
1782 fn test_extend_imported_list_slice() {
1783 let mut data = vec![];
1784
1785 for i in 0..1000 {
1786 let mut list = vec![];
1787 for j in 0..100 {
1788 list.push(Some(i * 1000 + j));
1789 }
1790 data.push(Some(list));
1791 }
1792
1793 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1794
1795 let slice = list_array.slice(500, 500);
1796 let imported = roundtrip_list_array(slice.clone());
1797 assert_eq!(imported.len(), 500);
1798 assert_eq!(&slice, &imported);
1799
1800 let copied = extend_array(&imported);
1801 assert_eq!(
1802 copied.as_any().downcast_ref::<ListArray>().unwrap(),
1803 &imported
1804 );
1805 }
1806
1807 trait NativeFromStr {
1810 fn from_str(value: &str) -> &Self;
1811 }
1812
1813 impl NativeFromStr for str {
1814 fn from_str(value: &str) -> &Self {
1815 value
1816 }
1817 }
1818
1819 impl NativeFromStr for [u8] {
1820 fn from_str(value: &str) -> &Self {
1821 value.as_bytes()
1822 }
1823 }
1824
1825 #[test]
1826 #[cfg(not(feature = "force_validate"))]
1827 fn test_utf8_view_ffi_from_dangling_pointer() {
1828 let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
1829 let buffers = empty.data_buffers().to_vec();
1830 let nulls = empty.nulls().cloned();
1831
1832 let alloc = Arc::new(1);
1834 let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
1835 let views = unsafe { ScalarBuffer::new_unchecked(buffer) };
1836
1837 let str_view: GenericByteViewArray<StringViewType> =
1838 unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
1839 let imported = roundtrip_byte_view_array(str_view);
1840 assert_eq!(imported.len(), 0);
1841 assert_eq!(&imported, &empty);
1842 }
1843
1844 #[test]
1845 fn test_round_trip_byte_view() {
1846 fn test_case<T>()
1847 where
1848 T: ByteViewType,
1849 T::Native: NativeFromStr,
1850 {
1851 macro_rules! run_test_case {
1852 ($array:expr) => {{
1853 let len = $array.len();
1855 let imported = roundtrip_byte_view_array($array);
1856 assert_eq!(imported.len(), len);
1857
1858 let copied = extend_array(&imported);
1859 assert_eq!(
1860 copied
1861 .as_any()
1862 .downcast_ref::<GenericByteViewArray<T>>()
1863 .unwrap(),
1864 &imported
1865 );
1866 }};
1867 }
1868
1869 let empty = GenericByteViewBuilder::<T>::new().finish();
1871 run_test_case!(empty);
1872
1873 let mut all_inlined = GenericByteViewBuilder::<T>::new();
1875 all_inlined.append_value(T::Native::from_str("inlined1"));
1876 all_inlined.append_value(T::Native::from_str("inlined2"));
1877 all_inlined.append_value(T::Native::from_str("inlined3"));
1878 let all_inlined = all_inlined.finish();
1879 assert_eq!(all_inlined.data_buffers().len(), 0);
1880 run_test_case!(all_inlined);
1881
1882 let mixed_one_variadic = {
1884 let mut builder = GenericByteViewBuilder::<T>::new();
1885 builder.append_value(T::Native::from_str("inlined"));
1886 let block_id =
1887 builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1888 builder.try_append_view(block_id, 0, 25).unwrap();
1889 builder.finish()
1890 };
1891 assert_eq!(mixed_one_variadic.data_buffers().len(), 1);
1892 run_test_case!(mixed_one_variadic);
1893
1894 let mixed_two_variadic = {
1896 let mut builder = GenericByteViewBuilder::<T>::new();
1897 builder.append_value(T::Native::from_str("inlined"));
1898 let block_id =
1899 builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1900 builder.try_append_view(block_id, 0, 25).unwrap();
1901
1902 let block_id = builder
1903 .append_block(Buffer::from("another-non-inlined-string-buffer".as_bytes()));
1904 builder.try_append_view(block_id, 0, 33).unwrap();
1905 builder.finish()
1906 };
1907 assert_eq!(mixed_two_variadic.data_buffers().len(), 2);
1908 run_test_case!(mixed_two_variadic);
1909 }
1910
1911 test_case::<StringViewType>();
1912 test_case::<BinaryViewType>();
1913 }
1914}