1use std::{mem::size_of, ptr::NonNull, sync::Arc};
105
106use arrow_buffer::{Buffer, MutableBuffer, bit_util};
107pub use arrow_data::ffi::FFI_ArrowArray;
108use arrow_data::{ArrayData, layout};
109pub use arrow_schema::ffi::FFI_ArrowSchema;
110use arrow_schema::{ArrowError, DataType, UnionMode};
111
112use crate::array::ArrayRef;
113
114type Result<T> = std::result::Result<T, ArrowError>;
115
116#[deprecated(
125 since = "52.0.0",
126 note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
127)]
128pub unsafe fn export_array_into_raw(
129 src: ArrayRef,
130 out_array: *mut FFI_ArrowArray,
131 out_schema: *mut FFI_ArrowSchema,
132) -> Result<()> {
133 let data = src.to_data();
134 let array = FFI_ArrowArray::new(&data);
135 let schema = FFI_ArrowSchema::try_from(data.data_type())?;
136
137 unsafe { std::ptr::write_unaligned(out_array, array) };
138 unsafe { std::ptr::write_unaligned(out_schema, schema) };
139
140 Ok(())
141}
142
143fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146 if let Some(primitive) = data_type.primitive_width() {
147 return match i {
148 0 => Err(ArrowError::CDataInterface(format!(
149 "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150 ))),
151 1 => Ok(primitive * 8),
152 i => Err(ArrowError::CDataInterface(format!(
153 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154 ))),
155 };
156 }
157
158 Ok(match (data_type, i) {
159 (DataType::Boolean, 1) => 1,
160 (DataType::Boolean, _) => {
161 return Err(ArrowError::CDataInterface(format!(
162 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163 )));
164 }
165 (DataType::FixedSizeBinary(num_bytes), 1) => {
166 TryInto::<usize>::try_into(*num_bytes).map_err(|_| {
167 ArrowError::InvalidArgumentError(format!(
168 "cannot determine bit_width for FixedSizeBinary({num_bytes})"
169 ))
170 })? * u8::BITS as usize
171 }
172 (DataType::FixedSizeList(f, num_elems), 1) => {
173 let child_bit_width = bit_width(f.data_type(), 1)?;
174 child_bit_width * (*num_elems as usize)
175 }
176 (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
177 return Err(ArrowError::CDataInterface(format!(
178 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
179 )));
180 }
181 (DataType::Utf8, 1)
185 | (DataType::Binary, 1)
186 | (DataType::List(_), 1)
187 | (DataType::Map(_, _), 1) => i32::BITS as _,
188 (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
189 (DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS as _,
191 (DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => i64::BITS as _,
193 (DataType::List(_), _) | (DataType::Map(_, _), _) => {
194 return Err(ArrowError::CDataInterface(format!(
195 "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
196 )));
197 }
198 (DataType::Utf8, _) | (DataType::Binary, _) => {
199 return Err(ArrowError::CDataInterface(format!(
200 "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
201 )));
202 }
203 (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => {
206 i64::BITS as _
207 }
208 (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2) => {
209 u8::BITS as _
210 }
211 (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _) => {
212 return Err(ArrowError::CDataInterface(format!(
213 "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
214 )));
215 }
216 (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => u128::BITS as _,
220 (DataType::Utf8View, _) | (DataType::BinaryView, _) => u8::BITS as _,
221 (DataType::Union(_, _), 0) => i8::BITS as _,
223 (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
225 (DataType::Union(_, UnionMode::Sparse), _) => {
226 return Err(ArrowError::CDataInterface(format!(
227 "The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
228 )));
229 }
230 (DataType::Union(_, UnionMode::Dense), _) => {
231 return Err(ArrowError::CDataInterface(format!(
232 "The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
233 )));
234 }
235 (_, 0) => {
236 return Err(ArrowError::CDataInterface(format!(
239 "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
240 )));
241 }
242 _ => {
243 return Err(ArrowError::CDataInterface(format!(
244 "The datatype \"{data_type}\" is still not supported in Rust implementation"
245 )));
246 }
247 })
248}
249
250unsafe fn create_buffer(
258 owner: Arc<FFI_ArrowArray>,
259 array: &FFI_ArrowArray,
260 index: usize,
261 len: usize,
262) -> Option<Buffer> {
263 if array.num_buffers() == 0 {
264 return None;
265 }
266 NonNull::new(array.buffer(index) as _)
267 .map(|ptr| unsafe { Buffer::from_custom_allocation(ptr, len, owner) })
268}
269
270pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
272 let array = FFI_ArrowArray::new(data);
273 let schema = FFI_ArrowSchema::try_from(data.data_type())?;
274 Ok((array, schema))
275}
276
277pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
283 let dt = DataType::try_from(schema)?;
284 let array = Arc::new(array);
285 let tmp = ImportedArrowArray {
286 array: &array,
287 data_type: dt,
288 owner: &array,
289 };
290 let mut data = tmp.consume()?;
291 data.align_buffers();
297 Ok(data)
298}
299
300pub unsafe fn from_ffi_and_data_type(
306 array: FFI_ArrowArray,
307 data_type: DataType,
308) -> Result<ArrayData> {
309 let array = Arc::new(array);
310 let tmp = ImportedArrowArray {
311 array: &array,
312 data_type,
313 owner: &array,
314 };
315 let mut data = tmp.consume()?;
316 data.align_buffers();
322 Ok(data)
323}
324
325#[derive(Debug)]
326struct ImportedArrowArray<'a> {
327 array: &'a FFI_ArrowArray,
328 data_type: DataType,
329 owner: &'a Arc<FFI_ArrowArray>,
330}
331
332impl ImportedArrowArray<'_> {
333 fn consume(self) -> Result<ArrayData> {
334 let len = self.array.len();
335 let offset = self.array.offset();
336 let null_count = match &self.data_type {
337 DataType::Null => Some(0),
338 _ => self.array.null_count_opt(),
339 };
340
341 let data_layout = layout(&self.data_type);
342 let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?;
343
344 let null_bit_buffer = if data_layout.can_contain_null_mask {
345 self.null_bit_buffer()
346 } else {
347 None
348 };
349
350 let mut child_data = self.consume_children()?;
351
352 if let Some(d) = self.dictionary()? {
353 assert!(child_data.is_empty());
356 child_data.push(d.consume()?);
357 }
358
359 Ok(unsafe {
361 ArrayData::new_unchecked(
362 self.data_type,
363 len,
364 null_count,
365 null_bit_buffer,
366 offset,
367 buffers,
368 child_data,
369 )
370 })
371 }
372
373 fn consume_children(&self) -> Result<Vec<ArrayData>> {
374 match &self.data_type {
375 DataType::List(field)
376 | DataType::FixedSizeList(field, _)
377 | DataType::LargeList(field)
378 | DataType::ListView(field)
379 | DataType::LargeListView(field)
380 | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
381 DataType::Struct(fields) => {
382 assert!(fields.len() == self.array.num_children());
383 fields
384 .iter()
385 .enumerate()
386 .map(|(i, field)| self.consume_child(i, field.data_type()))
387 .collect::<Result<Vec<_>>>()
388 }
389 DataType::Union(union_fields, _) => {
390 assert!(union_fields.len() == self.array.num_children());
391 union_fields
392 .iter()
393 .enumerate()
394 .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
395 .collect::<Result<Vec<_>>>()
396 }
397 DataType::RunEndEncoded(run_ends_field, values_field) => Ok([
398 self.consume_child(0, run_ends_field.data_type())?,
399 self.consume_child(1, values_field.data_type())?,
400 ]
401 .to_vec()),
402 _ => Ok(Vec::new()),
403 }
404 }
405
406 fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
407 ImportedArrowArray {
408 array: self.array.child(index),
409 data_type: child_type.clone(),
410 owner: self.owner,
411 }
412 .consume()
413 }
414
415 fn buffers(&self, can_contain_null_mask: bool, variadic: bool) -> Result<Vec<Buffer>> {
418 let buffer_begin = can_contain_null_mask as usize;
420 let buffer_end = self.array.num_buffers() - usize::from(variadic);
421
422 let variadic_buffer_lens = if variadic {
423 let num_variadic_buffers =
426 self.array.num_buffers() - (2 + usize::from(can_contain_null_mask));
427 if num_variadic_buffers == 0 {
428 &[]
429 } else {
430 let lengths = self.array.buffer(self.array.num_buffers() - 1);
431 unsafe { std::slice::from_raw_parts(lengths.cast::<i64>(), num_variadic_buffers) }
433 }
434 } else {
435 &[]
436 };
437
438 (buffer_begin..buffer_end)
439 .map(|index| {
440 let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
441 match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
442 Some(buf) => {
443 if buf.is_empty() {
448 Ok(MutableBuffer::new(0).into())
449 } else {
450 Ok(buf)
451 }
452 }
453 None if len == 0 => {
454 Ok(MutableBuffer::new(0).into())
457 }
458 None => Err(ArrowError::CDataInterface(format!(
459 "The external buffer at position {index} is null."
460 ))),
461 }
462 })
463 .collect()
464 }
465
466 fn buffer_len(
471 &self,
472 i: usize,
473 variadic_buffer_lengths: &[i64],
474 dt: &DataType,
475 ) -> Result<usize> {
476 let data_type = match dt {
478 DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
479 dt => dt,
480 };
481
482 let length = self.array.len() + self.array.offset();
485
486 Ok(match (&data_type, i) {
488 (DataType::Utf8, 1)
489 | (DataType::LargeUtf8, 1)
490 | (DataType::Binary, 1)
491 | (DataType::LargeBinary, 1)
492 | (DataType::List(_), 1)
493 | (DataType::LargeList(_), 1)
494 | (DataType::Map(_, _), 1) => {
495 let bits = bit_width(data_type, i)?;
497 debug_assert_eq!(bits % 8, 0);
498 (length + 1) * (bits / 8)
499 }
500 (DataType::ListView(_), 1)
501 | (DataType::ListView(_), 2)
502 | (DataType::LargeListView(_), 1)
503 | (DataType::LargeListView(_), 2) => {
504 let bits = bit_width(data_type, i)?;
505 debug_assert_eq!(bits % 8, 0);
506 length * (bits / 8)
507 }
508 (DataType::Utf8, 2) | (DataType::Binary, 2) => {
509 if self.array.is_empty() {
510 return Ok(0);
511 }
512
513 let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
515 #[allow(clippy::cast_ptr_alignment)]
518 let offset_buffer = self.array.buffer(1) as *const i32;
519 (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
521 }
522 (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
523 if self.array.is_empty() {
524 return Ok(0);
525 }
526
527 let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
529 #[allow(clippy::cast_ptr_alignment)]
532 let offset_buffer = self.array.buffer(1) as *const i64;
533 (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
535 }
536 (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => {
541 std::mem::size_of::<u128>() * length
542 }
543 (DataType::Utf8View, i) | (DataType::BinaryView, i) => {
544 variadic_buffer_lengths[i - 2] as usize
545 }
546 _ => {
548 let bits = bit_width(data_type, i)?;
549 bit_util::ceil(length * bits, 8)
550 }
551 })
552 }
553
554 fn null_bit_buffer(&self) -> Option<Buffer> {
558 let length = self.array.len() + self.array.offset();
562 let buffer_len = bit_util::ceil(length, 8);
563
564 unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
565 }
566
567 fn dictionary(&self) -> Result<Option<ImportedArrowArray<'_>>> {
568 match (self.array.dictionary(), &self.data_type) {
569 (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
570 array,
571 data_type: value_type.as_ref().clone(),
572 owner: self.owner,
573 })),
574 (Some(_), _) => Err(ArrowError::CDataInterface(
575 "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
576 )),
577 (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
578 "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
579 )),
580 (_, _) => Ok(None),
581 }
582 }
583}
584
585#[cfg(test)]
586mod tests_to_then_from_ffi {
587 use std::collections::HashMap;
588 use std::mem::ManuallyDrop;
589
590 use arrow_buffer::{ArrowNativeType, NullBuffer};
591 use arrow_schema::Field;
592
593 use crate::builder::UnionBuilder;
594 use crate::cast::AsArray;
595 use crate::types::{Float64Type, Int8Type, Int32Type};
596 use crate::*;
597
598 use super::*;
599
600 #[test]
601 fn test_round_trip() {
602 let array = Int32Array::from(vec![1, 2, 3]);
604
605 let (array, schema) = to_ffi(&array.into_data()).unwrap();
607
608 let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
610
611 assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
613 }
614
615 #[test]
616 fn test_import() {
617 let data = Int32Array::from(vec![1, 2, 3]).into_data();
621 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
622 let array = FFI_ArrowArray::new(&data);
623
624 let schema = Box::new(ManuallyDrop::new(schema));
626 let array = Box::new(ManuallyDrop::new(array));
627
628 let schema_ptr = &**schema as *const _;
629 let array_ptr = &**array as *const _;
630
631 let data =
635 unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
636
637 let array = Int32Array::from(data);
638 assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
639 }
640
641 #[test]
642 fn test_round_trip_with_offset() -> Result<()> {
643 let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), None]);
645
646 let array = array.slice(1, 2);
647
648 let (array, schema) = to_ffi(&array.to_data())?;
650
651 let data = unsafe { from_ffi(array, &schema) }?;
653 let array = make_array(data);
654 let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
655
656 assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
657
658 Ok(())
660 }
661
662 #[test]
663 #[cfg(not(feature = "force_validate"))]
664 fn test_decimal_round_trip() -> Result<()> {
665 let original_array = [Some(12345_i128), Some(-12345_i128), None]
667 .into_iter()
668 .collect::<Decimal128Array>()
669 .with_precision_and_scale(6, 2)
670 .unwrap();
671
672 let (array, schema) = to_ffi(&original_array.to_data())?;
674
675 let data = unsafe { from_ffi(array, &schema) }?;
677 let array = make_array(data);
678
679 let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
681
682 assert_eq!(array, &original_array);
684
685 Ok(())
687 }
688 #[test]
691 #[cfg(not(feature = "force_validate"))]
692 fn test_decimal128_under_aligned_round_trip() -> Result<()> {
693 let aligned = Buffer::from_vec(vec![0_i128, 1_i128, 2_i128]);
697 let under_aligned = aligned.slice(8);
698 assert_eq!(under_aligned.as_ptr().align_offset(8), 0);
699 assert_ne!(under_aligned.as_ptr().align_offset(16), 0);
700
701 let data = unsafe {
704 ArrayData::builder(DataType::Decimal128(10, 2))
705 .len(2)
706 .add_buffer(under_aligned)
707 .build_unchecked()
708 };
709
710 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
711 let array = FFI_ArrowArray::new(&data);
712
713 let imported = unsafe { from_ffi(array, &schema) }?;
714 let array = Decimal128Array::from(imported);
715
716 assert_eq!(array.len(), 2);
719 assert_eq!(array.value(0), 1_i128 << 64);
720 assert_eq!(array.value(1), 2_i128 << 64);
721 Ok(())
722 }
723
724 #[test]
725 fn test_null_count_handling() {
726 let int32_data = ArrayData::builder(DataType::Int32)
727 .len(10)
728 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
729 .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001])))
730 .build()
731 .unwrap();
732 let mut ffi_array = FFI_ArrowArray::new(&int32_data);
733 assert_eq!(3, ffi_array.null_count());
734 assert_eq!(Some(3), ffi_array.null_count_opt());
735 unsafe {
737 ffi_array.set_null_count(-1);
738 }
739 assert_eq!(None, ffi_array.null_count_opt());
740 let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap();
741 assert_eq!(3, int32_data.null_count());
742
743 let null_data = &ArrayData::new_null(&DataType::Null, 10);
744 let mut ffi_array = FFI_ArrowArray::new(null_data);
745 assert_eq!(10, ffi_array.null_count());
746 assert_eq!(Some(10), ffi_array.null_count_opt());
747 unsafe {
749 ffi_array.set_null_count(-1);
750 }
751 assert_eq!(None, ffi_array.null_count_opt());
752 let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap();
753 assert_eq!(0, null_data.null_count());
754 }
755
756 fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
757 let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
759
760 let (array, schema) = to_ffi(&array.to_data())?;
762
763 let data = unsafe { from_ffi(array, &schema) }?;
765 let array = make_array(data);
766
767 let array = array
769 .as_any()
770 .downcast_ref::<GenericStringArray<Offset>>()
771 .unwrap();
772
773 let expected = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
775 assert_eq!(array, &expected);
776
777 Ok(())
779 }
780
781 #[test]
782 fn test_string() -> Result<()> {
783 test_generic_string::<i32>()
784 }
785
786 #[test]
787 fn test_large_string() -> Result<()> {
788 test_generic_string::<i64>()
789 }
790
791 fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
792 let value_data = ArrayData::builder(DataType::Int32)
794 .len(8)
795 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
796 .build()
797 .unwrap();
798
799 let value_offsets = [0_usize, 3, 6, 8]
802 .iter()
803 .map(|i| Offset::from_usize(*i).unwrap())
804 .collect::<Buffer>();
805
806 let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
808 Field::new_list_field(DataType::Int32, false),
809 ));
810
811 let list_data = ArrayData::builder(list_data_type)
812 .len(3)
813 .add_buffer(value_offsets)
814 .add_child_data(value_data)
815 .build()
816 .unwrap();
817
818 let array = GenericListArray::<Offset>::from(list_data.clone());
820
821 let (array, schema) = to_ffi(&array.to_data())?;
823
824 let data = unsafe { from_ffi(array, &schema) }?;
826 let array = make_array(data);
827
828 let array = array
830 .as_any()
831 .downcast_ref::<GenericListArray<Offset>>()
832 .unwrap();
833
834 let expected = GenericListArray::<Offset>::from(list_data);
836 assert_eq!(&array.value(0), &expected.value(0));
837 assert_eq!(&array.value(1), &expected.value(1));
838 assert_eq!(&array.value(2), &expected.value(2));
839
840 Ok(())
842 }
843
844 #[test]
845 fn test_list() -> Result<()> {
846 test_generic_list::<i32>()
847 }
848
849 #[test]
850 fn test_large_list() -> Result<()> {
851 test_generic_list::<i64>()
852 }
853
854 fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> Result<()> {
855 let value_data = ArrayData::builder(DataType::Int16)
857 .len(8)
858 .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
859 .build()
860 .unwrap();
861
862 let value_offsets = [0_usize, 3, 6]
865 .iter()
866 .map(|i| Offset::from_usize(*i).unwrap())
867 .collect::<Buffer>();
868
869 let sizes_buffer = [3_usize, 3, 2]
870 .iter()
871 .map(|i| Offset::from_usize(*i).unwrap())
872 .collect::<Buffer>();
873
874 let list_view_dt = GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
876 Field::new_list_field(DataType::Int16, false),
877 ));
878
879 let list_data = ArrayData::builder(list_view_dt)
880 .len(3)
881 .add_buffer(value_offsets)
882 .add_buffer(sizes_buffer)
883 .add_child_data(value_data)
884 .build()
885 .unwrap();
886
887 let original = GenericListViewArray::<Offset>::from(list_data.clone());
888
889 let (array, schema) = to_ffi(&original.to_data())?;
891
892 let data = unsafe { from_ffi(array, &schema) }?;
894 let array = make_array(data);
895
896 let array = array
898 .as_any()
899 .downcast_ref::<GenericListViewArray<Offset>>()
900 .unwrap();
901
902 assert_eq!(&array.value(0), &original.value(0));
903 assert_eq!(&array.value(1), &original.value(1));
904 assert_eq!(&array.value(2), &original.value(2));
905
906 Ok(())
907 }
908
909 #[test]
910 fn test_list_view() -> Result<()> {
911 test_generic_list_view::<i32>()
912 }
913
914 #[test]
915 fn test_large_list_view() -> Result<()> {
916 test_generic_list_view::<i64>()
917 }
918
919 fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
920 let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
922 let array = GenericBinaryArray::<Offset>::from(array);
923
924 let (array, schema) = to_ffi(&array.to_data())?;
926
927 let data = unsafe { from_ffi(array, &schema) }?;
929 let array = make_array(data);
930 let array = array
931 .as_any()
932 .downcast_ref::<GenericBinaryArray<Offset>>()
933 .unwrap();
934
935 let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
937 let expected = GenericBinaryArray::<Offset>::from(expected);
938 assert_eq!(array, &expected);
939
940 Ok(())
942 }
943
944 #[test]
945 fn test_binary() -> Result<()> {
946 test_generic_binary::<i32>()
947 }
948
949 #[test]
950 fn test_large_binary() -> Result<()> {
951 test_generic_binary::<i64>()
952 }
953
954 #[test]
955 fn test_bool() -> Result<()> {
956 let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
958
959 let (array, schema) = to_ffi(&array.to_data())?;
961
962 let data = unsafe { from_ffi(array, &schema) }?;
964 let array = make_array(data);
965 let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
966
967 assert_eq!(
969 array,
970 &BooleanArray::from(vec![None, Some(true), Some(false)])
971 );
972
973 Ok(())
975 }
976
977 #[test]
978 fn test_time32() -> Result<()> {
979 let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
981
982 let (array, schema) = to_ffi(&array.to_data())?;
984
985 let data = unsafe { from_ffi(array, &schema) }?;
987 let array = make_array(data);
988 let array = array
989 .as_any()
990 .downcast_ref::<Time32MillisecondArray>()
991 .unwrap();
992
993 assert_eq!(
995 array,
996 &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
997 );
998
999 Ok(())
1001 }
1002
1003 #[test]
1004 fn test_timestamp() -> Result<()> {
1005 let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
1007
1008 let (array, schema) = to_ffi(&array.to_data())?;
1010
1011 let data = unsafe { from_ffi(array, &schema) }?;
1013 let array = make_array(data);
1014 let array = array
1015 .as_any()
1016 .downcast_ref::<TimestampMillisecondArray>()
1017 .unwrap();
1018
1019 assert_eq!(
1021 array,
1022 &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
1023 );
1024
1025 Ok(())
1027 }
1028
1029 #[test]
1030 fn test_fixed_size_binary_array() -> Result<()> {
1031 let values = vec![
1032 None,
1033 Some(vec![10, 10, 10]),
1034 None,
1035 Some(vec![20, 20, 20]),
1036 Some(vec![30, 30, 30]),
1037 None,
1038 ];
1039 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1040
1041 let (array, schema) = to_ffi(&array.to_data())?;
1043
1044 let data = unsafe { from_ffi(array, &schema) }?;
1046 let array = make_array(data);
1047 let array = array
1048 .as_any()
1049 .downcast_ref::<FixedSizeBinaryArray>()
1050 .unwrap();
1051
1052 assert_eq!(
1054 array,
1055 &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1056 vec![
1057 None,
1058 Some(vec![10, 10, 10]),
1059 None,
1060 Some(vec![20, 20, 20]),
1061 Some(vec![30, 30, 30]),
1062 None,
1063 ]
1064 .into_iter(),
1065 3
1066 )?
1067 );
1068
1069 Ok(())
1071 }
1072
1073 #[test]
1074 fn test_fixed_size_list_array() -> Result<()> {
1075 let mut validity_bits: [u8; 1] = [0; 1];
1077 bit_util::set_bit(&mut validity_bits, 2);
1078
1079 let v: Vec<i32> = (0..9).collect();
1080 let value_data = ArrayData::builder(DataType::Int32)
1081 .len(9)
1082 .add_buffer(Buffer::from_slice_ref(&v))
1083 .build()?;
1084
1085 let list_data_type =
1086 DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int32, false)), 3);
1087 let list_data = ArrayData::builder(list_data_type.clone())
1088 .len(3)
1089 .null_bit_buffer(Some(Buffer::from(validity_bits)))
1090 .add_child_data(value_data)
1091 .build()?;
1092
1093 let (array, schema) = to_ffi(&list_data)?;
1095
1096 let data = unsafe { from_ffi(array, &schema) }?;
1098 let array = make_array(data);
1099 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
1100
1101 let mut expected_validity_bits: [u8; 1] = [0; 1];
1103 bit_util::set_bit(&mut expected_validity_bits, 2);
1104 bit_util::set_bit(&mut expected_validity_bits, 5);
1105
1106 let mut w = vec![];
1107 w.extend_from_slice(&v);
1108
1109 let expected_value_data = ArrayData::builder(DataType::Int32)
1110 .len(9)
1111 .add_buffer(Buffer::from_slice_ref(&w))
1112 .build()?;
1113
1114 let expected_list_data = ArrayData::builder(list_data_type)
1115 .len(3)
1116 .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
1117 .add_child_data(expected_value_data)
1118 .build()?;
1119 let expected_array = FixedSizeListArray::from(expected_list_data);
1120
1121 assert_eq!(array, &expected_array);
1123
1124 Ok(())
1126 }
1127
1128 #[test]
1129 fn test_dictionary() -> Result<()> {
1130 let values = vec!["a", "aaa", "aaa"];
1132 let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
1133
1134 let (array, schema) = to_ffi(&dict_array.to_data())?;
1136
1137 let data = unsafe { from_ffi(array, &schema) }?;
1139 let array = make_array(data);
1140 let actual = array
1141 .as_any()
1142 .downcast_ref::<DictionaryArray<Int8Type>>()
1143 .unwrap();
1144
1145 let new_values = vec!["a", "aaa", "aaa"];
1147 let expected: DictionaryArray<Int8Type> = new_values.into_iter().collect();
1148 assert_eq!(actual, &expected);
1149
1150 Ok(())
1152 }
1153
1154 #[test]
1155 #[allow(deprecated)]
1156 fn test_export_array_into_raw() -> Result<()> {
1157 let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
1158
1159 let mut out_array = FFI_ArrowArray::empty();
1161 let mut out_schema = FFI_ArrowSchema::empty();
1162
1163 {
1164 let out_array_ptr = std::ptr::addr_of_mut!(out_array);
1165 let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
1166 unsafe {
1167 export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1168 }
1169 }
1170
1171 let data = unsafe { from_ffi(out_array, &out_schema) }?;
1173 let array = make_array(data);
1174
1175 let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1177
1178 assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
1180 Ok(())
1181 }
1182
1183 #[test]
1184 fn test_duration() -> Result<()> {
1185 let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
1187
1188 let (array, schema) = to_ffi(&array.to_data())?;
1190
1191 let data = unsafe { from_ffi(array, &schema) }?;
1193 let array = make_array(data);
1194 let array = array
1195 .as_any()
1196 .downcast_ref::<DurationSecondArray>()
1197 .unwrap();
1198
1199 assert_eq!(
1201 array,
1202 &DurationSecondArray::from(vec![None, Some(1), Some(2)])
1203 );
1204
1205 Ok(())
1207 }
1208
1209 #[test]
1210 fn test_map_array() -> Result<()> {
1211 let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1212 let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1213
1214 let entry_offsets = [0, 3, 6, 8];
1217
1218 let map_array =
1219 MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1220 .unwrap();
1221
1222 let (array, schema) = to_ffi(&map_array.to_data())?;
1224
1225 let data = unsafe { from_ffi(array, &schema) }?;
1227 let array = make_array(data);
1228
1229 let array = array.as_any().downcast_ref::<MapArray>().unwrap();
1231 assert_eq!(array, &map_array);
1232
1233 Ok(())
1234 }
1235
1236 #[test]
1237 fn test_struct_array() -> Result<()> {
1238 let metadata: HashMap<String, String> =
1239 [("Hello".to_string(), "World! 😊".to_string())].into();
1240 let struct_array = StructArray::from(vec![(
1241 Arc::new(Field::new("a", DataType::Int32, false).with_metadata(metadata)),
1242 Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
1243 )]);
1244
1245 let (array, schema) = to_ffi(&struct_array.to_data())?;
1247
1248 let data = unsafe { from_ffi(array, &schema) }?;
1250 let array = make_array(data);
1251
1252 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
1254 assert_eq!(array.data_type(), struct_array.data_type());
1255 assert_eq!(array, &struct_array);
1256
1257 Ok(())
1258 }
1259
1260 #[test]
1261 fn test_union_sparse_array() -> Result<()> {
1262 let mut builder = UnionBuilder::new_sparse();
1263 builder.append::<Int32Type>("a", 1).unwrap();
1264 builder.append_null::<Int32Type>("a").unwrap();
1265 builder.append::<Float64Type>("c", 3.0).unwrap();
1266 builder.append::<Int32Type>("a", 4).unwrap();
1267 let union = builder.build().unwrap();
1268
1269 let (array, schema) = to_ffi(&union.to_data())?;
1271
1272 let data = unsafe { from_ffi(array, &schema) }?;
1274 let array = make_array(data);
1275
1276 let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
1277
1278 let expected_type_ids = vec![0_i8, 0, 1, 0];
1279
1280 assert_eq!(*array.type_ids(), expected_type_ids);
1282 for (i, id) in expected_type_ids.iter().enumerate() {
1283 assert_eq!(id, &array.type_id(i));
1284 }
1285
1286 assert!(array.offsets().is_none());
1288
1289 for i in 0..array.len() {
1290 let slot = array.value(i);
1291 match i {
1292 0 => {
1293 let slot = slot.as_primitive::<Int32Type>();
1294 assert!(!slot.is_null(0));
1295 assert_eq!(slot.len(), 1);
1296 let value = slot.value(0);
1297 assert_eq!(1_i32, value);
1298 }
1299 1 => assert!(slot.is_null(0)),
1300 2 => {
1301 let slot = slot.as_primitive::<Float64Type>();
1302 assert!(!slot.is_null(0));
1303 assert_eq!(slot.len(), 1);
1304 let value = slot.value(0);
1305 assert_eq!(value, 3_f64);
1306 }
1307 3 => {
1308 let slot = slot.as_primitive::<Int32Type>();
1309 assert!(!slot.is_null(0));
1310 assert_eq!(slot.len(), 1);
1311 let value = slot.value(0);
1312 assert_eq!(4_i32, value);
1313 }
1314 _ => unreachable!(),
1315 }
1316 }
1317
1318 Ok(())
1319 }
1320
1321 #[test]
1322 fn test_union_dense_array() -> Result<()> {
1323 let mut builder = UnionBuilder::new_dense();
1324 builder.append::<Int32Type>("a", 1).unwrap();
1325 builder.append_null::<Int32Type>("a").unwrap();
1326 builder.append::<Float64Type>("c", 3.0).unwrap();
1327 builder.append::<Int32Type>("a", 4).unwrap();
1328 let union = builder.build().unwrap();
1329
1330 let (array, schema) = to_ffi(&union.to_data())?;
1332
1333 let data = unsafe { from_ffi(array, &schema) }?;
1335 let array = UnionArray::from(data);
1336
1337 let expected_type_ids = vec![0_i8, 0, 1, 0];
1338
1339 assert_eq!(*array.type_ids(), expected_type_ids);
1341 for (i, id) in expected_type_ids.iter().enumerate() {
1342 assert_eq!(id, &array.type_id(i));
1343 }
1344
1345 assert!(array.offsets().is_some());
1346
1347 for i in 0..array.len() {
1348 let slot = array.value(i);
1349 match i {
1350 0 => {
1351 let slot = slot.as_primitive::<Int32Type>();
1352 assert!(!slot.is_null(0));
1353 assert_eq!(slot.len(), 1);
1354 let value = slot.value(0);
1355 assert_eq!(1_i32, value);
1356 }
1357 1 => assert!(slot.is_null(0)),
1358 2 => {
1359 let slot = slot.as_primitive::<Float64Type>();
1360 assert!(!slot.is_null(0));
1361 assert_eq!(slot.len(), 1);
1362 let value = slot.value(0);
1363 assert_eq!(value, 3_f64);
1364 }
1365 3 => {
1366 let slot = slot.as_primitive::<Int32Type>();
1367 assert!(!slot.is_null(0));
1368 assert_eq!(slot.len(), 1);
1369 let value = slot.value(0);
1370 assert_eq!(4_i32, value);
1371 }
1372 _ => unreachable!(),
1373 }
1374 }
1375
1376 Ok(())
1377 }
1378
1379 #[test]
1380 fn test_run_array() -> Result<()> {
1381 let value_data =
1382 PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1383
1384 let run_ends_values = [4_i32, 6, 7, 9, 13, 18, 20, 22];
1386 let run_ends_data =
1387 PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1388
1389 let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1391
1392 let (array, schema) = to_ffi(&ree_array.to_data())?;
1394
1395 let data = unsafe { from_ffi(array, &schema) }?;
1397 let array = make_array(data);
1398
1399 let array = array
1401 .as_any()
1402 .downcast_ref::<RunArray<Int32Type>>()
1403 .unwrap();
1404 assert_eq!(array.data_type(), ree_array.data_type());
1405 assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1406 assert_eq!(array.values(), ree_array.values());
1407
1408 Ok(())
1409 }
1410
1411 #[test]
1412 fn test_nullable_run_array() -> Result<()> {
1413 let nulls = NullBuffer::from(vec![true, false, true, true, false]);
1414 let value_data =
1415 PrimitiveArray::<Int8Type>::new(vec![1_i8, 2, 3, 4, 5].into(), Some(nulls));
1416
1417 let run_ends_values = [5_i32, 6, 7, 8, 10];
1419 let run_ends_data =
1420 PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1421
1422 let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1424
1425 let (array, schema) = to_ffi(&ree_array.to_data())?;
1427
1428 let data = unsafe { from_ffi(array, &schema) }?;
1430 let array = make_array(data);
1431
1432 let array = array
1434 .as_any()
1435 .downcast_ref::<RunArray<Int32Type>>()
1436 .unwrap();
1437 assert_eq!(array.data_type(), ree_array.data_type());
1438 assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1439 assert_eq!(array.values(), ree_array.values());
1440
1441 Ok(())
1442 }
1443}
1444
1445#[cfg(test)]
1446mod tests_from_ffi {
1447 #[cfg(not(feature = "force_validate"))]
1448 use std::ptr::NonNull;
1449 use std::sync::Arc;
1450
1451 use arrow_buffer::NullBuffer;
1452 #[cfg(not(feature = "force_validate"))]
1453 use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
1454 #[cfg(feature = "force_validate")]
1455 use arrow_buffer::{bit_util, buffer::Buffer};
1456
1457 use arrow_data::ArrayData;
1458 use arrow_data::transform::MutableArrayData;
1459 use arrow_schema::{DataType, Field};
1460
1461 use super::Result;
1462
1463 use crate::builder::GenericByteViewBuilder;
1464 use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
1465 use crate::{
1466 ArrayRef, GenericByteViewArray, ListArray,
1467 array::{
1468 Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
1469 Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
1470 },
1471 ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi},
1472 make_array,
1473 };
1474
1475 fn test_round_trip(expected: &ArrayData) -> Result<()> {
1476 let array = FFI_ArrowArray::new(expected);
1478 let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
1479
1480 let result = &unsafe { from_ffi(array, &schema) }?;
1482
1483 assert_eq!(result, expected);
1484 Ok(())
1485 }
1486
1487 #[test]
1488 fn test_u32() -> Result<()> {
1489 let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
1490 let data = array.into_data();
1491 test_round_trip(&data)
1492 }
1493
1494 #[test]
1495 fn test_u64() -> Result<()> {
1496 let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
1497 let data = array.into_data();
1498 test_round_trip(&data)
1499 }
1500
1501 #[test]
1502 fn test_i64() -> Result<()> {
1503 let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
1504 let data = array.into_data();
1505 test_round_trip(&data)
1506 }
1507
1508 #[test]
1509 fn test_struct() -> Result<()> {
1510 let inner = StructArray::from(vec![
1511 (
1512 Arc::new(Field::new("a1", DataType::Boolean, false)),
1513 Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
1514 ),
1515 (
1516 Arc::new(Field::new("a2", DataType::UInt32, false)),
1517 Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
1518 ),
1519 ]);
1520
1521 let array = StructArray::from(vec![
1522 (
1523 Arc::new(Field::new("a", inner.data_type().clone(), false)),
1524 Arc::new(inner) as Arc<dyn Array>,
1525 ),
1526 (
1527 Arc::new(Field::new("b", DataType::Boolean, false)),
1528 Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
1529 ),
1530 (
1531 Arc::new(Field::new("c", DataType::UInt32, false)),
1532 Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
1533 ),
1534 ]);
1535 let data = array.into_data();
1536 test_round_trip(&data)
1537 }
1538
1539 #[test]
1540 fn test_dictionary() -> Result<()> {
1541 let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
1542 let keys = Int32Array::from(vec![
1543 Some(0),
1544 Some(1),
1545 None,
1546 Some(1),
1547 Some(1),
1548 None,
1549 Some(1),
1550 Some(2),
1551 Some(1),
1552 None,
1553 ]);
1554 let array = DictionaryArray::new(keys, Arc::new(values));
1555
1556 let data = array.into_data();
1557 test_round_trip(&data)
1558 }
1559
1560 #[test]
1561 fn test_fixed_size_binary() -> Result<()> {
1562 let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
1563 let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
1564
1565 let data = array.into_data();
1566 test_round_trip(&data)
1567 }
1568
1569 #[test]
1570 fn test_fixed_size_binary_with_nulls() -> Result<()> {
1571 let values = vec![
1572 None,
1573 Some(vec![10, 10, 10]),
1574 None,
1575 Some(vec![20, 20, 20]),
1576 Some(vec![30, 30, 30]),
1577 None,
1578 ];
1579 let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1580
1581 let data = array.into_data();
1582 test_round_trip(&data)
1583 }
1584
1585 #[test]
1586 fn test_fixed_size_list() -> Result<()> {
1587 let v: Vec<i64> = (0..9).collect();
1588 let value_data = ArrayData::builder(DataType::Int64)
1589 .len(9)
1590 .add_buffer(Buffer::from_slice_ref(v))
1591 .build()?;
1592 let list_data_type =
1593 DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3);
1594 let list_data = ArrayData::builder(list_data_type)
1595 .len(3)
1596 .add_child_data(value_data)
1597 .build()?;
1598 let array = FixedSizeListArray::from(list_data);
1599
1600 let data = array.into_data();
1601 test_round_trip(&data)
1602 }
1603
1604 #[test]
1605 fn test_fixed_size_list_with_nulls() -> Result<()> {
1606 let mut validity_bits: [u8; 1] = [0; 1];
1608 bit_util::set_bit(&mut validity_bits, 1);
1609 bit_util::set_bit(&mut validity_bits, 2);
1610 bit_util::set_bit(&mut validity_bits, 6);
1611
1612 let v: Vec<i16> = (0..16).collect();
1613 let value_data = ArrayData::builder(DataType::Int16)
1614 .len(16)
1615 .add_buffer(Buffer::from_slice_ref(v))
1616 .build()?;
1617 let list_data_type =
1618 DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2);
1619 let list_data = ArrayData::builder(list_data_type)
1620 .len(8)
1621 .null_bit_buffer(Some(Buffer::from(validity_bits)))
1622 .add_child_data(value_data)
1623 .build()?;
1624 let array = FixedSizeListArray::from(list_data);
1625
1626 let data = array.into_data();
1627 test_round_trip(&data)
1628 }
1629
1630 #[test]
1631 fn test_fixed_size_list_nested() -> Result<()> {
1632 let v: Vec<i32> = (0..16).collect();
1633 let value_data = ArrayData::builder(DataType::Int32)
1634 .len(16)
1635 .add_buffer(Buffer::from_slice_ref(v))
1636 .build()?;
1637
1638 let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
1639 let value_offsets = Buffer::from_slice_ref(offsets);
1640 let inner_list_data_type =
1641 DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1642 let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
1643 .len(8)
1644 .add_buffer(value_offsets)
1645 .add_child_data(value_data)
1646 .build()?;
1647
1648 let mut validity_bits: [u8; 1] = [0; 1];
1650 bit_util::set_bit(&mut validity_bits, 2);
1651
1652 let list_data_type =
1653 DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
1654 let list_data = ArrayData::builder(list_data_type)
1655 .len(4)
1656 .null_bit_buffer(Some(Buffer::from(validity_bits)))
1657 .add_child_data(inner_list_data)
1658 .build()?;
1659
1660 let array = FixedSizeListArray::from(list_data);
1661
1662 let data = array.into_data();
1663 test_round_trip(&data)
1664 }
1665
1666 #[test]
1667 fn test_list_view() -> Result<()> {
1668 let value_data = ArrayData::builder(DataType::Int16)
1670 .len(8)
1671 .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1672 .build()
1673 .unwrap();
1674
1675 let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
1678 let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
1679
1680 let list_view_dt =
1682 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, false)));
1683
1684 let list_view_data = ArrayData::builder(list_view_dt)
1685 .len(3)
1686 .add_buffer(value_offsets)
1687 .add_buffer(sizes_buffer)
1688 .add_child_data(value_data)
1689 .build()
1690 .unwrap();
1691
1692 test_round_trip(&list_view_data)
1693 }
1694
1695 #[test]
1696 fn test_list_view_with_nulls() -> Result<()> {
1697 let value_data = ArrayData::builder(DataType::Int16)
1699 .len(8)
1700 .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1701 .build()
1702 .unwrap();
1703
1704 let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
1707 let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
1708
1709 let list_view_dt =
1711 DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, true)));
1712
1713 let list_view_data = ArrayData::builder(list_view_dt)
1714 .len(4)
1715 .add_buffer(value_offsets)
1716 .add_buffer(sizes_buffer)
1717 .add_child_data(value_data)
1718 .nulls(Some(NullBuffer::from(vec![true, true, true, false])))
1719 .build()
1720 .unwrap();
1721
1722 test_round_trip(&list_view_data)
1723 }
1724
1725 #[test]
1726 #[cfg(not(feature = "force_validate"))]
1727 fn test_empty_string_with_non_zero_offset() -> Result<()> {
1728 use super::ImportedArrowArray;
1729 use arrow_buffer::{MutableBuffer, OffsetBuffer};
1730
1731 let data: Buffer = MutableBuffer::new(0).into();
1733 let offsets = OffsetBuffer::new(vec![123].into());
1734 let string_array =
1735 unsafe { StringArray::new_unchecked(offsets.clone(), data.clone(), None) };
1736
1737 let data = string_array.into_data();
1738
1739 let array = FFI_ArrowArray::new(&data);
1740 let schema = FFI_ArrowSchema::try_from(data.data_type())?;
1741
1742 let dt = DataType::try_from(&schema)?;
1743 let array = Arc::new(array);
1744 let imported_array = ImportedArrowArray {
1745 array: &array,
1746 data_type: dt,
1747 owner: &array,
1748 };
1749
1750 let offset_buf_len = imported_array.buffer_len(1, &[], &imported_array.data_type)?;
1751 let data_buf_len = imported_array.buffer_len(2, &[], &imported_array.data_type)?;
1752
1753 assert_eq!(offset_buf_len, 4);
1754 assert_eq!(data_buf_len, 0);
1755
1756 test_round_trip(&imported_array.consume()?)
1757 }
1758
1759 fn roundtrip_string_array(array: StringArray) -> StringArray {
1760 let data = array.into_data();
1761
1762 let array = FFI_ArrowArray::new(&data);
1763 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1764
1765 let array = unsafe { from_ffi(array, &schema) }.unwrap();
1766 StringArray::from(array)
1767 }
1768
1769 fn roundtrip_byte_view_array<T: ByteViewType>(
1770 array: GenericByteViewArray<T>,
1771 ) -> GenericByteViewArray<T> {
1772 let data = array.into_data();
1773
1774 let array = FFI_ArrowArray::new(&data);
1775 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1776
1777 let array = unsafe { from_ffi(array, &schema) }.unwrap();
1778 GenericByteViewArray::<T>::from(array)
1779 }
1780
1781 fn extend_array(array: &dyn Array) -> ArrayRef {
1782 let len = array.len();
1783 let data = array.to_data();
1784
1785 let mut mutable = MutableArrayData::new(vec![&data], false, len);
1786 mutable.extend(0, 0, len);
1787 make_array(mutable.freeze())
1788 }
1789
1790 #[test]
1791 fn test_extend_imported_string_slice() {
1792 let mut strings = vec![];
1793
1794 for i in 0..1000 {
1795 strings.push(format!("string: {i}"));
1796 }
1797
1798 let string_array = StringArray::from(strings);
1799
1800 let imported = roundtrip_string_array(string_array.clone());
1801 assert_eq!(imported.len(), 1000);
1802 assert_eq!(imported.value(0), "string: 0");
1803 assert_eq!(imported.value(499), "string: 499");
1804
1805 let copied = extend_array(&imported);
1806 assert_eq!(
1807 copied.as_any().downcast_ref::<StringArray>().unwrap(),
1808 &imported
1809 );
1810
1811 let slice = string_array.slice(500, 500);
1812
1813 let imported = roundtrip_string_array(slice);
1814 assert_eq!(imported.len(), 500);
1815 assert_eq!(imported.value(0), "string: 500");
1816 assert_eq!(imported.value(499), "string: 999");
1817
1818 let copied = extend_array(&imported);
1819 assert_eq!(
1820 copied.as_any().downcast_ref::<StringArray>().unwrap(),
1821 &imported
1822 );
1823 }
1824
1825 fn roundtrip_list_array(array: ListArray) -> ListArray {
1826 let data = array.into_data();
1827
1828 let array = FFI_ArrowArray::new(&data);
1829 let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1830
1831 let array = unsafe { from_ffi(array, &schema) }.unwrap();
1832 ListArray::from(array)
1833 }
1834
1835 #[test]
1836 fn test_extend_imported_list_slice() {
1837 let mut data = vec![];
1838
1839 for i in 0..1000 {
1840 let mut list = vec![];
1841 for j in 0..100 {
1842 list.push(Some(i * 1000 + j));
1843 }
1844 data.push(Some(list));
1845 }
1846
1847 let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1848
1849 let slice = list_array.slice(500, 500);
1850 let imported = roundtrip_list_array(slice.clone());
1851 assert_eq!(imported.len(), 500);
1852 assert_eq!(&slice, &imported);
1853
1854 let copied = extend_array(&imported);
1855 assert_eq!(
1856 copied.as_any().downcast_ref::<ListArray>().unwrap(),
1857 &imported
1858 );
1859 }
1860
1861 trait NativeFromStr {
1864 fn from_str(value: &str) -> &Self;
1865 }
1866
1867 impl NativeFromStr for str {
1868 fn from_str(value: &str) -> &Self {
1869 value
1870 }
1871 }
1872
1873 impl NativeFromStr for [u8] {
1874 fn from_str(value: &str) -> &Self {
1875 value.as_bytes()
1876 }
1877 }
1878
1879 #[test]
1880 #[cfg(not(feature = "force_validate"))]
1881 fn test_utf8_view_ffi_from_dangling_pointer() {
1882 let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
1883 let buffers = empty.data_buffers().to_vec();
1884 let nulls = empty.nulls().cloned();
1885
1886 let alloc = Arc::new(1);
1888 let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
1889 let views = unsafe { ScalarBuffer::new_unchecked(buffer) };
1890
1891 let str_view: GenericByteViewArray<StringViewType> =
1892 unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
1893 let imported = roundtrip_byte_view_array(str_view);
1894 assert_eq!(imported.len(), 0);
1895 assert_eq!(&imported, &empty);
1896 }
1897
1898 #[test]
1899 fn test_round_trip_byte_view() {
1900 fn test_case<T>()
1901 where
1902 T: ByteViewType,
1903 T::Native: NativeFromStr,
1904 {
1905 macro_rules! run_test_case {
1906 ($array:expr) => {{
1907 let len = $array.len();
1909 let imported = roundtrip_byte_view_array($array);
1910 assert_eq!(imported.len(), len);
1911
1912 let copied = extend_array(&imported);
1913 assert_eq!(
1914 copied
1915 .as_any()
1916 .downcast_ref::<GenericByteViewArray<T>>()
1917 .unwrap(),
1918 &imported
1919 );
1920 }};
1921 }
1922
1923 let empty = GenericByteViewBuilder::<T>::new().finish();
1925 run_test_case!(empty);
1926
1927 let mut all_inlined = GenericByteViewBuilder::<T>::new();
1929 all_inlined.append_value(T::Native::from_str("inlined1"));
1930 all_inlined.append_value(T::Native::from_str("inlined2"));
1931 all_inlined.append_value(T::Native::from_str("inlined3"));
1932 let all_inlined = all_inlined.finish();
1933 assert_eq!(all_inlined.data_buffers().len(), 0);
1934 run_test_case!(all_inlined);
1935
1936 let mixed_one_variadic = {
1938 let mut builder = GenericByteViewBuilder::<T>::new();
1939 builder.append_value(T::Native::from_str("inlined"));
1940 let block_id =
1941 builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1942 builder.try_append_view(block_id, 0, 25).unwrap();
1943 builder.finish()
1944 };
1945 assert_eq!(mixed_one_variadic.data_buffers().len(), 1);
1946 run_test_case!(mixed_one_variadic);
1947
1948 let mixed_two_variadic = {
1950 let mut builder = GenericByteViewBuilder::<T>::new();
1951 builder.append_value(T::Native::from_str("inlined"));
1952 let block_id =
1953 builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1954 builder.try_append_view(block_id, 0, 25).unwrap();
1955
1956 let block_id = builder
1957 .append_block(Buffer::from("another-non-inlined-string-buffer".as_bytes()));
1958 builder.try_append_view(block_id, 0, 33).unwrap();
1959 builder.finish()
1960 };
1961 assert_eq!(mixed_two_variadic.data_buffers().len(), 2);
1962 run_test_case!(mixed_two_variadic);
1963 }
1964
1965 test_case::<StringViewType>();
1966 test_case::<BinaryViewType>();
1967 }
1968}