arrow_array/
ffi.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19//!
20//! Generally, this module is divided in two main interfaces:
21//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
22//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
23//!
24//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
25//! `Buffer`, etc. This is handled by `from_ffi` and `to_ffi`.
26//!
27//!
28//! Export to FFI
29//!
30//! ```rust
31//! # use std::sync::Arc;
32//! # use arrow_array::{Int32Array, Array, make_array};
33//! # use arrow_data::ArrayData;
34//! # use arrow_array::ffi::{to_ffi, from_ffi};
35//! # use arrow_schema::ArrowError;
36//! # fn main() -> Result<(), ArrowError> {
37//! // create an array natively
38//!
39//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
40//! let data = array.into_data();
41//!
42//! // Export it
43//! let (out_array, out_schema) = to_ffi(&data)?;
44//!
45//! // import it
46//! let data = unsafe { from_ffi(out_array, &out_schema) }?;
47//! let array = Int32Array::from(data);
48//!
49//! // verify
50//! assert_eq!(array, Int32Array::from(vec![Some(1), None, Some(3)]));
51//! #
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! Import from FFI
57//!
58//! ```
59//! # use std::ptr::addr_of_mut;
60//! # use arrow_array::ffi::{from_ffi, FFI_ArrowArray};
61//! # use arrow_array::{ArrayRef, make_array};
62//! # use arrow_schema::{ArrowError, ffi::FFI_ArrowSchema};
63//! #
64//! /// A foreign data container that can export to C Data interface
65//! struct ForeignArray {};
66//!
67//! impl ForeignArray {
68//!     /// Export from foreign array representation to C Data interface
69//!     /// e.g. <https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
70//!     fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) {
71//!         // ...
72//!     }
73//! }
74//!
75//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
76//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
77//!     let mut schema = FFI_ArrowSchema::empty();
78//!     let mut array = FFI_ArrowArray::empty();
79//!     foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
80//!     Ok(make_array(unsafe { from_ffi(array, &schema) }?))
81//! }
82//! ```
83
84/*
85# Design:
86
87Main assumptions:
88* A memory region is deallocated according it its own release mechanism.
89* Rust shares memory regions between arrays.
90* A memory region should be deallocated when no-one is using it.
91
92The design of this module is as follows:
93
94`ArrowArray` contains two `Arc`s, one per ABI-compatible `struct`, each containing data
95according to the C Data Interface. These Arcs are used for ref counting of the structs
96within Rust and lifetime management.
97
98Each ABI-compatible `struct` knowns how to `drop` itself, calling `release`.
99
100To import an array, unsafely create an `ArrowArray` from two pointers using [ArrowArray::try_from_raw].
101To export an array, create an `ArrowArray` using [ArrowArray::try_new].
102*/
103
104use std::{mem::size_of, ptr::NonNull, sync::Arc};
105
106use arrow_buffer::{bit_util, Buffer, MutableBuffer};
107pub use arrow_data::ffi::FFI_ArrowArray;
108use arrow_data::{layout, ArrayData};
109pub use arrow_schema::ffi::FFI_ArrowSchema;
110use arrow_schema::{ArrowError, DataType, UnionMode};
111
112use crate::array::ArrayRef;
113
114type Result<T> = std::result::Result<T, ArrowError>;
115
116/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
117/// # Safety
118/// Assumes that these pointers represent valid C Data Interfaces, both in memory
119/// representation and lifetime via the `release` mechanism.
120///
121/// This function copies the content of two FFI structs [arrow_data::ffi::FFI_ArrowArray] and
122/// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
123/// Usually the raw pointers are provided by the array data consumer.
124#[deprecated(
125    since = "52.0.0",
126    note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
127)]
128pub unsafe fn export_array_into_raw(
129    src: ArrayRef,
130    out_array: *mut FFI_ArrowArray,
131    out_schema: *mut FFI_ArrowSchema,
132) -> Result<()> {
133    let data = src.to_data();
134    let array = FFI_ArrowArray::new(&data);
135    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
136
137    std::ptr::write_unaligned(out_array, array);
138    std::ptr::write_unaligned(out_schema, schema);
139
140    Ok(())
141}
142
143// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144// This is set by the Arrow specification
145fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146    if let Some(primitive) = data_type.primitive_width() {
147        return match i {
148            0 => Err(ArrowError::CDataInterface(format!(
149                "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150            ))),
151            1 => Ok(primitive * 8),
152            i => Err(ArrowError::CDataInterface(format!(
153                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154            ))),
155        };
156    }
157
158    Ok(match (data_type, i) {
159        (DataType::Boolean, 1) => 1,
160        (DataType::Boolean, _) => {
161            return Err(ArrowError::CDataInterface(format!(
162                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163            )))
164        }
165        (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
166        (DataType::FixedSizeList(f, num_elems), 1) => {
167            let child_bit_width = bit_width(f.data_type(), 1)?;
168            child_bit_width * (*num_elems as usize)
169        },
170        (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
171            return Err(ArrowError::CDataInterface(format!(
172                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
173            )))
174        },
175        // Variable-size list and map have one i32 buffer.
176        // Variable-sized binaries: have two buffers.
177        // "small": first buffer is i32, second is in bytes
178        (DataType::Utf8, 1) | (DataType::Binary, 1) | (DataType::List(_), 1) | (DataType::Map(_, _), 1) => i32::BITS as _,
179        (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
180        (DataType::List(_), _) | (DataType::Map(_, _), _) => {
181            return Err(ArrowError::CDataInterface(format!(
182                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
183            )))
184        }
185        (DataType::Utf8, _) | (DataType::Binary, _) => {
186            return Err(ArrowError::CDataInterface(format!(
187                "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
188            )))
189        }
190        // Variable-sized binaries: have two buffers.
191        // LargeUtf8: first buffer is i64, second is in bytes
192        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => i64::BITS as _,
193        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> u8::BITS as _,
194        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
195            return Err(ArrowError::CDataInterface(format!(
196                "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
197            )))
198        }
199        // Variable-sized views: have 3 or more buffers.
200        // Buffer 1 are the u128 views
201        // Buffers 2...N-1 are u8 byte buffers
202        (DataType::Utf8View, 1) | (DataType::BinaryView,1) => u128::BITS as _,
203        (DataType::Utf8View, _) | (DataType::BinaryView, _) => {
204            u8::BITS as _
205        }
206        // type ids. UnionArray doesn't have null bitmap so buffer index begins with 0.
207        (DataType::Union(_, _), 0) => i8::BITS as _,
208        // Only DenseUnion has 2nd buffer
209        (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
210        (DataType::Union(_, UnionMode::Sparse), _) => {
211            return Err(ArrowError::CDataInterface(format!(
212                "The datatype \"{data_type:?}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
213            )))
214        }
215        (DataType::Union(_, UnionMode::Dense), _) => {
216            return Err(ArrowError::CDataInterface(format!(
217                "The datatype \"{data_type:?}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
218            )))
219        }
220        (_, 0) => {
221            // We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
222            // UnionArray, they should be handled above.
223            return Err(ArrowError::CDataInterface(format!(
224                "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
225            )))
226        }
227        _ => {
228            return Err(ArrowError::CDataInterface(format!(
229                "The datatype \"{data_type:?}\" is still not supported in Rust implementation"
230            )))
231        }
232    })
233}
234
235/// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer).
236/// `bits` is the number of bits that the native type of this buffer has.
237/// The size of the buffer will be `ceil(self.length * bits, 8)`.
238/// # Panic
239/// This function panics if `i` is larger or equal to `n_buffers`.
240/// # Safety
241/// This function assumes that `ceil(self.length * bits, 8)` is the size of the buffer
242unsafe fn create_buffer(
243    owner: Arc<FFI_ArrowArray>,
244    array: &FFI_ArrowArray,
245    index: usize,
246    len: usize,
247) -> Option<Buffer> {
248    if array.num_buffers() == 0 {
249        return None;
250    }
251    NonNull::new(array.buffer(index) as _)
252        .map(|ptr| Buffer::from_custom_allocation(ptr, len, owner))
253}
254
255/// Export to the C Data Interface
256pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
257    let array = FFI_ArrowArray::new(data);
258    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
259    Ok((array, schema))
260}
261
262/// Import [ArrayData] from the C Data Interface
263///
264/// # Safety
265///
266/// This struct assumes that the incoming data agrees with the C data interface.
267pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
268    let dt = DataType::try_from(schema)?;
269    let array = Arc::new(array);
270    let tmp = ImportedArrowArray {
271        array: &array,
272        data_type: dt,
273        owner: &array,
274    };
275    tmp.consume()
276}
277
278/// Import [ArrayData] from the C Data Interface
279///
280/// # Safety
281///
282/// This struct assumes that the incoming data agrees with the C data interface.
283pub unsafe fn from_ffi_and_data_type(
284    array: FFI_ArrowArray,
285    data_type: DataType,
286) -> Result<ArrayData> {
287    let array = Arc::new(array);
288    let tmp = ImportedArrowArray {
289        array: &array,
290        data_type,
291        owner: &array,
292    };
293    tmp.consume()
294}
295
296#[derive(Debug)]
297struct ImportedArrowArray<'a> {
298    array: &'a FFI_ArrowArray,
299    data_type: DataType,
300    owner: &'a Arc<FFI_ArrowArray>,
301}
302
303impl ImportedArrowArray<'_> {
304    fn consume(self) -> Result<ArrayData> {
305        let len = self.array.len();
306        let offset = self.array.offset();
307        let null_count = match &self.data_type {
308            DataType::Null => Some(0),
309            _ => self.array.null_count_opt(),
310        };
311
312        let data_layout = layout(&self.data_type);
313        let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?;
314
315        let null_bit_buffer = if data_layout.can_contain_null_mask {
316            self.null_bit_buffer()
317        } else {
318            None
319        };
320
321        let mut child_data = self.consume_children()?;
322
323        if let Some(d) = self.dictionary()? {
324            // For dictionary type there should only be a single child, so we don't need to worry if
325            // there are other children added above.
326            assert!(child_data.is_empty());
327            child_data.push(d.consume()?);
328        }
329
330        // Should FFI be checking validity?
331        Ok(unsafe {
332            ArrayData::new_unchecked(
333                self.data_type,
334                len,
335                null_count,
336                null_bit_buffer,
337                offset,
338                buffers,
339                child_data,
340            )
341        })
342    }
343
344    fn consume_children(&self) -> Result<Vec<ArrayData>> {
345        match &self.data_type {
346            DataType::List(field)
347            | DataType::FixedSizeList(field, _)
348            | DataType::LargeList(field)
349            | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
350            DataType::Struct(fields) => {
351                assert!(fields.len() == self.array.num_children());
352                fields
353                    .iter()
354                    .enumerate()
355                    .map(|(i, field)| self.consume_child(i, field.data_type()))
356                    .collect::<Result<Vec<_>>>()
357            }
358            DataType::Union(union_fields, _) => {
359                assert!(union_fields.len() == self.array.num_children());
360                union_fields
361                    .iter()
362                    .enumerate()
363                    .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
364                    .collect::<Result<Vec<_>>>()
365            }
366            DataType::RunEndEncoded(run_ends_field, values_field) => Ok([
367                self.consume_child(0, run_ends_field.data_type())?,
368                self.consume_child(1, values_field.data_type())?,
369            ]
370            .to_vec()),
371            _ => Ok(Vec::new()),
372        }
373    }
374
375    fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
376        ImportedArrowArray {
377            array: self.array.child(index),
378            data_type: child_type.clone(),
379            owner: self.owner,
380        }
381        .consume()
382    }
383
384    /// returns all buffers, as organized by Rust (i.e. null buffer is skipped if it's present
385    /// in the spec of the type)
386    fn buffers(&self, can_contain_null_mask: bool, variadic: bool) -> Result<Vec<Buffer>> {
387        // + 1: skip null buffer
388        let buffer_begin = can_contain_null_mask as usize;
389        let buffer_end = self.array.num_buffers() - usize::from(variadic);
390
391        let variadic_buffer_lens = if variadic {
392            // Each views array has 1 (optional) null buffer, 1 views buffer, 1 lengths buffer.
393            // Rest are variadic.
394            let num_variadic_buffers =
395                self.array.num_buffers() - (2 + usize::from(can_contain_null_mask));
396            if num_variadic_buffers == 0 {
397                &[]
398            } else {
399                let lengths = self.array.buffer(self.array.num_buffers() - 1);
400                // SAFETY: is lengths is non-null, then it must be valid for up to num_variadic_buffers.
401                unsafe { std::slice::from_raw_parts(lengths.cast::<i64>(), num_variadic_buffers) }
402            }
403        } else {
404            &[]
405        };
406
407        (buffer_begin..buffer_end)
408            .map(|index| {
409                let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
410                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
411                    Some(buf) => Ok(buf),
412                    None if len == 0 => {
413                        // Null data buffer, which Rust doesn't allow. So create
414                        // an empty buffer.
415                        Ok(MutableBuffer::new(0).into())
416                    }
417                    None => Err(ArrowError::CDataInterface(format!(
418                        "The external buffer at position {index} is null."
419                    ))),
420                }
421            })
422            .collect()
423    }
424
425    /// Returns the length, in bytes, of the buffer `i` (indexed according to the C data interface)
426    /// Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
427    /// for variable-sized buffers, such as the second buffer of a stringArray, we need
428    /// to fetch offset buffer's len to build the second buffer.
429    fn buffer_len(
430        &self,
431        i: usize,
432        variadic_buffer_lengths: &[i64],
433        dt: &DataType,
434    ) -> Result<usize> {
435        // Special handling for dictionary type as we only care about the key type in the case.
436        let data_type = match dt {
437            DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
438            dt => dt,
439        };
440
441        // `ffi::ArrowArray` records array offset, we need to add it back to the
442        // buffer length to get the actual buffer length.
443        let length = self.array.len() + self.array.offset();
444
445        // Inner type is not important for buffer length.
446        Ok(match (&data_type, i) {
447            (DataType::Utf8, 1)
448            | (DataType::LargeUtf8, 1)
449            | (DataType::Binary, 1)
450            | (DataType::LargeBinary, 1)
451            | (DataType::List(_), 1)
452            | (DataType::LargeList(_), 1)
453            | (DataType::Map(_, _), 1) => {
454                // the len of the offset buffer (buffer 1) equals length + 1
455                let bits = bit_width(data_type, i)?;
456                debug_assert_eq!(bits % 8, 0);
457                (length + 1) * (bits / 8)
458            }
459            (DataType::Utf8, 2) | (DataType::Binary, 2) => {
460                if self.array.is_empty() {
461                    return Ok(0);
462                }
463
464                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
465                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
466                // first buffer is the null buffer => add(1)
467                // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
468                #[allow(clippy::cast_ptr_alignment)]
469                let offset_buffer = self.array.buffer(1) as *const i32;
470                // get last offset
471                (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
472            }
473            (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
474                if self.array.is_empty() {
475                    return Ok(0);
476                }
477
478                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
479                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
480                // first buffer is the null buffer => add(1)
481                // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
482                #[allow(clippy::cast_ptr_alignment)]
483                let offset_buffer = self.array.buffer(1) as *const i64;
484                // get last offset
485                (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
486            }
487            // View types: these have variadic buffers.
488            // Buffer 1 is the views buffer, which stores 1 u128 per length of the array.
489            // Buffers 2..N-1 are the buffers holding the byte data. Their lengths are variable.
490            // Buffer N is of length (N - 2) and stores i64 containing the lengths of buffers 2..N-1
491            (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => {
492                std::mem::size_of::<u128>() * length
493            }
494            (DataType::Utf8View, i) | (DataType::BinaryView, i) => {
495                variadic_buffer_lengths[i - 2] as usize
496            }
497            // buffer len of primitive types
498            _ => {
499                let bits = bit_width(data_type, i)?;
500                bit_util::ceil(length * bits, 8)
501            }
502        })
503    }
504
505    /// returns the null bit buffer.
506    /// Rust implementation uses a buffer that is not part of the array of buffers.
507    /// The C Data interface's null buffer is part of the array of buffers.
508    fn null_bit_buffer(&self) -> Option<Buffer> {
509        // similar to `self.buffer_len(0)`, but without `Result`.
510        // `ffi::ArrowArray` records array offset, we need to add it back to the
511        // buffer length to get the actual buffer length.
512        let length = self.array.len() + self.array.offset();
513        let buffer_len = bit_util::ceil(length, 8);
514
515        unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
516    }
517
518    fn dictionary(&self) -> Result<Option<ImportedArrowArray>> {
519        match (self.array.dictionary(), &self.data_type) {
520            (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
521                array,
522                data_type: value_type.as_ref().clone(),
523                owner: self.owner,
524            })),
525            (Some(_), _) => Err(ArrowError::CDataInterface(
526                "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
527            )),
528            (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
529                "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
530            )),
531            (_, _) => Ok(None),
532        }
533    }
534}
535
536#[cfg(test)]
537mod tests_to_then_from_ffi {
538    use std::collections::HashMap;
539    use std::mem::ManuallyDrop;
540
541    use arrow_buffer::NullBuffer;
542    use arrow_schema::Field;
543
544    use crate::builder::UnionBuilder;
545    use crate::cast::AsArray;
546    use crate::types::{Float64Type, Int32Type, Int8Type};
547    use crate::*;
548
549    use super::*;
550
551    #[test]
552    fn test_round_trip() {
553        // create an array natively
554        let array = Int32Array::from(vec![1, 2, 3]);
555
556        // export it
557        let (array, schema) = to_ffi(&array.into_data()).unwrap();
558
559        // (simulate consumer) import it
560        let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
561
562        // verify
563        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
564    }
565
566    #[test]
567    fn test_import() {
568        // Model receiving const pointers from an external system
569
570        // Create an array natively
571        let data = Int32Array::from(vec![1, 2, 3]).into_data();
572        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
573        let array = FFI_ArrowArray::new(&data);
574
575        // Use ManuallyDrop to avoid Box:Drop recursing
576        let schema = Box::new(ManuallyDrop::new(schema));
577        let array = Box::new(ManuallyDrop::new(array));
578
579        let schema_ptr = &**schema as *const _;
580        let array_ptr = &**array as *const _;
581
582        // We can read them back to memory
583        // SAFETY:
584        // Pointers are aligned and valid
585        let data =
586            unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
587
588        let array = Int32Array::from(data);
589        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
590    }
591
592    #[test]
593    fn test_round_trip_with_offset() -> Result<()> {
594        // create an array natively
595        let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), None]);
596
597        let array = array.slice(1, 2);
598
599        // export it
600        let (array, schema) = to_ffi(&array.to_data())?;
601
602        // (simulate consumer) import it
603        let data = unsafe { from_ffi(array, &schema) }?;
604        let array = make_array(data);
605        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
606
607        assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
608
609        // (drop/release)
610        Ok(())
611    }
612
613    #[test]
614    #[cfg(not(feature = "force_validate"))]
615    fn test_decimal_round_trip() -> Result<()> {
616        // create an array natively
617        let original_array = [Some(12345_i128), Some(-12345_i128), None]
618            .into_iter()
619            .collect::<Decimal128Array>()
620            .with_precision_and_scale(6, 2)
621            .unwrap();
622
623        // export it
624        let (array, schema) = to_ffi(&original_array.to_data())?;
625
626        // (simulate consumer) import it
627        let data = unsafe { from_ffi(array, &schema) }?;
628        let array = make_array(data);
629
630        // perform some operation
631        let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
632
633        // verify
634        assert_eq!(array, &original_array);
635
636        // (drop/release)
637        Ok(())
638    }
639    // case with nulls is tested in the docs, through the example on this module.
640
641    #[test]
642    fn test_null_count_handling() {
643        let int32_data = ArrayData::builder(DataType::Int32)
644            .len(10)
645            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
646            .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001])))
647            .build()
648            .unwrap();
649        let mut ffi_array = FFI_ArrowArray::new(&int32_data);
650        assert_eq!(3, ffi_array.null_count());
651        assert_eq!(Some(3), ffi_array.null_count_opt());
652        // Simulating uninitialized state
653        unsafe {
654            ffi_array.set_null_count(-1);
655        }
656        assert_eq!(None, ffi_array.null_count_opt());
657        let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap();
658        assert_eq!(3, int32_data.null_count());
659
660        let null_data = &ArrayData::new_null(&DataType::Null, 10);
661        let mut ffi_array = FFI_ArrowArray::new(null_data);
662        assert_eq!(10, ffi_array.null_count());
663        assert_eq!(Some(10), ffi_array.null_count_opt());
664        // Simulating uninitialized state
665        unsafe {
666            ffi_array.set_null_count(-1);
667        }
668        assert_eq!(None, ffi_array.null_count_opt());
669        let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap();
670        assert_eq!(0, null_data.null_count());
671    }
672
673    fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
674        // create an array natively
675        let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
676
677        // export it
678        let (array, schema) = to_ffi(&array.to_data())?;
679
680        // (simulate consumer) import it
681        let data = unsafe { from_ffi(array, &schema) }?;
682        let array = make_array(data);
683
684        // perform some operation
685        let array = array
686            .as_any()
687            .downcast_ref::<GenericStringArray<Offset>>()
688            .unwrap();
689
690        // verify
691        let expected = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
692        assert_eq!(array, &expected);
693
694        // (drop/release)
695        Ok(())
696    }
697
698    #[test]
699    fn test_string() -> Result<()> {
700        test_generic_string::<i32>()
701    }
702
703    #[test]
704    fn test_large_string() -> Result<()> {
705        test_generic_string::<i64>()
706    }
707
708    fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
709        // Construct a value array
710        let value_data = ArrayData::builder(DataType::Int32)
711            .len(8)
712            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
713            .build()
714            .unwrap();
715
716        // Construct a buffer for value offsets, for the nested array:
717        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
718        let value_offsets = [0_usize, 3, 6, 8]
719            .iter()
720            .map(|i| Offset::from_usize(*i).unwrap())
721            .collect::<Buffer>();
722
723        // Construct a list array from the above two
724        let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
725            Field::new_list_field(DataType::Int32, false),
726        ));
727
728        let list_data = ArrayData::builder(list_data_type)
729            .len(3)
730            .add_buffer(value_offsets)
731            .add_child_data(value_data)
732            .build()
733            .unwrap();
734
735        // create an array natively
736        let array = GenericListArray::<Offset>::from(list_data.clone());
737
738        // export it
739        let (array, schema) = to_ffi(&array.to_data())?;
740
741        // (simulate consumer) import it
742        let data = unsafe { from_ffi(array, &schema) }?;
743        let array = make_array(data);
744
745        // downcast
746        let array = array
747            .as_any()
748            .downcast_ref::<GenericListArray<Offset>>()
749            .unwrap();
750
751        // verify
752        let expected = GenericListArray::<Offset>::from(list_data);
753        assert_eq!(&array.value(0), &expected.value(0));
754        assert_eq!(&array.value(1), &expected.value(1));
755        assert_eq!(&array.value(2), &expected.value(2));
756
757        // (drop/release)
758        Ok(())
759    }
760
761    #[test]
762    fn test_list() -> Result<()> {
763        test_generic_list::<i32>()
764    }
765
766    #[test]
767    fn test_large_list() -> Result<()> {
768        test_generic_list::<i64>()
769    }
770
771    fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
772        // create an array natively
773        let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
774        let array = GenericBinaryArray::<Offset>::from(array);
775
776        // export it
777        let (array, schema) = to_ffi(&array.to_data())?;
778
779        // (simulate consumer) import it
780        let data = unsafe { from_ffi(array, &schema) }?;
781        let array = make_array(data);
782        let array = array
783            .as_any()
784            .downcast_ref::<GenericBinaryArray<Offset>>()
785            .unwrap();
786
787        // verify
788        let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
789        let expected = GenericBinaryArray::<Offset>::from(expected);
790        assert_eq!(array, &expected);
791
792        // (drop/release)
793        Ok(())
794    }
795
796    #[test]
797    fn test_binary() -> Result<()> {
798        test_generic_binary::<i32>()
799    }
800
801    #[test]
802    fn test_large_binary() -> Result<()> {
803        test_generic_binary::<i64>()
804    }
805
806    #[test]
807    fn test_bool() -> Result<()> {
808        // create an array natively
809        let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
810
811        // export it
812        let (array, schema) = to_ffi(&array.to_data())?;
813
814        // (simulate consumer) import it
815        let data = unsafe { from_ffi(array, &schema) }?;
816        let array = make_array(data);
817        let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
818
819        // verify
820        assert_eq!(
821            array,
822            &BooleanArray::from(vec![None, Some(true), Some(false)])
823        );
824
825        // (drop/release)
826        Ok(())
827    }
828
829    #[test]
830    fn test_time32() -> Result<()> {
831        // create an array natively
832        let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
833
834        // export it
835        let (array, schema) = to_ffi(&array.to_data())?;
836
837        // (simulate consumer) import it
838        let data = unsafe { from_ffi(array, &schema) }?;
839        let array = make_array(data);
840        let array = array
841            .as_any()
842            .downcast_ref::<Time32MillisecondArray>()
843            .unwrap();
844
845        // verify
846        assert_eq!(
847            array,
848            &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
849        );
850
851        // (drop/release)
852        Ok(())
853    }
854
855    #[test]
856    fn test_timestamp() -> Result<()> {
857        // create an array natively
858        let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
859
860        // export it
861        let (array, schema) = to_ffi(&array.to_data())?;
862
863        // (simulate consumer) import it
864        let data = unsafe { from_ffi(array, &schema) }?;
865        let array = make_array(data);
866        let array = array
867            .as_any()
868            .downcast_ref::<TimestampMillisecondArray>()
869            .unwrap();
870
871        // verify
872        assert_eq!(
873            array,
874            &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
875        );
876
877        // (drop/release)
878        Ok(())
879    }
880
881    #[test]
882    fn test_fixed_size_binary_array() -> Result<()> {
883        let values = vec![
884            None,
885            Some(vec![10, 10, 10]),
886            None,
887            Some(vec![20, 20, 20]),
888            Some(vec![30, 30, 30]),
889            None,
890        ];
891        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
892
893        // export it
894        let (array, schema) = to_ffi(&array.to_data())?;
895
896        // (simulate consumer) import it
897        let data = unsafe { from_ffi(array, &schema) }?;
898        let array = make_array(data);
899        let array = array
900            .as_any()
901            .downcast_ref::<FixedSizeBinaryArray>()
902            .unwrap();
903
904        // verify
905        assert_eq!(
906            array,
907            &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
908                vec![
909                    None,
910                    Some(vec![10, 10, 10]),
911                    None,
912                    Some(vec![20, 20, 20]),
913                    Some(vec![30, 30, 30]),
914                    None,
915                ]
916                .into_iter(),
917                3
918            )?
919        );
920
921        // (drop/release)
922        Ok(())
923    }
924
925    #[test]
926    fn test_fixed_size_list_array() -> Result<()> {
927        // 0000 0100
928        let mut validity_bits: [u8; 1] = [0; 1];
929        bit_util::set_bit(&mut validity_bits, 2);
930
931        let v: Vec<i32> = (0..9).collect();
932        let value_data = ArrayData::builder(DataType::Int32)
933            .len(9)
934            .add_buffer(Buffer::from_slice_ref(&v))
935            .build()?;
936
937        let list_data_type =
938            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int32, false)), 3);
939        let list_data = ArrayData::builder(list_data_type.clone())
940            .len(3)
941            .null_bit_buffer(Some(Buffer::from(validity_bits)))
942            .add_child_data(value_data)
943            .build()?;
944
945        // export it
946        let (array, schema) = to_ffi(&list_data)?;
947
948        // (simulate consumer) import it
949        let data = unsafe { from_ffi(array, &schema) }?;
950        let array = make_array(data);
951        let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
952
953        // 0010 0100
954        let mut expected_validity_bits: [u8; 1] = [0; 1];
955        bit_util::set_bit(&mut expected_validity_bits, 2);
956        bit_util::set_bit(&mut expected_validity_bits, 5);
957
958        let mut w = vec![];
959        w.extend_from_slice(&v);
960
961        let expected_value_data = ArrayData::builder(DataType::Int32)
962            .len(9)
963            .add_buffer(Buffer::from_slice_ref(&w))
964            .build()?;
965
966        let expected_list_data = ArrayData::builder(list_data_type)
967            .len(3)
968            .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
969            .add_child_data(expected_value_data)
970            .build()?;
971        let expected_array = FixedSizeListArray::from(expected_list_data);
972
973        // verify
974        assert_eq!(array, &expected_array);
975
976        // (drop/release)
977        Ok(())
978    }
979
980    #[test]
981    fn test_dictionary() -> Result<()> {
982        // create an array natively
983        let values = vec!["a", "aaa", "aaa"];
984        let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
985
986        // export it
987        let (array, schema) = to_ffi(&dict_array.to_data())?;
988
989        // (simulate consumer) import it
990        let data = unsafe { from_ffi(array, &schema) }?;
991        let array = make_array(data);
992        let actual = array
993            .as_any()
994            .downcast_ref::<DictionaryArray<Int8Type>>()
995            .unwrap();
996
997        // verify
998        let new_values = vec!["a", "aaa", "aaa"];
999        let expected: DictionaryArray<Int8Type> = new_values.into_iter().collect();
1000        assert_eq!(actual, &expected);
1001
1002        // (drop/release)
1003        Ok(())
1004    }
1005
1006    #[test]
1007    #[allow(deprecated)]
1008    fn test_export_array_into_raw() -> Result<()> {
1009        let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
1010
1011        // Assume two raw pointers provided by the consumer
1012        let mut out_array = FFI_ArrowArray::empty();
1013        let mut out_schema = FFI_ArrowSchema::empty();
1014
1015        {
1016            let out_array_ptr = std::ptr::addr_of_mut!(out_array);
1017            let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
1018            unsafe {
1019                export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1020            }
1021        }
1022
1023        // (simulate consumer) import it
1024        let data = unsafe { from_ffi(out_array, &out_schema) }?;
1025        let array = make_array(data);
1026
1027        // perform some operation
1028        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1029
1030        // verify
1031        assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
1032        Ok(())
1033    }
1034
1035    #[test]
1036    fn test_duration() -> Result<()> {
1037        // create an array natively
1038        let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
1039
1040        // export it
1041        let (array, schema) = to_ffi(&array.to_data())?;
1042
1043        // (simulate consumer) import it
1044        let data = unsafe { from_ffi(array, &schema) }?;
1045        let array = make_array(data);
1046        let array = array
1047            .as_any()
1048            .downcast_ref::<DurationSecondArray>()
1049            .unwrap();
1050
1051        // verify
1052        assert_eq!(
1053            array,
1054            &DurationSecondArray::from(vec![None, Some(1), Some(2)])
1055        );
1056
1057        // (drop/release)
1058        Ok(())
1059    }
1060
1061    #[test]
1062    fn test_map_array() -> Result<()> {
1063        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1064        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1065
1066        // Construct a buffer for value offsets, for the nested array:
1067        //  [[a, b, c], [d, e, f], [g, h]]
1068        let entry_offsets = [0, 3, 6, 8];
1069
1070        let map_array =
1071            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1072                .unwrap();
1073
1074        // export it
1075        let (array, schema) = to_ffi(&map_array.to_data())?;
1076
1077        // (simulate consumer) import it
1078        let data = unsafe { from_ffi(array, &schema) }?;
1079        let array = make_array(data);
1080
1081        // perform some operation
1082        let array = array.as_any().downcast_ref::<MapArray>().unwrap();
1083        assert_eq!(array, &map_array);
1084
1085        Ok(())
1086    }
1087
1088    #[test]
1089    fn test_struct_array() -> Result<()> {
1090        let metadata: HashMap<String, String> =
1091            [("Hello".to_string(), "World! 😊".to_string())].into();
1092        let struct_array = StructArray::from(vec![(
1093            Arc::new(Field::new("a", DataType::Int32, false).with_metadata(metadata)),
1094            Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
1095        )]);
1096
1097        // export it
1098        let (array, schema) = to_ffi(&struct_array.to_data())?;
1099
1100        // (simulate consumer) import it
1101        let data = unsafe { from_ffi(array, &schema) }?;
1102        let array = make_array(data);
1103
1104        // perform some operation
1105        let array = array.as_any().downcast_ref::<StructArray>().unwrap();
1106        assert_eq!(array.data_type(), struct_array.data_type());
1107        assert_eq!(array, &struct_array);
1108
1109        Ok(())
1110    }
1111
1112    #[test]
1113    fn test_union_sparse_array() -> Result<()> {
1114        let mut builder = UnionBuilder::new_sparse();
1115        builder.append::<Int32Type>("a", 1).unwrap();
1116        builder.append_null::<Int32Type>("a").unwrap();
1117        builder.append::<Float64Type>("c", 3.0).unwrap();
1118        builder.append::<Int32Type>("a", 4).unwrap();
1119        let union = builder.build().unwrap();
1120
1121        // export it
1122        let (array, schema) = to_ffi(&union.to_data())?;
1123
1124        // (simulate consumer) import it
1125        let data = unsafe { from_ffi(array, &schema) }?;
1126        let array = make_array(data);
1127
1128        let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
1129
1130        let expected_type_ids = vec![0_i8, 0, 1, 0];
1131
1132        // Check type ids
1133        assert_eq!(*array.type_ids(), expected_type_ids);
1134        for (i, id) in expected_type_ids.iter().enumerate() {
1135            assert_eq!(id, &array.type_id(i));
1136        }
1137
1138        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
1139        assert!(array.offsets().is_none());
1140
1141        for i in 0..array.len() {
1142            let slot = array.value(i);
1143            match i {
1144                0 => {
1145                    let slot = slot.as_primitive::<Int32Type>();
1146                    assert!(!slot.is_null(0));
1147                    assert_eq!(slot.len(), 1);
1148                    let value = slot.value(0);
1149                    assert_eq!(1_i32, value);
1150                }
1151                1 => assert!(slot.is_null(0)),
1152                2 => {
1153                    let slot = slot.as_primitive::<Float64Type>();
1154                    assert!(!slot.is_null(0));
1155                    assert_eq!(slot.len(), 1);
1156                    let value = slot.value(0);
1157                    assert_eq!(value, 3_f64);
1158                }
1159                3 => {
1160                    let slot = slot.as_primitive::<Int32Type>();
1161                    assert!(!slot.is_null(0));
1162                    assert_eq!(slot.len(), 1);
1163                    let value = slot.value(0);
1164                    assert_eq!(4_i32, value);
1165                }
1166                _ => unreachable!(),
1167            }
1168        }
1169
1170        Ok(())
1171    }
1172
1173    #[test]
1174    fn test_union_dense_array() -> Result<()> {
1175        let mut builder = UnionBuilder::new_dense();
1176        builder.append::<Int32Type>("a", 1).unwrap();
1177        builder.append_null::<Int32Type>("a").unwrap();
1178        builder.append::<Float64Type>("c", 3.0).unwrap();
1179        builder.append::<Int32Type>("a", 4).unwrap();
1180        let union = builder.build().unwrap();
1181
1182        // export it
1183        let (array, schema) = to_ffi(&union.to_data())?;
1184
1185        // (simulate consumer) import it
1186        let data = unsafe { from_ffi(array, &schema) }?;
1187        let array = UnionArray::from(data);
1188
1189        let expected_type_ids = vec![0_i8, 0, 1, 0];
1190
1191        // Check type ids
1192        assert_eq!(*array.type_ids(), expected_type_ids);
1193        for (i, id) in expected_type_ids.iter().enumerate() {
1194            assert_eq!(id, &array.type_id(i));
1195        }
1196
1197        assert!(array.offsets().is_some());
1198
1199        for i in 0..array.len() {
1200            let slot = array.value(i);
1201            match i {
1202                0 => {
1203                    let slot = slot.as_primitive::<Int32Type>();
1204                    assert!(!slot.is_null(0));
1205                    assert_eq!(slot.len(), 1);
1206                    let value = slot.value(0);
1207                    assert_eq!(1_i32, value);
1208                }
1209                1 => assert!(slot.is_null(0)),
1210                2 => {
1211                    let slot = slot.as_primitive::<Float64Type>();
1212                    assert!(!slot.is_null(0));
1213                    assert_eq!(slot.len(), 1);
1214                    let value = slot.value(0);
1215                    assert_eq!(value, 3_f64);
1216                }
1217                3 => {
1218                    let slot = slot.as_primitive::<Int32Type>();
1219                    assert!(!slot.is_null(0));
1220                    assert_eq!(slot.len(), 1);
1221                    let value = slot.value(0);
1222                    assert_eq!(4_i32, value);
1223                }
1224                _ => unreachable!(),
1225            }
1226        }
1227
1228        Ok(())
1229    }
1230
1231    #[test]
1232    fn test_run_array() -> Result<()> {
1233        let value_data =
1234            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1235
1236        // Construct a run_ends array:
1237        let run_ends_values = [4_i32, 6, 7, 9, 13, 18, 20, 22];
1238        let run_ends_data =
1239            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1240
1241        // Construct a run ends encoded array from the above two
1242        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1243
1244        // export it
1245        let (array, schema) = to_ffi(&ree_array.to_data())?;
1246
1247        // (simulate consumer) import it
1248        let data = unsafe { from_ffi(array, &schema) }?;
1249        let array = make_array(data);
1250
1251        // perform some operation
1252        let array = array
1253            .as_any()
1254            .downcast_ref::<RunArray<Int32Type>>()
1255            .unwrap();
1256        assert_eq!(array.data_type(), ree_array.data_type());
1257        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1258        assert_eq!(array.values(), ree_array.values());
1259
1260        Ok(())
1261    }
1262
1263    #[test]
1264    fn test_nullable_run_array() -> Result<()> {
1265        let nulls = NullBuffer::from(vec![true, false, true, true, false]);
1266        let value_data =
1267            PrimitiveArray::<Int8Type>::new(vec![1_i8, 2, 3, 4, 5].into(), Some(nulls));
1268
1269        // Construct a run_ends array:
1270        let run_ends_values = [5_i32, 6, 7, 8, 10];
1271        let run_ends_data =
1272            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1273
1274        // Construct a run ends encoded array from the above two
1275        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1276
1277        // export it
1278        let (array, schema) = to_ffi(&ree_array.to_data())?;
1279
1280        // (simulate consumer) import it
1281        let data = unsafe { from_ffi(array, &schema) }?;
1282        let array = make_array(data);
1283
1284        // perform some operation
1285        let array = array
1286            .as_any()
1287            .downcast_ref::<RunArray<Int32Type>>()
1288            .unwrap();
1289        assert_eq!(array.data_type(), ree_array.data_type());
1290        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1291        assert_eq!(array.values(), ree_array.values());
1292
1293        Ok(())
1294    }
1295}
1296
1297#[cfg(test)]
1298mod tests_from_ffi {
1299    use std::sync::Arc;
1300
1301    use arrow_buffer::{bit_util, buffer::Buffer};
1302    use arrow_data::transform::MutableArrayData;
1303    use arrow_data::ArrayData;
1304    use arrow_schema::{DataType, Field};
1305
1306    use super::Result;
1307    use crate::builder::GenericByteViewBuilder;
1308    use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
1309    use crate::{
1310        array::{
1311            Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
1312            Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
1313        },
1314        ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
1315        make_array, ArrayRef, GenericByteViewArray, ListArray,
1316    };
1317
1318    fn test_round_trip(expected: &ArrayData) -> Result<()> {
1319        // here we export the array
1320        let array = FFI_ArrowArray::new(expected);
1321        let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
1322
1323        // simulate an external consumer by being the consumer
1324        let result = &unsafe { from_ffi(array, &schema) }?;
1325
1326        assert_eq!(result, expected);
1327        Ok(())
1328    }
1329
1330    #[test]
1331    fn test_u32() -> Result<()> {
1332        let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
1333        let data = array.into_data();
1334        test_round_trip(&data)
1335    }
1336
1337    #[test]
1338    fn test_u64() -> Result<()> {
1339        let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
1340        let data = array.into_data();
1341        test_round_trip(&data)
1342    }
1343
1344    #[test]
1345    fn test_i64() -> Result<()> {
1346        let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
1347        let data = array.into_data();
1348        test_round_trip(&data)
1349    }
1350
1351    #[test]
1352    fn test_struct() -> Result<()> {
1353        let inner = StructArray::from(vec![
1354            (
1355                Arc::new(Field::new("a1", DataType::Boolean, false)),
1356                Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
1357            ),
1358            (
1359                Arc::new(Field::new("a2", DataType::UInt32, false)),
1360                Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
1361            ),
1362        ]);
1363
1364        let array = StructArray::from(vec![
1365            (
1366                Arc::new(Field::new("a", inner.data_type().clone(), false)),
1367                Arc::new(inner) as Arc<dyn Array>,
1368            ),
1369            (
1370                Arc::new(Field::new("b", DataType::Boolean, false)),
1371                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
1372            ),
1373            (
1374                Arc::new(Field::new("c", DataType::UInt32, false)),
1375                Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
1376            ),
1377        ]);
1378        let data = array.into_data();
1379        test_round_trip(&data)
1380    }
1381
1382    #[test]
1383    fn test_dictionary() -> Result<()> {
1384        let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
1385        let keys = Int32Array::from(vec![
1386            Some(0),
1387            Some(1),
1388            None,
1389            Some(1),
1390            Some(1),
1391            None,
1392            Some(1),
1393            Some(2),
1394            Some(1),
1395            None,
1396        ]);
1397        let array = DictionaryArray::new(keys, Arc::new(values));
1398
1399        let data = array.into_data();
1400        test_round_trip(&data)
1401    }
1402
1403    #[test]
1404    fn test_fixed_size_binary() -> Result<()> {
1405        let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
1406        let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
1407
1408        let data = array.into_data();
1409        test_round_trip(&data)
1410    }
1411
1412    #[test]
1413    fn test_fixed_size_binary_with_nulls() -> Result<()> {
1414        let values = vec![
1415            None,
1416            Some(vec![10, 10, 10]),
1417            None,
1418            Some(vec![20, 20, 20]),
1419            Some(vec![30, 30, 30]),
1420            None,
1421        ];
1422        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1423
1424        let data = array.into_data();
1425        test_round_trip(&data)
1426    }
1427
1428    #[test]
1429    fn test_fixed_size_list() -> Result<()> {
1430        let v: Vec<i64> = (0..9).collect();
1431        let value_data = ArrayData::builder(DataType::Int64)
1432            .len(9)
1433            .add_buffer(Buffer::from_slice_ref(v))
1434            .build()?;
1435        let list_data_type =
1436            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3);
1437        let list_data = ArrayData::builder(list_data_type)
1438            .len(3)
1439            .add_child_data(value_data)
1440            .build()?;
1441        let array = FixedSizeListArray::from(list_data);
1442
1443        let data = array.into_data();
1444        test_round_trip(&data)
1445    }
1446
1447    #[test]
1448    fn test_fixed_size_list_with_nulls() -> Result<()> {
1449        // 0100 0110
1450        let mut validity_bits: [u8; 1] = [0; 1];
1451        bit_util::set_bit(&mut validity_bits, 1);
1452        bit_util::set_bit(&mut validity_bits, 2);
1453        bit_util::set_bit(&mut validity_bits, 6);
1454
1455        let v: Vec<i16> = (0..16).collect();
1456        let value_data = ArrayData::builder(DataType::Int16)
1457            .len(16)
1458            .add_buffer(Buffer::from_slice_ref(v))
1459            .build()?;
1460        let list_data_type =
1461            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2);
1462        let list_data = ArrayData::builder(list_data_type)
1463            .len(8)
1464            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1465            .add_child_data(value_data)
1466            .build()?;
1467        let array = FixedSizeListArray::from(list_data);
1468
1469        let data = array.into_data();
1470        test_round_trip(&data)
1471    }
1472
1473    #[test]
1474    fn test_fixed_size_list_nested() -> Result<()> {
1475        let v: Vec<i32> = (0..16).collect();
1476        let value_data = ArrayData::builder(DataType::Int32)
1477            .len(16)
1478            .add_buffer(Buffer::from_slice_ref(v))
1479            .build()?;
1480
1481        let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
1482        let value_offsets = Buffer::from_slice_ref(offsets);
1483        let inner_list_data_type =
1484            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1485        let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
1486            .len(8)
1487            .add_buffer(value_offsets)
1488            .add_child_data(value_data)
1489            .build()?;
1490
1491        // 0000 0100
1492        let mut validity_bits: [u8; 1] = [0; 1];
1493        bit_util::set_bit(&mut validity_bits, 2);
1494
1495        let list_data_type =
1496            DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
1497        let list_data = ArrayData::builder(list_data_type)
1498            .len(4)
1499            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1500            .add_child_data(inner_list_data)
1501            .build()?;
1502
1503        let array = FixedSizeListArray::from(list_data);
1504
1505        let data = array.into_data();
1506        test_round_trip(&data)
1507    }
1508
1509    #[test]
1510    #[cfg(not(feature = "force_validate"))]
1511    fn test_empty_string_with_non_zero_offset() -> Result<()> {
1512        use super::ImportedArrowArray;
1513        use arrow_buffer::{MutableBuffer, OffsetBuffer};
1514
1515        // Simulate an empty string array with a non-zero offset from a producer
1516        let data: Buffer = MutableBuffer::new(0).into();
1517        let offsets = OffsetBuffer::new(vec![123].into());
1518        let string_array =
1519            unsafe { StringArray::new_unchecked(offsets.clone(), data.clone(), None) };
1520
1521        let data = string_array.into_data();
1522
1523        let array = FFI_ArrowArray::new(&data);
1524        let schema = FFI_ArrowSchema::try_from(data.data_type())?;
1525
1526        let dt = DataType::try_from(&schema)?;
1527        let array = Arc::new(array);
1528        let imported_array = ImportedArrowArray {
1529            array: &array,
1530            data_type: dt,
1531            owner: &array,
1532        };
1533
1534        let offset_buf_len = imported_array.buffer_len(1, &[], &imported_array.data_type)?;
1535        let data_buf_len = imported_array.buffer_len(2, &[], &imported_array.data_type)?;
1536
1537        assert_eq!(offset_buf_len, 4);
1538        assert_eq!(data_buf_len, 0);
1539
1540        test_round_trip(&imported_array.consume()?)
1541    }
1542
1543    fn roundtrip_string_array(array: StringArray) -> StringArray {
1544        let data = array.into_data();
1545
1546        let array = FFI_ArrowArray::new(&data);
1547        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1548
1549        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1550        StringArray::from(array)
1551    }
1552
1553    fn roundtrip_byte_view_array<T: ByteViewType>(
1554        array: GenericByteViewArray<T>,
1555    ) -> GenericByteViewArray<T> {
1556        let data = array.into_data();
1557
1558        let array = FFI_ArrowArray::new(&data);
1559        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1560
1561        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1562        GenericByteViewArray::<T>::from(array)
1563    }
1564
1565    fn extend_array(array: &dyn Array) -> ArrayRef {
1566        let len = array.len();
1567        let data = array.to_data();
1568
1569        let mut mutable = MutableArrayData::new(vec![&data], false, len);
1570        mutable.extend(0, 0, len);
1571        make_array(mutable.freeze())
1572    }
1573
1574    #[test]
1575    fn test_extend_imported_string_slice() {
1576        let mut strings = vec![];
1577
1578        for i in 0..1000 {
1579            strings.push(format!("string: {}", i));
1580        }
1581
1582        let string_array = StringArray::from(strings);
1583
1584        let imported = roundtrip_string_array(string_array.clone());
1585        assert_eq!(imported.len(), 1000);
1586        assert_eq!(imported.value(0), "string: 0");
1587        assert_eq!(imported.value(499), "string: 499");
1588
1589        let copied = extend_array(&imported);
1590        assert_eq!(
1591            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1592            &imported
1593        );
1594
1595        let slice = string_array.slice(500, 500);
1596
1597        let imported = roundtrip_string_array(slice);
1598        assert_eq!(imported.len(), 500);
1599        assert_eq!(imported.value(0), "string: 500");
1600        assert_eq!(imported.value(499), "string: 999");
1601
1602        let copied = extend_array(&imported);
1603        assert_eq!(
1604            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1605            &imported
1606        );
1607    }
1608
1609    fn roundtrip_list_array(array: ListArray) -> ListArray {
1610        let data = array.into_data();
1611
1612        let array = FFI_ArrowArray::new(&data);
1613        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1614
1615        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1616        ListArray::from(array)
1617    }
1618
1619    #[test]
1620    fn test_extend_imported_list_slice() {
1621        let mut data = vec![];
1622
1623        for i in 0..1000 {
1624            let mut list = vec![];
1625            for j in 0..100 {
1626                list.push(Some(i * 1000 + j));
1627            }
1628            data.push(Some(list));
1629        }
1630
1631        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1632
1633        let slice = list_array.slice(500, 500);
1634        let imported = roundtrip_list_array(slice.clone());
1635        assert_eq!(imported.len(), 500);
1636        assert_eq!(&slice, &imported);
1637
1638        let copied = extend_array(&imported);
1639        assert_eq!(
1640            copied.as_any().downcast_ref::<ListArray>().unwrap(),
1641            &imported
1642        );
1643    }
1644
1645    /// Helper trait to allow us to use easily strings as either BinaryViewType::Native or
1646    /// StringViewType::Native scalars.
1647    trait NativeFromStr {
1648        fn from_str(value: &str) -> &Self;
1649    }
1650
1651    impl NativeFromStr for str {
1652        fn from_str(value: &str) -> &Self {
1653            value
1654        }
1655    }
1656
1657    impl NativeFromStr for [u8] {
1658        fn from_str(value: &str) -> &Self {
1659            value.as_bytes()
1660        }
1661    }
1662
1663    #[test]
1664    fn test_round_trip_byte_view() {
1665        fn test_case<T>()
1666        where
1667            T: ByteViewType,
1668            T::Native: NativeFromStr,
1669        {
1670            macro_rules! run_test_case {
1671                ($array:expr) => {{
1672                    // round-trip through C  Data Interface
1673                    let len = $array.len();
1674                    let imported = roundtrip_byte_view_array($array);
1675                    assert_eq!(imported.len(), len);
1676
1677                    let copied = extend_array(&imported);
1678                    assert_eq!(
1679                        copied
1680                            .as_any()
1681                            .downcast_ref::<GenericByteViewArray<T>>()
1682                            .unwrap(),
1683                        &imported
1684                    );
1685                }};
1686            }
1687
1688            // Empty test case.
1689            let empty = GenericByteViewBuilder::<T>::new().finish();
1690            run_test_case!(empty);
1691
1692            // All inlined strings test case.
1693            let mut all_inlined = GenericByteViewBuilder::<T>::new();
1694            all_inlined.append_value(T::Native::from_str("inlined1"));
1695            all_inlined.append_value(T::Native::from_str("inlined2"));
1696            all_inlined.append_value(T::Native::from_str("inlined3"));
1697            let all_inlined = all_inlined.finish();
1698            assert_eq!(all_inlined.data_buffers().len(), 0);
1699            run_test_case!(all_inlined);
1700
1701            // some inlined + non-inlined, 1 variadic buffer.
1702            let mixed_one_variadic = {
1703                let mut builder = GenericByteViewBuilder::<T>::new();
1704                builder.append_value(T::Native::from_str("inlined"));
1705                let block_id =
1706                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1707                builder.try_append_view(block_id, 0, 25).unwrap();
1708                builder.finish()
1709            };
1710            assert_eq!(mixed_one_variadic.data_buffers().len(), 1);
1711            run_test_case!(mixed_one_variadic);
1712
1713            // inlined + non-inlined, 2 variadic buffers.
1714            let mixed_two_variadic = {
1715                let mut builder = GenericByteViewBuilder::<T>::new();
1716                builder.append_value(T::Native::from_str("inlined"));
1717                let block_id =
1718                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1719                builder.try_append_view(block_id, 0, 25).unwrap();
1720
1721                let block_id = builder
1722                    .append_block(Buffer::from("another-non-inlined-string-buffer".as_bytes()));
1723                builder.try_append_view(block_id, 0, 33).unwrap();
1724                builder.finish()
1725            };
1726            assert_eq!(mixed_two_variadic.data_buffers().len(), 2);
1727            run_test_case!(mixed_two_variadic);
1728        }
1729
1730        test_case::<StringViewType>();
1731        test_case::<BinaryViewType>();
1732    }
1733}