arrow_array/
ffi.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19//!
20//! Generally, this module is divided in two main interfaces:
21//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
22//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
23//!
24//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
25//! `Buffer`, etc. This is handled by `from_ffi` and `to_ffi`.
26//!
27//!
28//! Export to FFI
29//!
30//! ```rust
31//! # use std::sync::Arc;
32//! # use arrow_array::{Int32Array, Array, make_array};
33//! # use arrow_data::ArrayData;
34//! # use arrow_array::ffi::{to_ffi, from_ffi};
35//! # use arrow_schema::ArrowError;
36//! # fn main() -> Result<(), ArrowError> {
37//! // create an array natively
38//!
39//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
40//! let data = array.into_data();
41//!
42//! // Export it
43//! let (out_array, out_schema) = to_ffi(&data)?;
44//!
45//! // import it
46//! let data = unsafe { from_ffi(out_array, &out_schema) }?;
47//! let array = Int32Array::from(data);
48//!
49//! // verify
50//! assert_eq!(array, Int32Array::from(vec![Some(1), None, Some(3)]));
51//! #
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! Import from FFI
57//!
58//! ```
59//! # use std::ptr::addr_of_mut;
60//! # use arrow_array::ffi::{from_ffi, FFI_ArrowArray};
61//! # use arrow_array::{ArrayRef, make_array};
62//! # use arrow_schema::{ArrowError, ffi::FFI_ArrowSchema};
63//! #
64//! /// A foreign data container that can export to C Data interface
65//! struct ForeignArray {};
66//!
67//! impl ForeignArray {
68//!     /// Export from foreign array representation to C Data interface
69//!     /// e.g. <https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
70//!     fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) {
71//!         // ...
72//!     }
73//! }
74//!
75//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
76//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
77//!     let mut schema = FFI_ArrowSchema::empty();
78//!     let mut array = FFI_ArrowArray::empty();
79//!     foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
80//!     Ok(make_array(unsafe { from_ffi(array, &schema) }?))
81//! }
82//! ```
83
84/*
85# Design:
86
87Main assumptions:
88* A memory region is deallocated according it its own release mechanism.
89* Rust shares memory regions between arrays.
90* A memory region should be deallocated when no-one is using it.
91
92The design of this module is as follows:
93
94`ArrowArray` contains two `Arc`s, one per ABI-compatible `struct`, each containing data
95according to the C Data Interface. These Arcs are used for ref counting of the structs
96within Rust and lifetime management.
97
98Each ABI-compatible `struct` knowns how to `drop` itself, calling `release`.
99
100To import an array, unsafely create an `ArrowArray` from two pointers using [ArrowArray::try_from_raw].
101To export an array, create an `ArrowArray` using [ArrowArray::try_new].
102*/
103
104use std::{mem::size_of, ptr::NonNull, sync::Arc};
105
106use arrow_buffer::{bit_util, Buffer, MutableBuffer};
107pub use arrow_data::ffi::FFI_ArrowArray;
108use arrow_data::{layout, ArrayData};
109pub use arrow_schema::ffi::FFI_ArrowSchema;
110use arrow_schema::{ArrowError, DataType, UnionMode};
111
112use crate::array::ArrayRef;
113
114type Result<T> = std::result::Result<T, ArrowError>;
115
116/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
117/// # Safety
118/// Assumes that these pointers represent valid C Data Interfaces, both in memory
119/// representation and lifetime via the `release` mechanism.
120///
121/// This function copies the content of two FFI structs [arrow_data::ffi::FFI_ArrowArray] and
122/// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
123/// Usually the raw pointers are provided by the array data consumer.
124#[deprecated(
125    since = "52.0.0",
126    note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
127)]
128pub unsafe fn export_array_into_raw(
129    src: ArrayRef,
130    out_array: *mut FFI_ArrowArray,
131    out_schema: *mut FFI_ArrowSchema,
132) -> Result<()> {
133    let data = src.to_data();
134    let array = FFI_ArrowArray::new(&data);
135    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
136
137    std::ptr::write_unaligned(out_array, array);
138    std::ptr::write_unaligned(out_schema, schema);
139
140    Ok(())
141}
142
143// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144// This is set by the Arrow specification
145fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146    if let Some(primitive) = data_type.primitive_width() {
147        return match i {
148            0 => Err(ArrowError::CDataInterface(format!(
149                "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150            ))),
151            1 => Ok(primitive * 8),
152            i => Err(ArrowError::CDataInterface(format!(
153                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154            ))),
155        };
156    }
157
158    Ok(match (data_type, i) {
159        (DataType::Boolean, 1) => 1,
160        (DataType::Boolean, _) => {
161            return Err(ArrowError::CDataInterface(format!(
162                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163            )))
164        }
165        (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
166        (DataType::FixedSizeList(f, num_elems), 1) => {
167            let child_bit_width = bit_width(f.data_type(), 1)?;
168            child_bit_width * (*num_elems as usize)
169        },
170        (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
171            return Err(ArrowError::CDataInterface(format!(
172                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
173            )))
174        },
175        // Variable-size list and map have one i32 buffer.
176        // Variable-sized binaries: have two buffers.
177        // "small": first buffer is i32, second is in bytes
178        (DataType::Utf8, 1) | (DataType::Binary, 1) | (DataType::List(_), 1) | (DataType::Map(_, _), 1) => i32::BITS as _,
179        (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
180        (DataType::List(_), _) | (DataType::Map(_, _), _) => {
181            return Err(ArrowError::CDataInterface(format!(
182                "The datatype \"{data_type:?}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
183            )))
184        }
185        (DataType::Utf8, _) | (DataType::Binary, _) => {
186            return Err(ArrowError::CDataInterface(format!(
187                "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
188            )))
189        }
190        // Variable-sized binaries: have two buffers.
191        // LargeUtf8: first buffer is i64, second is in bytes
192        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => i64::BITS as _,
193        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2)=> u8::BITS as _,
194        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _)=> {
195            return Err(ArrowError::CDataInterface(format!(
196                "The datatype \"{data_type:?}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
197            )))
198        }
199        // Variable-sized views: have 3 or more buffers.
200        // Buffer 1 are the u128 views
201        // Buffers 2...N-1 are u8 byte buffers
202        (DataType::Utf8View, 1) | (DataType::BinaryView,1) => u128::BITS as _,
203        (DataType::Utf8View, _) | (DataType::BinaryView, _) => {
204            u8::BITS as _
205        }
206        // type ids. UnionArray doesn't have null bitmap so buffer index begins with 0.
207        (DataType::Union(_, _), 0) => i8::BITS as _,
208        // Only DenseUnion has 2nd buffer
209        (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
210        (DataType::Union(_, UnionMode::Sparse), _) => {
211            return Err(ArrowError::CDataInterface(format!(
212                "The datatype \"{data_type:?}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
213            )))
214        }
215        (DataType::Union(_, UnionMode::Dense), _) => {
216            return Err(ArrowError::CDataInterface(format!(
217                "The datatype \"{data_type:?}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
218            )))
219        }
220        (_, 0) => {
221            // We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
222            // UnionArray, they should be handled above.
223            return Err(ArrowError::CDataInterface(format!(
224                "The datatype \"{data_type:?}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
225            )))
226        }
227        _ => {
228            return Err(ArrowError::CDataInterface(format!(
229                "The datatype \"{data_type:?}\" is still not supported in Rust implementation"
230            )))
231        }
232    })
233}
234
235/// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer).
236/// `bits` is the number of bits that the native type of this buffer has.
237/// The size of the buffer will be `ceil(self.length * bits, 8)`.
238/// # Panic
239/// This function panics if `i` is larger or equal to `n_buffers`.
240/// # Safety
241/// This function assumes that `ceil(self.length * bits, 8)` is the size of the buffer
242unsafe fn create_buffer(
243    owner: Arc<FFI_ArrowArray>,
244    array: &FFI_ArrowArray,
245    index: usize,
246    len: usize,
247) -> Option<Buffer> {
248    if array.num_buffers() == 0 {
249        return None;
250    }
251    NonNull::new(array.buffer(index) as _)
252        .map(|ptr| Buffer::from_custom_allocation(ptr, len, owner))
253}
254
255/// Export to the C Data Interface
256pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
257    let array = FFI_ArrowArray::new(data);
258    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
259    Ok((array, schema))
260}
261
262/// Import [ArrayData] from the C Data Interface
263///
264/// # Safety
265///
266/// This struct assumes that the incoming data agrees with the C data interface.
267pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
268    let dt = DataType::try_from(schema)?;
269    let array = Arc::new(array);
270    let tmp = ImportedArrowArray {
271        array: &array,
272        data_type: dt,
273        owner: &array,
274    };
275    tmp.consume()
276}
277
278/// Import [ArrayData] from the C Data Interface
279///
280/// # Safety
281///
282/// This struct assumes that the incoming data agrees with the C data interface.
283pub unsafe fn from_ffi_and_data_type(
284    array: FFI_ArrowArray,
285    data_type: DataType,
286) -> Result<ArrayData> {
287    let array = Arc::new(array);
288    let tmp = ImportedArrowArray {
289        array: &array,
290        data_type,
291        owner: &array,
292    };
293    tmp.consume()
294}
295
296#[derive(Debug)]
297struct ImportedArrowArray<'a> {
298    array: &'a FFI_ArrowArray,
299    data_type: DataType,
300    owner: &'a Arc<FFI_ArrowArray>,
301}
302
303impl ImportedArrowArray<'_> {
304    fn consume(self) -> Result<ArrayData> {
305        let len = self.array.len();
306        let offset = self.array.offset();
307        let null_count = match &self.data_type {
308            DataType::Null => Some(0),
309            _ => self.array.null_count_opt(),
310        };
311
312        let data_layout = layout(&self.data_type);
313        let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?;
314
315        let null_bit_buffer = if data_layout.can_contain_null_mask {
316            self.null_bit_buffer()
317        } else {
318            None
319        };
320
321        let mut child_data = self.consume_children()?;
322
323        if let Some(d) = self.dictionary()? {
324            // For dictionary type there should only be a single child, so we don't need to worry if
325            // there are other children added above.
326            assert!(child_data.is_empty());
327            child_data.push(d.consume()?);
328        }
329
330        // Should FFI be checking validity?
331        Ok(unsafe {
332            ArrayData::new_unchecked(
333                self.data_type,
334                len,
335                null_count,
336                null_bit_buffer,
337                offset,
338                buffers,
339                child_data,
340            )
341        })
342    }
343
344    fn consume_children(&self) -> Result<Vec<ArrayData>> {
345        match &self.data_type {
346            DataType::List(field)
347            | DataType::FixedSizeList(field, _)
348            | DataType::LargeList(field)
349            | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
350            DataType::Struct(fields) => {
351                assert!(fields.len() == self.array.num_children());
352                fields
353                    .iter()
354                    .enumerate()
355                    .map(|(i, field)| self.consume_child(i, field.data_type()))
356                    .collect::<Result<Vec<_>>>()
357            }
358            DataType::Union(union_fields, _) => {
359                assert!(union_fields.len() == self.array.num_children());
360                union_fields
361                    .iter()
362                    .enumerate()
363                    .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
364                    .collect::<Result<Vec<_>>>()
365            }
366            DataType::RunEndEncoded(run_ends_field, values_field) => Ok([
367                self.consume_child(0, run_ends_field.data_type())?,
368                self.consume_child(1, values_field.data_type())?,
369            ]
370            .to_vec()),
371            _ => Ok(Vec::new()),
372        }
373    }
374
375    fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
376        ImportedArrowArray {
377            array: self.array.child(index),
378            data_type: child_type.clone(),
379            owner: self.owner,
380        }
381        .consume()
382    }
383
384    /// returns all buffers, as organized by Rust (i.e. null buffer is skipped if it's present
385    /// in the spec of the type)
386    fn buffers(&self, can_contain_null_mask: bool, variadic: bool) -> Result<Vec<Buffer>> {
387        // + 1: skip null buffer
388        let buffer_begin = can_contain_null_mask as usize;
389        let buffer_end = self.array.num_buffers() - usize::from(variadic);
390
391        let variadic_buffer_lens = if variadic {
392            // Each views array has 1 (optional) null buffer, 1 views buffer, 1 lengths buffer.
393            // Rest are variadic.
394            let num_variadic_buffers =
395                self.array.num_buffers() - (2 + usize::from(can_contain_null_mask));
396            if num_variadic_buffers == 0 {
397                &[]
398            } else {
399                let lengths = self.array.buffer(self.array.num_buffers() - 1);
400                // SAFETY: is lengths is non-null, then it must be valid for up to num_variadic_buffers.
401                unsafe { std::slice::from_raw_parts(lengths.cast::<i64>(), num_variadic_buffers) }
402            }
403        } else {
404            &[]
405        };
406
407        (buffer_begin..buffer_end)
408            .map(|index| {
409                let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
410                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
411                    Some(buf) => {
412                        // External libraries may use a dangling pointer for a buffer with length 0.
413                        // We respect the array length specified in the C Data Interface. Actually,
414                        // if the length is incorrect, we cannot create a correct buffer even if
415                        // the pointer is valid.
416                        if buf.is_empty() {
417                            Ok(MutableBuffer::new(0).into())
418                        } else {
419                            Ok(buf)
420                        }
421                    }
422                    None if len == 0 => {
423                        // Null data buffer, which Rust doesn't allow. So create
424                        // an empty buffer.
425                        Ok(MutableBuffer::new(0).into())
426                    }
427                    None => Err(ArrowError::CDataInterface(format!(
428                        "The external buffer at position {index} is null."
429                    ))),
430                }
431            })
432            .collect()
433    }
434
435    /// Returns the length, in bytes, of the buffer `i` (indexed according to the C data interface)
436    /// Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
437    /// for variable-sized buffers, such as the second buffer of a stringArray, we need
438    /// to fetch offset buffer's len to build the second buffer.
439    fn buffer_len(
440        &self,
441        i: usize,
442        variadic_buffer_lengths: &[i64],
443        dt: &DataType,
444    ) -> Result<usize> {
445        // Special handling for dictionary type as we only care about the key type in the case.
446        let data_type = match dt {
447            DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
448            dt => dt,
449        };
450
451        // `ffi::ArrowArray` records array offset, we need to add it back to the
452        // buffer length to get the actual buffer length.
453        let length = self.array.len() + self.array.offset();
454
455        // Inner type is not important for buffer length.
456        Ok(match (&data_type, i) {
457            (DataType::Utf8, 1)
458            | (DataType::LargeUtf8, 1)
459            | (DataType::Binary, 1)
460            | (DataType::LargeBinary, 1)
461            | (DataType::List(_), 1)
462            | (DataType::LargeList(_), 1)
463            | (DataType::Map(_, _), 1) => {
464                // the len of the offset buffer (buffer 1) equals length + 1
465                let bits = bit_width(data_type, i)?;
466                debug_assert_eq!(bits % 8, 0);
467                (length + 1) * (bits / 8)
468            }
469            (DataType::Utf8, 2) | (DataType::Binary, 2) => {
470                if self.array.is_empty() {
471                    return Ok(0);
472                }
473
474                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
475                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
476                // first buffer is the null buffer => add(1)
477                // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
478                #[allow(clippy::cast_ptr_alignment)]
479                let offset_buffer = self.array.buffer(1) as *const i32;
480                // get last offset
481                (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
482            }
483            (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
484                if self.array.is_empty() {
485                    return Ok(0);
486                }
487
488                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
489                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
490                // first buffer is the null buffer => add(1)
491                // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
492                #[allow(clippy::cast_ptr_alignment)]
493                let offset_buffer = self.array.buffer(1) as *const i64;
494                // get last offset
495                (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
496            }
497            // View types: these have variadic buffers.
498            // Buffer 1 is the views buffer, which stores 1 u128 per length of the array.
499            // Buffers 2..N-1 are the buffers holding the byte data. Their lengths are variable.
500            // Buffer N is of length (N - 2) and stores i64 containing the lengths of buffers 2..N-1
501            (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => {
502                std::mem::size_of::<u128>() * length
503            }
504            (DataType::Utf8View, i) | (DataType::BinaryView, i) => {
505                variadic_buffer_lengths[i - 2] as usize
506            }
507            // buffer len of primitive types
508            _ => {
509                let bits = bit_width(data_type, i)?;
510                bit_util::ceil(length * bits, 8)
511            }
512        })
513    }
514
515    /// returns the null bit buffer.
516    /// Rust implementation uses a buffer that is not part of the array of buffers.
517    /// The C Data interface's null buffer is part of the array of buffers.
518    fn null_bit_buffer(&self) -> Option<Buffer> {
519        // similar to `self.buffer_len(0)`, but without `Result`.
520        // `ffi::ArrowArray` records array offset, we need to add it back to the
521        // buffer length to get the actual buffer length.
522        let length = self.array.len() + self.array.offset();
523        let buffer_len = bit_util::ceil(length, 8);
524
525        unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
526    }
527
528    fn dictionary(&self) -> Result<Option<ImportedArrowArray>> {
529        match (self.array.dictionary(), &self.data_type) {
530            (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
531                array,
532                data_type: value_type.as_ref().clone(),
533                owner: self.owner,
534            })),
535            (Some(_), _) => Err(ArrowError::CDataInterface(
536                "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
537            )),
538            (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
539                "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
540            )),
541            (_, _) => Ok(None),
542        }
543    }
544}
545
546#[cfg(test)]
547mod tests_to_then_from_ffi {
548    use std::collections::HashMap;
549    use std::mem::ManuallyDrop;
550
551    use arrow_buffer::NullBuffer;
552    use arrow_schema::Field;
553
554    use crate::builder::UnionBuilder;
555    use crate::cast::AsArray;
556    use crate::types::{Float64Type, Int32Type, Int8Type};
557    use crate::*;
558
559    use super::*;
560
561    #[test]
562    fn test_round_trip() {
563        // create an array natively
564        let array = Int32Array::from(vec![1, 2, 3]);
565
566        // export it
567        let (array, schema) = to_ffi(&array.into_data()).unwrap();
568
569        // (simulate consumer) import it
570        let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
571
572        // verify
573        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
574    }
575
576    #[test]
577    fn test_import() {
578        // Model receiving const pointers from an external system
579
580        // Create an array natively
581        let data = Int32Array::from(vec![1, 2, 3]).into_data();
582        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
583        let array = FFI_ArrowArray::new(&data);
584
585        // Use ManuallyDrop to avoid Box:Drop recursing
586        let schema = Box::new(ManuallyDrop::new(schema));
587        let array = Box::new(ManuallyDrop::new(array));
588
589        let schema_ptr = &**schema as *const _;
590        let array_ptr = &**array as *const _;
591
592        // We can read them back to memory
593        // SAFETY:
594        // Pointers are aligned and valid
595        let data =
596            unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
597
598        let array = Int32Array::from(data);
599        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
600    }
601
602    #[test]
603    fn test_round_trip_with_offset() -> Result<()> {
604        // create an array natively
605        let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), None]);
606
607        let array = array.slice(1, 2);
608
609        // export it
610        let (array, schema) = to_ffi(&array.to_data())?;
611
612        // (simulate consumer) import it
613        let data = unsafe { from_ffi(array, &schema) }?;
614        let array = make_array(data);
615        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
616
617        assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
618
619        // (drop/release)
620        Ok(())
621    }
622
623    #[test]
624    #[cfg(not(feature = "force_validate"))]
625    fn test_decimal_round_trip() -> Result<()> {
626        // create an array natively
627        let original_array = [Some(12345_i128), Some(-12345_i128), None]
628            .into_iter()
629            .collect::<Decimal128Array>()
630            .with_precision_and_scale(6, 2)
631            .unwrap();
632
633        // export it
634        let (array, schema) = to_ffi(&original_array.to_data())?;
635
636        // (simulate consumer) import it
637        let data = unsafe { from_ffi(array, &schema) }?;
638        let array = make_array(data);
639
640        // perform some operation
641        let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
642
643        // verify
644        assert_eq!(array, &original_array);
645
646        // (drop/release)
647        Ok(())
648    }
649    // case with nulls is tested in the docs, through the example on this module.
650
651    #[test]
652    fn test_null_count_handling() {
653        let int32_data = ArrayData::builder(DataType::Int32)
654            .len(10)
655            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
656            .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001])))
657            .build()
658            .unwrap();
659        let mut ffi_array = FFI_ArrowArray::new(&int32_data);
660        assert_eq!(3, ffi_array.null_count());
661        assert_eq!(Some(3), ffi_array.null_count_opt());
662        // Simulating uninitialized state
663        unsafe {
664            ffi_array.set_null_count(-1);
665        }
666        assert_eq!(None, ffi_array.null_count_opt());
667        let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap();
668        assert_eq!(3, int32_data.null_count());
669
670        let null_data = &ArrayData::new_null(&DataType::Null, 10);
671        let mut ffi_array = FFI_ArrowArray::new(null_data);
672        assert_eq!(10, ffi_array.null_count());
673        assert_eq!(Some(10), ffi_array.null_count_opt());
674        // Simulating uninitialized state
675        unsafe {
676            ffi_array.set_null_count(-1);
677        }
678        assert_eq!(None, ffi_array.null_count_opt());
679        let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap();
680        assert_eq!(0, null_data.null_count());
681    }
682
683    fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
684        // create an array natively
685        let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
686
687        // export it
688        let (array, schema) = to_ffi(&array.to_data())?;
689
690        // (simulate consumer) import it
691        let data = unsafe { from_ffi(array, &schema) }?;
692        let array = make_array(data);
693
694        // perform some operation
695        let array = array
696            .as_any()
697            .downcast_ref::<GenericStringArray<Offset>>()
698            .unwrap();
699
700        // verify
701        let expected = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
702        assert_eq!(array, &expected);
703
704        // (drop/release)
705        Ok(())
706    }
707
708    #[test]
709    fn test_string() -> Result<()> {
710        test_generic_string::<i32>()
711    }
712
713    #[test]
714    fn test_large_string() -> Result<()> {
715        test_generic_string::<i64>()
716    }
717
718    fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
719        // Construct a value array
720        let value_data = ArrayData::builder(DataType::Int32)
721            .len(8)
722            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
723            .build()
724            .unwrap();
725
726        // Construct a buffer for value offsets, for the nested array:
727        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
728        let value_offsets = [0_usize, 3, 6, 8]
729            .iter()
730            .map(|i| Offset::from_usize(*i).unwrap())
731            .collect::<Buffer>();
732
733        // Construct a list array from the above two
734        let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
735            Field::new_list_field(DataType::Int32, false),
736        ));
737
738        let list_data = ArrayData::builder(list_data_type)
739            .len(3)
740            .add_buffer(value_offsets)
741            .add_child_data(value_data)
742            .build()
743            .unwrap();
744
745        // create an array natively
746        let array = GenericListArray::<Offset>::from(list_data.clone());
747
748        // export it
749        let (array, schema) = to_ffi(&array.to_data())?;
750
751        // (simulate consumer) import it
752        let data = unsafe { from_ffi(array, &schema) }?;
753        let array = make_array(data);
754
755        // downcast
756        let array = array
757            .as_any()
758            .downcast_ref::<GenericListArray<Offset>>()
759            .unwrap();
760
761        // verify
762        let expected = GenericListArray::<Offset>::from(list_data);
763        assert_eq!(&array.value(0), &expected.value(0));
764        assert_eq!(&array.value(1), &expected.value(1));
765        assert_eq!(&array.value(2), &expected.value(2));
766
767        // (drop/release)
768        Ok(())
769    }
770
771    #[test]
772    fn test_list() -> Result<()> {
773        test_generic_list::<i32>()
774    }
775
776    #[test]
777    fn test_large_list() -> Result<()> {
778        test_generic_list::<i64>()
779    }
780
781    fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
782        // create an array natively
783        let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
784        let array = GenericBinaryArray::<Offset>::from(array);
785
786        // export it
787        let (array, schema) = to_ffi(&array.to_data())?;
788
789        // (simulate consumer) import it
790        let data = unsafe { from_ffi(array, &schema) }?;
791        let array = make_array(data);
792        let array = array
793            .as_any()
794            .downcast_ref::<GenericBinaryArray<Offset>>()
795            .unwrap();
796
797        // verify
798        let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
799        let expected = GenericBinaryArray::<Offset>::from(expected);
800        assert_eq!(array, &expected);
801
802        // (drop/release)
803        Ok(())
804    }
805
806    #[test]
807    fn test_binary() -> Result<()> {
808        test_generic_binary::<i32>()
809    }
810
811    #[test]
812    fn test_large_binary() -> Result<()> {
813        test_generic_binary::<i64>()
814    }
815
816    #[test]
817    fn test_bool() -> Result<()> {
818        // create an array natively
819        let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
820
821        // export it
822        let (array, schema) = to_ffi(&array.to_data())?;
823
824        // (simulate consumer) import it
825        let data = unsafe { from_ffi(array, &schema) }?;
826        let array = make_array(data);
827        let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
828
829        // verify
830        assert_eq!(
831            array,
832            &BooleanArray::from(vec![None, Some(true), Some(false)])
833        );
834
835        // (drop/release)
836        Ok(())
837    }
838
839    #[test]
840    fn test_time32() -> Result<()> {
841        // create an array natively
842        let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
843
844        // export it
845        let (array, schema) = to_ffi(&array.to_data())?;
846
847        // (simulate consumer) import it
848        let data = unsafe { from_ffi(array, &schema) }?;
849        let array = make_array(data);
850        let array = array
851            .as_any()
852            .downcast_ref::<Time32MillisecondArray>()
853            .unwrap();
854
855        // verify
856        assert_eq!(
857            array,
858            &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
859        );
860
861        // (drop/release)
862        Ok(())
863    }
864
865    #[test]
866    fn test_timestamp() -> Result<()> {
867        // create an array natively
868        let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
869
870        // export it
871        let (array, schema) = to_ffi(&array.to_data())?;
872
873        // (simulate consumer) import it
874        let data = unsafe { from_ffi(array, &schema) }?;
875        let array = make_array(data);
876        let array = array
877            .as_any()
878            .downcast_ref::<TimestampMillisecondArray>()
879            .unwrap();
880
881        // verify
882        assert_eq!(
883            array,
884            &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
885        );
886
887        // (drop/release)
888        Ok(())
889    }
890
891    #[test]
892    fn test_fixed_size_binary_array() -> Result<()> {
893        let values = vec![
894            None,
895            Some(vec![10, 10, 10]),
896            None,
897            Some(vec![20, 20, 20]),
898            Some(vec![30, 30, 30]),
899            None,
900        ];
901        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
902
903        // export it
904        let (array, schema) = to_ffi(&array.to_data())?;
905
906        // (simulate consumer) import it
907        let data = unsafe { from_ffi(array, &schema) }?;
908        let array = make_array(data);
909        let array = array
910            .as_any()
911            .downcast_ref::<FixedSizeBinaryArray>()
912            .unwrap();
913
914        // verify
915        assert_eq!(
916            array,
917            &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
918                vec![
919                    None,
920                    Some(vec![10, 10, 10]),
921                    None,
922                    Some(vec![20, 20, 20]),
923                    Some(vec![30, 30, 30]),
924                    None,
925                ]
926                .into_iter(),
927                3
928            )?
929        );
930
931        // (drop/release)
932        Ok(())
933    }
934
935    #[test]
936    fn test_fixed_size_list_array() -> Result<()> {
937        // 0000 0100
938        let mut validity_bits: [u8; 1] = [0; 1];
939        bit_util::set_bit(&mut validity_bits, 2);
940
941        let v: Vec<i32> = (0..9).collect();
942        let value_data = ArrayData::builder(DataType::Int32)
943            .len(9)
944            .add_buffer(Buffer::from_slice_ref(&v))
945            .build()?;
946
947        let list_data_type =
948            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int32, false)), 3);
949        let list_data = ArrayData::builder(list_data_type.clone())
950            .len(3)
951            .null_bit_buffer(Some(Buffer::from(validity_bits)))
952            .add_child_data(value_data)
953            .build()?;
954
955        // export it
956        let (array, schema) = to_ffi(&list_data)?;
957
958        // (simulate consumer) import it
959        let data = unsafe { from_ffi(array, &schema) }?;
960        let array = make_array(data);
961        let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
962
963        // 0010 0100
964        let mut expected_validity_bits: [u8; 1] = [0; 1];
965        bit_util::set_bit(&mut expected_validity_bits, 2);
966        bit_util::set_bit(&mut expected_validity_bits, 5);
967
968        let mut w = vec![];
969        w.extend_from_slice(&v);
970
971        let expected_value_data = ArrayData::builder(DataType::Int32)
972            .len(9)
973            .add_buffer(Buffer::from_slice_ref(&w))
974            .build()?;
975
976        let expected_list_data = ArrayData::builder(list_data_type)
977            .len(3)
978            .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
979            .add_child_data(expected_value_data)
980            .build()?;
981        let expected_array = FixedSizeListArray::from(expected_list_data);
982
983        // verify
984        assert_eq!(array, &expected_array);
985
986        // (drop/release)
987        Ok(())
988    }
989
990    #[test]
991    fn test_dictionary() -> Result<()> {
992        // create an array natively
993        let values = vec!["a", "aaa", "aaa"];
994        let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
995
996        // export it
997        let (array, schema) = to_ffi(&dict_array.to_data())?;
998
999        // (simulate consumer) import it
1000        let data = unsafe { from_ffi(array, &schema) }?;
1001        let array = make_array(data);
1002        let actual = array
1003            .as_any()
1004            .downcast_ref::<DictionaryArray<Int8Type>>()
1005            .unwrap();
1006
1007        // verify
1008        let new_values = vec!["a", "aaa", "aaa"];
1009        let expected: DictionaryArray<Int8Type> = new_values.into_iter().collect();
1010        assert_eq!(actual, &expected);
1011
1012        // (drop/release)
1013        Ok(())
1014    }
1015
1016    #[test]
1017    #[allow(deprecated)]
1018    fn test_export_array_into_raw() -> Result<()> {
1019        let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
1020
1021        // Assume two raw pointers provided by the consumer
1022        let mut out_array = FFI_ArrowArray::empty();
1023        let mut out_schema = FFI_ArrowSchema::empty();
1024
1025        {
1026            let out_array_ptr = std::ptr::addr_of_mut!(out_array);
1027            let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
1028            unsafe {
1029                export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1030            }
1031        }
1032
1033        // (simulate consumer) import it
1034        let data = unsafe { from_ffi(out_array, &out_schema) }?;
1035        let array = make_array(data);
1036
1037        // perform some operation
1038        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1039
1040        // verify
1041        assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
1042        Ok(())
1043    }
1044
1045    #[test]
1046    fn test_duration() -> Result<()> {
1047        // create an array natively
1048        let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
1049
1050        // export it
1051        let (array, schema) = to_ffi(&array.to_data())?;
1052
1053        // (simulate consumer) import it
1054        let data = unsafe { from_ffi(array, &schema) }?;
1055        let array = make_array(data);
1056        let array = array
1057            .as_any()
1058            .downcast_ref::<DurationSecondArray>()
1059            .unwrap();
1060
1061        // verify
1062        assert_eq!(
1063            array,
1064            &DurationSecondArray::from(vec![None, Some(1), Some(2)])
1065        );
1066
1067        // (drop/release)
1068        Ok(())
1069    }
1070
1071    #[test]
1072    fn test_map_array() -> Result<()> {
1073        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1074        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1075
1076        // Construct a buffer for value offsets, for the nested array:
1077        //  [[a, b, c], [d, e, f], [g, h]]
1078        let entry_offsets = [0, 3, 6, 8];
1079
1080        let map_array =
1081            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1082                .unwrap();
1083
1084        // export it
1085        let (array, schema) = to_ffi(&map_array.to_data())?;
1086
1087        // (simulate consumer) import it
1088        let data = unsafe { from_ffi(array, &schema) }?;
1089        let array = make_array(data);
1090
1091        // perform some operation
1092        let array = array.as_any().downcast_ref::<MapArray>().unwrap();
1093        assert_eq!(array, &map_array);
1094
1095        Ok(())
1096    }
1097
1098    #[test]
1099    fn test_struct_array() -> Result<()> {
1100        let metadata: HashMap<String, String> =
1101            [("Hello".to_string(), "World! 😊".to_string())].into();
1102        let struct_array = StructArray::from(vec![(
1103            Arc::new(Field::new("a", DataType::Int32, false).with_metadata(metadata)),
1104            Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
1105        )]);
1106
1107        // export it
1108        let (array, schema) = to_ffi(&struct_array.to_data())?;
1109
1110        // (simulate consumer) import it
1111        let data = unsafe { from_ffi(array, &schema) }?;
1112        let array = make_array(data);
1113
1114        // perform some operation
1115        let array = array.as_any().downcast_ref::<StructArray>().unwrap();
1116        assert_eq!(array.data_type(), struct_array.data_type());
1117        assert_eq!(array, &struct_array);
1118
1119        Ok(())
1120    }
1121
1122    #[test]
1123    fn test_union_sparse_array() -> Result<()> {
1124        let mut builder = UnionBuilder::new_sparse();
1125        builder.append::<Int32Type>("a", 1).unwrap();
1126        builder.append_null::<Int32Type>("a").unwrap();
1127        builder.append::<Float64Type>("c", 3.0).unwrap();
1128        builder.append::<Int32Type>("a", 4).unwrap();
1129        let union = builder.build().unwrap();
1130
1131        // export it
1132        let (array, schema) = to_ffi(&union.to_data())?;
1133
1134        // (simulate consumer) import it
1135        let data = unsafe { from_ffi(array, &schema) }?;
1136        let array = make_array(data);
1137
1138        let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
1139
1140        let expected_type_ids = vec![0_i8, 0, 1, 0];
1141
1142        // Check type ids
1143        assert_eq!(*array.type_ids(), expected_type_ids);
1144        for (i, id) in expected_type_ids.iter().enumerate() {
1145            assert_eq!(id, &array.type_id(i));
1146        }
1147
1148        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
1149        assert!(array.offsets().is_none());
1150
1151        for i in 0..array.len() {
1152            let slot = array.value(i);
1153            match i {
1154                0 => {
1155                    let slot = slot.as_primitive::<Int32Type>();
1156                    assert!(!slot.is_null(0));
1157                    assert_eq!(slot.len(), 1);
1158                    let value = slot.value(0);
1159                    assert_eq!(1_i32, value);
1160                }
1161                1 => assert!(slot.is_null(0)),
1162                2 => {
1163                    let slot = slot.as_primitive::<Float64Type>();
1164                    assert!(!slot.is_null(0));
1165                    assert_eq!(slot.len(), 1);
1166                    let value = slot.value(0);
1167                    assert_eq!(value, 3_f64);
1168                }
1169                3 => {
1170                    let slot = slot.as_primitive::<Int32Type>();
1171                    assert!(!slot.is_null(0));
1172                    assert_eq!(slot.len(), 1);
1173                    let value = slot.value(0);
1174                    assert_eq!(4_i32, value);
1175                }
1176                _ => unreachable!(),
1177            }
1178        }
1179
1180        Ok(())
1181    }
1182
1183    #[test]
1184    fn test_union_dense_array() -> Result<()> {
1185        let mut builder = UnionBuilder::new_dense();
1186        builder.append::<Int32Type>("a", 1).unwrap();
1187        builder.append_null::<Int32Type>("a").unwrap();
1188        builder.append::<Float64Type>("c", 3.0).unwrap();
1189        builder.append::<Int32Type>("a", 4).unwrap();
1190        let union = builder.build().unwrap();
1191
1192        // export it
1193        let (array, schema) = to_ffi(&union.to_data())?;
1194
1195        // (simulate consumer) import it
1196        let data = unsafe { from_ffi(array, &schema) }?;
1197        let array = UnionArray::from(data);
1198
1199        let expected_type_ids = vec![0_i8, 0, 1, 0];
1200
1201        // Check type ids
1202        assert_eq!(*array.type_ids(), expected_type_ids);
1203        for (i, id) in expected_type_ids.iter().enumerate() {
1204            assert_eq!(id, &array.type_id(i));
1205        }
1206
1207        assert!(array.offsets().is_some());
1208
1209        for i in 0..array.len() {
1210            let slot = array.value(i);
1211            match i {
1212                0 => {
1213                    let slot = slot.as_primitive::<Int32Type>();
1214                    assert!(!slot.is_null(0));
1215                    assert_eq!(slot.len(), 1);
1216                    let value = slot.value(0);
1217                    assert_eq!(1_i32, value);
1218                }
1219                1 => assert!(slot.is_null(0)),
1220                2 => {
1221                    let slot = slot.as_primitive::<Float64Type>();
1222                    assert!(!slot.is_null(0));
1223                    assert_eq!(slot.len(), 1);
1224                    let value = slot.value(0);
1225                    assert_eq!(value, 3_f64);
1226                }
1227                3 => {
1228                    let slot = slot.as_primitive::<Int32Type>();
1229                    assert!(!slot.is_null(0));
1230                    assert_eq!(slot.len(), 1);
1231                    let value = slot.value(0);
1232                    assert_eq!(4_i32, value);
1233                }
1234                _ => unreachable!(),
1235            }
1236        }
1237
1238        Ok(())
1239    }
1240
1241    #[test]
1242    fn test_run_array() -> Result<()> {
1243        let value_data =
1244            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1245
1246        // Construct a run_ends array:
1247        let run_ends_values = [4_i32, 6, 7, 9, 13, 18, 20, 22];
1248        let run_ends_data =
1249            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1250
1251        // Construct a run ends encoded array from the above two
1252        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1253
1254        // export it
1255        let (array, schema) = to_ffi(&ree_array.to_data())?;
1256
1257        // (simulate consumer) import it
1258        let data = unsafe { from_ffi(array, &schema) }?;
1259        let array = make_array(data);
1260
1261        // perform some operation
1262        let array = array
1263            .as_any()
1264            .downcast_ref::<RunArray<Int32Type>>()
1265            .unwrap();
1266        assert_eq!(array.data_type(), ree_array.data_type());
1267        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1268        assert_eq!(array.values(), ree_array.values());
1269
1270        Ok(())
1271    }
1272
1273    #[test]
1274    fn test_nullable_run_array() -> Result<()> {
1275        let nulls = NullBuffer::from(vec![true, false, true, true, false]);
1276        let value_data =
1277            PrimitiveArray::<Int8Type>::new(vec![1_i8, 2, 3, 4, 5].into(), Some(nulls));
1278
1279        // Construct a run_ends array:
1280        let run_ends_values = [5_i32, 6, 7, 8, 10];
1281        let run_ends_data =
1282            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1283
1284        // Construct a run ends encoded array from the above two
1285        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1286
1287        // export it
1288        let (array, schema) = to_ffi(&ree_array.to_data())?;
1289
1290        // (simulate consumer) import it
1291        let data = unsafe { from_ffi(array, &schema) }?;
1292        let array = make_array(data);
1293
1294        // perform some operation
1295        let array = array
1296            .as_any()
1297            .downcast_ref::<RunArray<Int32Type>>()
1298            .unwrap();
1299        assert_eq!(array.data_type(), ree_array.data_type());
1300        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1301        assert_eq!(array.values(), ree_array.values());
1302
1303        Ok(())
1304    }
1305}
1306
1307#[cfg(test)]
1308mod tests_from_ffi {
1309    #[cfg(not(feature = "force_validate"))]
1310    use std::ptr::NonNull;
1311    use std::sync::Arc;
1312
1313    #[cfg(feature = "force_validate")]
1314    use arrow_buffer::{bit_util, buffer::Buffer};
1315    #[cfg(not(feature = "force_validate"))]
1316    use arrow_buffer::{bit_util, buffer::Buffer, ScalarBuffer};
1317
1318    use arrow_data::transform::MutableArrayData;
1319    use arrow_data::ArrayData;
1320    use arrow_schema::{DataType, Field};
1321
1322    use super::Result;
1323    use crate::builder::GenericByteViewBuilder;
1324    use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
1325    use crate::{
1326        array::{
1327            Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
1328            Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
1329        },
1330        ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
1331        make_array, ArrayRef, GenericByteViewArray, ListArray,
1332    };
1333
1334    fn test_round_trip(expected: &ArrayData) -> Result<()> {
1335        // here we export the array
1336        let array = FFI_ArrowArray::new(expected);
1337        let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
1338
1339        // simulate an external consumer by being the consumer
1340        let result = &unsafe { from_ffi(array, &schema) }?;
1341
1342        assert_eq!(result, expected);
1343        Ok(())
1344    }
1345
1346    #[test]
1347    fn test_u32() -> Result<()> {
1348        let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
1349        let data = array.into_data();
1350        test_round_trip(&data)
1351    }
1352
1353    #[test]
1354    fn test_u64() -> Result<()> {
1355        let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
1356        let data = array.into_data();
1357        test_round_trip(&data)
1358    }
1359
1360    #[test]
1361    fn test_i64() -> Result<()> {
1362        let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
1363        let data = array.into_data();
1364        test_round_trip(&data)
1365    }
1366
1367    #[test]
1368    fn test_struct() -> Result<()> {
1369        let inner = StructArray::from(vec![
1370            (
1371                Arc::new(Field::new("a1", DataType::Boolean, false)),
1372                Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
1373            ),
1374            (
1375                Arc::new(Field::new("a2", DataType::UInt32, false)),
1376                Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
1377            ),
1378        ]);
1379
1380        let array = StructArray::from(vec![
1381            (
1382                Arc::new(Field::new("a", inner.data_type().clone(), false)),
1383                Arc::new(inner) as Arc<dyn Array>,
1384            ),
1385            (
1386                Arc::new(Field::new("b", DataType::Boolean, false)),
1387                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
1388            ),
1389            (
1390                Arc::new(Field::new("c", DataType::UInt32, false)),
1391                Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
1392            ),
1393        ]);
1394        let data = array.into_data();
1395        test_round_trip(&data)
1396    }
1397
1398    #[test]
1399    fn test_dictionary() -> Result<()> {
1400        let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
1401        let keys = Int32Array::from(vec![
1402            Some(0),
1403            Some(1),
1404            None,
1405            Some(1),
1406            Some(1),
1407            None,
1408            Some(1),
1409            Some(2),
1410            Some(1),
1411            None,
1412        ]);
1413        let array = DictionaryArray::new(keys, Arc::new(values));
1414
1415        let data = array.into_data();
1416        test_round_trip(&data)
1417    }
1418
1419    #[test]
1420    fn test_fixed_size_binary() -> Result<()> {
1421        let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
1422        let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
1423
1424        let data = array.into_data();
1425        test_round_trip(&data)
1426    }
1427
1428    #[test]
1429    fn test_fixed_size_binary_with_nulls() -> Result<()> {
1430        let values = vec![
1431            None,
1432            Some(vec![10, 10, 10]),
1433            None,
1434            Some(vec![20, 20, 20]),
1435            Some(vec![30, 30, 30]),
1436            None,
1437        ];
1438        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1439
1440        let data = array.into_data();
1441        test_round_trip(&data)
1442    }
1443
1444    #[test]
1445    fn test_fixed_size_list() -> Result<()> {
1446        let v: Vec<i64> = (0..9).collect();
1447        let value_data = ArrayData::builder(DataType::Int64)
1448            .len(9)
1449            .add_buffer(Buffer::from_slice_ref(v))
1450            .build()?;
1451        let list_data_type =
1452            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3);
1453        let list_data = ArrayData::builder(list_data_type)
1454            .len(3)
1455            .add_child_data(value_data)
1456            .build()?;
1457        let array = FixedSizeListArray::from(list_data);
1458
1459        let data = array.into_data();
1460        test_round_trip(&data)
1461    }
1462
1463    #[test]
1464    fn test_fixed_size_list_with_nulls() -> Result<()> {
1465        // 0100 0110
1466        let mut validity_bits: [u8; 1] = [0; 1];
1467        bit_util::set_bit(&mut validity_bits, 1);
1468        bit_util::set_bit(&mut validity_bits, 2);
1469        bit_util::set_bit(&mut validity_bits, 6);
1470
1471        let v: Vec<i16> = (0..16).collect();
1472        let value_data = ArrayData::builder(DataType::Int16)
1473            .len(16)
1474            .add_buffer(Buffer::from_slice_ref(v))
1475            .build()?;
1476        let list_data_type =
1477            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2);
1478        let list_data = ArrayData::builder(list_data_type)
1479            .len(8)
1480            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1481            .add_child_data(value_data)
1482            .build()?;
1483        let array = FixedSizeListArray::from(list_data);
1484
1485        let data = array.into_data();
1486        test_round_trip(&data)
1487    }
1488
1489    #[test]
1490    fn test_fixed_size_list_nested() -> Result<()> {
1491        let v: Vec<i32> = (0..16).collect();
1492        let value_data = ArrayData::builder(DataType::Int32)
1493            .len(16)
1494            .add_buffer(Buffer::from_slice_ref(v))
1495            .build()?;
1496
1497        let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
1498        let value_offsets = Buffer::from_slice_ref(offsets);
1499        let inner_list_data_type =
1500            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1501        let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
1502            .len(8)
1503            .add_buffer(value_offsets)
1504            .add_child_data(value_data)
1505            .build()?;
1506
1507        // 0000 0100
1508        let mut validity_bits: [u8; 1] = [0; 1];
1509        bit_util::set_bit(&mut validity_bits, 2);
1510
1511        let list_data_type =
1512            DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
1513        let list_data = ArrayData::builder(list_data_type)
1514            .len(4)
1515            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1516            .add_child_data(inner_list_data)
1517            .build()?;
1518
1519        let array = FixedSizeListArray::from(list_data);
1520
1521        let data = array.into_data();
1522        test_round_trip(&data)
1523    }
1524
1525    #[test]
1526    #[cfg(not(feature = "force_validate"))]
1527    fn test_empty_string_with_non_zero_offset() -> Result<()> {
1528        use super::ImportedArrowArray;
1529        use arrow_buffer::{MutableBuffer, OffsetBuffer};
1530
1531        // Simulate an empty string array with a non-zero offset from a producer
1532        let data: Buffer = MutableBuffer::new(0).into();
1533        let offsets = OffsetBuffer::new(vec![123].into());
1534        let string_array =
1535            unsafe { StringArray::new_unchecked(offsets.clone(), data.clone(), None) };
1536
1537        let data = string_array.into_data();
1538
1539        let array = FFI_ArrowArray::new(&data);
1540        let schema = FFI_ArrowSchema::try_from(data.data_type())?;
1541
1542        let dt = DataType::try_from(&schema)?;
1543        let array = Arc::new(array);
1544        let imported_array = ImportedArrowArray {
1545            array: &array,
1546            data_type: dt,
1547            owner: &array,
1548        };
1549
1550        let offset_buf_len = imported_array.buffer_len(1, &[], &imported_array.data_type)?;
1551        let data_buf_len = imported_array.buffer_len(2, &[], &imported_array.data_type)?;
1552
1553        assert_eq!(offset_buf_len, 4);
1554        assert_eq!(data_buf_len, 0);
1555
1556        test_round_trip(&imported_array.consume()?)
1557    }
1558
1559    fn roundtrip_string_array(array: StringArray) -> StringArray {
1560        let data = array.into_data();
1561
1562        let array = FFI_ArrowArray::new(&data);
1563        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1564
1565        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1566        StringArray::from(array)
1567    }
1568
1569    fn roundtrip_byte_view_array<T: ByteViewType>(
1570        array: GenericByteViewArray<T>,
1571    ) -> GenericByteViewArray<T> {
1572        let data = array.into_data();
1573
1574        let array = FFI_ArrowArray::new(&data);
1575        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1576
1577        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1578        GenericByteViewArray::<T>::from(array)
1579    }
1580
1581    fn extend_array(array: &dyn Array) -> ArrayRef {
1582        let len = array.len();
1583        let data = array.to_data();
1584
1585        let mut mutable = MutableArrayData::new(vec![&data], false, len);
1586        mutable.extend(0, 0, len);
1587        make_array(mutable.freeze())
1588    }
1589
1590    #[test]
1591    fn test_extend_imported_string_slice() {
1592        let mut strings = vec![];
1593
1594        for i in 0..1000 {
1595            strings.push(format!("string: {i}"));
1596        }
1597
1598        let string_array = StringArray::from(strings);
1599
1600        let imported = roundtrip_string_array(string_array.clone());
1601        assert_eq!(imported.len(), 1000);
1602        assert_eq!(imported.value(0), "string: 0");
1603        assert_eq!(imported.value(499), "string: 499");
1604
1605        let copied = extend_array(&imported);
1606        assert_eq!(
1607            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1608            &imported
1609        );
1610
1611        let slice = string_array.slice(500, 500);
1612
1613        let imported = roundtrip_string_array(slice);
1614        assert_eq!(imported.len(), 500);
1615        assert_eq!(imported.value(0), "string: 500");
1616        assert_eq!(imported.value(499), "string: 999");
1617
1618        let copied = extend_array(&imported);
1619        assert_eq!(
1620            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1621            &imported
1622        );
1623    }
1624
1625    fn roundtrip_list_array(array: ListArray) -> ListArray {
1626        let data = array.into_data();
1627
1628        let array = FFI_ArrowArray::new(&data);
1629        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1630
1631        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1632        ListArray::from(array)
1633    }
1634
1635    #[test]
1636    fn test_extend_imported_list_slice() {
1637        let mut data = vec![];
1638
1639        for i in 0..1000 {
1640            let mut list = vec![];
1641            for j in 0..100 {
1642                list.push(Some(i * 1000 + j));
1643            }
1644            data.push(Some(list));
1645        }
1646
1647        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1648
1649        let slice = list_array.slice(500, 500);
1650        let imported = roundtrip_list_array(slice.clone());
1651        assert_eq!(imported.len(), 500);
1652        assert_eq!(&slice, &imported);
1653
1654        let copied = extend_array(&imported);
1655        assert_eq!(
1656            copied.as_any().downcast_ref::<ListArray>().unwrap(),
1657            &imported
1658        );
1659    }
1660
1661    /// Helper trait to allow us to use easily strings as either BinaryViewType::Native or
1662    /// StringViewType::Native scalars.
1663    trait NativeFromStr {
1664        fn from_str(value: &str) -> &Self;
1665    }
1666
1667    impl NativeFromStr for str {
1668        fn from_str(value: &str) -> &Self {
1669            value
1670        }
1671    }
1672
1673    impl NativeFromStr for [u8] {
1674        fn from_str(value: &str) -> &Self {
1675            value.as_bytes()
1676        }
1677    }
1678
1679    #[test]
1680    #[cfg(not(feature = "force_validate"))]
1681    fn test_utf8_view_ffi_from_dangling_pointer() {
1682        let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
1683        let buffers = empty.data_buffers().to_vec();
1684        let nulls = empty.nulls().cloned();
1685
1686        // Create a dangling pointer to a view buffer with zero length.
1687        let alloc = Arc::new(1);
1688        let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
1689        let views = unsafe { ScalarBuffer::new_unchecked(buffer) };
1690
1691        let str_view: GenericByteViewArray<StringViewType> =
1692            unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
1693        let imported = roundtrip_byte_view_array(str_view);
1694        assert_eq!(imported.len(), 0);
1695        assert_eq!(&imported, &empty);
1696    }
1697
1698    #[test]
1699    fn test_round_trip_byte_view() {
1700        fn test_case<T>()
1701        where
1702            T: ByteViewType,
1703            T::Native: NativeFromStr,
1704        {
1705            macro_rules! run_test_case {
1706                ($array:expr) => {{
1707                    // round-trip through C  Data Interface
1708                    let len = $array.len();
1709                    let imported = roundtrip_byte_view_array($array);
1710                    assert_eq!(imported.len(), len);
1711
1712                    let copied = extend_array(&imported);
1713                    assert_eq!(
1714                        copied
1715                            .as_any()
1716                            .downcast_ref::<GenericByteViewArray<T>>()
1717                            .unwrap(),
1718                        &imported
1719                    );
1720                }};
1721            }
1722
1723            // Empty test case.
1724            let empty = GenericByteViewBuilder::<T>::new().finish();
1725            run_test_case!(empty);
1726
1727            // All inlined strings test case.
1728            let mut all_inlined = GenericByteViewBuilder::<T>::new();
1729            all_inlined.append_value(T::Native::from_str("inlined1"));
1730            all_inlined.append_value(T::Native::from_str("inlined2"));
1731            all_inlined.append_value(T::Native::from_str("inlined3"));
1732            let all_inlined = all_inlined.finish();
1733            assert_eq!(all_inlined.data_buffers().len(), 0);
1734            run_test_case!(all_inlined);
1735
1736            // some inlined + non-inlined, 1 variadic buffer.
1737            let mixed_one_variadic = {
1738                let mut builder = GenericByteViewBuilder::<T>::new();
1739                builder.append_value(T::Native::from_str("inlined"));
1740                let block_id =
1741                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1742                builder.try_append_view(block_id, 0, 25).unwrap();
1743                builder.finish()
1744            };
1745            assert_eq!(mixed_one_variadic.data_buffers().len(), 1);
1746            run_test_case!(mixed_one_variadic);
1747
1748            // inlined + non-inlined, 2 variadic buffers.
1749            let mixed_two_variadic = {
1750                let mut builder = GenericByteViewBuilder::<T>::new();
1751                builder.append_value(T::Native::from_str("inlined"));
1752                let block_id =
1753                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1754                builder.try_append_view(block_id, 0, 25).unwrap();
1755
1756                let block_id = builder
1757                    .append_block(Buffer::from("another-non-inlined-string-buffer".as_bytes()));
1758                builder.try_append_view(block_id, 0, 33).unwrap();
1759                builder.finish()
1760            };
1761            assert_eq!(mixed_two_variadic.data_buffers().len(), 2);
1762            run_test_case!(mixed_two_variadic);
1763        }
1764
1765        test_case::<StringViewType>();
1766        test_case::<BinaryViewType>();
1767    }
1768}