arrow_array/
ffi.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19//!
20//! Generally, this module is divided in two main interfaces:
21//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
22//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
23//!
24//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
25//! `Buffer`, etc. This is handled by `from_ffi` and `to_ffi`.
26//!
27//!
28//! Export to FFI
29//!
30//! ```rust
31//! # use std::sync::Arc;
32//! # use arrow_array::{Int32Array, Array, make_array};
33//! # use arrow_data::ArrayData;
34//! # use arrow_array::ffi::{to_ffi, from_ffi};
35//! # use arrow_schema::ArrowError;
36//! # fn main() -> Result<(), ArrowError> {
37//! // create an array natively
38//!
39//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
40//! let data = array.into_data();
41//!
42//! // Export it
43//! let (out_array, out_schema) = to_ffi(&data)?;
44//!
45//! // import it
46//! let data = unsafe { from_ffi(out_array, &out_schema) }?;
47//! let array = Int32Array::from(data);
48//!
49//! // verify
50//! assert_eq!(array, Int32Array::from(vec![Some(1), None, Some(3)]));
51//! #
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! Import from FFI
57//!
58//! ```
59//! # use std::ptr::addr_of_mut;
60//! # use arrow_array::ffi::{from_ffi, FFI_ArrowArray};
61//! # use arrow_array::{ArrayRef, make_array};
62//! # use arrow_schema::{ArrowError, ffi::FFI_ArrowSchema};
63//! #
64//! /// A foreign data container that can export to C Data interface
65//! struct ForeignArray {};
66//!
67//! impl ForeignArray {
68//!     /// Export from foreign array representation to C Data interface
69//!     /// e.g. <https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
70//!     fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) {
71//!         // ...
72//!     }
73//! }
74//!
75//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
76//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
77//!     let mut schema = FFI_ArrowSchema::empty();
78//!     let mut array = FFI_ArrowArray::empty();
79//!     foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
80//!     Ok(make_array(unsafe { from_ffi(array, &schema) }?))
81//! }
82//! ```
83
84/*
85# Design:
86
87Main assumptions:
88* A memory region is deallocated according it its own release mechanism.
89* Rust shares memory regions between arrays.
90* A memory region should be deallocated when no-one is using it.
91
92The design of this module is as follows:
93
94`ArrowArray` contains two `Arc`s, one per ABI-compatible `struct`, each containing data
95according to the C Data Interface. These Arcs are used for ref counting of the structs
96within Rust and lifetime management.
97
98Each ABI-compatible `struct` knowns how to `drop` itself, calling `release`.
99
100To import an array, unsafely create an `ArrowArray` from two pointers using [ArrowArray::try_from_raw].
101To export an array, create an `ArrowArray` using [ArrowArray::try_new].
102*/
103
104use std::{mem::size_of, ptr::NonNull, sync::Arc};
105
106use arrow_buffer::{Buffer, MutableBuffer, bit_util};
107pub use arrow_data::ffi::FFI_ArrowArray;
108use arrow_data::{ArrayData, layout};
109pub use arrow_schema::ffi::FFI_ArrowSchema;
110use arrow_schema::{ArrowError, DataType, UnionMode};
111
112use crate::array::ArrayRef;
113
114type Result<T> = std::result::Result<T, ArrowError>;
115
116/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
117/// # Safety
118/// Assumes that these pointers represent valid C Data Interfaces, both in memory
119/// representation and lifetime via the `release` mechanism.
120///
121/// This function copies the content of two FFI structs [arrow_data::ffi::FFI_ArrowArray] and
122/// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
123/// Usually the raw pointers are provided by the array data consumer.
124#[deprecated(
125    since = "52.0.0",
126    note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
127)]
128pub unsafe fn export_array_into_raw(
129    src: ArrayRef,
130    out_array: *mut FFI_ArrowArray,
131    out_schema: *mut FFI_ArrowSchema,
132) -> Result<()> {
133    let data = src.to_data();
134    let array = FFI_ArrowArray::new(&data);
135    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
136
137    unsafe { std::ptr::write_unaligned(out_array, array) };
138    unsafe { std::ptr::write_unaligned(out_schema, schema) };
139
140    Ok(())
141}
142
143/// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144/// This is set by the Arrow specification
145fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146    if let Some(primitive) = data_type.primitive_width() {
147        return match i {
148            0 => Err(ArrowError::CDataInterface(format!(
149                "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150            ))),
151            1 => Ok(primitive * 8),
152            i => Err(ArrowError::CDataInterface(format!(
153                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154            ))),
155        };
156    }
157
158    Ok(match (data_type, i) {
159        (DataType::Boolean, 1) => 1,
160        (DataType::Boolean, _) => {
161            return Err(ArrowError::CDataInterface(format!(
162                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163            )));
164        }
165        (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
166        (DataType::FixedSizeList(f, num_elems), 1) => {
167            let child_bit_width = bit_width(f.data_type(), 1)?;
168            child_bit_width * (*num_elems as usize)
169        }
170        (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
171            return Err(ArrowError::CDataInterface(format!(
172                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
173            )));
174        }
175        // Variable-size list and map have one i32 buffer.
176        // Variable-sized binaries: have two buffers.
177        // "small": first buffer is i32, second is in bytes
178        (DataType::Utf8, 1)
179        | (DataType::Binary, 1)
180        | (DataType::List(_), 1)
181        | (DataType::Map(_, _), 1) => i32::BITS as _,
182        (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
183        // List views have two i32 buffers, offsets and sizes
184        (DataType::ListView(_), 1) | (DataType::ListView(_), 2) => i32::BITS as _,
185        // Large list views have two i64 buffers, offsets and sizes
186        (DataType::LargeListView(_), 1) | (DataType::LargeListView(_), 2) => i64::BITS as _,
187        (DataType::List(_), _) | (DataType::Map(_, _), _) => {
188            return Err(ArrowError::CDataInterface(format!(
189                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
190            )));
191        }
192        (DataType::Utf8, _) | (DataType::Binary, _) => {
193            return Err(ArrowError::CDataInterface(format!(
194                "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
195            )));
196        }
197        // Variable-sized binaries: have two buffers.
198        // LargeUtf8: first buffer is i64, second is in bytes
199        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => {
200            i64::BITS as _
201        }
202        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2) => {
203            u8::BITS as _
204        }
205        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _) => {
206            return Err(ArrowError::CDataInterface(format!(
207                "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
208            )));
209        }
210        // Variable-sized views: have 3 or more buffers.
211        // Buffer 1 are the u128 views
212        // Buffers 2...N-1 are u8 byte buffers
213        (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => u128::BITS as _,
214        (DataType::Utf8View, _) | (DataType::BinaryView, _) => u8::BITS as _,
215        // type ids. UnionArray doesn't have null bitmap so buffer index begins with 0.
216        (DataType::Union(_, _), 0) => i8::BITS as _,
217        // Only DenseUnion has 2nd buffer
218        (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
219        (DataType::Union(_, UnionMode::Sparse), _) => {
220            return Err(ArrowError::CDataInterface(format!(
221                "The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
222            )));
223        }
224        (DataType::Union(_, UnionMode::Dense), _) => {
225            return Err(ArrowError::CDataInterface(format!(
226                "The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
227            )));
228        }
229        (_, 0) => {
230            // We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
231            // UnionArray, they should be handled above.
232            return Err(ArrowError::CDataInterface(format!(
233                "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
234            )));
235        }
236        _ => {
237            return Err(ArrowError::CDataInterface(format!(
238                "The datatype \"{data_type}\" is still not supported in Rust implementation"
239            )));
240        }
241    })
242}
243
244/// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer).
245/// `bits` is the number of bits that the native type of this buffer has.
246/// The size of the buffer will be `ceil(self.length * bits, 8)`.
247/// # Panic
248/// This function panics if `i` is larger or equal to `n_buffers`.
249/// # Safety
250/// This function assumes that `ceil(self.length * bits, 8)` is the size of the buffer
251unsafe fn create_buffer(
252    owner: Arc<FFI_ArrowArray>,
253    array: &FFI_ArrowArray,
254    index: usize,
255    len: usize,
256) -> Option<Buffer> {
257    if array.num_buffers() == 0 {
258        return None;
259    }
260    NonNull::new(array.buffer(index) as _)
261        .map(|ptr| unsafe { Buffer::from_custom_allocation(ptr, len, owner) })
262}
263
264/// Export to the C Data Interface
265pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
266    let array = FFI_ArrowArray::new(data);
267    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
268    Ok((array, schema))
269}
270
271/// Import [ArrayData] from the C Data Interface
272///
273/// # Safety
274///
275/// This struct assumes that the incoming data agrees with the C data interface.
276pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
277    let dt = DataType::try_from(schema)?;
278    let array = Arc::new(array);
279    let tmp = ImportedArrowArray {
280        array: &array,
281        data_type: dt,
282        owner: &array,
283    };
284    tmp.consume()
285}
286
287/// Import [ArrayData] from the C Data Interface
288///
289/// # Safety
290///
291/// This struct assumes that the incoming data agrees with the C data interface.
292pub unsafe fn from_ffi_and_data_type(
293    array: FFI_ArrowArray,
294    data_type: DataType,
295) -> Result<ArrayData> {
296    let array = Arc::new(array);
297    let tmp = ImportedArrowArray {
298        array: &array,
299        data_type,
300        owner: &array,
301    };
302    tmp.consume()
303}
304
305#[derive(Debug)]
306struct ImportedArrowArray<'a> {
307    array: &'a FFI_ArrowArray,
308    data_type: DataType,
309    owner: &'a Arc<FFI_ArrowArray>,
310}
311
312impl ImportedArrowArray<'_> {
313    fn consume(self) -> Result<ArrayData> {
314        let len = self.array.len();
315        let offset = self.array.offset();
316        let null_count = match &self.data_type {
317            DataType::Null => Some(0),
318            _ => self.array.null_count_opt(),
319        };
320
321        let data_layout = layout(&self.data_type);
322        let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?;
323
324        let null_bit_buffer = if data_layout.can_contain_null_mask {
325            self.null_bit_buffer()
326        } else {
327            None
328        };
329
330        let mut child_data = self.consume_children()?;
331
332        if let Some(d) = self.dictionary()? {
333            // For dictionary type there should only be a single child, so we don't need to worry if
334            // there are other children added above.
335            assert!(child_data.is_empty());
336            child_data.push(d.consume()?);
337        }
338
339        // Should FFI be checking validity?
340        Ok(unsafe {
341            ArrayData::new_unchecked(
342                self.data_type,
343                len,
344                null_count,
345                null_bit_buffer,
346                offset,
347                buffers,
348                child_data,
349            )
350        })
351    }
352
353    fn consume_children(&self) -> Result<Vec<ArrayData>> {
354        match &self.data_type {
355            DataType::List(field)
356            | DataType::FixedSizeList(field, _)
357            | DataType::LargeList(field)
358            | DataType::ListView(field)
359            | DataType::LargeListView(field)
360            | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
361            DataType::Struct(fields) => {
362                assert!(fields.len() == self.array.num_children());
363                fields
364                    .iter()
365                    .enumerate()
366                    .map(|(i, field)| self.consume_child(i, field.data_type()))
367                    .collect::<Result<Vec<_>>>()
368            }
369            DataType::Union(union_fields, _) => {
370                assert!(union_fields.len() == self.array.num_children());
371                union_fields
372                    .iter()
373                    .enumerate()
374                    .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
375                    .collect::<Result<Vec<_>>>()
376            }
377            DataType::RunEndEncoded(run_ends_field, values_field) => Ok([
378                self.consume_child(0, run_ends_field.data_type())?,
379                self.consume_child(1, values_field.data_type())?,
380            ]
381            .to_vec()),
382            _ => Ok(Vec::new()),
383        }
384    }
385
386    fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
387        ImportedArrowArray {
388            array: self.array.child(index),
389            data_type: child_type.clone(),
390            owner: self.owner,
391        }
392        .consume()
393    }
394
395    /// returns all buffers, as organized by Rust (i.e. null buffer is skipped if it's present
396    /// in the spec of the type)
397    fn buffers(&self, can_contain_null_mask: bool, variadic: bool) -> Result<Vec<Buffer>> {
398        // + 1: skip null buffer
399        let buffer_begin = can_contain_null_mask as usize;
400        let buffer_end = self.array.num_buffers() - usize::from(variadic);
401
402        let variadic_buffer_lens = if variadic {
403            // Each views array has 1 (optional) null buffer, 1 views buffer, 1 lengths buffer.
404            // Rest are variadic.
405            let num_variadic_buffers =
406                self.array.num_buffers() - (2 + usize::from(can_contain_null_mask));
407            if num_variadic_buffers == 0 {
408                &[]
409            } else {
410                let lengths = self.array.buffer(self.array.num_buffers() - 1);
411                // SAFETY: is lengths is non-null, then it must be valid for up to num_variadic_buffers.
412                unsafe { std::slice::from_raw_parts(lengths.cast::<i64>(), num_variadic_buffers) }
413            }
414        } else {
415            &[]
416        };
417
418        (buffer_begin..buffer_end)
419            .map(|index| {
420                let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
421                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
422                    Some(buf) => {
423                        // External libraries may use a dangling pointer for a buffer with length 0.
424                        // We respect the array length specified in the C Data Interface. Actually,
425                        // if the length is incorrect, we cannot create a correct buffer even if
426                        // the pointer is valid.
427                        if buf.is_empty() {
428                            Ok(MutableBuffer::new(0).into())
429                        } else {
430                            Ok(buf)
431                        }
432                    }
433                    None if len == 0 => {
434                        // Null data buffer, which Rust doesn't allow. So create
435                        // an empty buffer.
436                        Ok(MutableBuffer::new(0).into())
437                    }
438                    None => Err(ArrowError::CDataInterface(format!(
439                        "The external buffer at position {index} is null."
440                    ))),
441                }
442            })
443            .collect()
444    }
445
446    /// Returns the length, in bytes, of the buffer `i` (indexed according to the C data interface)
447    /// Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
448    /// for variable-sized buffers, such as the second buffer of a stringArray, we need
449    /// to fetch offset buffer's len to build the second buffer.
450    fn buffer_len(
451        &self,
452        i: usize,
453        variadic_buffer_lengths: &[i64],
454        dt: &DataType,
455    ) -> Result<usize> {
456        // Special handling for dictionary type as we only care about the key type in the case.
457        let data_type = match dt {
458            DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
459            dt => dt,
460        };
461
462        // `ffi::ArrowArray` records array offset, we need to add it back to the
463        // buffer length to get the actual buffer length.
464        let length = self.array.len() + self.array.offset();
465
466        // Inner type is not important for buffer length.
467        Ok(match (&data_type, i) {
468            (DataType::Utf8, 1)
469            | (DataType::LargeUtf8, 1)
470            | (DataType::Binary, 1)
471            | (DataType::LargeBinary, 1)
472            | (DataType::List(_), 1)
473            | (DataType::LargeList(_), 1)
474            | (DataType::Map(_, _), 1) => {
475                // the len of the offset buffer (buffer 1) equals length + 1
476                let bits = bit_width(data_type, i)?;
477                debug_assert_eq!(bits % 8, 0);
478                (length + 1) * (bits / 8)
479            }
480            (DataType::ListView(_), 1)
481            | (DataType::ListView(_), 2)
482            | (DataType::LargeListView(_), 1)
483            | (DataType::LargeListView(_), 2) => {
484                let bits = bit_width(data_type, i)?;
485                debug_assert_eq!(bits % 8, 0);
486                length * (bits / 8)
487            }
488            (DataType::Utf8, 2) | (DataType::Binary, 2) => {
489                if self.array.is_empty() {
490                    return Ok(0);
491                }
492
493                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
494                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
495                // first buffer is the null buffer => add(1)
496                // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
497                #[allow(clippy::cast_ptr_alignment)]
498                let offset_buffer = self.array.buffer(1) as *const i32;
499                // get last offset
500                (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
501            }
502            (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
503                if self.array.is_empty() {
504                    return Ok(0);
505                }
506
507                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
508                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
509                // first buffer is the null buffer => add(1)
510                // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
511                #[allow(clippy::cast_ptr_alignment)]
512                let offset_buffer = self.array.buffer(1) as *const i64;
513                // get last offset
514                (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
515            }
516            // View types: these have variadic buffers.
517            // Buffer 1 is the views buffer, which stores 1 u128 per length of the array.
518            // Buffers 2..N-1 are the buffers holding the byte data. Their lengths are variable.
519            // Buffer N is of length (N - 2) and stores i64 containing the lengths of buffers 2..N-1
520            (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => {
521                std::mem::size_of::<u128>() * length
522            }
523            (DataType::Utf8View, i) | (DataType::BinaryView, i) => {
524                variadic_buffer_lengths[i - 2] as usize
525            }
526            // buffer len of primitive types
527            _ => {
528                let bits = bit_width(data_type, i)?;
529                bit_util::ceil(length * bits, 8)
530            }
531        })
532    }
533
534    /// returns the null bit buffer.
535    /// Rust implementation uses a buffer that is not part of the array of buffers.
536    /// The C Data interface's null buffer is part of the array of buffers.
537    fn null_bit_buffer(&self) -> Option<Buffer> {
538        // similar to `self.buffer_len(0)`, but without `Result`.
539        // `ffi::ArrowArray` records array offset, we need to add it back to the
540        // buffer length to get the actual buffer length.
541        let length = self.array.len() + self.array.offset();
542        let buffer_len = bit_util::ceil(length, 8);
543
544        unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
545    }
546
547    fn dictionary(&self) -> Result<Option<ImportedArrowArray<'_>>> {
548        match (self.array.dictionary(), &self.data_type) {
549            (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
550                array,
551                data_type: value_type.as_ref().clone(),
552                owner: self.owner,
553            })),
554            (Some(_), _) => Err(ArrowError::CDataInterface(
555                "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
556            )),
557            (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
558                "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
559            )),
560            (_, _) => Ok(None),
561        }
562    }
563}
564
565#[cfg(test)]
566mod tests_to_then_from_ffi {
567    use std::collections::HashMap;
568    use std::mem::ManuallyDrop;
569
570    use arrow_buffer::{ArrowNativeType, NullBuffer};
571    use arrow_schema::Field;
572
573    use crate::builder::UnionBuilder;
574    use crate::cast::AsArray;
575    use crate::types::{Float64Type, Int8Type, Int32Type};
576    use crate::*;
577
578    use super::*;
579
580    #[test]
581    fn test_round_trip() {
582        // create an array natively
583        let array = Int32Array::from(vec![1, 2, 3]);
584
585        // export it
586        let (array, schema) = to_ffi(&array.into_data()).unwrap();
587
588        // (simulate consumer) import it
589        let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
590
591        // verify
592        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
593    }
594
595    #[test]
596    fn test_import() {
597        // Model receiving const pointers from an external system
598
599        // Create an array natively
600        let data = Int32Array::from(vec![1, 2, 3]).into_data();
601        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
602        let array = FFI_ArrowArray::new(&data);
603
604        // Use ManuallyDrop to avoid Box:Drop recursing
605        let schema = Box::new(ManuallyDrop::new(schema));
606        let array = Box::new(ManuallyDrop::new(array));
607
608        let schema_ptr = &**schema as *const _;
609        let array_ptr = &**array as *const _;
610
611        // We can read them back to memory
612        // SAFETY:
613        // Pointers are aligned and valid
614        let data =
615            unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
616
617        let array = Int32Array::from(data);
618        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
619    }
620
621    #[test]
622    fn test_round_trip_with_offset() -> Result<()> {
623        // create an array natively
624        let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), None]);
625
626        let array = array.slice(1, 2);
627
628        // export it
629        let (array, schema) = to_ffi(&array.to_data())?;
630
631        // (simulate consumer) import it
632        let data = unsafe { from_ffi(array, &schema) }?;
633        let array = make_array(data);
634        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
635
636        assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
637
638        // (drop/release)
639        Ok(())
640    }
641
642    #[test]
643    #[cfg(not(feature = "force_validate"))]
644    fn test_decimal_round_trip() -> Result<()> {
645        // create an array natively
646        let original_array = [Some(12345_i128), Some(-12345_i128), None]
647            .into_iter()
648            .collect::<Decimal128Array>()
649            .with_precision_and_scale(6, 2)
650            .unwrap();
651
652        // export it
653        let (array, schema) = to_ffi(&original_array.to_data())?;
654
655        // (simulate consumer) import it
656        let data = unsafe { from_ffi(array, &schema) }?;
657        let array = make_array(data);
658
659        // perform some operation
660        let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
661
662        // verify
663        assert_eq!(array, &original_array);
664
665        // (drop/release)
666        Ok(())
667    }
668    // case with nulls is tested in the docs, through the example on this module.
669
670    #[test]
671    fn test_null_count_handling() {
672        let int32_data = ArrayData::builder(DataType::Int32)
673            .len(10)
674            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
675            .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001])))
676            .build()
677            .unwrap();
678        let mut ffi_array = FFI_ArrowArray::new(&int32_data);
679        assert_eq!(3, ffi_array.null_count());
680        assert_eq!(Some(3), ffi_array.null_count_opt());
681        // Simulating uninitialized state
682        unsafe {
683            ffi_array.set_null_count(-1);
684        }
685        assert_eq!(None, ffi_array.null_count_opt());
686        let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap();
687        assert_eq!(3, int32_data.null_count());
688
689        let null_data = &ArrayData::new_null(&DataType::Null, 10);
690        let mut ffi_array = FFI_ArrowArray::new(null_data);
691        assert_eq!(10, ffi_array.null_count());
692        assert_eq!(Some(10), ffi_array.null_count_opt());
693        // Simulating uninitialized state
694        unsafe {
695            ffi_array.set_null_count(-1);
696        }
697        assert_eq!(None, ffi_array.null_count_opt());
698        let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap();
699        assert_eq!(0, null_data.null_count());
700    }
701
702    fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
703        // create an array natively
704        let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
705
706        // export it
707        let (array, schema) = to_ffi(&array.to_data())?;
708
709        // (simulate consumer) import it
710        let data = unsafe { from_ffi(array, &schema) }?;
711        let array = make_array(data);
712
713        // perform some operation
714        let array = array
715            .as_any()
716            .downcast_ref::<GenericStringArray<Offset>>()
717            .unwrap();
718
719        // verify
720        let expected = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
721        assert_eq!(array, &expected);
722
723        // (drop/release)
724        Ok(())
725    }
726
727    #[test]
728    fn test_string() -> Result<()> {
729        test_generic_string::<i32>()
730    }
731
732    #[test]
733    fn test_large_string() -> Result<()> {
734        test_generic_string::<i64>()
735    }
736
737    fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
738        // Construct a value array
739        let value_data = ArrayData::builder(DataType::Int32)
740            .len(8)
741            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
742            .build()
743            .unwrap();
744
745        // Construct a buffer for value offsets, for the nested array:
746        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
747        let value_offsets = [0_usize, 3, 6, 8]
748            .iter()
749            .map(|i| Offset::from_usize(*i).unwrap())
750            .collect::<Buffer>();
751
752        // Construct a list array from the above two
753        let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
754            Field::new_list_field(DataType::Int32, false),
755        ));
756
757        let list_data = ArrayData::builder(list_data_type)
758            .len(3)
759            .add_buffer(value_offsets)
760            .add_child_data(value_data)
761            .build()
762            .unwrap();
763
764        // create an array natively
765        let array = GenericListArray::<Offset>::from(list_data.clone());
766
767        // export it
768        let (array, schema) = to_ffi(&array.to_data())?;
769
770        // (simulate consumer) import it
771        let data = unsafe { from_ffi(array, &schema) }?;
772        let array = make_array(data);
773
774        // downcast
775        let array = array
776            .as_any()
777            .downcast_ref::<GenericListArray<Offset>>()
778            .unwrap();
779
780        // verify
781        let expected = GenericListArray::<Offset>::from(list_data);
782        assert_eq!(&array.value(0), &expected.value(0));
783        assert_eq!(&array.value(1), &expected.value(1));
784        assert_eq!(&array.value(2), &expected.value(2));
785
786        // (drop/release)
787        Ok(())
788    }
789
790    #[test]
791    fn test_list() -> Result<()> {
792        test_generic_list::<i32>()
793    }
794
795    #[test]
796    fn test_large_list() -> Result<()> {
797        test_generic_list::<i64>()
798    }
799
800    fn test_generic_list_view<Offset: OffsetSizeTrait + ArrowNativeType>() -> Result<()> {
801        // Construct a value array
802        let value_data = ArrayData::builder(DataType::Int16)
803            .len(8)
804            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
805            .build()
806            .unwrap();
807
808        // Construct a buffer for value offsets, for the nested array:
809        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
810        let value_offsets = [0_usize, 3, 6]
811            .iter()
812            .map(|i| Offset::from_usize(*i).unwrap())
813            .collect::<Buffer>();
814
815        let sizes_buffer = [3_usize, 3, 2]
816            .iter()
817            .map(|i| Offset::from_usize(*i).unwrap())
818            .collect::<Buffer>();
819
820        // Construct a list array from the above two
821        let list_view_dt = GenericListViewArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
822            Field::new_list_field(DataType::Int16, false),
823        ));
824
825        let list_data = ArrayData::builder(list_view_dt)
826            .len(3)
827            .add_buffer(value_offsets)
828            .add_buffer(sizes_buffer)
829            .add_child_data(value_data)
830            .build()
831            .unwrap();
832
833        let original = GenericListViewArray::<Offset>::from(list_data.clone());
834
835        // export it
836        let (array, schema) = to_ffi(&original.to_data())?;
837
838        // (simulate consumer) import it
839        let data = unsafe { from_ffi(array, &schema) }?;
840        let array = make_array(data);
841
842        // downcast
843        let array = array
844            .as_any()
845            .downcast_ref::<GenericListViewArray<Offset>>()
846            .unwrap();
847
848        assert_eq!(&array.value(0), &original.value(0));
849        assert_eq!(&array.value(1), &original.value(1));
850        assert_eq!(&array.value(2), &original.value(2));
851
852        Ok(())
853    }
854
855    #[test]
856    fn test_list_view() -> Result<()> {
857        test_generic_list_view::<i32>()
858    }
859
860    #[test]
861    fn test_large_list_view() -> Result<()> {
862        test_generic_list_view::<i64>()
863    }
864
865    fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
866        // create an array natively
867        let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
868        let array = GenericBinaryArray::<Offset>::from(array);
869
870        // export it
871        let (array, schema) = to_ffi(&array.to_data())?;
872
873        // (simulate consumer) import it
874        let data = unsafe { from_ffi(array, &schema) }?;
875        let array = make_array(data);
876        let array = array
877            .as_any()
878            .downcast_ref::<GenericBinaryArray<Offset>>()
879            .unwrap();
880
881        // verify
882        let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
883        let expected = GenericBinaryArray::<Offset>::from(expected);
884        assert_eq!(array, &expected);
885
886        // (drop/release)
887        Ok(())
888    }
889
890    #[test]
891    fn test_binary() -> Result<()> {
892        test_generic_binary::<i32>()
893    }
894
895    #[test]
896    fn test_large_binary() -> Result<()> {
897        test_generic_binary::<i64>()
898    }
899
900    #[test]
901    fn test_bool() -> Result<()> {
902        // create an array natively
903        let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
904
905        // export it
906        let (array, schema) = to_ffi(&array.to_data())?;
907
908        // (simulate consumer) import it
909        let data = unsafe { from_ffi(array, &schema) }?;
910        let array = make_array(data);
911        let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
912
913        // verify
914        assert_eq!(
915            array,
916            &BooleanArray::from(vec![None, Some(true), Some(false)])
917        );
918
919        // (drop/release)
920        Ok(())
921    }
922
923    #[test]
924    fn test_time32() -> Result<()> {
925        // create an array natively
926        let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
927
928        // export it
929        let (array, schema) = to_ffi(&array.to_data())?;
930
931        // (simulate consumer) import it
932        let data = unsafe { from_ffi(array, &schema) }?;
933        let array = make_array(data);
934        let array = array
935            .as_any()
936            .downcast_ref::<Time32MillisecondArray>()
937            .unwrap();
938
939        // verify
940        assert_eq!(
941            array,
942            &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
943        );
944
945        // (drop/release)
946        Ok(())
947    }
948
949    #[test]
950    fn test_timestamp() -> Result<()> {
951        // create an array natively
952        let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
953
954        // export it
955        let (array, schema) = to_ffi(&array.to_data())?;
956
957        // (simulate consumer) import it
958        let data = unsafe { from_ffi(array, &schema) }?;
959        let array = make_array(data);
960        let array = array
961            .as_any()
962            .downcast_ref::<TimestampMillisecondArray>()
963            .unwrap();
964
965        // verify
966        assert_eq!(
967            array,
968            &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
969        );
970
971        // (drop/release)
972        Ok(())
973    }
974
975    #[test]
976    fn test_fixed_size_binary_array() -> Result<()> {
977        let values = vec![
978            None,
979            Some(vec![10, 10, 10]),
980            None,
981            Some(vec![20, 20, 20]),
982            Some(vec![30, 30, 30]),
983            None,
984        ];
985        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
986
987        // export it
988        let (array, schema) = to_ffi(&array.to_data())?;
989
990        // (simulate consumer) import it
991        let data = unsafe { from_ffi(array, &schema) }?;
992        let array = make_array(data);
993        let array = array
994            .as_any()
995            .downcast_ref::<FixedSizeBinaryArray>()
996            .unwrap();
997
998        // verify
999        assert_eq!(
1000            array,
1001            &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
1002                vec![
1003                    None,
1004                    Some(vec![10, 10, 10]),
1005                    None,
1006                    Some(vec![20, 20, 20]),
1007                    Some(vec![30, 30, 30]),
1008                    None,
1009                ]
1010                .into_iter(),
1011                3
1012            )?
1013        );
1014
1015        // (drop/release)
1016        Ok(())
1017    }
1018
1019    #[test]
1020    fn test_fixed_size_list_array() -> Result<()> {
1021        // 0000 0100
1022        let mut validity_bits: [u8; 1] = [0; 1];
1023        bit_util::set_bit(&mut validity_bits, 2);
1024
1025        let v: Vec<i32> = (0..9).collect();
1026        let value_data = ArrayData::builder(DataType::Int32)
1027            .len(9)
1028            .add_buffer(Buffer::from_slice_ref(&v))
1029            .build()?;
1030
1031        let list_data_type =
1032            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int32, false)), 3);
1033        let list_data = ArrayData::builder(list_data_type.clone())
1034            .len(3)
1035            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1036            .add_child_data(value_data)
1037            .build()?;
1038
1039        // export it
1040        let (array, schema) = to_ffi(&list_data)?;
1041
1042        // (simulate consumer) import it
1043        let data = unsafe { from_ffi(array, &schema) }?;
1044        let array = make_array(data);
1045        let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
1046
1047        // 0010 0100
1048        let mut expected_validity_bits: [u8; 1] = [0; 1];
1049        bit_util::set_bit(&mut expected_validity_bits, 2);
1050        bit_util::set_bit(&mut expected_validity_bits, 5);
1051
1052        let mut w = vec![];
1053        w.extend_from_slice(&v);
1054
1055        let expected_value_data = ArrayData::builder(DataType::Int32)
1056            .len(9)
1057            .add_buffer(Buffer::from_slice_ref(&w))
1058            .build()?;
1059
1060        let expected_list_data = ArrayData::builder(list_data_type)
1061            .len(3)
1062            .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
1063            .add_child_data(expected_value_data)
1064            .build()?;
1065        let expected_array = FixedSizeListArray::from(expected_list_data);
1066
1067        // verify
1068        assert_eq!(array, &expected_array);
1069
1070        // (drop/release)
1071        Ok(())
1072    }
1073
1074    #[test]
1075    fn test_dictionary() -> Result<()> {
1076        // create an array natively
1077        let values = vec!["a", "aaa", "aaa"];
1078        let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
1079
1080        // export it
1081        let (array, schema) = to_ffi(&dict_array.to_data())?;
1082
1083        // (simulate consumer) import it
1084        let data = unsafe { from_ffi(array, &schema) }?;
1085        let array = make_array(data);
1086        let actual = array
1087            .as_any()
1088            .downcast_ref::<DictionaryArray<Int8Type>>()
1089            .unwrap();
1090
1091        // verify
1092        let new_values = vec!["a", "aaa", "aaa"];
1093        let expected: DictionaryArray<Int8Type> = new_values.into_iter().collect();
1094        assert_eq!(actual, &expected);
1095
1096        // (drop/release)
1097        Ok(())
1098    }
1099
1100    #[test]
1101    #[allow(deprecated)]
1102    fn test_export_array_into_raw() -> Result<()> {
1103        let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
1104
1105        // Assume two raw pointers provided by the consumer
1106        let mut out_array = FFI_ArrowArray::empty();
1107        let mut out_schema = FFI_ArrowSchema::empty();
1108
1109        {
1110            let out_array_ptr = std::ptr::addr_of_mut!(out_array);
1111            let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
1112            unsafe {
1113                export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1114            }
1115        }
1116
1117        // (simulate consumer) import it
1118        let data = unsafe { from_ffi(out_array, &out_schema) }?;
1119        let array = make_array(data);
1120
1121        // perform some operation
1122        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1123
1124        // verify
1125        assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
1126        Ok(())
1127    }
1128
1129    #[test]
1130    fn test_duration() -> Result<()> {
1131        // create an array natively
1132        let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
1133
1134        // export it
1135        let (array, schema) = to_ffi(&array.to_data())?;
1136
1137        // (simulate consumer) import it
1138        let data = unsafe { from_ffi(array, &schema) }?;
1139        let array = make_array(data);
1140        let array = array
1141            .as_any()
1142            .downcast_ref::<DurationSecondArray>()
1143            .unwrap();
1144
1145        // verify
1146        assert_eq!(
1147            array,
1148            &DurationSecondArray::from(vec![None, Some(1), Some(2)])
1149        );
1150
1151        // (drop/release)
1152        Ok(())
1153    }
1154
1155    #[test]
1156    fn test_map_array() -> Result<()> {
1157        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1158        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1159
1160        // Construct a buffer for value offsets, for the nested array:
1161        //  [[a, b, c], [d, e, f], [g, h]]
1162        let entry_offsets = [0, 3, 6, 8];
1163
1164        let map_array =
1165            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1166                .unwrap();
1167
1168        // export it
1169        let (array, schema) = to_ffi(&map_array.to_data())?;
1170
1171        // (simulate consumer) import it
1172        let data = unsafe { from_ffi(array, &schema) }?;
1173        let array = make_array(data);
1174
1175        // perform some operation
1176        let array = array.as_any().downcast_ref::<MapArray>().unwrap();
1177        assert_eq!(array, &map_array);
1178
1179        Ok(())
1180    }
1181
1182    #[test]
1183    fn test_struct_array() -> Result<()> {
1184        let metadata: HashMap<String, String> =
1185            [("Hello".to_string(), "World! 😊".to_string())].into();
1186        let struct_array = StructArray::from(vec![(
1187            Arc::new(Field::new("a", DataType::Int32, false).with_metadata(metadata)),
1188            Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
1189        )]);
1190
1191        // export it
1192        let (array, schema) = to_ffi(&struct_array.to_data())?;
1193
1194        // (simulate consumer) import it
1195        let data = unsafe { from_ffi(array, &schema) }?;
1196        let array = make_array(data);
1197
1198        // perform some operation
1199        let array = array.as_any().downcast_ref::<StructArray>().unwrap();
1200        assert_eq!(array.data_type(), struct_array.data_type());
1201        assert_eq!(array, &struct_array);
1202
1203        Ok(())
1204    }
1205
1206    #[test]
1207    fn test_union_sparse_array() -> Result<()> {
1208        let mut builder = UnionBuilder::new_sparse();
1209        builder.append::<Int32Type>("a", 1).unwrap();
1210        builder.append_null::<Int32Type>("a").unwrap();
1211        builder.append::<Float64Type>("c", 3.0).unwrap();
1212        builder.append::<Int32Type>("a", 4).unwrap();
1213        let union = builder.build().unwrap();
1214
1215        // export it
1216        let (array, schema) = to_ffi(&union.to_data())?;
1217
1218        // (simulate consumer) import it
1219        let data = unsafe { from_ffi(array, &schema) }?;
1220        let array = make_array(data);
1221
1222        let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
1223
1224        let expected_type_ids = vec![0_i8, 0, 1, 0];
1225
1226        // Check type ids
1227        assert_eq!(*array.type_ids(), expected_type_ids);
1228        for (i, id) in expected_type_ids.iter().enumerate() {
1229            assert_eq!(id, &array.type_id(i));
1230        }
1231
1232        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
1233        assert!(array.offsets().is_none());
1234
1235        for i in 0..array.len() {
1236            let slot = array.value(i);
1237            match i {
1238                0 => {
1239                    let slot = slot.as_primitive::<Int32Type>();
1240                    assert!(!slot.is_null(0));
1241                    assert_eq!(slot.len(), 1);
1242                    let value = slot.value(0);
1243                    assert_eq!(1_i32, value);
1244                }
1245                1 => assert!(slot.is_null(0)),
1246                2 => {
1247                    let slot = slot.as_primitive::<Float64Type>();
1248                    assert!(!slot.is_null(0));
1249                    assert_eq!(slot.len(), 1);
1250                    let value = slot.value(0);
1251                    assert_eq!(value, 3_f64);
1252                }
1253                3 => {
1254                    let slot = slot.as_primitive::<Int32Type>();
1255                    assert!(!slot.is_null(0));
1256                    assert_eq!(slot.len(), 1);
1257                    let value = slot.value(0);
1258                    assert_eq!(4_i32, value);
1259                }
1260                _ => unreachable!(),
1261            }
1262        }
1263
1264        Ok(())
1265    }
1266
1267    #[test]
1268    fn test_union_dense_array() -> Result<()> {
1269        let mut builder = UnionBuilder::new_dense();
1270        builder.append::<Int32Type>("a", 1).unwrap();
1271        builder.append_null::<Int32Type>("a").unwrap();
1272        builder.append::<Float64Type>("c", 3.0).unwrap();
1273        builder.append::<Int32Type>("a", 4).unwrap();
1274        let union = builder.build().unwrap();
1275
1276        // export it
1277        let (array, schema) = to_ffi(&union.to_data())?;
1278
1279        // (simulate consumer) import it
1280        let data = unsafe { from_ffi(array, &schema) }?;
1281        let array = UnionArray::from(data);
1282
1283        let expected_type_ids = vec![0_i8, 0, 1, 0];
1284
1285        // Check type ids
1286        assert_eq!(*array.type_ids(), expected_type_ids);
1287        for (i, id) in expected_type_ids.iter().enumerate() {
1288            assert_eq!(id, &array.type_id(i));
1289        }
1290
1291        assert!(array.offsets().is_some());
1292
1293        for i in 0..array.len() {
1294            let slot = array.value(i);
1295            match i {
1296                0 => {
1297                    let slot = slot.as_primitive::<Int32Type>();
1298                    assert!(!slot.is_null(0));
1299                    assert_eq!(slot.len(), 1);
1300                    let value = slot.value(0);
1301                    assert_eq!(1_i32, value);
1302                }
1303                1 => assert!(slot.is_null(0)),
1304                2 => {
1305                    let slot = slot.as_primitive::<Float64Type>();
1306                    assert!(!slot.is_null(0));
1307                    assert_eq!(slot.len(), 1);
1308                    let value = slot.value(0);
1309                    assert_eq!(value, 3_f64);
1310                }
1311                3 => {
1312                    let slot = slot.as_primitive::<Int32Type>();
1313                    assert!(!slot.is_null(0));
1314                    assert_eq!(slot.len(), 1);
1315                    let value = slot.value(0);
1316                    assert_eq!(4_i32, value);
1317                }
1318                _ => unreachable!(),
1319            }
1320        }
1321
1322        Ok(())
1323    }
1324
1325    #[test]
1326    fn test_run_array() -> Result<()> {
1327        let value_data =
1328            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1329
1330        // Construct a run_ends array:
1331        let run_ends_values = [4_i32, 6, 7, 9, 13, 18, 20, 22];
1332        let run_ends_data =
1333            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1334
1335        // Construct a run ends encoded array from the above two
1336        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1337
1338        // export it
1339        let (array, schema) = to_ffi(&ree_array.to_data())?;
1340
1341        // (simulate consumer) import it
1342        let data = unsafe { from_ffi(array, &schema) }?;
1343        let array = make_array(data);
1344
1345        // perform some operation
1346        let array = array
1347            .as_any()
1348            .downcast_ref::<RunArray<Int32Type>>()
1349            .unwrap();
1350        assert_eq!(array.data_type(), ree_array.data_type());
1351        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1352        assert_eq!(array.values(), ree_array.values());
1353
1354        Ok(())
1355    }
1356
1357    #[test]
1358    fn test_nullable_run_array() -> Result<()> {
1359        let nulls = NullBuffer::from(vec![true, false, true, true, false]);
1360        let value_data =
1361            PrimitiveArray::<Int8Type>::new(vec![1_i8, 2, 3, 4, 5].into(), Some(nulls));
1362
1363        // Construct a run_ends array:
1364        let run_ends_values = [5_i32, 6, 7, 8, 10];
1365        let run_ends_data =
1366            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1367
1368        // Construct a run ends encoded array from the above two
1369        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1370
1371        // export it
1372        let (array, schema) = to_ffi(&ree_array.to_data())?;
1373
1374        // (simulate consumer) import it
1375        let data = unsafe { from_ffi(array, &schema) }?;
1376        let array = make_array(data);
1377
1378        // perform some operation
1379        let array = array
1380            .as_any()
1381            .downcast_ref::<RunArray<Int32Type>>()
1382            .unwrap();
1383        assert_eq!(array.data_type(), ree_array.data_type());
1384        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1385        assert_eq!(array.values(), ree_array.values());
1386
1387        Ok(())
1388    }
1389}
1390
1391#[cfg(test)]
1392mod tests_from_ffi {
1393    #[cfg(not(feature = "force_validate"))]
1394    use std::ptr::NonNull;
1395    use std::sync::Arc;
1396
1397    use arrow_buffer::NullBuffer;
1398    #[cfg(not(feature = "force_validate"))]
1399    use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
1400    #[cfg(feature = "force_validate")]
1401    use arrow_buffer::{bit_util, buffer::Buffer};
1402
1403    use arrow_data::ArrayData;
1404    use arrow_data::transform::MutableArrayData;
1405    use arrow_schema::{DataType, Field};
1406
1407    use super::Result;
1408
1409    use crate::builder::GenericByteViewBuilder;
1410    use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
1411    use crate::{
1412        ArrayRef, GenericByteViewArray, ListArray,
1413        array::{
1414            Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
1415            Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
1416        },
1417        ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi},
1418        make_array,
1419    };
1420
1421    fn test_round_trip(expected: &ArrayData) -> Result<()> {
1422        // here we export the array
1423        let array = FFI_ArrowArray::new(expected);
1424        let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
1425
1426        // simulate an external consumer by being the consumer
1427        let result = &unsafe { from_ffi(array, &schema) }?;
1428
1429        assert_eq!(result, expected);
1430        Ok(())
1431    }
1432
1433    #[test]
1434    fn test_u32() -> Result<()> {
1435        let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
1436        let data = array.into_data();
1437        test_round_trip(&data)
1438    }
1439
1440    #[test]
1441    fn test_u64() -> Result<()> {
1442        let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
1443        let data = array.into_data();
1444        test_round_trip(&data)
1445    }
1446
1447    #[test]
1448    fn test_i64() -> Result<()> {
1449        let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
1450        let data = array.into_data();
1451        test_round_trip(&data)
1452    }
1453
1454    #[test]
1455    fn test_struct() -> Result<()> {
1456        let inner = StructArray::from(vec![
1457            (
1458                Arc::new(Field::new("a1", DataType::Boolean, false)),
1459                Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
1460            ),
1461            (
1462                Arc::new(Field::new("a2", DataType::UInt32, false)),
1463                Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
1464            ),
1465        ]);
1466
1467        let array = StructArray::from(vec![
1468            (
1469                Arc::new(Field::new("a", inner.data_type().clone(), false)),
1470                Arc::new(inner) as Arc<dyn Array>,
1471            ),
1472            (
1473                Arc::new(Field::new("b", DataType::Boolean, false)),
1474                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
1475            ),
1476            (
1477                Arc::new(Field::new("c", DataType::UInt32, false)),
1478                Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
1479            ),
1480        ]);
1481        let data = array.into_data();
1482        test_round_trip(&data)
1483    }
1484
1485    #[test]
1486    fn test_dictionary() -> Result<()> {
1487        let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
1488        let keys = Int32Array::from(vec![
1489            Some(0),
1490            Some(1),
1491            None,
1492            Some(1),
1493            Some(1),
1494            None,
1495            Some(1),
1496            Some(2),
1497            Some(1),
1498            None,
1499        ]);
1500        let array = DictionaryArray::new(keys, Arc::new(values));
1501
1502        let data = array.into_data();
1503        test_round_trip(&data)
1504    }
1505
1506    #[test]
1507    fn test_fixed_size_binary() -> Result<()> {
1508        let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
1509        let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
1510
1511        let data = array.into_data();
1512        test_round_trip(&data)
1513    }
1514
1515    #[test]
1516    fn test_fixed_size_binary_with_nulls() -> Result<()> {
1517        let values = vec![
1518            None,
1519            Some(vec![10, 10, 10]),
1520            None,
1521            Some(vec![20, 20, 20]),
1522            Some(vec![30, 30, 30]),
1523            None,
1524        ];
1525        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1526
1527        let data = array.into_data();
1528        test_round_trip(&data)
1529    }
1530
1531    #[test]
1532    fn test_fixed_size_list() -> Result<()> {
1533        let v: Vec<i64> = (0..9).collect();
1534        let value_data = ArrayData::builder(DataType::Int64)
1535            .len(9)
1536            .add_buffer(Buffer::from_slice_ref(v))
1537            .build()?;
1538        let list_data_type =
1539            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3);
1540        let list_data = ArrayData::builder(list_data_type)
1541            .len(3)
1542            .add_child_data(value_data)
1543            .build()?;
1544        let array = FixedSizeListArray::from(list_data);
1545
1546        let data = array.into_data();
1547        test_round_trip(&data)
1548    }
1549
1550    #[test]
1551    fn test_fixed_size_list_with_nulls() -> Result<()> {
1552        // 0100 0110
1553        let mut validity_bits: [u8; 1] = [0; 1];
1554        bit_util::set_bit(&mut validity_bits, 1);
1555        bit_util::set_bit(&mut validity_bits, 2);
1556        bit_util::set_bit(&mut validity_bits, 6);
1557
1558        let v: Vec<i16> = (0..16).collect();
1559        let value_data = ArrayData::builder(DataType::Int16)
1560            .len(16)
1561            .add_buffer(Buffer::from_slice_ref(v))
1562            .build()?;
1563        let list_data_type =
1564            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2);
1565        let list_data = ArrayData::builder(list_data_type)
1566            .len(8)
1567            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1568            .add_child_data(value_data)
1569            .build()?;
1570        let array = FixedSizeListArray::from(list_data);
1571
1572        let data = array.into_data();
1573        test_round_trip(&data)
1574    }
1575
1576    #[test]
1577    fn test_fixed_size_list_nested() -> Result<()> {
1578        let v: Vec<i32> = (0..16).collect();
1579        let value_data = ArrayData::builder(DataType::Int32)
1580            .len(16)
1581            .add_buffer(Buffer::from_slice_ref(v))
1582            .build()?;
1583
1584        let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
1585        let value_offsets = Buffer::from_slice_ref(offsets);
1586        let inner_list_data_type =
1587            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1588        let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
1589            .len(8)
1590            .add_buffer(value_offsets)
1591            .add_child_data(value_data)
1592            .build()?;
1593
1594        // 0000 0100
1595        let mut validity_bits: [u8; 1] = [0; 1];
1596        bit_util::set_bit(&mut validity_bits, 2);
1597
1598        let list_data_type =
1599            DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
1600        let list_data = ArrayData::builder(list_data_type)
1601            .len(4)
1602            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1603            .add_child_data(inner_list_data)
1604            .build()?;
1605
1606        let array = FixedSizeListArray::from(list_data);
1607
1608        let data = array.into_data();
1609        test_round_trip(&data)
1610    }
1611
1612    #[test]
1613    fn test_list_view() -> Result<()> {
1614        // Construct a value array
1615        let value_data = ArrayData::builder(DataType::Int16)
1616            .len(8)
1617            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1618            .build()
1619            .unwrap();
1620
1621        // Construct a buffer for value offsets, for the nested array:
1622        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
1623        let value_offsets = Buffer::from(vec![0_i32, 3, 6]);
1624        let sizes_buffer = Buffer::from(vec![3_i32, 3, 2]);
1625
1626        // Construct a list array from the above two
1627        let list_view_dt =
1628            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, false)));
1629
1630        let list_view_data = ArrayData::builder(list_view_dt)
1631            .len(3)
1632            .add_buffer(value_offsets)
1633            .add_buffer(sizes_buffer)
1634            .add_child_data(value_data)
1635            .build()
1636            .unwrap();
1637
1638        test_round_trip(&list_view_data)
1639    }
1640
1641    #[test]
1642    fn test_list_view_with_nulls() -> Result<()> {
1643        // Construct a value array
1644        let value_data = ArrayData::builder(DataType::Int16)
1645            .len(8)
1646            .add_buffer(Buffer::from_slice_ref([0_i16, 1, 2, 3, 4, 5, 6, 7]))
1647            .build()
1648            .unwrap();
1649
1650        // Construct a buffer for value offsets, for the nested array:
1651        //  [[0, 1, 2], [3, 4, 5], [6, 7], null]
1652        let value_offsets = Buffer::from(vec![0_i32, 3, 6, 8]);
1653        let sizes_buffer = Buffer::from(vec![3_i32, 3, 2, 0]);
1654
1655        // Construct a list array from the above two
1656        let list_view_dt =
1657            DataType::ListView(Arc::new(Field::new_list_field(DataType::Int16, true)));
1658
1659        let list_view_data = ArrayData::builder(list_view_dt)
1660            .len(4)
1661            .add_buffer(value_offsets)
1662            .add_buffer(sizes_buffer)
1663            .add_child_data(value_data)
1664            .nulls(Some(NullBuffer::from(vec![true, true, true, false])))
1665            .build()
1666            .unwrap();
1667
1668        test_round_trip(&list_view_data)
1669    }
1670
1671    #[test]
1672    #[cfg(not(feature = "force_validate"))]
1673    fn test_empty_string_with_non_zero_offset() -> Result<()> {
1674        use super::ImportedArrowArray;
1675        use arrow_buffer::{MutableBuffer, OffsetBuffer};
1676
1677        // Simulate an empty string array with a non-zero offset from a producer
1678        let data: Buffer = MutableBuffer::new(0).into();
1679        let offsets = OffsetBuffer::new(vec![123].into());
1680        let string_array =
1681            unsafe { StringArray::new_unchecked(offsets.clone(), data.clone(), None) };
1682
1683        let data = string_array.into_data();
1684
1685        let array = FFI_ArrowArray::new(&data);
1686        let schema = FFI_ArrowSchema::try_from(data.data_type())?;
1687
1688        let dt = DataType::try_from(&schema)?;
1689        let array = Arc::new(array);
1690        let imported_array = ImportedArrowArray {
1691            array: &array,
1692            data_type: dt,
1693            owner: &array,
1694        };
1695
1696        let offset_buf_len = imported_array.buffer_len(1, &[], &imported_array.data_type)?;
1697        let data_buf_len = imported_array.buffer_len(2, &[], &imported_array.data_type)?;
1698
1699        assert_eq!(offset_buf_len, 4);
1700        assert_eq!(data_buf_len, 0);
1701
1702        test_round_trip(&imported_array.consume()?)
1703    }
1704
1705    fn roundtrip_string_array(array: StringArray) -> StringArray {
1706        let data = array.into_data();
1707
1708        let array = FFI_ArrowArray::new(&data);
1709        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1710
1711        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1712        StringArray::from(array)
1713    }
1714
1715    fn roundtrip_byte_view_array<T: ByteViewType>(
1716        array: GenericByteViewArray<T>,
1717    ) -> GenericByteViewArray<T> {
1718        let data = array.into_data();
1719
1720        let array = FFI_ArrowArray::new(&data);
1721        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1722
1723        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1724        GenericByteViewArray::<T>::from(array)
1725    }
1726
1727    fn extend_array(array: &dyn Array) -> ArrayRef {
1728        let len = array.len();
1729        let data = array.to_data();
1730
1731        let mut mutable = MutableArrayData::new(vec![&data], false, len);
1732        mutable.extend(0, 0, len);
1733        make_array(mutable.freeze())
1734    }
1735
1736    #[test]
1737    fn test_extend_imported_string_slice() {
1738        let mut strings = vec![];
1739
1740        for i in 0..1000 {
1741            strings.push(format!("string: {i}"));
1742        }
1743
1744        let string_array = StringArray::from(strings);
1745
1746        let imported = roundtrip_string_array(string_array.clone());
1747        assert_eq!(imported.len(), 1000);
1748        assert_eq!(imported.value(0), "string: 0");
1749        assert_eq!(imported.value(499), "string: 499");
1750
1751        let copied = extend_array(&imported);
1752        assert_eq!(
1753            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1754            &imported
1755        );
1756
1757        let slice = string_array.slice(500, 500);
1758
1759        let imported = roundtrip_string_array(slice);
1760        assert_eq!(imported.len(), 500);
1761        assert_eq!(imported.value(0), "string: 500");
1762        assert_eq!(imported.value(499), "string: 999");
1763
1764        let copied = extend_array(&imported);
1765        assert_eq!(
1766            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1767            &imported
1768        );
1769    }
1770
1771    fn roundtrip_list_array(array: ListArray) -> ListArray {
1772        let data = array.into_data();
1773
1774        let array = FFI_ArrowArray::new(&data);
1775        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1776
1777        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1778        ListArray::from(array)
1779    }
1780
1781    #[test]
1782    fn test_extend_imported_list_slice() {
1783        let mut data = vec![];
1784
1785        for i in 0..1000 {
1786            let mut list = vec![];
1787            for j in 0..100 {
1788                list.push(Some(i * 1000 + j));
1789            }
1790            data.push(Some(list));
1791        }
1792
1793        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1794
1795        let slice = list_array.slice(500, 500);
1796        let imported = roundtrip_list_array(slice.clone());
1797        assert_eq!(imported.len(), 500);
1798        assert_eq!(&slice, &imported);
1799
1800        let copied = extend_array(&imported);
1801        assert_eq!(
1802            copied.as_any().downcast_ref::<ListArray>().unwrap(),
1803            &imported
1804        );
1805    }
1806
1807    /// Helper trait to allow us to use easily strings as either BinaryViewType::Native or
1808    /// StringViewType::Native scalars.
1809    trait NativeFromStr {
1810        fn from_str(value: &str) -> &Self;
1811    }
1812
1813    impl NativeFromStr for str {
1814        fn from_str(value: &str) -> &Self {
1815            value
1816        }
1817    }
1818
1819    impl NativeFromStr for [u8] {
1820        fn from_str(value: &str) -> &Self {
1821            value.as_bytes()
1822        }
1823    }
1824
1825    #[test]
1826    #[cfg(not(feature = "force_validate"))]
1827    fn test_utf8_view_ffi_from_dangling_pointer() {
1828        let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
1829        let buffers = empty.data_buffers().to_vec();
1830        let nulls = empty.nulls().cloned();
1831
1832        // Create a dangling pointer to a view buffer with zero length.
1833        let alloc = Arc::new(1);
1834        let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
1835        let views = unsafe { ScalarBuffer::new_unchecked(buffer) };
1836
1837        let str_view: GenericByteViewArray<StringViewType> =
1838            unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
1839        let imported = roundtrip_byte_view_array(str_view);
1840        assert_eq!(imported.len(), 0);
1841        assert_eq!(&imported, &empty);
1842    }
1843
1844    #[test]
1845    fn test_round_trip_byte_view() {
1846        fn test_case<T>()
1847        where
1848            T: ByteViewType,
1849            T::Native: NativeFromStr,
1850        {
1851            macro_rules! run_test_case {
1852                ($array:expr) => {{
1853                    // round-trip through C  Data Interface
1854                    let len = $array.len();
1855                    let imported = roundtrip_byte_view_array($array);
1856                    assert_eq!(imported.len(), len);
1857
1858                    let copied = extend_array(&imported);
1859                    assert_eq!(
1860                        copied
1861                            .as_any()
1862                            .downcast_ref::<GenericByteViewArray<T>>()
1863                            .unwrap(),
1864                        &imported
1865                    );
1866                }};
1867            }
1868
1869            // Empty test case.
1870            let empty = GenericByteViewBuilder::<T>::new().finish();
1871            run_test_case!(empty);
1872
1873            // All inlined strings test case.
1874            let mut all_inlined = GenericByteViewBuilder::<T>::new();
1875            all_inlined.append_value(T::Native::from_str("inlined1"));
1876            all_inlined.append_value(T::Native::from_str("inlined2"));
1877            all_inlined.append_value(T::Native::from_str("inlined3"));
1878            let all_inlined = all_inlined.finish();
1879            assert_eq!(all_inlined.data_buffers().len(), 0);
1880            run_test_case!(all_inlined);
1881
1882            // some inlined + non-inlined, 1 variadic buffer.
1883            let mixed_one_variadic = {
1884                let mut builder = GenericByteViewBuilder::<T>::new();
1885                builder.append_value(T::Native::from_str("inlined"));
1886                let block_id =
1887                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1888                builder.try_append_view(block_id, 0, 25).unwrap();
1889                builder.finish()
1890            };
1891            assert_eq!(mixed_one_variadic.data_buffers().len(), 1);
1892            run_test_case!(mixed_one_variadic);
1893
1894            // inlined + non-inlined, 2 variadic buffers.
1895            let mixed_two_variadic = {
1896                let mut builder = GenericByteViewBuilder::<T>::new();
1897                builder.append_value(T::Native::from_str("inlined"));
1898                let block_id =
1899                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1900                builder.try_append_view(block_id, 0, 25).unwrap();
1901
1902                let block_id = builder
1903                    .append_block(Buffer::from("another-non-inlined-string-buffer".as_bytes()));
1904                builder.try_append_view(block_id, 0, 33).unwrap();
1905                builder.finish()
1906            };
1907            assert_eq!(mixed_two_variadic.data_buffers().len(), 2);
1908            run_test_case!(mixed_two_variadic);
1909        }
1910
1911        test_case::<StringViewType>();
1912        test_case::<BinaryViewType>();
1913    }
1914}