arrow_array/
ffi.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
19//!
20//! Generally, this module is divided in two main interfaces:
21//! One interface maps C ABI to native Rust types, i.e. convert c-pointers, c_char, to native rust.
22//! This is handled by [FFI_ArrowSchema] and [FFI_ArrowArray].
23//!
24//! The second interface maps native Rust types to the Rust-specific implementation of Arrow such as `format` to `Datatype`,
25//! `Buffer`, etc. This is handled by `from_ffi` and `to_ffi`.
26//!
27//!
28//! Export to FFI
29//!
30//! ```rust
31//! # use std::sync::Arc;
32//! # use arrow_array::{Int32Array, Array, make_array};
33//! # use arrow_data::ArrayData;
34//! # use arrow_array::ffi::{to_ffi, from_ffi};
35//! # use arrow_schema::ArrowError;
36//! # fn main() -> Result<(), ArrowError> {
37//! // create an array natively
38//!
39//! let array = Int32Array::from(vec![Some(1), None, Some(3)]);
40//! let data = array.into_data();
41//!
42//! // Export it
43//! let (out_array, out_schema) = to_ffi(&data)?;
44//!
45//! // import it
46//! let data = unsafe { from_ffi(out_array, &out_schema) }?;
47//! let array = Int32Array::from(data);
48//!
49//! // verify
50//! assert_eq!(array, Int32Array::from(vec![Some(1), None, Some(3)]));
51//! #
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! Import from FFI
57//!
58//! ```
59//! # use std::ptr::addr_of_mut;
60//! # use arrow_array::ffi::{from_ffi, FFI_ArrowArray};
61//! # use arrow_array::{ArrayRef, make_array};
62//! # use arrow_schema::{ArrowError, ffi::FFI_ArrowSchema};
63//! #
64//! /// A foreign data container that can export to C Data interface
65//! struct ForeignArray {};
66//!
67//! impl ForeignArray {
68//!     /// Export from foreign array representation to C Data interface
69//!     /// e.g. <https://github.com/apache/arrow/blob/fc1f9ebbc4c3ae77d5cfc2f9322f4373d3d19b8a/python/pyarrow/array.pxi#L1552>
70//!     fn export_to_c(&self, array: *mut FFI_ArrowArray, schema: *mut FFI_ArrowSchema) {
71//!         // ...
72//!     }
73//! }
74//!
75//! /// Import an [`ArrayRef`] from a [`ForeignArray`]
76//! fn import_array(foreign: &ForeignArray) -> Result<ArrayRef, ArrowError> {
77//!     let mut schema = FFI_ArrowSchema::empty();
78//!     let mut array = FFI_ArrowArray::empty();
79//!     foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
80//!     Ok(make_array(unsafe { from_ffi(array, &schema) }?))
81//! }
82//! ```
83
84/*
85# Design:
86
87Main assumptions:
88* A memory region is deallocated according it its own release mechanism.
89* Rust shares memory regions between arrays.
90* A memory region should be deallocated when no-one is using it.
91
92The design of this module is as follows:
93
94`ArrowArray` contains two `Arc`s, one per ABI-compatible `struct`, each containing data
95according to the C Data Interface. These Arcs are used for ref counting of the structs
96within Rust and lifetime management.
97
98Each ABI-compatible `struct` knowns how to `drop` itself, calling `release`.
99
100To import an array, unsafely create an `ArrowArray` from two pointers using [ArrowArray::try_from_raw].
101To export an array, create an `ArrowArray` using [ArrowArray::try_new].
102*/
103
104use std::{mem::size_of, ptr::NonNull, sync::Arc};
105
106use arrow_buffer::{Buffer, MutableBuffer, bit_util};
107pub use arrow_data::ffi::FFI_ArrowArray;
108use arrow_data::{ArrayData, layout};
109pub use arrow_schema::ffi::FFI_ArrowSchema;
110use arrow_schema::{ArrowError, DataType, UnionMode};
111
112use crate::array::ArrayRef;
113
114type Result<T> = std::result::Result<T, ArrowError>;
115
116/// Exports an array to raw pointers of the C Data Interface provided by the consumer.
117/// # Safety
118/// Assumes that these pointers represent valid C Data Interfaces, both in memory
119/// representation and lifetime via the `release` mechanism.
120///
121/// This function copies the content of two FFI structs [arrow_data::ffi::FFI_ArrowArray] and
122/// [arrow_schema::ffi::FFI_ArrowSchema] in the array to the location pointed by the raw pointers.
123/// Usually the raw pointers are provided by the array data consumer.
124#[deprecated(
125    since = "52.0.0",
126    note = "Use FFI_ArrowArray::new and FFI_ArrowSchema::try_from"
127)]
128pub unsafe fn export_array_into_raw(
129    src: ArrayRef,
130    out_array: *mut FFI_ArrowArray,
131    out_schema: *mut FFI_ArrowSchema,
132) -> Result<()> {
133    let data = src.to_data();
134    let array = FFI_ArrowArray::new(&data);
135    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
136
137    unsafe { std::ptr::write_unaligned(out_array, array) };
138    unsafe { std::ptr::write_unaligned(out_schema, schema) };
139
140    Ok(())
141}
142
143// returns the number of bits that buffer `i` (in the C data interface) is expected to have.
144// This is set by the Arrow specification
145fn bit_width(data_type: &DataType, i: usize) -> Result<usize> {
146    if let Some(primitive) = data_type.primitive_width() {
147        return match i {
148            0 => Err(ArrowError::CDataInterface(format!(
149                "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
150            ))),
151            1 => Ok(primitive * 8),
152            i => Err(ArrowError::CDataInterface(format!(
153                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
154            ))),
155        };
156    }
157
158    Ok(match (data_type, i) {
159        (DataType::Boolean, 1) => 1,
160        (DataType::Boolean, _) => {
161            return Err(ArrowError::CDataInterface(format!(
162                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
163            )));
164        }
165        (DataType::FixedSizeBinary(num_bytes), 1) => *num_bytes as usize * u8::BITS as usize,
166        (DataType::FixedSizeList(f, num_elems), 1) => {
167            let child_bit_width = bit_width(f.data_type(), 1)?;
168            child_bit_width * (*num_elems as usize)
169        }
170        (DataType::FixedSizeBinary(_), _) | (DataType::FixedSizeList(_, _), _) => {
171            return Err(ArrowError::CDataInterface(format!(
172                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
173            )));
174        }
175        // Variable-size list and map have one i32 buffer.
176        // Variable-sized binaries: have two buffers.
177        // "small": first buffer is i32, second is in bytes
178        (DataType::Utf8, 1)
179        | (DataType::Binary, 1)
180        | (DataType::List(_), 1)
181        | (DataType::Map(_, _), 1) => i32::BITS as _,
182        (DataType::Utf8, 2) | (DataType::Binary, 2) => u8::BITS as _,
183        (DataType::List(_), _) | (DataType::Map(_, _), _) => {
184            return Err(ArrowError::CDataInterface(format!(
185                "The datatype \"{data_type}\" expects 2 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
186            )));
187        }
188        (DataType::Utf8, _) | (DataType::Binary, _) => {
189            return Err(ArrowError::CDataInterface(format!(
190                "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
191            )));
192        }
193        // Variable-sized binaries: have two buffers.
194        // LargeUtf8: first buffer is i64, second is in bytes
195        (DataType::LargeUtf8, 1) | (DataType::LargeBinary, 1) | (DataType::LargeList(_), 1) => {
196            i64::BITS as _
197        }
198        (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) | (DataType::LargeList(_), 2) => {
199            u8::BITS as _
200        }
201        (DataType::LargeUtf8, _) | (DataType::LargeBinary, _) | (DataType::LargeList(_), _) => {
202            return Err(ArrowError::CDataInterface(format!(
203                "The datatype \"{data_type}\" expects 3 buffers, but requested {i}. Please verify that the C data interface is correctly implemented."
204            )));
205        }
206        // Variable-sized views: have 3 or more buffers.
207        // Buffer 1 are the u128 views
208        // Buffers 2...N-1 are u8 byte buffers
209        (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => u128::BITS as _,
210        (DataType::Utf8View, _) | (DataType::BinaryView, _) => u8::BITS as _,
211        // type ids. UnionArray doesn't have null bitmap so buffer index begins with 0.
212        (DataType::Union(_, _), 0) => i8::BITS as _,
213        // Only DenseUnion has 2nd buffer
214        (DataType::Union(_, UnionMode::Dense), 1) => i32::BITS as _,
215        (DataType::Union(_, UnionMode::Sparse), _) => {
216            return Err(ArrowError::CDataInterface(format!(
217                "The datatype \"{data_type}\" expects 1 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
218            )));
219        }
220        (DataType::Union(_, UnionMode::Dense), _) => {
221            return Err(ArrowError::CDataInterface(format!(
222                "The datatype \"{data_type}\" expects 2 buffer, but requested {i}. Please verify that the C data interface is correctly implemented."
223            )));
224        }
225        (_, 0) => {
226            // We don't call this `bit_width` to compute buffer length for null buffer. If any types that don't have null buffer like
227            // UnionArray, they should be handled above.
228            return Err(ArrowError::CDataInterface(format!(
229                "The datatype \"{data_type}\" doesn't expect buffer at index 0. Please verify that the C data interface is correctly implemented."
230            )));
231        }
232        _ => {
233            return Err(ArrowError::CDataInterface(format!(
234                "The datatype \"{data_type}\" is still not supported in Rust implementation"
235            )));
236        }
237    })
238}
239
240/// returns a new buffer corresponding to the index `i` of the FFI array. It may not exist (null pointer).
241/// `bits` is the number of bits that the native type of this buffer has.
242/// The size of the buffer will be `ceil(self.length * bits, 8)`.
243/// # Panic
244/// This function panics if `i` is larger or equal to `n_buffers`.
245/// # Safety
246/// This function assumes that `ceil(self.length * bits, 8)` is the size of the buffer
247unsafe fn create_buffer(
248    owner: Arc<FFI_ArrowArray>,
249    array: &FFI_ArrowArray,
250    index: usize,
251    len: usize,
252) -> Option<Buffer> {
253    if array.num_buffers() == 0 {
254        return None;
255    }
256    NonNull::new(array.buffer(index) as _)
257        .map(|ptr| unsafe { Buffer::from_custom_allocation(ptr, len, owner) })
258}
259
260/// Export to the C Data Interface
261pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
262    let array = FFI_ArrowArray::new(data);
263    let schema = FFI_ArrowSchema::try_from(data.data_type())?;
264    Ok((array, schema))
265}
266
267/// Import [ArrayData] from the C Data Interface
268///
269/// # Safety
270///
271/// This struct assumes that the incoming data agrees with the C data interface.
272pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
273    let dt = DataType::try_from(schema)?;
274    let array = Arc::new(array);
275    let tmp = ImportedArrowArray {
276        array: &array,
277        data_type: dt,
278        owner: &array,
279    };
280    tmp.consume()
281}
282
283/// Import [ArrayData] from the C Data Interface
284///
285/// # Safety
286///
287/// This struct assumes that the incoming data agrees with the C data interface.
288pub unsafe fn from_ffi_and_data_type(
289    array: FFI_ArrowArray,
290    data_type: DataType,
291) -> Result<ArrayData> {
292    let array = Arc::new(array);
293    let tmp = ImportedArrowArray {
294        array: &array,
295        data_type,
296        owner: &array,
297    };
298    tmp.consume()
299}
300
301#[derive(Debug)]
302struct ImportedArrowArray<'a> {
303    array: &'a FFI_ArrowArray,
304    data_type: DataType,
305    owner: &'a Arc<FFI_ArrowArray>,
306}
307
308impl ImportedArrowArray<'_> {
309    fn consume(self) -> Result<ArrayData> {
310        let len = self.array.len();
311        let offset = self.array.offset();
312        let null_count = match &self.data_type {
313            DataType::Null => Some(0),
314            _ => self.array.null_count_opt(),
315        };
316
317        let data_layout = layout(&self.data_type);
318        let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?;
319
320        let null_bit_buffer = if data_layout.can_contain_null_mask {
321            self.null_bit_buffer()
322        } else {
323            None
324        };
325
326        let mut child_data = self.consume_children()?;
327
328        if let Some(d) = self.dictionary()? {
329            // For dictionary type there should only be a single child, so we don't need to worry if
330            // there are other children added above.
331            assert!(child_data.is_empty());
332            child_data.push(d.consume()?);
333        }
334
335        // Should FFI be checking validity?
336        Ok(unsafe {
337            ArrayData::new_unchecked(
338                self.data_type,
339                len,
340                null_count,
341                null_bit_buffer,
342                offset,
343                buffers,
344                child_data,
345            )
346        })
347    }
348
349    fn consume_children(&self) -> Result<Vec<ArrayData>> {
350        match &self.data_type {
351            DataType::List(field)
352            | DataType::FixedSizeList(field, _)
353            | DataType::LargeList(field)
354            | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
355            DataType::Struct(fields) => {
356                assert!(fields.len() == self.array.num_children());
357                fields
358                    .iter()
359                    .enumerate()
360                    .map(|(i, field)| self.consume_child(i, field.data_type()))
361                    .collect::<Result<Vec<_>>>()
362            }
363            DataType::Union(union_fields, _) => {
364                assert!(union_fields.len() == self.array.num_children());
365                union_fields
366                    .iter()
367                    .enumerate()
368                    .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
369                    .collect::<Result<Vec<_>>>()
370            }
371            DataType::RunEndEncoded(run_ends_field, values_field) => Ok([
372                self.consume_child(0, run_ends_field.data_type())?,
373                self.consume_child(1, values_field.data_type())?,
374            ]
375            .to_vec()),
376            _ => Ok(Vec::new()),
377        }
378    }
379
380    fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
381        ImportedArrowArray {
382            array: self.array.child(index),
383            data_type: child_type.clone(),
384            owner: self.owner,
385        }
386        .consume()
387    }
388
389    /// returns all buffers, as organized by Rust (i.e. null buffer is skipped if it's present
390    /// in the spec of the type)
391    fn buffers(&self, can_contain_null_mask: bool, variadic: bool) -> Result<Vec<Buffer>> {
392        // + 1: skip null buffer
393        let buffer_begin = can_contain_null_mask as usize;
394        let buffer_end = self.array.num_buffers() - usize::from(variadic);
395
396        let variadic_buffer_lens = if variadic {
397            // Each views array has 1 (optional) null buffer, 1 views buffer, 1 lengths buffer.
398            // Rest are variadic.
399            let num_variadic_buffers =
400                self.array.num_buffers() - (2 + usize::from(can_contain_null_mask));
401            if num_variadic_buffers == 0 {
402                &[]
403            } else {
404                let lengths = self.array.buffer(self.array.num_buffers() - 1);
405                // SAFETY: is lengths is non-null, then it must be valid for up to num_variadic_buffers.
406                unsafe { std::slice::from_raw_parts(lengths.cast::<i64>(), num_variadic_buffers) }
407            }
408        } else {
409            &[]
410        };
411
412        (buffer_begin..buffer_end)
413            .map(|index| {
414                let len = self.buffer_len(index, variadic_buffer_lens, &self.data_type)?;
415                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
416                    Some(buf) => {
417                        // External libraries may use a dangling pointer for a buffer with length 0.
418                        // We respect the array length specified in the C Data Interface. Actually,
419                        // if the length is incorrect, we cannot create a correct buffer even if
420                        // the pointer is valid.
421                        if buf.is_empty() {
422                            Ok(MutableBuffer::new(0).into())
423                        } else {
424                            Ok(buf)
425                        }
426                    }
427                    None if len == 0 => {
428                        // Null data buffer, which Rust doesn't allow. So create
429                        // an empty buffer.
430                        Ok(MutableBuffer::new(0).into())
431                    }
432                    None => Err(ArrowError::CDataInterface(format!(
433                        "The external buffer at position {index} is null."
434                    ))),
435                }
436            })
437            .collect()
438    }
439
440    /// Returns the length, in bytes, of the buffer `i` (indexed according to the C data interface)
441    /// Rust implementation uses fixed-sized buffers, which require knowledge of their `len`.
442    /// for variable-sized buffers, such as the second buffer of a stringArray, we need
443    /// to fetch offset buffer's len to build the second buffer.
444    fn buffer_len(
445        &self,
446        i: usize,
447        variadic_buffer_lengths: &[i64],
448        dt: &DataType,
449    ) -> Result<usize> {
450        // Special handling for dictionary type as we only care about the key type in the case.
451        let data_type = match dt {
452            DataType::Dictionary(key_data_type, _) => key_data_type.as_ref(),
453            dt => dt,
454        };
455
456        // `ffi::ArrowArray` records array offset, we need to add it back to the
457        // buffer length to get the actual buffer length.
458        let length = self.array.len() + self.array.offset();
459
460        // Inner type is not important for buffer length.
461        Ok(match (&data_type, i) {
462            (DataType::Utf8, 1)
463            | (DataType::LargeUtf8, 1)
464            | (DataType::Binary, 1)
465            | (DataType::LargeBinary, 1)
466            | (DataType::List(_), 1)
467            | (DataType::LargeList(_), 1)
468            | (DataType::Map(_, _), 1) => {
469                // the len of the offset buffer (buffer 1) equals length + 1
470                let bits = bit_width(data_type, i)?;
471                debug_assert_eq!(bits % 8, 0);
472                (length + 1) * (bits / 8)
473            }
474            (DataType::Utf8, 2) | (DataType::Binary, 2) => {
475                if self.array.is_empty() {
476                    return Ok(0);
477                }
478
479                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
480                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
481                // first buffer is the null buffer => add(1)
482                // we assume that pointer is aligned for `i32`, as Utf8 uses `i32` offsets.
483                #[allow(clippy::cast_ptr_alignment)]
484                let offset_buffer = self.array.buffer(1) as *const i32;
485                // get last offset
486                (unsafe { *offset_buffer.add(len / size_of::<i32>() - 1) }) as usize
487            }
488            (DataType::LargeUtf8, 2) | (DataType::LargeBinary, 2) => {
489                if self.array.is_empty() {
490                    return Ok(0);
491                }
492
493                // the len of the data buffer (buffer 2) equals the last value of the offset buffer (buffer 1)
494                let len = self.buffer_len(1, variadic_buffer_lengths, dt)?;
495                // first buffer is the null buffer => add(1)
496                // we assume that pointer is aligned for `i64`, as Large uses `i64` offsets.
497                #[allow(clippy::cast_ptr_alignment)]
498                let offset_buffer = self.array.buffer(1) as *const i64;
499                // get last offset
500                (unsafe { *offset_buffer.add(len / size_of::<i64>() - 1) }) as usize
501            }
502            // View types: these have variadic buffers.
503            // Buffer 1 is the views buffer, which stores 1 u128 per length of the array.
504            // Buffers 2..N-1 are the buffers holding the byte data. Their lengths are variable.
505            // Buffer N is of length (N - 2) and stores i64 containing the lengths of buffers 2..N-1
506            (DataType::Utf8View, 1) | (DataType::BinaryView, 1) => {
507                std::mem::size_of::<u128>() * length
508            }
509            (DataType::Utf8View, i) | (DataType::BinaryView, i) => {
510                variadic_buffer_lengths[i - 2] as usize
511            }
512            // buffer len of primitive types
513            _ => {
514                let bits = bit_width(data_type, i)?;
515                bit_util::ceil(length * bits, 8)
516            }
517        })
518    }
519
520    /// returns the null bit buffer.
521    /// Rust implementation uses a buffer that is not part of the array of buffers.
522    /// The C Data interface's null buffer is part of the array of buffers.
523    fn null_bit_buffer(&self) -> Option<Buffer> {
524        // similar to `self.buffer_len(0)`, but without `Result`.
525        // `ffi::ArrowArray` records array offset, we need to add it back to the
526        // buffer length to get the actual buffer length.
527        let length = self.array.len() + self.array.offset();
528        let buffer_len = bit_util::ceil(length, 8);
529
530        unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
531    }
532
533    fn dictionary(&self) -> Result<Option<ImportedArrowArray<'_>>> {
534        match (self.array.dictionary(), &self.data_type) {
535            (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
536                array,
537                data_type: value_type.as_ref().clone(),
538                owner: self.owner,
539            })),
540            (Some(_), _) => Err(ArrowError::CDataInterface(
541                "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
542            )),
543            (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
544                "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
545            )),
546            (_, _) => Ok(None),
547        }
548    }
549}
550
551#[cfg(test)]
552mod tests_to_then_from_ffi {
553    use std::collections::HashMap;
554    use std::mem::ManuallyDrop;
555
556    use arrow_buffer::NullBuffer;
557    use arrow_schema::Field;
558
559    use crate::builder::UnionBuilder;
560    use crate::cast::AsArray;
561    use crate::types::{Float64Type, Int8Type, Int32Type};
562    use crate::*;
563
564    use super::*;
565
566    #[test]
567    fn test_round_trip() {
568        // create an array natively
569        let array = Int32Array::from(vec![1, 2, 3]);
570
571        // export it
572        let (array, schema) = to_ffi(&array.into_data()).unwrap();
573
574        // (simulate consumer) import it
575        let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
576
577        // verify
578        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
579    }
580
581    #[test]
582    fn test_import() {
583        // Model receiving const pointers from an external system
584
585        // Create an array natively
586        let data = Int32Array::from(vec![1, 2, 3]).into_data();
587        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
588        let array = FFI_ArrowArray::new(&data);
589
590        // Use ManuallyDrop to avoid Box:Drop recursing
591        let schema = Box::new(ManuallyDrop::new(schema));
592        let array = Box::new(ManuallyDrop::new(array));
593
594        let schema_ptr = &**schema as *const _;
595        let array_ptr = &**array as *const _;
596
597        // We can read them back to memory
598        // SAFETY:
599        // Pointers are aligned and valid
600        let data =
601            unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
602
603        let array = Int32Array::from(data);
604        assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
605    }
606
607    #[test]
608    fn test_round_trip_with_offset() -> Result<()> {
609        // create an array natively
610        let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), None]);
611
612        let array = array.slice(1, 2);
613
614        // export it
615        let (array, schema) = to_ffi(&array.to_data())?;
616
617        // (simulate consumer) import it
618        let data = unsafe { from_ffi(array, &schema) }?;
619        let array = make_array(data);
620        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
621
622        assert_eq!(array, &Int32Array::from(vec![Some(2), None]));
623
624        // (drop/release)
625        Ok(())
626    }
627
628    #[test]
629    #[cfg(not(feature = "force_validate"))]
630    fn test_decimal_round_trip() -> Result<()> {
631        // create an array natively
632        let original_array = [Some(12345_i128), Some(-12345_i128), None]
633            .into_iter()
634            .collect::<Decimal128Array>()
635            .with_precision_and_scale(6, 2)
636            .unwrap();
637
638        // export it
639        let (array, schema) = to_ffi(&original_array.to_data())?;
640
641        // (simulate consumer) import it
642        let data = unsafe { from_ffi(array, &schema) }?;
643        let array = make_array(data);
644
645        // perform some operation
646        let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
647
648        // verify
649        assert_eq!(array, &original_array);
650
651        // (drop/release)
652        Ok(())
653    }
654    // case with nulls is tested in the docs, through the example on this module.
655
656    #[test]
657    fn test_null_count_handling() {
658        let int32_data = ArrayData::builder(DataType::Int32)
659            .len(10)
660            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
661            .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001])))
662            .build()
663            .unwrap();
664        let mut ffi_array = FFI_ArrowArray::new(&int32_data);
665        assert_eq!(3, ffi_array.null_count());
666        assert_eq!(Some(3), ffi_array.null_count_opt());
667        // Simulating uninitialized state
668        unsafe {
669            ffi_array.set_null_count(-1);
670        }
671        assert_eq!(None, ffi_array.null_count_opt());
672        let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap();
673        assert_eq!(3, int32_data.null_count());
674
675        let null_data = &ArrayData::new_null(&DataType::Null, 10);
676        let mut ffi_array = FFI_ArrowArray::new(null_data);
677        assert_eq!(10, ffi_array.null_count());
678        assert_eq!(Some(10), ffi_array.null_count_opt());
679        // Simulating uninitialized state
680        unsafe {
681            ffi_array.set_null_count(-1);
682        }
683        assert_eq!(None, ffi_array.null_count_opt());
684        let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap();
685        assert_eq!(0, null_data.null_count());
686    }
687
688    fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
689        // create an array natively
690        let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
691
692        // export it
693        let (array, schema) = to_ffi(&array.to_data())?;
694
695        // (simulate consumer) import it
696        let data = unsafe { from_ffi(array, &schema) }?;
697        let array = make_array(data);
698
699        // perform some operation
700        let array = array
701            .as_any()
702            .downcast_ref::<GenericStringArray<Offset>>()
703            .unwrap();
704
705        // verify
706        let expected = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
707        assert_eq!(array, &expected);
708
709        // (drop/release)
710        Ok(())
711    }
712
713    #[test]
714    fn test_string() -> Result<()> {
715        test_generic_string::<i32>()
716    }
717
718    #[test]
719    fn test_large_string() -> Result<()> {
720        test_generic_string::<i64>()
721    }
722
723    fn test_generic_list<Offset: OffsetSizeTrait>() -> Result<()> {
724        // Construct a value array
725        let value_data = ArrayData::builder(DataType::Int32)
726            .len(8)
727            .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
728            .build()
729            .unwrap();
730
731        // Construct a buffer for value offsets, for the nested array:
732        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
733        let value_offsets = [0_usize, 3, 6, 8]
734            .iter()
735            .map(|i| Offset::from_usize(*i).unwrap())
736            .collect::<Buffer>();
737
738        // Construct a list array from the above two
739        let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Arc::new(
740            Field::new_list_field(DataType::Int32, false),
741        ));
742
743        let list_data = ArrayData::builder(list_data_type)
744            .len(3)
745            .add_buffer(value_offsets)
746            .add_child_data(value_data)
747            .build()
748            .unwrap();
749
750        // create an array natively
751        let array = GenericListArray::<Offset>::from(list_data.clone());
752
753        // export it
754        let (array, schema) = to_ffi(&array.to_data())?;
755
756        // (simulate consumer) import it
757        let data = unsafe { from_ffi(array, &schema) }?;
758        let array = make_array(data);
759
760        // downcast
761        let array = array
762            .as_any()
763            .downcast_ref::<GenericListArray<Offset>>()
764            .unwrap();
765
766        // verify
767        let expected = GenericListArray::<Offset>::from(list_data);
768        assert_eq!(&array.value(0), &expected.value(0));
769        assert_eq!(&array.value(1), &expected.value(1));
770        assert_eq!(&array.value(2), &expected.value(2));
771
772        // (drop/release)
773        Ok(())
774    }
775
776    #[test]
777    fn test_list() -> Result<()> {
778        test_generic_list::<i32>()
779    }
780
781    #[test]
782    fn test_large_list() -> Result<()> {
783        test_generic_list::<i64>()
784    }
785
786    fn test_generic_binary<Offset: OffsetSizeTrait>() -> Result<()> {
787        // create an array natively
788        let array: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
789        let array = GenericBinaryArray::<Offset>::from(array);
790
791        // export it
792        let (array, schema) = to_ffi(&array.to_data())?;
793
794        // (simulate consumer) import it
795        let data = unsafe { from_ffi(array, &schema) }?;
796        let array = make_array(data);
797        let array = array
798            .as_any()
799            .downcast_ref::<GenericBinaryArray<Offset>>()
800            .unwrap();
801
802        // verify
803        let expected: Vec<Option<&[u8]>> = vec![Some(b"a"), None, Some(b"aaa")];
804        let expected = GenericBinaryArray::<Offset>::from(expected);
805        assert_eq!(array, &expected);
806
807        // (drop/release)
808        Ok(())
809    }
810
811    #[test]
812    fn test_binary() -> Result<()> {
813        test_generic_binary::<i32>()
814    }
815
816    #[test]
817    fn test_large_binary() -> Result<()> {
818        test_generic_binary::<i64>()
819    }
820
821    #[test]
822    fn test_bool() -> Result<()> {
823        // create an array natively
824        let array = BooleanArray::from(vec![None, Some(true), Some(false)]);
825
826        // export it
827        let (array, schema) = to_ffi(&array.to_data())?;
828
829        // (simulate consumer) import it
830        let data = unsafe { from_ffi(array, &schema) }?;
831        let array = make_array(data);
832        let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
833
834        // verify
835        assert_eq!(
836            array,
837            &BooleanArray::from(vec![None, Some(true), Some(false)])
838        );
839
840        // (drop/release)
841        Ok(())
842    }
843
844    #[test]
845    fn test_time32() -> Result<()> {
846        // create an array natively
847        let array = Time32MillisecondArray::from(vec![None, Some(1), Some(2)]);
848
849        // export it
850        let (array, schema) = to_ffi(&array.to_data())?;
851
852        // (simulate consumer) import it
853        let data = unsafe { from_ffi(array, &schema) }?;
854        let array = make_array(data);
855        let array = array
856            .as_any()
857            .downcast_ref::<Time32MillisecondArray>()
858            .unwrap();
859
860        // verify
861        assert_eq!(
862            array,
863            &Time32MillisecondArray::from(vec![None, Some(1), Some(2)])
864        );
865
866        // (drop/release)
867        Ok(())
868    }
869
870    #[test]
871    fn test_timestamp() -> Result<()> {
872        // create an array natively
873        let array = TimestampMillisecondArray::from(vec![None, Some(1), Some(2)]);
874
875        // export it
876        let (array, schema) = to_ffi(&array.to_data())?;
877
878        // (simulate consumer) import it
879        let data = unsafe { from_ffi(array, &schema) }?;
880        let array = make_array(data);
881        let array = array
882            .as_any()
883            .downcast_ref::<TimestampMillisecondArray>()
884            .unwrap();
885
886        // verify
887        assert_eq!(
888            array,
889            &TimestampMillisecondArray::from(vec![None, Some(1), Some(2)])
890        );
891
892        // (drop/release)
893        Ok(())
894    }
895
896    #[test]
897    fn test_fixed_size_binary_array() -> Result<()> {
898        let values = vec![
899            None,
900            Some(vec![10, 10, 10]),
901            None,
902            Some(vec![20, 20, 20]),
903            Some(vec![30, 30, 30]),
904            None,
905        ];
906        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
907
908        // export it
909        let (array, schema) = to_ffi(&array.to_data())?;
910
911        // (simulate consumer) import it
912        let data = unsafe { from_ffi(array, &schema) }?;
913        let array = make_array(data);
914        let array = array
915            .as_any()
916            .downcast_ref::<FixedSizeBinaryArray>()
917            .unwrap();
918
919        // verify
920        assert_eq!(
921            array,
922            &FixedSizeBinaryArray::try_from_sparse_iter_with_size(
923                vec![
924                    None,
925                    Some(vec![10, 10, 10]),
926                    None,
927                    Some(vec![20, 20, 20]),
928                    Some(vec![30, 30, 30]),
929                    None,
930                ]
931                .into_iter(),
932                3
933            )?
934        );
935
936        // (drop/release)
937        Ok(())
938    }
939
940    #[test]
941    fn test_fixed_size_list_array() -> Result<()> {
942        // 0000 0100
943        let mut validity_bits: [u8; 1] = [0; 1];
944        bit_util::set_bit(&mut validity_bits, 2);
945
946        let v: Vec<i32> = (0..9).collect();
947        let value_data = ArrayData::builder(DataType::Int32)
948            .len(9)
949            .add_buffer(Buffer::from_slice_ref(&v))
950            .build()?;
951
952        let list_data_type =
953            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int32, false)), 3);
954        let list_data = ArrayData::builder(list_data_type.clone())
955            .len(3)
956            .null_bit_buffer(Some(Buffer::from(validity_bits)))
957            .add_child_data(value_data)
958            .build()?;
959
960        // export it
961        let (array, schema) = to_ffi(&list_data)?;
962
963        // (simulate consumer) import it
964        let data = unsafe { from_ffi(array, &schema) }?;
965        let array = make_array(data);
966        let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
967
968        // 0010 0100
969        let mut expected_validity_bits: [u8; 1] = [0; 1];
970        bit_util::set_bit(&mut expected_validity_bits, 2);
971        bit_util::set_bit(&mut expected_validity_bits, 5);
972
973        let mut w = vec![];
974        w.extend_from_slice(&v);
975
976        let expected_value_data = ArrayData::builder(DataType::Int32)
977            .len(9)
978            .add_buffer(Buffer::from_slice_ref(&w))
979            .build()?;
980
981        let expected_list_data = ArrayData::builder(list_data_type)
982            .len(3)
983            .null_bit_buffer(Some(Buffer::from(expected_validity_bits)))
984            .add_child_data(expected_value_data)
985            .build()?;
986        let expected_array = FixedSizeListArray::from(expected_list_data);
987
988        // verify
989        assert_eq!(array, &expected_array);
990
991        // (drop/release)
992        Ok(())
993    }
994
995    #[test]
996    fn test_dictionary() -> Result<()> {
997        // create an array natively
998        let values = vec!["a", "aaa", "aaa"];
999        let dict_array: DictionaryArray<Int8Type> = values.into_iter().collect();
1000
1001        // export it
1002        let (array, schema) = to_ffi(&dict_array.to_data())?;
1003
1004        // (simulate consumer) import it
1005        let data = unsafe { from_ffi(array, &schema) }?;
1006        let array = make_array(data);
1007        let actual = array
1008            .as_any()
1009            .downcast_ref::<DictionaryArray<Int8Type>>()
1010            .unwrap();
1011
1012        // verify
1013        let new_values = vec!["a", "aaa", "aaa"];
1014        let expected: DictionaryArray<Int8Type> = new_values.into_iter().collect();
1015        assert_eq!(actual, &expected);
1016
1017        // (drop/release)
1018        Ok(())
1019    }
1020
1021    #[test]
1022    #[allow(deprecated)]
1023    fn test_export_array_into_raw() -> Result<()> {
1024        let array = make_array(Int32Array::from(vec![1, 2, 3]).into_data());
1025
1026        // Assume two raw pointers provided by the consumer
1027        let mut out_array = FFI_ArrowArray::empty();
1028        let mut out_schema = FFI_ArrowSchema::empty();
1029
1030        {
1031            let out_array_ptr = std::ptr::addr_of_mut!(out_array);
1032            let out_schema_ptr = std::ptr::addr_of_mut!(out_schema);
1033            unsafe {
1034                export_array_into_raw(array, out_array_ptr, out_schema_ptr)?;
1035            }
1036        }
1037
1038        // (simulate consumer) import it
1039        let data = unsafe { from_ffi(out_array, &out_schema) }?;
1040        let array = make_array(data);
1041
1042        // perform some operation
1043        let array = array.as_any().downcast_ref::<Int32Array>().unwrap();
1044
1045        // verify
1046        assert_eq!(array, &Int32Array::from(vec![1, 2, 3]));
1047        Ok(())
1048    }
1049
1050    #[test]
1051    fn test_duration() -> Result<()> {
1052        // create an array natively
1053        let array = DurationSecondArray::from(vec![None, Some(1), Some(2)]);
1054
1055        // export it
1056        let (array, schema) = to_ffi(&array.to_data())?;
1057
1058        // (simulate consumer) import it
1059        let data = unsafe { from_ffi(array, &schema) }?;
1060        let array = make_array(data);
1061        let array = array
1062            .as_any()
1063            .downcast_ref::<DurationSecondArray>()
1064            .unwrap();
1065
1066        // verify
1067        assert_eq!(
1068            array,
1069            &DurationSecondArray::from(vec![None, Some(1), Some(2)])
1070        );
1071
1072        // (drop/release)
1073        Ok(())
1074    }
1075
1076    #[test]
1077    fn test_map_array() -> Result<()> {
1078        let keys = vec!["a", "b", "c", "d", "e", "f", "g", "h"];
1079        let values_data = UInt32Array::from(vec![0u32, 10, 20, 30, 40, 50, 60, 70]);
1080
1081        // Construct a buffer for value offsets, for the nested array:
1082        //  [[a, b, c], [d, e, f], [g, h]]
1083        let entry_offsets = [0, 3, 6, 8];
1084
1085        let map_array =
1086            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
1087                .unwrap();
1088
1089        // export it
1090        let (array, schema) = to_ffi(&map_array.to_data())?;
1091
1092        // (simulate consumer) import it
1093        let data = unsafe { from_ffi(array, &schema) }?;
1094        let array = make_array(data);
1095
1096        // perform some operation
1097        let array = array.as_any().downcast_ref::<MapArray>().unwrap();
1098        assert_eq!(array, &map_array);
1099
1100        Ok(())
1101    }
1102
1103    #[test]
1104    fn test_struct_array() -> Result<()> {
1105        let metadata: HashMap<String, String> =
1106            [("Hello".to_string(), "World! 😊".to_string())].into();
1107        let struct_array = StructArray::from(vec![(
1108            Arc::new(Field::new("a", DataType::Int32, false).with_metadata(metadata)),
1109            Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
1110        )]);
1111
1112        // export it
1113        let (array, schema) = to_ffi(&struct_array.to_data())?;
1114
1115        // (simulate consumer) import it
1116        let data = unsafe { from_ffi(array, &schema) }?;
1117        let array = make_array(data);
1118
1119        // perform some operation
1120        let array = array.as_any().downcast_ref::<StructArray>().unwrap();
1121        assert_eq!(array.data_type(), struct_array.data_type());
1122        assert_eq!(array, &struct_array);
1123
1124        Ok(())
1125    }
1126
1127    #[test]
1128    fn test_union_sparse_array() -> Result<()> {
1129        let mut builder = UnionBuilder::new_sparse();
1130        builder.append::<Int32Type>("a", 1).unwrap();
1131        builder.append_null::<Int32Type>("a").unwrap();
1132        builder.append::<Float64Type>("c", 3.0).unwrap();
1133        builder.append::<Int32Type>("a", 4).unwrap();
1134        let union = builder.build().unwrap();
1135
1136        // export it
1137        let (array, schema) = to_ffi(&union.to_data())?;
1138
1139        // (simulate consumer) import it
1140        let data = unsafe { from_ffi(array, &schema) }?;
1141        let array = make_array(data);
1142
1143        let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
1144
1145        let expected_type_ids = vec![0_i8, 0, 1, 0];
1146
1147        // Check type ids
1148        assert_eq!(*array.type_ids(), expected_type_ids);
1149        for (i, id) in expected_type_ids.iter().enumerate() {
1150            assert_eq!(id, &array.type_id(i));
1151        }
1152
1153        // Check offsets, sparse union should only have a single buffer, i.e. no offsets
1154        assert!(array.offsets().is_none());
1155
1156        for i in 0..array.len() {
1157            let slot = array.value(i);
1158            match i {
1159                0 => {
1160                    let slot = slot.as_primitive::<Int32Type>();
1161                    assert!(!slot.is_null(0));
1162                    assert_eq!(slot.len(), 1);
1163                    let value = slot.value(0);
1164                    assert_eq!(1_i32, value);
1165                }
1166                1 => assert!(slot.is_null(0)),
1167                2 => {
1168                    let slot = slot.as_primitive::<Float64Type>();
1169                    assert!(!slot.is_null(0));
1170                    assert_eq!(slot.len(), 1);
1171                    let value = slot.value(0);
1172                    assert_eq!(value, 3_f64);
1173                }
1174                3 => {
1175                    let slot = slot.as_primitive::<Int32Type>();
1176                    assert!(!slot.is_null(0));
1177                    assert_eq!(slot.len(), 1);
1178                    let value = slot.value(0);
1179                    assert_eq!(4_i32, value);
1180                }
1181                _ => unreachable!(),
1182            }
1183        }
1184
1185        Ok(())
1186    }
1187
1188    #[test]
1189    fn test_union_dense_array() -> Result<()> {
1190        let mut builder = UnionBuilder::new_dense();
1191        builder.append::<Int32Type>("a", 1).unwrap();
1192        builder.append_null::<Int32Type>("a").unwrap();
1193        builder.append::<Float64Type>("c", 3.0).unwrap();
1194        builder.append::<Int32Type>("a", 4).unwrap();
1195        let union = builder.build().unwrap();
1196
1197        // export it
1198        let (array, schema) = to_ffi(&union.to_data())?;
1199
1200        // (simulate consumer) import it
1201        let data = unsafe { from_ffi(array, &schema) }?;
1202        let array = UnionArray::from(data);
1203
1204        let expected_type_ids = vec![0_i8, 0, 1, 0];
1205
1206        // Check type ids
1207        assert_eq!(*array.type_ids(), expected_type_ids);
1208        for (i, id) in expected_type_ids.iter().enumerate() {
1209            assert_eq!(id, &array.type_id(i));
1210        }
1211
1212        assert!(array.offsets().is_some());
1213
1214        for i in 0..array.len() {
1215            let slot = array.value(i);
1216            match i {
1217                0 => {
1218                    let slot = slot.as_primitive::<Int32Type>();
1219                    assert!(!slot.is_null(0));
1220                    assert_eq!(slot.len(), 1);
1221                    let value = slot.value(0);
1222                    assert_eq!(1_i32, value);
1223                }
1224                1 => assert!(slot.is_null(0)),
1225                2 => {
1226                    let slot = slot.as_primitive::<Float64Type>();
1227                    assert!(!slot.is_null(0));
1228                    assert_eq!(slot.len(), 1);
1229                    let value = slot.value(0);
1230                    assert_eq!(value, 3_f64);
1231                }
1232                3 => {
1233                    let slot = slot.as_primitive::<Int32Type>();
1234                    assert!(!slot.is_null(0));
1235                    assert_eq!(slot.len(), 1);
1236                    let value = slot.value(0);
1237                    assert_eq!(4_i32, value);
1238                }
1239                _ => unreachable!(),
1240            }
1241        }
1242
1243        Ok(())
1244    }
1245
1246    #[test]
1247    fn test_run_array() -> Result<()> {
1248        let value_data =
1249            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1250
1251        // Construct a run_ends array:
1252        let run_ends_values = [4_i32, 6, 7, 9, 13, 18, 20, 22];
1253        let run_ends_data =
1254            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1255
1256        // Construct a run ends encoded array from the above two
1257        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1258
1259        // export it
1260        let (array, schema) = to_ffi(&ree_array.to_data())?;
1261
1262        // (simulate consumer) import it
1263        let data = unsafe { from_ffi(array, &schema) }?;
1264        let array = make_array(data);
1265
1266        // perform some operation
1267        let array = array
1268            .as_any()
1269            .downcast_ref::<RunArray<Int32Type>>()
1270            .unwrap();
1271        assert_eq!(array.data_type(), ree_array.data_type());
1272        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1273        assert_eq!(array.values(), ree_array.values());
1274
1275        Ok(())
1276    }
1277
1278    #[test]
1279    fn test_nullable_run_array() -> Result<()> {
1280        let nulls = NullBuffer::from(vec![true, false, true, true, false]);
1281        let value_data =
1282            PrimitiveArray::<Int8Type>::new(vec![1_i8, 2, 3, 4, 5].into(), Some(nulls));
1283
1284        // Construct a run_ends array:
1285        let run_ends_values = [5_i32, 6, 7, 8, 10];
1286        let run_ends_data =
1287            PrimitiveArray::<Int32Type>::from_iter_values(run_ends_values.iter().copied());
1288
1289        // Construct a run ends encoded array from the above two
1290        let ree_array = RunArray::<Int32Type>::try_new(&run_ends_data, &value_data).unwrap();
1291
1292        // export it
1293        let (array, schema) = to_ffi(&ree_array.to_data())?;
1294
1295        // (simulate consumer) import it
1296        let data = unsafe { from_ffi(array, &schema) }?;
1297        let array = make_array(data);
1298
1299        // perform some operation
1300        let array = array
1301            .as_any()
1302            .downcast_ref::<RunArray<Int32Type>>()
1303            .unwrap();
1304        assert_eq!(array.data_type(), ree_array.data_type());
1305        assert_eq!(array.run_ends().values(), ree_array.run_ends().values());
1306        assert_eq!(array.values(), ree_array.values());
1307
1308        Ok(())
1309    }
1310}
1311
1312#[cfg(test)]
1313mod tests_from_ffi {
1314    #[cfg(not(feature = "force_validate"))]
1315    use std::ptr::NonNull;
1316    use std::sync::Arc;
1317
1318    #[cfg(not(feature = "force_validate"))]
1319    use arrow_buffer::{ScalarBuffer, bit_util, buffer::Buffer};
1320    #[cfg(feature = "force_validate")]
1321    use arrow_buffer::{bit_util, buffer::Buffer};
1322
1323    use arrow_data::ArrayData;
1324    use arrow_data::transform::MutableArrayData;
1325    use arrow_schema::{DataType, Field};
1326
1327    use super::Result;
1328    use crate::builder::GenericByteViewBuilder;
1329    use crate::types::{BinaryViewType, ByteViewType, Int32Type, StringViewType};
1330    use crate::{
1331        ArrayRef, GenericByteViewArray, ListArray,
1332        array::{
1333            Array, BooleanArray, DictionaryArray, FixedSizeBinaryArray, FixedSizeListArray,
1334            Int32Array, Int64Array, StringArray, StructArray, UInt32Array, UInt64Array,
1335        },
1336        ffi::{FFI_ArrowArray, FFI_ArrowSchema, from_ffi},
1337        make_array,
1338    };
1339
1340    fn test_round_trip(expected: &ArrayData) -> Result<()> {
1341        // here we export the array
1342        let array = FFI_ArrowArray::new(expected);
1343        let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
1344
1345        // simulate an external consumer by being the consumer
1346        let result = &unsafe { from_ffi(array, &schema) }?;
1347
1348        assert_eq!(result, expected);
1349        Ok(())
1350    }
1351
1352    #[test]
1353    fn test_u32() -> Result<()> {
1354        let array = UInt32Array::from(vec![Some(2), None, Some(1), None]);
1355        let data = array.into_data();
1356        test_round_trip(&data)
1357    }
1358
1359    #[test]
1360    fn test_u64() -> Result<()> {
1361        let array = UInt64Array::from(vec![Some(2), None, Some(1), None]);
1362        let data = array.into_data();
1363        test_round_trip(&data)
1364    }
1365
1366    #[test]
1367    fn test_i64() -> Result<()> {
1368        let array = Int64Array::from(vec![Some(2), None, Some(1), None]);
1369        let data = array.into_data();
1370        test_round_trip(&data)
1371    }
1372
1373    #[test]
1374    fn test_struct() -> Result<()> {
1375        let inner = StructArray::from(vec![
1376            (
1377                Arc::new(Field::new("a1", DataType::Boolean, false)),
1378                Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
1379            ),
1380            (
1381                Arc::new(Field::new("a2", DataType::UInt32, false)),
1382                Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
1383            ),
1384        ]);
1385
1386        let array = StructArray::from(vec![
1387            (
1388                Arc::new(Field::new("a", inner.data_type().clone(), false)),
1389                Arc::new(inner) as Arc<dyn Array>,
1390            ),
1391            (
1392                Arc::new(Field::new("b", DataType::Boolean, false)),
1393                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
1394            ),
1395            (
1396                Arc::new(Field::new("c", DataType::UInt32, false)),
1397                Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
1398            ),
1399        ]);
1400        let data = array.into_data();
1401        test_round_trip(&data)
1402    }
1403
1404    #[test]
1405    fn test_dictionary() -> Result<()> {
1406        let values = StringArray::from(vec![Some("foo"), Some("bar"), None]);
1407        let keys = Int32Array::from(vec![
1408            Some(0),
1409            Some(1),
1410            None,
1411            Some(1),
1412            Some(1),
1413            None,
1414            Some(1),
1415            Some(2),
1416            Some(1),
1417            None,
1418        ]);
1419        let array = DictionaryArray::new(keys, Arc::new(values));
1420
1421        let data = array.into_data();
1422        test_round_trip(&data)
1423    }
1424
1425    #[test]
1426    fn test_fixed_size_binary() -> Result<()> {
1427        let values = vec![vec![10, 10, 10], vec![20, 20, 20], vec![30, 30, 30]];
1428        let array = FixedSizeBinaryArray::try_from_iter(values.into_iter())?;
1429
1430        let data = array.into_data();
1431        test_round_trip(&data)
1432    }
1433
1434    #[test]
1435    fn test_fixed_size_binary_with_nulls() -> Result<()> {
1436        let values = vec![
1437            None,
1438            Some(vec![10, 10, 10]),
1439            None,
1440            Some(vec![20, 20, 20]),
1441            Some(vec![30, 30, 30]),
1442            None,
1443        ];
1444        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
1445
1446        let data = array.into_data();
1447        test_round_trip(&data)
1448    }
1449
1450    #[test]
1451    fn test_fixed_size_list() -> Result<()> {
1452        let v: Vec<i64> = (0..9).collect();
1453        let value_data = ArrayData::builder(DataType::Int64)
1454            .len(9)
1455            .add_buffer(Buffer::from_slice_ref(v))
1456            .build()?;
1457        let list_data_type =
1458            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int64, false)), 3);
1459        let list_data = ArrayData::builder(list_data_type)
1460            .len(3)
1461            .add_child_data(value_data)
1462            .build()?;
1463        let array = FixedSizeListArray::from(list_data);
1464
1465        let data = array.into_data();
1466        test_round_trip(&data)
1467    }
1468
1469    #[test]
1470    fn test_fixed_size_list_with_nulls() -> Result<()> {
1471        // 0100 0110
1472        let mut validity_bits: [u8; 1] = [0; 1];
1473        bit_util::set_bit(&mut validity_bits, 1);
1474        bit_util::set_bit(&mut validity_bits, 2);
1475        bit_util::set_bit(&mut validity_bits, 6);
1476
1477        let v: Vec<i16> = (0..16).collect();
1478        let value_data = ArrayData::builder(DataType::Int16)
1479            .len(16)
1480            .add_buffer(Buffer::from_slice_ref(v))
1481            .build()?;
1482        let list_data_type =
1483            DataType::FixedSizeList(Arc::new(Field::new("f", DataType::Int16, false)), 2);
1484        let list_data = ArrayData::builder(list_data_type)
1485            .len(8)
1486            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1487            .add_child_data(value_data)
1488            .build()?;
1489        let array = FixedSizeListArray::from(list_data);
1490
1491        let data = array.into_data();
1492        test_round_trip(&data)
1493    }
1494
1495    #[test]
1496    fn test_fixed_size_list_nested() -> Result<()> {
1497        let v: Vec<i32> = (0..16).collect();
1498        let value_data = ArrayData::builder(DataType::Int32)
1499            .len(16)
1500            .add_buffer(Buffer::from_slice_ref(v))
1501            .build()?;
1502
1503        let offsets: Vec<i32> = vec![0, 2, 4, 6, 8, 10, 12, 14, 16];
1504        let value_offsets = Buffer::from_slice_ref(offsets);
1505        let inner_list_data_type =
1506            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1507        let inner_list_data = ArrayData::builder(inner_list_data_type.clone())
1508            .len(8)
1509            .add_buffer(value_offsets)
1510            .add_child_data(value_data)
1511            .build()?;
1512
1513        // 0000 0100
1514        let mut validity_bits: [u8; 1] = [0; 1];
1515        bit_util::set_bit(&mut validity_bits, 2);
1516
1517        let list_data_type =
1518            DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
1519        let list_data = ArrayData::builder(list_data_type)
1520            .len(4)
1521            .null_bit_buffer(Some(Buffer::from(validity_bits)))
1522            .add_child_data(inner_list_data)
1523            .build()?;
1524
1525        let array = FixedSizeListArray::from(list_data);
1526
1527        let data = array.into_data();
1528        test_round_trip(&data)
1529    }
1530
1531    #[test]
1532    #[cfg(not(feature = "force_validate"))]
1533    fn test_empty_string_with_non_zero_offset() -> Result<()> {
1534        use super::ImportedArrowArray;
1535        use arrow_buffer::{MutableBuffer, OffsetBuffer};
1536
1537        // Simulate an empty string array with a non-zero offset from a producer
1538        let data: Buffer = MutableBuffer::new(0).into();
1539        let offsets = OffsetBuffer::new(vec![123].into());
1540        let string_array =
1541            unsafe { StringArray::new_unchecked(offsets.clone(), data.clone(), None) };
1542
1543        let data = string_array.into_data();
1544
1545        let array = FFI_ArrowArray::new(&data);
1546        let schema = FFI_ArrowSchema::try_from(data.data_type())?;
1547
1548        let dt = DataType::try_from(&schema)?;
1549        let array = Arc::new(array);
1550        let imported_array = ImportedArrowArray {
1551            array: &array,
1552            data_type: dt,
1553            owner: &array,
1554        };
1555
1556        let offset_buf_len = imported_array.buffer_len(1, &[], &imported_array.data_type)?;
1557        let data_buf_len = imported_array.buffer_len(2, &[], &imported_array.data_type)?;
1558
1559        assert_eq!(offset_buf_len, 4);
1560        assert_eq!(data_buf_len, 0);
1561
1562        test_round_trip(&imported_array.consume()?)
1563    }
1564
1565    fn roundtrip_string_array(array: StringArray) -> StringArray {
1566        let data = array.into_data();
1567
1568        let array = FFI_ArrowArray::new(&data);
1569        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1570
1571        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1572        StringArray::from(array)
1573    }
1574
1575    fn roundtrip_byte_view_array<T: ByteViewType>(
1576        array: GenericByteViewArray<T>,
1577    ) -> GenericByteViewArray<T> {
1578        let data = array.into_data();
1579
1580        let array = FFI_ArrowArray::new(&data);
1581        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1582
1583        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1584        GenericByteViewArray::<T>::from(array)
1585    }
1586
1587    fn extend_array(array: &dyn Array) -> ArrayRef {
1588        let len = array.len();
1589        let data = array.to_data();
1590
1591        let mut mutable = MutableArrayData::new(vec![&data], false, len);
1592        mutable.extend(0, 0, len);
1593        make_array(mutable.freeze())
1594    }
1595
1596    #[test]
1597    fn test_extend_imported_string_slice() {
1598        let mut strings = vec![];
1599
1600        for i in 0..1000 {
1601            strings.push(format!("string: {i}"));
1602        }
1603
1604        let string_array = StringArray::from(strings);
1605
1606        let imported = roundtrip_string_array(string_array.clone());
1607        assert_eq!(imported.len(), 1000);
1608        assert_eq!(imported.value(0), "string: 0");
1609        assert_eq!(imported.value(499), "string: 499");
1610
1611        let copied = extend_array(&imported);
1612        assert_eq!(
1613            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1614            &imported
1615        );
1616
1617        let slice = string_array.slice(500, 500);
1618
1619        let imported = roundtrip_string_array(slice);
1620        assert_eq!(imported.len(), 500);
1621        assert_eq!(imported.value(0), "string: 500");
1622        assert_eq!(imported.value(499), "string: 999");
1623
1624        let copied = extend_array(&imported);
1625        assert_eq!(
1626            copied.as_any().downcast_ref::<StringArray>().unwrap(),
1627            &imported
1628        );
1629    }
1630
1631    fn roundtrip_list_array(array: ListArray) -> ListArray {
1632        let data = array.into_data();
1633
1634        let array = FFI_ArrowArray::new(&data);
1635        let schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();
1636
1637        let array = unsafe { from_ffi(array, &schema) }.unwrap();
1638        ListArray::from(array)
1639    }
1640
1641    #[test]
1642    fn test_extend_imported_list_slice() {
1643        let mut data = vec![];
1644
1645        for i in 0..1000 {
1646            let mut list = vec![];
1647            for j in 0..100 {
1648                list.push(Some(i * 1000 + j));
1649            }
1650            data.push(Some(list));
1651        }
1652
1653        let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data);
1654
1655        let slice = list_array.slice(500, 500);
1656        let imported = roundtrip_list_array(slice.clone());
1657        assert_eq!(imported.len(), 500);
1658        assert_eq!(&slice, &imported);
1659
1660        let copied = extend_array(&imported);
1661        assert_eq!(
1662            copied.as_any().downcast_ref::<ListArray>().unwrap(),
1663            &imported
1664        );
1665    }
1666
1667    /// Helper trait to allow us to use easily strings as either BinaryViewType::Native or
1668    /// StringViewType::Native scalars.
1669    trait NativeFromStr {
1670        fn from_str(value: &str) -> &Self;
1671    }
1672
1673    impl NativeFromStr for str {
1674        fn from_str(value: &str) -> &Self {
1675            value
1676        }
1677    }
1678
1679    impl NativeFromStr for [u8] {
1680        fn from_str(value: &str) -> &Self {
1681            value.as_bytes()
1682        }
1683    }
1684
1685    #[test]
1686    #[cfg(not(feature = "force_validate"))]
1687    fn test_utf8_view_ffi_from_dangling_pointer() {
1688        let empty = GenericByteViewBuilder::<StringViewType>::new().finish();
1689        let buffers = empty.data_buffers().to_vec();
1690        let nulls = empty.nulls().cloned();
1691
1692        // Create a dangling pointer to a view buffer with zero length.
1693        let alloc = Arc::new(1);
1694        let buffer = unsafe { Buffer::from_custom_allocation(NonNull::<u8>::dangling(), 0, alloc) };
1695        let views = unsafe { ScalarBuffer::new_unchecked(buffer) };
1696
1697        let str_view: GenericByteViewArray<StringViewType> =
1698            unsafe { GenericByteViewArray::new_unchecked(views, buffers, nulls) };
1699        let imported = roundtrip_byte_view_array(str_view);
1700        assert_eq!(imported.len(), 0);
1701        assert_eq!(&imported, &empty);
1702    }
1703
1704    #[test]
1705    fn test_round_trip_byte_view() {
1706        fn test_case<T>()
1707        where
1708            T: ByteViewType,
1709            T::Native: NativeFromStr,
1710        {
1711            macro_rules! run_test_case {
1712                ($array:expr) => {{
1713                    // round-trip through C  Data Interface
1714                    let len = $array.len();
1715                    let imported = roundtrip_byte_view_array($array);
1716                    assert_eq!(imported.len(), len);
1717
1718                    let copied = extend_array(&imported);
1719                    assert_eq!(
1720                        copied
1721                            .as_any()
1722                            .downcast_ref::<GenericByteViewArray<T>>()
1723                            .unwrap(),
1724                        &imported
1725                    );
1726                }};
1727            }
1728
1729            // Empty test case.
1730            let empty = GenericByteViewBuilder::<T>::new().finish();
1731            run_test_case!(empty);
1732
1733            // All inlined strings test case.
1734            let mut all_inlined = GenericByteViewBuilder::<T>::new();
1735            all_inlined.append_value(T::Native::from_str("inlined1"));
1736            all_inlined.append_value(T::Native::from_str("inlined2"));
1737            all_inlined.append_value(T::Native::from_str("inlined3"));
1738            let all_inlined = all_inlined.finish();
1739            assert_eq!(all_inlined.data_buffers().len(), 0);
1740            run_test_case!(all_inlined);
1741
1742            // some inlined + non-inlined, 1 variadic buffer.
1743            let mixed_one_variadic = {
1744                let mut builder = GenericByteViewBuilder::<T>::new();
1745                builder.append_value(T::Native::from_str("inlined"));
1746                let block_id =
1747                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1748                builder.try_append_view(block_id, 0, 25).unwrap();
1749                builder.finish()
1750            };
1751            assert_eq!(mixed_one_variadic.data_buffers().len(), 1);
1752            run_test_case!(mixed_one_variadic);
1753
1754            // inlined + non-inlined, 2 variadic buffers.
1755            let mixed_two_variadic = {
1756                let mut builder = GenericByteViewBuilder::<T>::new();
1757                builder.append_value(T::Native::from_str("inlined"));
1758                let block_id =
1759                    builder.append_block(Buffer::from("non-inlined-string-buffer".as_bytes()));
1760                builder.try_append_view(block_id, 0, 25).unwrap();
1761
1762                let block_id = builder
1763                    .append_block(Buffer::from("another-non-inlined-string-buffer".as_bytes()));
1764                builder.try_append_view(block_id, 0, 33).unwrap();
1765                builder.finish()
1766            };
1767            assert_eq!(mixed_two_variadic.data_buffers().len(), 2);
1768            run_test_case!(mixed_two_variadic);
1769        }
1770
1771        test_case::<StringViewType>();
1772        test_case::<BinaryViewType>();
1773    }
1774}