Skip to main content

arrow_array/array/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! The concrete array definitions
19
20mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81/// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
82///
83/// # Safety
84///
85/// Implementations of this trait must ensure that all methods implementations comply with
86/// the Arrow specification. No safety guards are placed and failing to comply with it can
87/// translate into panics or undefined behavior. For example, a value computed based on `len`
88/// may be used as a direct index into memory regions without checks.
89///
90/// Use it at your own risk knowing that this trait might be sealed in the future.
91pub unsafe trait Array: std::fmt::Debug + Send + Sync {
92    /// Returns the array as [`Any`] so that it can be
93    /// downcasted to a specific implementation.
94    ///
95    /// # Example:
96    ///
97    /// ```
98    /// # use std::sync::Arc;
99    /// # use arrow_array::{Int32Array, RecordBatch};
100    /// # use arrow_schema::{Schema, Field, DataType, ArrowError};
101    ///
102    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
103    /// let batch = RecordBatch::try_new(
104    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
105    ///     vec![Arc::new(id)]
106    /// ).unwrap();
107    ///
108    /// let int32array = batch
109    ///     .column(0)
110    ///     .as_any()
111    ///     .downcast_ref::<Int32Array>()
112    ///     .expect("Failed to downcast");
113    /// ```
114    fn as_any(&self) -> &dyn Any;
115
116    /// Returns the underlying data of this array
117    fn to_data(&self) -> ArrayData;
118
119    /// Returns the underlying data of this array
120    ///
121    /// Unlike [`Array::to_data`] this consumes self, allowing it avoid unnecessary clones
122    fn into_data(self) -> ArrayData;
123
124    /// Returns a reference to the [`DataType`] of this array.
125    ///
126    /// # Example:
127    ///
128    /// ```
129    /// use arrow_schema::DataType;
130    /// use arrow_array::{Array, Int32Array};
131    ///
132    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
133    ///
134    /// assert_eq!(*array.data_type(), DataType::Int32);
135    /// ```
136    fn data_type(&self) -> &DataType;
137
138    /// Returns a zero-copy slice of this array with the indicated offset and length.
139    ///
140    /// # Example:
141    ///
142    /// ```
143    /// use arrow_array::{Array, Int32Array};
144    ///
145    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
146    /// // Make slice over the values [2, 3, 4]
147    /// let array_slice = array.slice(1, 3);
148    ///
149    /// assert_eq!(&array_slice, &Int32Array::from(vec![2, 3, 4]));
150    /// ```
151    fn slice(&self, offset: usize, length: usize) -> ArrayRef;
152
153    /// Returns the length (i.e., number of elements) of this array.
154    ///
155    /// # Example:
156    ///
157    /// ```
158    /// use arrow_array::{Array, Int32Array};
159    ///
160    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
161    ///
162    /// assert_eq!(array.len(), 5);
163    /// ```
164    fn len(&self) -> usize;
165
166    /// Returns whether this array is empty.
167    ///
168    /// # Example:
169    ///
170    /// ```
171    /// use arrow_array::{Array, Int32Array};
172    ///
173    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
174    ///
175    /// assert_eq!(array.is_empty(), false);
176    /// ```
177    fn is_empty(&self) -> bool;
178
179    /// Shrinks the capacity of any exclusively owned buffer as much as possible
180    ///
181    /// Shared or externally allocated buffers will be ignored, and
182    /// any buffer offsets will be preserved.
183    fn shrink_to_fit(&mut self) {}
184
185    /// Returns the offset into the underlying data used by this array(-slice).
186    /// Note that the underlying data can be shared by many arrays.
187    /// This defaults to `0`.
188    ///
189    /// # Example:
190    ///
191    /// ```
192    /// use arrow_array::{Array, BooleanArray};
193    ///
194    /// let array = BooleanArray::from(vec![false, false, true, true]);
195    /// let array_slice = array.slice(1, 3);
196    ///
197    /// assert_eq!(array.offset(), 0);
198    /// assert_eq!(array_slice.offset(), 1);
199    /// ```
200    fn offset(&self) -> usize;
201
202    /// Returns the null buffer of this array if any.
203    ///
204    /// The null buffer contains the "physical" nulls of an array, that is how
205    /// the nulls are represented in the underlying arrow format.
206    ///
207    /// The physical representation is efficient, but is sometimes non intuitive
208    /// for certain array types such as those with nullable child arrays like
209    /// [`DictionaryArray::values`], [`RunArray::values`] or [`UnionArray`], or without a
210    /// null buffer, such as [`NullArray`].
211    ///
212    /// To determine if each element of such an array is "logically" null,
213    /// use the slower [`Array::logical_nulls`] to obtain a computed mask.
214    fn nulls(&self) -> Option<&NullBuffer>;
215
216    /// Returns a potentially computed [`NullBuffer`] that represents the logical
217    /// null values of this array, if any.
218    ///
219    /// Logical nulls represent the values that are null in the array,
220    /// regardless of the underlying physical arrow representation.
221    ///
222    /// For most array types, this is equivalent to the "physical" nulls
223    /// returned by [`Array::nulls`]. It is different for the following cases, because which
224    /// elements are null is not encoded in a single null buffer:
225    ///
226    /// * [`DictionaryArray`] where [`DictionaryArray::values`] contains nulls
227    /// * [`RunArray`] where [`RunArray::values`] contains nulls
228    /// * [`NullArray`] where all indices are nulls
229    /// * [`UnionArray`] where the selected values contains nulls
230    ///
231    /// In these cases a logical [`NullBuffer`] will be computed, encoding the
232    /// logical nullability of these arrays, beyond what is encoded in
233    /// [`Array::nulls`]
234    fn logical_nulls(&self) -> Option<NullBuffer> {
235        self.nulls().cloned()
236    }
237
238    /// Returns whether the element at `index` is null according to [`Array::nulls`]
239    ///
240    /// Note: For performance reasons, this method returns nullability solely as determined by the
241    /// null buffer. This difference can lead to surprising results, for example, [`NullArray::is_null`] always
242    /// returns `false` as the array lacks a null buffer. Similarly [`DictionaryArray`], [`RunArray`] and [`UnionArray`] may
243    /// encode nullability in their children. See [`Self::logical_nulls`] for more information.
244    ///
245    /// # Example:
246    ///
247    /// ```
248    /// use arrow_array::{Array, Int32Array, NullArray};
249    ///
250    /// let array = Int32Array::from(vec![Some(1), None]);
251    /// assert_eq!(array.is_null(0), false);
252    /// assert_eq!(array.is_null(1), true);
253    ///
254    /// // NullArrays do not have a null buffer, and therefore always
255    /// // return false for is_null.
256    /// let array = NullArray::new(1);
257    /// assert_eq!(array.is_null(0), false);
258    /// ```
259    fn is_null(&self, index: usize) -> bool {
260        self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
261    }
262
263    /// Returns whether the element at `index` is *not* null, the
264    /// opposite of [`Self::is_null`].
265    ///
266    /// # Example:
267    ///
268    /// ```
269    /// use arrow_array::{Array, Int32Array};
270    ///
271    /// let array = Int32Array::from(vec![Some(1), None]);
272    ///
273    /// assert_eq!(array.is_valid(0), true);
274    /// assert_eq!(array.is_valid(1), false);
275    /// ```
276    fn is_valid(&self, index: usize) -> bool {
277        !self.is_null(index)
278    }
279
280    /// Returns the total number of physical null values in this array.
281    ///
282    /// Note: this method returns the physical null count, i.e. that encoded in [`Array::nulls`],
283    /// see [`Array::logical_nulls`] for logical nullability
284    ///
285    /// # Example:
286    ///
287    /// ```
288    /// use arrow_array::{Array, Int32Array};
289    ///
290    /// // Construct an array with values [1, NULL, NULL]
291    /// let array = Int32Array::from(vec![Some(1), None, None]);
292    ///
293    /// assert_eq!(array.null_count(), 2);
294    /// ```
295    fn null_count(&self) -> usize {
296        self.nulls().map(|n| n.null_count()).unwrap_or_default()
297    }
298
299    /// Returns the total number of logical null values in this array.
300    ///
301    /// Note: this method returns the logical null count, i.e. that encoded in
302    /// [`Array::logical_nulls`]. In general this is equivalent to [`Array::null_count`] but may differ in the
303    /// presence of logical nullability, see [`Array::nulls`] and [`Array::logical_nulls`].
304    ///
305    /// # Example:
306    ///
307    /// ```
308    /// use arrow_array::{Array, Int32Array};
309    ///
310    /// // Construct an array with values [1, NULL, NULL]
311    /// let array = Int32Array::from(vec![Some(1), None, None]);
312    ///
313    /// assert_eq!(array.logical_null_count(), 2);
314    /// ```
315    fn logical_null_count(&self) -> usize {
316        self.logical_nulls()
317            .map(|n| n.null_count())
318            .unwrap_or_default()
319    }
320
321    /// Returns `false` if the array is guaranteed to not contain any logical nulls
322    ///
323    /// This is generally equivalent to `Array::logical_null_count() != 0` unless determining
324    /// the logical nulls is expensive, in which case this method can return true even for an
325    /// array without nulls.
326    ///
327    /// This is also generally equivalent to `Array::null_count() != 0` but may differ in the
328    /// presence of logical nullability, see [`Array::logical_null_count`] and [`Array::null_count`].
329    ///
330    /// Implementations will return `true` unless they can cheaply prove no logical nulls
331    /// are present. For example a [`DictionaryArray`] with nullable values will still return true,
332    /// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
333    /// and therefore would not appear in [`Array::logical_nulls`].
334    fn is_nullable(&self) -> bool {
335        self.logical_null_count() != 0
336    }
337
338    /// Returns the total number of bytes of memory pointed to by this array.
339    /// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
340    /// Note that this does not always correspond to the exact memory usage of an array,
341    /// since multiple arrays can share the same buffers or slices thereof.
342    fn get_buffer_memory_size(&self) -> usize;
343
344    /// Returns the total number of bytes of memory occupied physically by this array.
345    /// This value will always be greater than returned by `get_buffer_memory_size()` and
346    /// includes the overhead of the data structures that contain the pointers to the various buffers.
347    fn get_array_memory_size(&self) -> usize;
348}
349
350/// A reference-counted reference to a generic `Array`
351pub type ArrayRef = Arc<dyn Array>;
352
353/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
354unsafe impl Array for ArrayRef {
355    fn as_any(&self) -> &dyn Any {
356        self.as_ref().as_any()
357    }
358
359    fn to_data(&self) -> ArrayData {
360        self.as_ref().to_data()
361    }
362
363    fn into_data(self) -> ArrayData {
364        self.to_data()
365    }
366
367    fn data_type(&self) -> &DataType {
368        self.as_ref().data_type()
369    }
370
371    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
372        self.as_ref().slice(offset, length)
373    }
374
375    fn len(&self) -> usize {
376        self.as_ref().len()
377    }
378
379    fn is_empty(&self) -> bool {
380        self.as_ref().is_empty()
381    }
382
383    /// For shared buffers, this is a no-op.
384    fn shrink_to_fit(&mut self) {
385        if let Some(slf) = Arc::get_mut(self) {
386            slf.shrink_to_fit();
387        } else {
388            // We ignore shared buffers.
389        }
390    }
391
392    fn offset(&self) -> usize {
393        self.as_ref().offset()
394    }
395
396    fn nulls(&self) -> Option<&NullBuffer> {
397        self.as_ref().nulls()
398    }
399
400    fn logical_nulls(&self) -> Option<NullBuffer> {
401        self.as_ref().logical_nulls()
402    }
403
404    fn is_null(&self, index: usize) -> bool {
405        self.as_ref().is_null(index)
406    }
407
408    fn is_valid(&self, index: usize) -> bool {
409        self.as_ref().is_valid(index)
410    }
411
412    fn null_count(&self) -> usize {
413        self.as_ref().null_count()
414    }
415
416    fn logical_null_count(&self) -> usize {
417        self.as_ref().logical_null_count()
418    }
419
420    fn is_nullable(&self) -> bool {
421        self.as_ref().is_nullable()
422    }
423
424    fn get_buffer_memory_size(&self) -> usize {
425        self.as_ref().get_buffer_memory_size()
426    }
427
428    fn get_array_memory_size(&self) -> usize {
429        self.as_ref().get_array_memory_size()
430    }
431}
432
433unsafe impl<T: Array> Array for &T {
434    fn as_any(&self) -> &dyn Any {
435        T::as_any(self)
436    }
437
438    fn to_data(&self) -> ArrayData {
439        T::to_data(self)
440    }
441
442    fn into_data(self) -> ArrayData {
443        self.to_data()
444    }
445
446    fn data_type(&self) -> &DataType {
447        T::data_type(self)
448    }
449
450    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
451        T::slice(self, offset, length)
452    }
453
454    fn len(&self) -> usize {
455        T::len(self)
456    }
457
458    fn is_empty(&self) -> bool {
459        T::is_empty(self)
460    }
461
462    fn offset(&self) -> usize {
463        T::offset(self)
464    }
465
466    fn nulls(&self) -> Option<&NullBuffer> {
467        T::nulls(self)
468    }
469
470    fn logical_nulls(&self) -> Option<NullBuffer> {
471        T::logical_nulls(self)
472    }
473
474    fn is_null(&self, index: usize) -> bool {
475        T::is_null(self, index)
476    }
477
478    fn is_valid(&self, index: usize) -> bool {
479        T::is_valid(self, index)
480    }
481
482    fn null_count(&self) -> usize {
483        T::null_count(self)
484    }
485
486    fn logical_null_count(&self) -> usize {
487        T::logical_null_count(self)
488    }
489
490    fn is_nullable(&self) -> bool {
491        T::is_nullable(self)
492    }
493
494    fn get_buffer_memory_size(&self) -> usize {
495        T::get_buffer_memory_size(self)
496    }
497
498    fn get_array_memory_size(&self) -> usize {
499        T::get_array_memory_size(self)
500    }
501}
502
503/// A generic trait for accessing the values of an [`Array`]
504///
505/// This trait helps write specialized implementations of algorithms for
506/// different array types. Specialized implementations allow the compiler
507/// to optimize the code for the specific array type, which can lead to
508/// significant performance improvements.
509///
510/// # Example
511/// For example, to write three different implementations of a string length function
512/// for [`StringArray`], [`LargeStringArray`], and [`StringViewArray`], you can write
513///
514/// ```
515/// # use std::sync::Arc;
516/// # use arrow_array::{ArrayAccessor, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
517/// # use arrow_buffer::ArrowNativeType;
518/// # use arrow_array::cast::AsArray;
519/// # use arrow_array::iterator::ArrayIter;
520/// # use arrow_array::types::{Int32Type, Int64Type};
521/// # use arrow_schema::{ArrowError, DataType};
522/// /// This function takes a dynamically typed `ArrayRef` and calls
523/// /// calls one of three specialized implementations
524/// fn character_length(arg: ArrayRef) -> Result<ArrayRef, ArrowError> {
525///     match arg.data_type() {
526///         DataType::Utf8 => {
527///             // downcast the ArrayRef to a StringArray and call the specialized implementation
528///             let string_array = arg.as_string::<i32>();
529///             character_length_general::<Int32Type, _>(string_array)
530///         }
531///         DataType::LargeUtf8 => {
532///             character_length_general::<Int64Type, _>(arg.as_string::<i64>())
533///         }
534///         DataType::Utf8View => {
535///             character_length_general::<Int32Type, _>(arg.as_string_view())
536///         }
537///         _ => Err(ArrowError::InvalidArgumentError("Unsupported data type".to_string())),
538///     }
539/// }
540///
541/// /// A generic implementation of the character_length function
542/// /// This function uses the `ArrayAccessor` trait to access the values of the array
543/// /// so the compiler can generated specialized implementations for different array types
544/// ///
545/// /// Returns a new array with the length of each string in the input array
546/// /// * Int32Array for Utf8 and Utf8View arrays (lengths are 32-bit integers)
547/// /// * Int64Array for LargeUtf8 arrays (lengths are 64-bit integers)
548/// ///
549/// /// This is generic on the type of the primitive array (different string arrays have
550/// /// different lengths) and the type of the array accessor (different string arrays
551/// /// have different ways to access the values)
552/// fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
553///     array: V,
554/// ) -> Result<ArrayRef, ArrowError>
555/// where
556///     T::Native: OffsetSizeTrait,
557/// {
558///     let iter = ArrayIter::new(array);
559///     // Create a Int32Array / Int64Array with the length of each string
560///     let result = iter
561///         .map(|string| {
562///             string.map(|string: &str| {
563///                 T::Native::from_usize(string.chars().count())
564///                     .expect("should not fail as string.chars will always return integer")
565///             })
566///         })
567///         .collect::<PrimitiveArray<T>>();
568///
569///     /// Return the result as a new ArrayRef (dynamically typed)
570///     Ok(Arc::new(result) as ArrayRef)
571/// }
572/// ```
573///
574/// # Validity
575///
576/// An [`ArrayAccessor`] must always return a well-defined value for an index
577/// that is within the bounds `0..Array::len`, including for null indexes where
578/// [`Array::is_null`] is true.
579///
580/// The value at null indexes is unspecified, and implementations must not rely
581/// on a specific value such as [`Default::default`] being returned, however, it
582/// must not be undefined
583pub trait ArrayAccessor: Array {
584    /// The Arrow type of the element being accessed.
585    type Item: Send + Sync;
586
587    /// Returns the element at index `i`
588    /// # Panics
589    /// Panics if the value is outside the bounds of the array
590    fn value(&self, index: usize) -> Self::Item;
591
592    /// Returns the element at index `i`
593    /// # Safety
594    /// Caller is responsible for ensuring that the index is within the bounds of the array
595    unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
596}
597
598/// A trait for Arrow String Arrays, currently three types are supported:
599/// - `StringArray`
600/// - `LargeStringArray`
601/// - `StringViewArray`
602///
603/// This trait helps to abstract over the different types of string arrays
604/// so that we don't need to duplicate the implementation for each type.
605pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
606    /// Returns true if all data within this string array is ASCII
607    fn is_ascii(&self) -> bool;
608
609    /// Constructs a new iterator
610    fn iter(&self) -> ArrayIter<Self>;
611}
612
613impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
614    fn is_ascii(&self) -> bool {
615        GenericStringArray::<O>::is_ascii(self)
616    }
617
618    fn iter(&self) -> ArrayIter<Self> {
619        GenericStringArray::<O>::iter(self)
620    }
621}
622impl<'a> StringArrayType<'a> for &'a StringViewArray {
623    fn is_ascii(&self) -> bool {
624        StringViewArray::is_ascii(self)
625    }
626
627    fn iter(&self) -> ArrayIter<Self> {
628        StringViewArray::iter(self)
629    }
630}
631
632/// A trait for Arrow Binary Arrays, currently four types are supported:
633/// - `BinaryArray`
634/// - `LargeBinaryArray`
635/// - `BinaryViewArray`
636/// - `FixedSizeBinaryArray`
637///
638/// This trait helps to abstract over the different types of binary arrays
639/// so that we don't need to duplicate the implementation for each type.
640pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
641    /// Constructs a new iterator
642    fn iter(&self) -> ArrayIter<Self>;
643}
644
645impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
646    fn iter(&self) -> ArrayIter<Self> {
647        GenericBinaryArray::<O>::iter(self)
648    }
649}
650impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
651    fn iter(&self) -> ArrayIter<Self> {
652        BinaryViewArray::iter(self)
653    }
654}
655impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
656    fn iter(&self) -> ArrayIter<Self> {
657        FixedSizeBinaryArray::iter(self)
658    }
659}
660
661impl PartialEq for dyn Array + '_ {
662    fn eq(&self, other: &Self) -> bool {
663        self.to_data().eq(&other.to_data())
664    }
665}
666
667impl<T: Array> PartialEq<T> for dyn Array + '_ {
668    fn eq(&self, other: &T) -> bool {
669        self.to_data().eq(&other.to_data())
670    }
671}
672
673impl PartialEq for NullArray {
674    fn eq(&self, other: &NullArray) -> bool {
675        self.to_data().eq(&other.to_data())
676    }
677}
678
679impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
680    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
681        self.to_data().eq(&other.to_data())
682    }
683}
684
685impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
686    fn eq(&self, other: &Self) -> bool {
687        self.to_data().eq(&other.to_data())
688    }
689}
690
691impl PartialEq for BooleanArray {
692    fn eq(&self, other: &BooleanArray) -> bool {
693        self.to_data().eq(&other.to_data())
694    }
695}
696
697impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
698    fn eq(&self, other: &Self) -> bool {
699        self.to_data().eq(&other.to_data())
700    }
701}
702
703impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
704    fn eq(&self, other: &Self) -> bool {
705        self.to_data().eq(&other.to_data())
706    }
707}
708
709impl PartialEq for FixedSizeBinaryArray {
710    fn eq(&self, other: &Self) -> bool {
711        self.to_data().eq(&other.to_data())
712    }
713}
714
715impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
716    fn eq(&self, other: &Self) -> bool {
717        self.to_data().eq(&other.to_data())
718    }
719}
720
721impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
722    fn eq(&self, other: &Self) -> bool {
723        self.to_data().eq(&other.to_data())
724    }
725}
726
727impl PartialEq for MapArray {
728    fn eq(&self, other: &Self) -> bool {
729        self.to_data().eq(&other.to_data())
730    }
731}
732
733impl PartialEq for FixedSizeListArray {
734    fn eq(&self, other: &Self) -> bool {
735        self.to_data().eq(&other.to_data())
736    }
737}
738
739impl PartialEq for StructArray {
740    fn eq(&self, other: &Self) -> bool {
741        self.to_data().eq(&other.to_data())
742    }
743}
744
745impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
746    fn eq(&self, other: &Self) -> bool {
747        self.to_data().eq(&other.to_data())
748    }
749}
750
751impl<R: RunEndIndexType> PartialEq for RunArray<R> {
752    fn eq(&self, other: &Self) -> bool {
753        self.to_data().eq(&other.to_data())
754    }
755}
756
757/// Constructs an [`ArrayRef`] from an [`ArrayData`].
758///
759/// # Notes:
760///
761/// It is more efficient to directly construct the concrete array type rather
762/// than using this function as creating an `ArrayData` requires at least one
763/// additional allocation (the Vec of buffers).
764///
765/// # Example:
766/// ```
767/// # use std::sync::Arc;
768/// # use arrow_data::ArrayData;
769/// # use arrow_array::{make_array, ArrayRef, Int32Array};
770/// # use arrow_buffer::{Buffer, ScalarBuffer};
771/// # use arrow_schema::DataType;
772/// // Create an Int32Array with values [1, 2, 3]
773/// let values_buffer = Buffer::from_slice_ref(&[1, 2, 3]);
774/// // ArrayData can be constructed using ArrayDataBuilder
775///  let builder = ArrayData::builder(DataType::Int32)
776///    .len(3)
777///    .add_buffer(values_buffer.clone());
778/// let array_data = builder.build().unwrap();
779/// // Create the ArrayRef from the ArrayData
780/// let array = make_array(array_data);
781///
782/// // It is equivalent to directly constructing the Int32Array
783/// let scalar_buffer = ScalarBuffer::from(values_buffer);
784/// let int32_array: ArrayRef = Arc::new(Int32Array::new(scalar_buffer, None));
785/// assert_eq!(&array, &int32_array);
786/// ```
787pub fn make_array(data: ArrayData) -> ArrayRef {
788    match data.data_type() {
789        DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
790        DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
791        DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
792        DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
793        DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
794        DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
795        DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
796        DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
797        DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
798        DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
799        DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
800        DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
801        DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
802        DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
803        DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
804        DataType::Time32(TimeUnit::Millisecond) => {
805            Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
806        }
807        DataType::Time64(TimeUnit::Microsecond) => {
808            Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
809        }
810        DataType::Time64(TimeUnit::Nanosecond) => {
811            Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
812        }
813        DataType::Timestamp(TimeUnit::Second, _) => {
814            Arc::new(TimestampSecondArray::from(data)) as ArrayRef
815        }
816        DataType::Timestamp(TimeUnit::Millisecond, _) => {
817            Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
818        }
819        DataType::Timestamp(TimeUnit::Microsecond, _) => {
820            Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
821        }
822        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
823            Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
824        }
825        DataType::Interval(IntervalUnit::YearMonth) => {
826            Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
827        }
828        DataType::Interval(IntervalUnit::DayTime) => {
829            Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
830        }
831        DataType::Interval(IntervalUnit::MonthDayNano) => {
832            Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
833        }
834        DataType::Duration(TimeUnit::Second) => {
835            Arc::new(DurationSecondArray::from(data)) as ArrayRef
836        }
837        DataType::Duration(TimeUnit::Millisecond) => {
838            Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
839        }
840        DataType::Duration(TimeUnit::Microsecond) => {
841            Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
842        }
843        DataType::Duration(TimeUnit::Nanosecond) => {
844            Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
845        }
846        DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
847        DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
848        DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
849        DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
850        DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
851        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
852        DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
853        DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
854        DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
855        DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
856        DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
857        DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
858        DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
859        DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
860        DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
861        DataType::Dictionary(key_type, _) => match key_type.as_ref() {
862            DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
863            DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
864            DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
865            DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
866            DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
867            DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
868            DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
869            DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
870            dt => unimplemented!("Unexpected dictionary key type {dt}"),
871        },
872        DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
873            DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
874            DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
875            DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
876            dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
877        },
878        DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
879        DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
880        DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
881        DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
882        DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
883        dt => unimplemented!("Unexpected data type {dt}"),
884    }
885}
886
887/// Creates a new empty array
888///
889/// ```
890/// use std::sync::Arc;
891/// use arrow_schema::DataType;
892/// use arrow_array::{ArrayRef, Int32Array, new_empty_array};
893///
894/// let empty_array = new_empty_array(&DataType::Int32);
895/// let array: ArrayRef = Arc::new(Int32Array::from(vec![] as Vec<i32>));
896///
897/// assert_eq!(&array, &empty_array);
898/// ```
899pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
900    let data = ArrayData::new_empty(data_type);
901    make_array(data)
902}
903
904/// Creates a new array of `data_type` of length `length` filled
905/// entirely of `NULL` values
906///
907/// ```
908/// use std::sync::Arc;
909/// use arrow_schema::DataType;
910/// use arrow_array::{ArrayRef, Int32Array, new_null_array};
911///
912/// let null_array = new_null_array(&DataType::Int32, 3);
913/// let array: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None]));
914///
915/// assert_eq!(&array, &null_array);
916/// ```
917pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
918    make_array(ArrayData::new_null(data_type, length))
919}
920
921/// Helper function that creates an [`OffsetBuffer`] from a buffer and array offset/ length
922///
923/// # Safety
924///
925/// - buffer must contain valid arrow offsets ( [`OffsetBuffer`] ) for the
926///   given length and offset.
927unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
928    buffer: Buffer,
929    offset: usize,
930    len: usize,
931) -> OffsetBuffer<O> {
932    if len == 0 && buffer.is_empty() {
933        return OffsetBuffer::new_empty();
934    }
935
936    let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
937    // Safety:
938    // Arguments were valid
939    unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
940}
941
942/// Helper function for printing potentially long arrays.
943fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
944where
945    A: Array,
946    F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
947{
948    let head = std::cmp::min(10, array.len());
949
950    for i in 0..head {
951        if array.is_null(i) {
952            writeln!(f, "  null,")?;
953        } else {
954            write!(f, "  ")?;
955            print_item(array, i, f)?;
956            writeln!(f, ",")?;
957        }
958    }
959    if array.len() > 10 {
960        if array.len() > 20 {
961            writeln!(f, "  ...{} elements...,", array.len() - 20)?;
962        }
963
964        let tail = std::cmp::max(head, array.len() - 10);
965
966        for i in tail..array.len() {
967            if array.is_null(i) {
968                writeln!(f, "  null,")?;
969            } else {
970                write!(f, "  ")?;
971                print_item(array, i, f)?;
972                writeln!(f, ",")?;
973            }
974        }
975    }
976    Ok(())
977}
978
979#[cfg(test)]
980mod tests {
981    use super::*;
982    use crate::cast::{as_union_array, downcast_array};
983    use crate::downcast_run_array;
984    use arrow_buffer::MutableBuffer;
985    use arrow_schema::{Field, Fields, UnionFields, UnionMode};
986
987    #[test]
988    fn test_empty_primitive() {
989        let array = new_empty_array(&DataType::Int32);
990        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
991        assert_eq!(a.len(), 0);
992        let expected: &[i32] = &[];
993        assert_eq!(a.values(), expected);
994    }
995
996    #[test]
997    fn test_empty_variable_sized() {
998        let array = new_empty_array(&DataType::Utf8);
999        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1000        assert_eq!(a.len(), 0);
1001        assert_eq!(a.value_offsets()[0], 0i32);
1002    }
1003
1004    #[test]
1005    fn test_empty_list_primitive() {
1006        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1007        let array = new_empty_array(&data_type);
1008        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1009        assert_eq!(a.len(), 0);
1010        assert_eq!(a.value_offsets()[0], 0i32);
1011    }
1012
1013    #[test]
1014    fn test_null_boolean() {
1015        let array = new_null_array(&DataType::Boolean, 9);
1016        let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
1017        assert_eq!(a.len(), 9);
1018        for i in 0..9 {
1019            assert!(a.is_null(i));
1020        }
1021    }
1022
1023    #[test]
1024    fn test_null_primitive() {
1025        let array = new_null_array(&DataType::Int32, 9);
1026        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1027        assert_eq!(a.len(), 9);
1028        for i in 0..9 {
1029            assert!(a.is_null(i));
1030        }
1031    }
1032
1033    #[test]
1034    fn test_null_struct() {
1035        // It is possible to create a null struct containing a non-nullable child
1036        // see https://github.com/apache/arrow-rs/pull/3244 for details
1037        let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
1038        let array = new_null_array(&struct_type, 9);
1039
1040        let a = array.as_any().downcast_ref::<StructArray>().unwrap();
1041        assert_eq!(a.len(), 9);
1042        assert_eq!(a.column(0).len(), 9);
1043        for i in 0..9 {
1044            assert!(a.is_null(i));
1045        }
1046
1047        // Make sure we can slice the resulting array.
1048        a.slice(0, 5);
1049    }
1050
1051    #[test]
1052    fn test_null_variable_sized() {
1053        let array = new_null_array(&DataType::Utf8, 9);
1054        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1055        assert_eq!(a.len(), 9);
1056        assert_eq!(a.value_offsets()[9], 0i32);
1057        for i in 0..9 {
1058            assert!(a.is_null(i));
1059        }
1060    }
1061
1062    #[test]
1063    fn test_null_list_primitive() {
1064        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1065        let array = new_null_array(&data_type, 9);
1066        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1067        assert_eq!(a.len(), 9);
1068        assert_eq!(a.value_offsets()[9], 0i32);
1069        for i in 0..9 {
1070            assert!(a.is_null(i));
1071        }
1072    }
1073
1074    #[test]
1075    fn test_null_map() {
1076        let data_type = DataType::Map(
1077            Arc::new(Field::new(
1078                "entry",
1079                DataType::Struct(Fields::from(vec![
1080                    Field::new("key", DataType::Utf8, false),
1081                    Field::new("value", DataType::Int32, true),
1082                ])),
1083                false,
1084            )),
1085            false,
1086        );
1087        let array = new_null_array(&data_type, 9);
1088        let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1089        assert_eq!(a.len(), 9);
1090        assert_eq!(a.value_offsets()[9], 0i32);
1091        for i in 0..9 {
1092            assert!(a.is_null(i));
1093        }
1094    }
1095
1096    #[test]
1097    fn test_null_dictionary() {
1098        let values =
1099            vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1100
1101        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1102        let array = Arc::new(array) as ArrayRef;
1103
1104        let null_array = new_null_array(array.data_type(), 9);
1105        assert_eq!(&array, &null_array);
1106        assert_eq!(
1107            array.to_data().buffers()[0].len(),
1108            null_array.to_data().buffers()[0].len()
1109        );
1110    }
1111
1112    #[test]
1113    fn test_null_union() {
1114        for mode in [UnionMode::Sparse, UnionMode::Dense] {
1115            let data_type = DataType::Union(
1116                UnionFields::try_new(
1117                    vec![2, 1],
1118                    vec![
1119                        Field::new("foo", DataType::Int32, true),
1120                        Field::new("bar", DataType::Int64, true),
1121                    ],
1122                )
1123                .unwrap(),
1124                mode,
1125            );
1126            let array = new_null_array(&data_type, 4);
1127
1128            let array = as_union_array(array.as_ref());
1129            assert_eq!(array.len(), 4);
1130            assert_eq!(array.null_count(), 0);
1131            assert_eq!(array.logical_null_count(), 4);
1132
1133            for i in 0..4 {
1134                let a = array.value(i);
1135                assert_eq!(a.len(), 1);
1136                assert_eq!(a.null_count(), 1);
1137                assert_eq!(a.logical_null_count(), 1);
1138                assert!(a.is_null(0))
1139            }
1140
1141            array.to_data().validate_full().unwrap();
1142        }
1143    }
1144
1145    #[test]
1146    #[allow(unused_parens)]
1147    fn test_null_runs() {
1148        for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1149            let data_type = DataType::RunEndEncoded(
1150                Arc::new(Field::new("run_ends", r, false)),
1151                Arc::new(Field::new("values", DataType::Utf8, true)),
1152            );
1153
1154            let array = new_null_array(&data_type, 4);
1155            let array = array.as_ref();
1156
1157            downcast_run_array! {
1158                array => {
1159                    assert_eq!(array.len(), 4);
1160                    assert_eq!(array.null_count(), 0);
1161                    assert_eq!(array.logical_null_count(), 4);
1162                    assert_eq!(array.values().len(), 1);
1163                    assert_eq!(array.values().null_count(), 1);
1164                    assert_eq!(array.run_ends().len(), 4);
1165                    assert_eq!(array.run_ends().values(), &[4]);
1166
1167                    let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1168                    assert_eq!(idx, &[0,0,0,0]);
1169                }
1170                d => unreachable!("{d}")
1171            }
1172        }
1173    }
1174
1175    #[test]
1176    fn test_null_fixed_size_binary() {
1177        for size in [1, 2, 7] {
1178            let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1179            let array = array
1180                .as_ref()
1181                .as_any()
1182                .downcast_ref::<FixedSizeBinaryArray>()
1183                .unwrap();
1184
1185            assert_eq!(array.len(), 6);
1186            assert_eq!(array.null_count(), 6);
1187            assert_eq!(array.logical_null_count(), 6);
1188            array.iter().for_each(|x| assert!(x.is_none()));
1189        }
1190    }
1191
1192    #[test]
1193    fn test_memory_size_null() {
1194        let null_arr = NullArray::new(32);
1195
1196        assert_eq!(0, null_arr.get_buffer_memory_size());
1197        assert_eq!(
1198            std::mem::size_of::<usize>(),
1199            null_arr.get_array_memory_size()
1200        );
1201    }
1202
1203    #[test]
1204    fn test_memory_size_primitive() {
1205        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1206        let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1207
1208        // subtract empty array to avoid magic numbers for the size of additional fields
1209        assert_eq!(
1210            arr.get_array_memory_size() - empty.get_array_memory_size(),
1211            128 * std::mem::size_of::<i64>()
1212        );
1213    }
1214
1215    #[test]
1216    fn test_memory_size_primitive_sliced() {
1217        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1218        let slice1 = arr.slice(0, 64);
1219        let slice2 = arr.slice(64, 64);
1220
1221        // both slices report the full buffer memory usage, even though the buffers are shared
1222        assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1223        assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1224    }
1225
1226    #[test]
1227    fn test_memory_size_primitive_nullable() {
1228        let arr: PrimitiveArray<Int64Type> = (0..128)
1229            .map(|i| if i % 20 == 0 { Some(i) } else { None })
1230            .collect();
1231        let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1232            ArrayData::builder(arr.data_type().clone())
1233                .add_buffer(MutableBuffer::new(0).into())
1234                .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1235                .build()
1236                .unwrap(),
1237        );
1238
1239        // expected size is the size of the PrimitiveArray struct,
1240        // which includes the optional validity buffer
1241        // plus one buffer on the heap
1242        assert_eq!(
1243            std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1244            empty_with_bitmap.get_array_memory_size()
1245        );
1246
1247        // subtract empty array to avoid magic numbers for the size of additional fields
1248        // the size of the validity bitmap is rounded up to 64 bytes
1249        assert_eq!(
1250            arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1251            128 * std::mem::size_of::<i64>() + 64
1252        );
1253    }
1254
1255    #[test]
1256    fn test_memory_size_dictionary() {
1257        let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1258        let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1259            (0..256).map(|i| (i % values.len()) as i16),
1260        );
1261
1262        let dict_data_type = DataType::Dictionary(
1263            Box::new(keys.data_type().clone()),
1264            Box::new(values.data_type().clone()),
1265        );
1266        let dict_data = keys
1267            .into_data()
1268            .into_builder()
1269            .data_type(dict_data_type)
1270            .child_data(vec![values.into_data()])
1271            .build()
1272            .unwrap();
1273
1274        let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1275            Box::new(DataType::Int16),
1276            Box::new(DataType::Int64),
1277        ));
1278
1279        let arr = DictionaryArray::<Int16Type>::from(dict_data);
1280        let empty = DictionaryArray::<Int16Type>::from(empty_data);
1281
1282        let expected_keys_size = 256 * std::mem::size_of::<i16>();
1283        assert_eq!(
1284            arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1285            expected_keys_size
1286        );
1287
1288        let expected_values_size = 16 * std::mem::size_of::<i64>();
1289        assert_eq!(
1290            arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1291            expected_values_size
1292        );
1293
1294        let expected_size = expected_keys_size + expected_values_size;
1295        assert_eq!(
1296            arr.get_array_memory_size() - empty.get_array_memory_size(),
1297            expected_size
1298        );
1299    }
1300
1301    /// Test function that takes an &dyn Array
1302    fn compute_my_thing(arr: &dyn Array) -> bool {
1303        !arr.is_empty()
1304    }
1305
1306    #[test]
1307    fn test_array_ref_as_array() {
1308        let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1309
1310        // works well!
1311        assert!(compute_my_thing(&arr));
1312
1313        // Should also work when wrapped as an ArrayRef
1314        let arr: ArrayRef = Arc::new(arr);
1315        assert!(compute_my_thing(&arr));
1316        assert!(compute_my_thing(arr.as_ref()));
1317    }
1318
1319    #[test]
1320    fn test_downcast_array() {
1321        let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1322
1323        let boxed: ArrayRef = Arc::new(array);
1324        let array: Int32Array = downcast_array(&boxed);
1325
1326        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1327        assert_eq!(array, expected);
1328    }
1329}