Skip to main content

arrow_array/array/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! The concrete array definitions
19
20mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81/// An array in the [Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html)
82///
83/// # Safety
84///
85/// Implementations of this trait must ensure that all methods implementations comply with
86/// the Arrow specification. No safety guards are placed and failing to comply with it can
87/// translate into panics or undefined behavior. For example, a value computed based on `len`
88/// may be used as a direct index into memory regions without checks.
89///
90/// Note that it is likely impossible to correctly implement the trait for a
91/// third party type, as substantial arrow-rs functionality is based on the
92/// return values of [`Array::data_type`] and third party types cannot extend
93/// the [`DataType`] enum. So any code that attempts casting based on data type
94/// (including internal arrow library code) risks a panic or undefined behavior.
95/// See [this discussion] for more details.
96///
97/// This trait might be sealed in the future. Use at your own risk.
98///
99/// [this discussion]: https://github.com/apache/arrow-rs/pull/9234#pullrequestreview-3708950936
100pub unsafe trait Array: std::fmt::Debug + Send + Sync {
101    /// Returns the array as [`Any`] so that it can be
102    /// downcasted to a specific implementation.
103    ///
104    /// # Example:
105    ///
106    /// ```
107    /// # use std::sync::Arc;
108    /// # use arrow_array::{Int32Array, RecordBatch};
109    /// # use arrow_schema::{Schema, Field, DataType, ArrowError};
110    ///
111    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
112    /// let batch = RecordBatch::try_new(
113    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
114    ///     vec![Arc::new(id)]
115    /// ).unwrap();
116    ///
117    /// let int32array = batch
118    ///     .column(0)
119    ///     .as_any()
120    ///     .downcast_ref::<Int32Array>()
121    ///     .expect("Failed to downcast");
122    /// ```
123    fn as_any(&self) -> &dyn Any;
124
125    /// Returns the underlying data of this array
126    fn to_data(&self) -> ArrayData;
127
128    /// Returns the underlying data of this array
129    ///
130    /// Unlike [`Array::to_data`] this consumes self, allowing it avoid unnecessary clones
131    fn into_data(self) -> ArrayData;
132
133    /// Returns a reference to the [`DataType`] of this array.
134    ///
135    /// # Example:
136    ///
137    /// ```
138    /// use arrow_schema::DataType;
139    /// use arrow_array::{Array, Int32Array};
140    ///
141    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
142    ///
143    /// assert_eq!(*array.data_type(), DataType::Int32);
144    /// ```
145    fn data_type(&self) -> &DataType;
146
147    /// Returns a zero-copy slice of this array with the indicated offset and length.
148    ///
149    /// # Example:
150    ///
151    /// ```
152    /// use arrow_array::{Array, Int32Array};
153    ///
154    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
155    /// // Make slice over the values [2, 3, 4]
156    /// let array_slice = array.slice(1, 3);
157    ///
158    /// assert_eq!(&array_slice, &Int32Array::from(vec![2, 3, 4]));
159    /// ```
160    fn slice(&self, offset: usize, length: usize) -> ArrayRef;
161
162    /// Returns the length (i.e., number of elements) of this array.
163    ///
164    /// # Example:
165    ///
166    /// ```
167    /// use arrow_array::{Array, Int32Array};
168    ///
169    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
170    ///
171    /// assert_eq!(array.len(), 5);
172    /// ```
173    fn len(&self) -> usize;
174
175    /// Returns whether this array is empty.
176    ///
177    /// # Example:
178    ///
179    /// ```
180    /// use arrow_array::{Array, Int32Array};
181    ///
182    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
183    ///
184    /// assert_eq!(array.is_empty(), false);
185    /// ```
186    fn is_empty(&self) -> bool;
187
188    /// Shrinks the capacity of any exclusively owned buffer as much as possible
189    ///
190    /// Shared or externally allocated buffers will be ignored, and
191    /// any buffer offsets will be preserved.
192    fn shrink_to_fit(&mut self) {}
193
194    /// Returns the offset into the underlying data used by this array(-slice).
195    /// Note that the underlying data can be shared by many arrays.
196    /// This defaults to `0`.
197    ///
198    /// # Example:
199    ///
200    /// ```
201    /// use arrow_array::{Array, BooleanArray};
202    ///
203    /// let array = BooleanArray::from(vec![false, false, true, true]);
204    /// let array_slice = array.slice(1, 3);
205    ///
206    /// assert_eq!(array.offset(), 0);
207    /// assert_eq!(array_slice.offset(), 1);
208    /// ```
209    fn offset(&self) -> usize;
210
211    /// Returns the null buffer of this array if any.
212    ///
213    /// The null buffer contains the "physical" nulls of an array, that is how
214    /// the nulls are represented in the underlying arrow format.
215    ///
216    /// The physical representation is efficient, but is sometimes non intuitive
217    /// for certain array types such as those with nullable child arrays like
218    /// [`DictionaryArray::values`], [`RunArray::values`] or [`UnionArray`], or without a
219    /// null buffer, such as [`NullArray`].
220    ///
221    /// To determine if each element of such an array is "logically" null,
222    /// use the slower [`Array::logical_nulls`] to obtain a computed mask.
223    fn nulls(&self) -> Option<&NullBuffer>;
224
225    /// Returns a potentially computed [`NullBuffer`] that represents the logical
226    /// null values of this array, if any.
227    ///
228    /// Logical nulls represent the values that are null in the array,
229    /// regardless of the underlying physical arrow representation.
230    ///
231    /// For most array types, this is equivalent to the "physical" nulls
232    /// returned by [`Array::nulls`]. It is different for the following cases, because which
233    /// elements are null is not encoded in a single null buffer:
234    ///
235    /// * [`DictionaryArray`] where [`DictionaryArray::values`] contains nulls
236    /// * [`RunArray`] where [`RunArray::values`] contains nulls
237    /// * [`NullArray`] where all indices are nulls
238    /// * [`UnionArray`] where the selected values contains nulls
239    ///
240    /// In these cases a logical [`NullBuffer`] will be computed, encoding the
241    /// logical nullability of these arrays, beyond what is encoded in
242    /// [`Array::nulls`]
243    fn logical_nulls(&self) -> Option<NullBuffer> {
244        self.nulls().cloned()
245    }
246
247    /// Returns whether the element at `index` is null according to [`Array::nulls`]
248    ///
249    /// Note: For performance reasons, this method returns nullability solely as determined by the
250    /// null buffer. This difference can lead to surprising results, for example, [`NullArray::is_null`] always
251    /// returns `false` as the array lacks a null buffer. Similarly [`DictionaryArray`], [`RunArray`] and [`UnionArray`] may
252    /// encode nullability in their children. See [`Self::logical_nulls`] for more information.
253    ///
254    /// # Example:
255    ///
256    /// ```
257    /// use arrow_array::{Array, Int32Array, NullArray};
258    ///
259    /// let array = Int32Array::from(vec![Some(1), None]);
260    /// assert_eq!(array.is_null(0), false);
261    /// assert_eq!(array.is_null(1), true);
262    ///
263    /// // NullArrays do not have a null buffer, and therefore always
264    /// // return false for is_null.
265    /// let array = NullArray::new(1);
266    /// assert_eq!(array.is_null(0), false);
267    /// ```
268    fn is_null(&self, index: usize) -> bool {
269        self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
270    }
271
272    /// Returns whether the element at `index` is *not* null, the
273    /// opposite of [`Self::is_null`].
274    ///
275    /// # Example:
276    ///
277    /// ```
278    /// use arrow_array::{Array, Int32Array};
279    ///
280    /// let array = Int32Array::from(vec![Some(1), None]);
281    ///
282    /// assert_eq!(array.is_valid(0), true);
283    /// assert_eq!(array.is_valid(1), false);
284    /// ```
285    fn is_valid(&self, index: usize) -> bool {
286        !self.is_null(index)
287    }
288
289    /// Returns the total number of physical null values in this array.
290    ///
291    /// Note: this method returns the physical null count, i.e. that encoded in [`Array::nulls`],
292    /// see [`Array::logical_nulls`] for logical nullability
293    ///
294    /// # Example:
295    ///
296    /// ```
297    /// use arrow_array::{Array, Int32Array};
298    ///
299    /// // Construct an array with values [1, NULL, NULL]
300    /// let array = Int32Array::from(vec![Some(1), None, None]);
301    ///
302    /// assert_eq!(array.null_count(), 2);
303    /// ```
304    fn null_count(&self) -> usize {
305        self.nulls().map(|n| n.null_count()).unwrap_or_default()
306    }
307
308    /// Returns the total number of logical null values in this array.
309    ///
310    /// Note: this method returns the logical null count, i.e. that encoded in
311    /// [`Array::logical_nulls`]. In general this is equivalent to [`Array::null_count`] but may differ in the
312    /// presence of logical nullability, see [`Array::nulls`] and [`Array::logical_nulls`].
313    ///
314    /// # Example:
315    ///
316    /// ```
317    /// use arrow_array::{Array, Int32Array};
318    ///
319    /// // Construct an array with values [1, NULL, NULL]
320    /// let array = Int32Array::from(vec![Some(1), None, None]);
321    ///
322    /// assert_eq!(array.logical_null_count(), 2);
323    /// ```
324    fn logical_null_count(&self) -> usize {
325        self.logical_nulls()
326            .map(|n| n.null_count())
327            .unwrap_or_default()
328    }
329
330    /// Returns `false` if the array is guaranteed to not contain any logical nulls
331    ///
332    /// This is generally equivalent to `Array::logical_null_count() != 0` unless determining
333    /// the logical nulls is expensive, in which case this method can return true even for an
334    /// array without nulls.
335    ///
336    /// This is also generally equivalent to `Array::null_count() != 0` but may differ in the
337    /// presence of logical nullability, see [`Array::logical_null_count`] and [`Array::null_count`].
338    ///
339    /// Implementations will return `true` unless they can cheaply prove no logical nulls
340    /// are present. For example a [`DictionaryArray`] with nullable values will still return true,
341    /// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
342    /// and therefore would not appear in [`Array::logical_nulls`].
343    fn is_nullable(&self) -> bool {
344        self.logical_null_count() != 0
345    }
346
347    /// Returns the total number of bytes of memory pointed to by this array.
348    /// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
349    /// Note that this does not always correspond to the exact memory usage of an array,
350    /// since multiple arrays can share the same buffers or slices thereof.
351    fn get_buffer_memory_size(&self) -> usize;
352
353    /// Returns the total number of bytes of memory occupied physically by this array.
354    /// This value will always be greater than returned by `get_buffer_memory_size()` and
355    /// includes the overhead of the data structures that contain the pointers to the various buffers.
356    fn get_array_memory_size(&self) -> usize;
357}
358
359/// A reference-counted reference to a generic `Array`
360pub type ArrayRef = Arc<dyn Array>;
361
362/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
363unsafe impl Array for ArrayRef {
364    fn as_any(&self) -> &dyn Any {
365        self.as_ref().as_any()
366    }
367
368    fn to_data(&self) -> ArrayData {
369        self.as_ref().to_data()
370    }
371
372    fn into_data(self) -> ArrayData {
373        self.to_data()
374    }
375
376    fn data_type(&self) -> &DataType {
377        self.as_ref().data_type()
378    }
379
380    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
381        self.as_ref().slice(offset, length)
382    }
383
384    fn len(&self) -> usize {
385        self.as_ref().len()
386    }
387
388    fn is_empty(&self) -> bool {
389        self.as_ref().is_empty()
390    }
391
392    /// For shared buffers, this is a no-op.
393    fn shrink_to_fit(&mut self) {
394        if let Some(slf) = Arc::get_mut(self) {
395            slf.shrink_to_fit();
396        } else {
397            // We ignore shared buffers.
398        }
399    }
400
401    fn offset(&self) -> usize {
402        self.as_ref().offset()
403    }
404
405    fn nulls(&self) -> Option<&NullBuffer> {
406        self.as_ref().nulls()
407    }
408
409    fn logical_nulls(&self) -> Option<NullBuffer> {
410        self.as_ref().logical_nulls()
411    }
412
413    fn is_null(&self, index: usize) -> bool {
414        self.as_ref().is_null(index)
415    }
416
417    fn is_valid(&self, index: usize) -> bool {
418        self.as_ref().is_valid(index)
419    }
420
421    fn null_count(&self) -> usize {
422        self.as_ref().null_count()
423    }
424
425    fn logical_null_count(&self) -> usize {
426        self.as_ref().logical_null_count()
427    }
428
429    fn is_nullable(&self) -> bool {
430        self.as_ref().is_nullable()
431    }
432
433    fn get_buffer_memory_size(&self) -> usize {
434        self.as_ref().get_buffer_memory_size()
435    }
436
437    fn get_array_memory_size(&self) -> usize {
438        self.as_ref().get_array_memory_size()
439    }
440}
441
442unsafe impl<T: Array> Array for &T {
443    fn as_any(&self) -> &dyn Any {
444        T::as_any(self)
445    }
446
447    fn to_data(&self) -> ArrayData {
448        T::to_data(self)
449    }
450
451    fn into_data(self) -> ArrayData {
452        self.to_data()
453    }
454
455    fn data_type(&self) -> &DataType {
456        T::data_type(self)
457    }
458
459    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
460        T::slice(self, offset, length)
461    }
462
463    fn len(&self) -> usize {
464        T::len(self)
465    }
466
467    fn is_empty(&self) -> bool {
468        T::is_empty(self)
469    }
470
471    fn offset(&self) -> usize {
472        T::offset(self)
473    }
474
475    fn nulls(&self) -> Option<&NullBuffer> {
476        T::nulls(self)
477    }
478
479    fn logical_nulls(&self) -> Option<NullBuffer> {
480        T::logical_nulls(self)
481    }
482
483    fn is_null(&self, index: usize) -> bool {
484        T::is_null(self, index)
485    }
486
487    fn is_valid(&self, index: usize) -> bool {
488        T::is_valid(self, index)
489    }
490
491    fn null_count(&self) -> usize {
492        T::null_count(self)
493    }
494
495    fn logical_null_count(&self) -> usize {
496        T::logical_null_count(self)
497    }
498
499    fn is_nullable(&self) -> bool {
500        T::is_nullable(self)
501    }
502
503    fn get_buffer_memory_size(&self) -> usize {
504        T::get_buffer_memory_size(self)
505    }
506
507    fn get_array_memory_size(&self) -> usize {
508        T::get_array_memory_size(self)
509    }
510}
511
512/// A generic trait for accessing the values of an [`Array`]
513///
514/// This trait helps write specialized implementations of algorithms for
515/// different array types. Specialized implementations allow the compiler
516/// to optimize the code for the specific array type, which can lead to
517/// significant performance improvements.
518///
519/// # Example
520/// For example, to write three different implementations of a string length function
521/// for [`StringArray`], [`LargeStringArray`], and [`StringViewArray`], you can write
522///
523/// ```
524/// # use std::sync::Arc;
525/// # use arrow_array::{ArrayAccessor, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
526/// # use arrow_buffer::ArrowNativeType;
527/// # use arrow_array::cast::AsArray;
528/// # use arrow_array::iterator::ArrayIter;
529/// # use arrow_array::types::{Int32Type, Int64Type};
530/// # use arrow_schema::{ArrowError, DataType};
531/// /// This function takes a dynamically typed `ArrayRef` and calls
532/// /// calls one of three specialized implementations
533/// fn character_length(arg: ArrayRef) -> Result<ArrayRef, ArrowError> {
534///     match arg.data_type() {
535///         DataType::Utf8 => {
536///             // downcast the ArrayRef to a StringArray and call the specialized implementation
537///             let string_array = arg.as_string::<i32>();
538///             character_length_general::<Int32Type, _>(string_array)
539///         }
540///         DataType::LargeUtf8 => {
541///             character_length_general::<Int64Type, _>(arg.as_string::<i64>())
542///         }
543///         DataType::Utf8View => {
544///             character_length_general::<Int32Type, _>(arg.as_string_view())
545///         }
546///         _ => Err(ArrowError::InvalidArgumentError("Unsupported data type".to_string())),
547///     }
548/// }
549///
550/// /// A generic implementation of the character_length function
551/// /// This function uses the `ArrayAccessor` trait to access the values of the array
552/// /// so the compiler can generated specialized implementations for different array types
553/// ///
554/// /// Returns a new array with the length of each string in the input array
555/// /// * Int32Array for Utf8 and Utf8View arrays (lengths are 32-bit integers)
556/// /// * Int64Array for LargeUtf8 arrays (lengths are 64-bit integers)
557/// ///
558/// /// This is generic on the type of the primitive array (different string arrays have
559/// /// different lengths) and the type of the array accessor (different string arrays
560/// /// have different ways to access the values)
561/// fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
562///     array: V,
563/// ) -> Result<ArrayRef, ArrowError>
564/// where
565///     T::Native: OffsetSizeTrait,
566/// {
567///     let iter = ArrayIter::new(array);
568///     // Create a Int32Array / Int64Array with the length of each string
569///     let result = iter
570///         .map(|string| {
571///             string.map(|string: &str| {
572///                 T::Native::from_usize(string.chars().count())
573///                     .expect("should not fail as string.chars will always return integer")
574///             })
575///         })
576///         .collect::<PrimitiveArray<T>>();
577///
578///     /// Return the result as a new ArrayRef (dynamically typed)
579///     Ok(Arc::new(result) as ArrayRef)
580/// }
581/// ```
582///
583/// # Validity
584///
585/// An [`ArrayAccessor`] must always return a well-defined value for an index
586/// that is within the bounds `0..Array::len`, including for null indexes where
587/// [`Array::is_null`] is true.
588///
589/// The value at null indexes is unspecified, and implementations must not rely
590/// on a specific value such as [`Default::default`] being returned, however, it
591/// must not be undefined
592pub trait ArrayAccessor: Array {
593    /// The Arrow type of the element being accessed.
594    type Item: Send + Sync;
595
596    /// Returns the element at index `i`
597    /// # Panics
598    /// Panics if the value is outside the bounds of the array
599    fn value(&self, index: usize) -> Self::Item;
600
601    /// Returns the element at index `i`
602    /// # Safety
603    /// Caller is responsible for ensuring that the index is within the bounds of the array
604    unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
605}
606
607/// A trait for Arrow String Arrays, currently three types are supported:
608/// - `StringArray`
609/// - `LargeStringArray`
610/// - `StringViewArray`
611///
612/// This trait helps to abstract over the different types of string arrays
613/// so that we don't need to duplicate the implementation for each type.
614pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
615    /// Returns true if all data within this string array is ASCII
616    fn is_ascii(&self) -> bool;
617
618    /// Constructs a new iterator
619    fn iter(&self) -> ArrayIter<Self>;
620}
621
622impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
623    fn is_ascii(&self) -> bool {
624        GenericStringArray::<O>::is_ascii(self)
625    }
626
627    fn iter(&self) -> ArrayIter<Self> {
628        GenericStringArray::<O>::iter(self)
629    }
630}
631impl<'a> StringArrayType<'a> for &'a StringViewArray {
632    fn is_ascii(&self) -> bool {
633        StringViewArray::is_ascii(self)
634    }
635
636    fn iter(&self) -> ArrayIter<Self> {
637        StringViewArray::iter(self)
638    }
639}
640
641/// A trait for Arrow Binary Arrays, currently four types are supported:
642/// - `BinaryArray`
643/// - `LargeBinaryArray`
644/// - `BinaryViewArray`
645/// - `FixedSizeBinaryArray`
646///
647/// This trait helps to abstract over the different types of binary arrays
648/// so that we don't need to duplicate the implementation for each type.
649pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
650    /// Constructs a new iterator
651    fn iter(&self) -> ArrayIter<Self>;
652}
653
654impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
655    fn iter(&self) -> ArrayIter<Self> {
656        GenericBinaryArray::<O>::iter(self)
657    }
658}
659impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
660    fn iter(&self) -> ArrayIter<Self> {
661        BinaryViewArray::iter(self)
662    }
663}
664impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
665    fn iter(&self) -> ArrayIter<Self> {
666        FixedSizeBinaryArray::iter(self)
667    }
668}
669
670impl PartialEq for dyn Array + '_ {
671    fn eq(&self, other: &Self) -> bool {
672        self.to_data().eq(&other.to_data())
673    }
674}
675
676impl<T: Array> PartialEq<T> for dyn Array + '_ {
677    fn eq(&self, other: &T) -> bool {
678        self.to_data().eq(&other.to_data())
679    }
680}
681
682impl PartialEq for NullArray {
683    fn eq(&self, other: &NullArray) -> bool {
684        self.to_data().eq(&other.to_data())
685    }
686}
687
688impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
689    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
690        self.to_data().eq(&other.to_data())
691    }
692}
693
694impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
695    fn eq(&self, other: &Self) -> bool {
696        self.to_data().eq(&other.to_data())
697    }
698}
699
700impl PartialEq for BooleanArray {
701    fn eq(&self, other: &BooleanArray) -> bool {
702        self.to_data().eq(&other.to_data())
703    }
704}
705
706impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
707    fn eq(&self, other: &Self) -> bool {
708        self.to_data().eq(&other.to_data())
709    }
710}
711
712impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
713    fn eq(&self, other: &Self) -> bool {
714        self.to_data().eq(&other.to_data())
715    }
716}
717
718impl PartialEq for FixedSizeBinaryArray {
719    fn eq(&self, other: &Self) -> bool {
720        self.to_data().eq(&other.to_data())
721    }
722}
723
724impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
725    fn eq(&self, other: &Self) -> bool {
726        self.to_data().eq(&other.to_data())
727    }
728}
729
730impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
731    fn eq(&self, other: &Self) -> bool {
732        self.to_data().eq(&other.to_data())
733    }
734}
735
736impl PartialEq for MapArray {
737    fn eq(&self, other: &Self) -> bool {
738        self.to_data().eq(&other.to_data())
739    }
740}
741
742impl PartialEq for FixedSizeListArray {
743    fn eq(&self, other: &Self) -> bool {
744        self.to_data().eq(&other.to_data())
745    }
746}
747
748impl PartialEq for StructArray {
749    fn eq(&self, other: &Self) -> bool {
750        self.to_data().eq(&other.to_data())
751    }
752}
753
754impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
755    fn eq(&self, other: &Self) -> bool {
756        self.to_data().eq(&other.to_data())
757    }
758}
759
760impl<R: RunEndIndexType> PartialEq for RunArray<R> {
761    fn eq(&self, other: &Self) -> bool {
762        self.to_data().eq(&other.to_data())
763    }
764}
765
766/// Constructs an [`ArrayRef`] from an [`ArrayData`].
767///
768/// # Notes:
769///
770/// It is more efficient to directly construct the concrete array type rather
771/// than using this function as creating an `ArrayData` requires at least one
772/// additional allocation (the Vec of buffers).
773///
774/// # Example:
775/// ```
776/// # use std::sync::Arc;
777/// # use arrow_data::ArrayData;
778/// # use arrow_array::{make_array, ArrayRef, Int32Array};
779/// # use arrow_buffer::{Buffer, ScalarBuffer};
780/// # use arrow_schema::DataType;
781/// // Create an Int32Array with values [1, 2, 3]
782/// let values_buffer = Buffer::from_slice_ref(&[1, 2, 3]);
783/// // ArrayData can be constructed using ArrayDataBuilder
784///  let builder = ArrayData::builder(DataType::Int32)
785///    .len(3)
786///    .add_buffer(values_buffer.clone());
787/// let array_data = builder.build().unwrap();
788/// // Create the ArrayRef from the ArrayData
789/// let array = make_array(array_data);
790///
791/// // It is equivalent to directly constructing the Int32Array
792/// let scalar_buffer = ScalarBuffer::from(values_buffer);
793/// let int32_array: ArrayRef = Arc::new(Int32Array::new(scalar_buffer, None));
794/// assert_eq!(&array, &int32_array);
795/// ```
796pub fn make_array(data: ArrayData) -> ArrayRef {
797    match data.data_type() {
798        DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
799        DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
800        DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
801        DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
802        DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
803        DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
804        DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
805        DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
806        DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
807        DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
808        DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
809        DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
810        DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
811        DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
812        DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
813        DataType::Time32(TimeUnit::Millisecond) => {
814            Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
815        }
816        DataType::Time64(TimeUnit::Microsecond) => {
817            Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
818        }
819        DataType::Time64(TimeUnit::Nanosecond) => {
820            Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
821        }
822        DataType::Timestamp(TimeUnit::Second, _) => {
823            Arc::new(TimestampSecondArray::from(data)) as ArrayRef
824        }
825        DataType::Timestamp(TimeUnit::Millisecond, _) => {
826            Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
827        }
828        DataType::Timestamp(TimeUnit::Microsecond, _) => {
829            Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
830        }
831        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
832            Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
833        }
834        DataType::Interval(IntervalUnit::YearMonth) => {
835            Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
836        }
837        DataType::Interval(IntervalUnit::DayTime) => {
838            Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
839        }
840        DataType::Interval(IntervalUnit::MonthDayNano) => {
841            Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
842        }
843        DataType::Duration(TimeUnit::Second) => {
844            Arc::new(DurationSecondArray::from(data)) as ArrayRef
845        }
846        DataType::Duration(TimeUnit::Millisecond) => {
847            Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
848        }
849        DataType::Duration(TimeUnit::Microsecond) => {
850            Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
851        }
852        DataType::Duration(TimeUnit::Nanosecond) => {
853            Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
854        }
855        DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
856        DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
857        DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
858        DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
859        DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
860        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
861        DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
862        DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
863        DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
864        DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
865        DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
866        DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
867        DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
868        DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
869        DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
870        DataType::Dictionary(key_type, _) => match key_type.as_ref() {
871            DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
872            DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
873            DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
874            DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
875            DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
876            DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
877            DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
878            DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
879            dt => unimplemented!("Unexpected dictionary key type {dt}"),
880        },
881        DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
882            DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
883            DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
884            DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
885            dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
886        },
887        DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
888        DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
889        DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
890        DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
891        DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
892        dt => unimplemented!("Unexpected data type {dt}"),
893    }
894}
895
896/// Creates a new empty array
897///
898/// ```
899/// use std::sync::Arc;
900/// use arrow_schema::DataType;
901/// use arrow_array::{ArrayRef, Int32Array, new_empty_array};
902///
903/// let empty_array = new_empty_array(&DataType::Int32);
904/// let array: ArrayRef = Arc::new(Int32Array::from(vec![] as Vec<i32>));
905///
906/// assert_eq!(&array, &empty_array);
907/// ```
908pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
909    let data = ArrayData::new_empty(data_type);
910    make_array(data)
911}
912
913/// Creates a new array of `data_type` of length `length` filled
914/// entirely of `NULL` values
915///
916/// ```
917/// use std::sync::Arc;
918/// use arrow_schema::DataType;
919/// use arrow_array::{ArrayRef, Int32Array, new_null_array};
920///
921/// let null_array = new_null_array(&DataType::Int32, 3);
922/// let array: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None]));
923///
924/// assert_eq!(&array, &null_array);
925/// ```
926pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
927    make_array(ArrayData::new_null(data_type, length))
928}
929
930/// Helper function that creates an [`OffsetBuffer`] from a buffer and array offset/ length
931///
932/// # Safety
933///
934/// - buffer must contain valid arrow offsets ( [`OffsetBuffer`] ) for the
935///   given length and offset.
936unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
937    buffer: Buffer,
938    offset: usize,
939    len: usize,
940) -> OffsetBuffer<O> {
941    if len == 0 && buffer.is_empty() {
942        return OffsetBuffer::new_empty();
943    }
944
945    let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
946    // Safety:
947    // Arguments were valid
948    unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
949}
950
951/// Helper function for printing potentially long arrays.
952fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
953where
954    A: Array,
955    F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
956{
957    let head = std::cmp::min(10, array.len());
958
959    for i in 0..head {
960        if array.is_null(i) {
961            writeln!(f, "  null,")?;
962        } else {
963            write!(f, "  ")?;
964            print_item(array, i, f)?;
965            writeln!(f, ",")?;
966        }
967    }
968    if array.len() > 10 {
969        if array.len() > 20 {
970            writeln!(f, "  ...{} elements...,", array.len() - 20)?;
971        }
972
973        let tail = std::cmp::max(head, array.len() - 10);
974
975        for i in tail..array.len() {
976            if array.is_null(i) {
977                writeln!(f, "  null,")?;
978            } else {
979                write!(f, "  ")?;
980                print_item(array, i, f)?;
981                writeln!(f, ",")?;
982            }
983        }
984    }
985    Ok(())
986}
987
988#[cfg(test)]
989mod tests {
990    use super::*;
991    use crate::cast::{as_union_array, downcast_array};
992    use crate::downcast_run_array;
993    use arrow_buffer::MutableBuffer;
994    use arrow_schema::{Field, Fields, UnionFields, UnionMode};
995
996    #[test]
997    fn test_empty_primitive() {
998        let array = new_empty_array(&DataType::Int32);
999        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1000        assert_eq!(a.len(), 0);
1001        let expected: &[i32] = &[];
1002        assert_eq!(a.values(), expected);
1003    }
1004
1005    #[test]
1006    fn test_empty_variable_sized() {
1007        let array = new_empty_array(&DataType::Utf8);
1008        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1009        assert_eq!(a.len(), 0);
1010        assert_eq!(a.value_offsets()[0], 0i32);
1011    }
1012
1013    #[test]
1014    fn test_empty_list_primitive() {
1015        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1016        let array = new_empty_array(&data_type);
1017        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1018        assert_eq!(a.len(), 0);
1019        assert_eq!(a.value_offsets()[0], 0i32);
1020    }
1021
1022    #[test]
1023    fn test_null_boolean() {
1024        let array = new_null_array(&DataType::Boolean, 9);
1025        let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
1026        assert_eq!(a.len(), 9);
1027        for i in 0..9 {
1028            assert!(a.is_null(i));
1029        }
1030    }
1031
1032    #[test]
1033    fn test_null_primitive() {
1034        let array = new_null_array(&DataType::Int32, 9);
1035        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1036        assert_eq!(a.len(), 9);
1037        for i in 0..9 {
1038            assert!(a.is_null(i));
1039        }
1040    }
1041
1042    #[test]
1043    fn test_null_struct() {
1044        // It is possible to create a null struct containing a non-nullable child
1045        // see https://github.com/apache/arrow-rs/pull/3244 for details
1046        let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
1047        let array = new_null_array(&struct_type, 9);
1048
1049        let a = array.as_any().downcast_ref::<StructArray>().unwrap();
1050        assert_eq!(a.len(), 9);
1051        assert_eq!(a.column(0).len(), 9);
1052        for i in 0..9 {
1053            assert!(a.is_null(i));
1054        }
1055
1056        // Make sure we can slice the resulting array.
1057        a.slice(0, 5);
1058    }
1059
1060    #[test]
1061    fn test_null_variable_sized() {
1062        let array = new_null_array(&DataType::Utf8, 9);
1063        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1064        assert_eq!(a.len(), 9);
1065        assert_eq!(a.value_offsets()[9], 0i32);
1066        for i in 0..9 {
1067            assert!(a.is_null(i));
1068        }
1069    }
1070
1071    #[test]
1072    fn test_null_list_primitive() {
1073        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1074        let array = new_null_array(&data_type, 9);
1075        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1076        assert_eq!(a.len(), 9);
1077        assert_eq!(a.value_offsets()[9], 0i32);
1078        for i in 0..9 {
1079            assert!(a.is_null(i));
1080        }
1081    }
1082
1083    #[test]
1084    fn test_null_map() {
1085        let data_type = DataType::Map(
1086            Arc::new(Field::new(
1087                "entry",
1088                DataType::Struct(Fields::from(vec![
1089                    Field::new("key", DataType::Utf8, false),
1090                    Field::new("value", DataType::Int32, true),
1091                ])),
1092                false,
1093            )),
1094            false,
1095        );
1096        let array = new_null_array(&data_type, 9);
1097        let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1098        assert_eq!(a.len(), 9);
1099        assert_eq!(a.value_offsets()[9], 0i32);
1100        for i in 0..9 {
1101            assert!(a.is_null(i));
1102        }
1103    }
1104
1105    #[test]
1106    fn test_null_dictionary() {
1107        let values =
1108            vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1109
1110        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1111        let array = Arc::new(array) as ArrayRef;
1112
1113        let null_array = new_null_array(array.data_type(), 9);
1114        assert_eq!(&array, &null_array);
1115        assert_eq!(
1116            array.to_data().buffers()[0].len(),
1117            null_array.to_data().buffers()[0].len()
1118        );
1119    }
1120
1121    #[test]
1122    fn test_null_union() {
1123        for mode in [UnionMode::Sparse, UnionMode::Dense] {
1124            let data_type = DataType::Union(
1125                UnionFields::try_new(
1126                    vec![2, 1],
1127                    vec![
1128                        Field::new("foo", DataType::Int32, true),
1129                        Field::new("bar", DataType::Int64, true),
1130                    ],
1131                )
1132                .unwrap(),
1133                mode,
1134            );
1135            let array = new_null_array(&data_type, 4);
1136
1137            let array = as_union_array(array.as_ref());
1138            assert_eq!(array.len(), 4);
1139            assert_eq!(array.null_count(), 0);
1140            assert_eq!(array.logical_null_count(), 4);
1141
1142            for i in 0..4 {
1143                let a = array.value(i);
1144                assert_eq!(a.len(), 1);
1145                assert_eq!(a.null_count(), 1);
1146                assert_eq!(a.logical_null_count(), 1);
1147                assert!(a.is_null(0))
1148            }
1149
1150            array.to_data().validate_full().unwrap();
1151        }
1152    }
1153
1154    #[test]
1155    #[allow(unused_parens)]
1156    fn test_null_runs() {
1157        for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1158            let data_type = DataType::RunEndEncoded(
1159                Arc::new(Field::new("run_ends", r, false)),
1160                Arc::new(Field::new("values", DataType::Utf8, true)),
1161            );
1162
1163            let array = new_null_array(&data_type, 4);
1164            let array = array.as_ref();
1165
1166            downcast_run_array! {
1167                array => {
1168                    assert_eq!(array.len(), 4);
1169                    assert_eq!(array.null_count(), 0);
1170                    assert_eq!(array.logical_null_count(), 4);
1171                    assert_eq!(array.values().len(), 1);
1172                    assert_eq!(array.values().null_count(), 1);
1173                    assert_eq!(array.run_ends().len(), 4);
1174                    assert_eq!(array.run_ends().values(), &[4]);
1175
1176                    let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1177                    assert_eq!(idx, &[0,0,0,0]);
1178                }
1179                d => unreachable!("{d}")
1180            }
1181        }
1182    }
1183
1184    #[test]
1185    fn test_null_fixed_size_binary() {
1186        for size in [1, 2, 7] {
1187            let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1188            let array = array
1189                .as_ref()
1190                .as_any()
1191                .downcast_ref::<FixedSizeBinaryArray>()
1192                .unwrap();
1193
1194            assert_eq!(array.len(), 6);
1195            assert_eq!(array.null_count(), 6);
1196            assert_eq!(array.logical_null_count(), 6);
1197            array.iter().for_each(|x| assert!(x.is_none()));
1198        }
1199    }
1200
1201    #[test]
1202    fn test_memory_size_null() {
1203        let null_arr = NullArray::new(32);
1204
1205        assert_eq!(0, null_arr.get_buffer_memory_size());
1206        assert_eq!(
1207            std::mem::size_of::<usize>(),
1208            null_arr.get_array_memory_size()
1209        );
1210    }
1211
1212    #[test]
1213    fn test_memory_size_primitive() {
1214        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1215        let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1216
1217        // subtract empty array to avoid magic numbers for the size of additional fields
1218        assert_eq!(
1219            arr.get_array_memory_size() - empty.get_array_memory_size(),
1220            128 * std::mem::size_of::<i64>()
1221        );
1222    }
1223
1224    #[test]
1225    fn test_memory_size_primitive_sliced() {
1226        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1227        let slice1 = arr.slice(0, 64);
1228        let slice2 = arr.slice(64, 64);
1229
1230        // both slices report the full buffer memory usage, even though the buffers are shared
1231        assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1232        assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1233    }
1234
1235    #[test]
1236    fn test_memory_size_primitive_nullable() {
1237        let arr: PrimitiveArray<Int64Type> = (0..128)
1238            .map(|i| if i % 20 == 0 { Some(i) } else { None })
1239            .collect();
1240        let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1241            ArrayData::builder(arr.data_type().clone())
1242                .add_buffer(MutableBuffer::new(0).into())
1243                .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1244                .build()
1245                .unwrap(),
1246        );
1247
1248        // expected size is the size of the PrimitiveArray struct,
1249        // which includes the optional validity buffer
1250        // plus one buffer on the heap
1251        assert_eq!(
1252            std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1253            empty_with_bitmap.get_array_memory_size()
1254        );
1255
1256        // subtract empty array to avoid magic numbers for the size of additional fields
1257        // the size of the validity bitmap is rounded up to 64 bytes
1258        assert_eq!(
1259            arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1260            128 * std::mem::size_of::<i64>() + 64
1261        );
1262    }
1263
1264    #[test]
1265    fn test_memory_size_dictionary() {
1266        let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1267        let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1268            (0..256).map(|i| (i % values.len()) as i16),
1269        );
1270
1271        let dict_data_type = DataType::Dictionary(
1272            Box::new(keys.data_type().clone()),
1273            Box::new(values.data_type().clone()),
1274        );
1275        let dict_data = keys
1276            .into_data()
1277            .into_builder()
1278            .data_type(dict_data_type)
1279            .child_data(vec![values.into_data()])
1280            .build()
1281            .unwrap();
1282
1283        let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1284            Box::new(DataType::Int16),
1285            Box::new(DataType::Int64),
1286        ));
1287
1288        let arr = DictionaryArray::<Int16Type>::from(dict_data);
1289        let empty = DictionaryArray::<Int16Type>::from(empty_data);
1290
1291        let expected_keys_size = 256 * std::mem::size_of::<i16>();
1292        assert_eq!(
1293            arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1294            expected_keys_size
1295        );
1296
1297        let expected_values_size = 16 * std::mem::size_of::<i64>();
1298        assert_eq!(
1299            arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1300            expected_values_size
1301        );
1302
1303        let expected_size = expected_keys_size + expected_values_size;
1304        assert_eq!(
1305            arr.get_array_memory_size() - empty.get_array_memory_size(),
1306            expected_size
1307        );
1308    }
1309
1310    /// Test function that takes an &dyn Array
1311    fn compute_my_thing(arr: &dyn Array) -> bool {
1312        !arr.is_empty()
1313    }
1314
1315    #[test]
1316    fn test_array_ref_as_array() {
1317        let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1318
1319        // works well!
1320        assert!(compute_my_thing(&arr));
1321
1322        // Should also work when wrapped as an ArrayRef
1323        let arr: ArrayRef = Arc::new(arr);
1324        assert!(compute_my_thing(&arr));
1325        assert!(compute_my_thing(arr.as_ref()));
1326    }
1327
1328    #[test]
1329    fn test_downcast_array() {
1330        let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1331
1332        let boxed: ArrayRef = Arc::new(array);
1333        let array: Int32Array = downcast_array(&boxed);
1334
1335        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1336        assert_eq!(array, expected);
1337    }
1338}