arrow_array/array/
mod.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! The concrete array definitions
19
20mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81/// An array in the [arrow columnar format](https://arrow.apache.org/docs/format/Columnar.html)
82pub trait Array: std::fmt::Debug + Send + Sync {
83    /// Returns the array as [`Any`] so that it can be
84    /// downcasted to a specific implementation.
85    ///
86    /// # Example:
87    ///
88    /// ```
89    /// # use std::sync::Arc;
90    /// # use arrow_array::{Int32Array, RecordBatch};
91    /// # use arrow_schema::{Schema, Field, DataType, ArrowError};
92    ///
93    /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
94    /// let batch = RecordBatch::try_new(
95    ///     Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
96    ///     vec![Arc::new(id)]
97    /// ).unwrap();
98    ///
99    /// let int32array = batch
100    ///     .column(0)
101    ///     .as_any()
102    ///     .downcast_ref::<Int32Array>()
103    ///     .expect("Failed to downcast");
104    /// ```
105    fn as_any(&self) -> &dyn Any;
106
107    /// Returns the underlying data of this array
108    fn to_data(&self) -> ArrayData;
109
110    /// Returns the underlying data of this array
111    ///
112    /// Unlike [`Array::to_data`] this consumes self, allowing it avoid unnecessary clones
113    fn into_data(self) -> ArrayData;
114
115    /// Returns a reference to the [`DataType`] of this array.
116    ///
117    /// # Example:
118    ///
119    /// ```
120    /// use arrow_schema::DataType;
121    /// use arrow_array::{Array, Int32Array};
122    ///
123    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
124    ///
125    /// assert_eq!(*array.data_type(), DataType::Int32);
126    /// ```
127    fn data_type(&self) -> &DataType;
128
129    /// Returns a zero-copy slice of this array with the indicated offset and length.
130    ///
131    /// # Example:
132    ///
133    /// ```
134    /// use arrow_array::{Array, Int32Array};
135    ///
136    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
137    /// // Make slice over the values [2, 3, 4]
138    /// let array_slice = array.slice(1, 3);
139    ///
140    /// assert_eq!(&array_slice, &Int32Array::from(vec![2, 3, 4]));
141    /// ```
142    fn slice(&self, offset: usize, length: usize) -> ArrayRef;
143
144    /// Returns the length (i.e., number of elements) of this array.
145    ///
146    /// # Example:
147    ///
148    /// ```
149    /// use arrow_array::{Array, Int32Array};
150    ///
151    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
152    ///
153    /// assert_eq!(array.len(), 5);
154    /// ```
155    fn len(&self) -> usize;
156
157    /// Returns whether this array is empty.
158    ///
159    /// # Example:
160    ///
161    /// ```
162    /// use arrow_array::{Array, Int32Array};
163    ///
164    /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
165    ///
166    /// assert_eq!(array.is_empty(), false);
167    /// ```
168    fn is_empty(&self) -> bool;
169
170    /// Shrinks the capacity of any exclusively owned buffer as much as possible
171    ///
172    /// Shared or externally allocated buffers will be ignored, and
173    /// any buffer offsets will be preserved.
174    fn shrink_to_fit(&mut self) {}
175
176    /// Returns the offset into the underlying data used by this array(-slice).
177    /// Note that the underlying data can be shared by many arrays.
178    /// This defaults to `0`.
179    ///
180    /// # Example:
181    ///
182    /// ```
183    /// use arrow_array::{Array, BooleanArray};
184    ///
185    /// let array = BooleanArray::from(vec![false, false, true, true]);
186    /// let array_slice = array.slice(1, 3);
187    ///
188    /// assert_eq!(array.offset(), 0);
189    /// assert_eq!(array_slice.offset(), 1);
190    /// ```
191    fn offset(&self) -> usize;
192
193    /// Returns the null buffer of this array if any.
194    ///
195    /// The null buffer contains the "physical" nulls of an array, that is how
196    /// the nulls are represented in the underlying arrow format.
197    ///
198    /// The physical representation is efficient, but is sometimes non intuitive
199    /// for certain array types such as those with nullable child arrays like
200    /// [`DictionaryArray::values`], [`RunArray::values`] or [`UnionArray`], or without a
201    /// null buffer, such as [`NullArray`].
202    ///
203    /// To determine if each element of such an array is "logically" null,
204    /// use the slower [`Array::logical_nulls`] to obtain a computed mask.
205    fn nulls(&self) -> Option<&NullBuffer>;
206
207    /// Returns a potentially computed [`NullBuffer`] that represents the logical
208    /// null values of this array, if any.
209    ///
210    /// Logical nulls represent the values that are null in the array,
211    /// regardless of the underlying physical arrow representation.
212    ///
213    /// For most array types, this is equivalent to the "physical" nulls
214    /// returned by [`Array::nulls`]. It is different for the following cases, because which
215    /// elements are null is not encoded in a single null buffer:
216    ///
217    /// * [`DictionaryArray`] where [`DictionaryArray::values`] contains nulls
218    /// * [`RunArray`] where [`RunArray::values`] contains nulls
219    /// * [`NullArray`] where all indices are nulls
220    /// * [`UnionArray`] where the selected values contains nulls
221    ///
222    /// In these cases a logical [`NullBuffer`] will be computed, encoding the
223    /// logical nullability of these arrays, beyond what is encoded in
224    /// [`Array::nulls`]
225    fn logical_nulls(&self) -> Option<NullBuffer> {
226        self.nulls().cloned()
227    }
228
229    /// Returns whether the element at `index` is null according to [`Array::nulls`]
230    ///
231    /// Note: For performance reasons, this method returns nullability solely as determined by the
232    /// null buffer. This difference can lead to surprising results, for example, [`NullArray::is_null`] always
233    /// returns `false` as the array lacks a null buffer. Similarly [`DictionaryArray`], [`RunArray`] and [`UnionArray`] may
234    /// encode nullability in their children. See [`Self::logical_nulls`] for more information.
235    ///
236    /// # Example:
237    ///
238    /// ```
239    /// use arrow_array::{Array, Int32Array, NullArray};
240    ///
241    /// let array = Int32Array::from(vec![Some(1), None]);
242    /// assert_eq!(array.is_null(0), false);
243    /// assert_eq!(array.is_null(1), true);
244    ///
245    /// // NullArrays do not have a null buffer, and therefore always
246    /// // return false for is_null.
247    /// let array = NullArray::new(1);
248    /// assert_eq!(array.is_null(0), false);
249    /// ```
250    fn is_null(&self, index: usize) -> bool {
251        self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
252    }
253
254    /// Returns whether the element at `index` is *not* null, the
255    /// opposite of [`Self::is_null`].
256    ///
257    /// # Example:
258    ///
259    /// ```
260    /// use arrow_array::{Array, Int32Array};
261    ///
262    /// let array = Int32Array::from(vec![Some(1), None]);
263    ///
264    /// assert_eq!(array.is_valid(0), true);
265    /// assert_eq!(array.is_valid(1), false);
266    /// ```
267    fn is_valid(&self, index: usize) -> bool {
268        !self.is_null(index)
269    }
270
271    /// Returns the total number of physical null values in this array.
272    ///
273    /// Note: this method returns the physical null count, i.e. that encoded in [`Array::nulls`],
274    /// see [`Array::logical_nulls`] for logical nullability
275    ///
276    /// # Example:
277    ///
278    /// ```
279    /// use arrow_array::{Array, Int32Array};
280    ///
281    /// // Construct an array with values [1, NULL, NULL]
282    /// let array = Int32Array::from(vec![Some(1), None, None]);
283    ///
284    /// assert_eq!(array.null_count(), 2);
285    /// ```
286    fn null_count(&self) -> usize {
287        self.nulls().map(|n| n.null_count()).unwrap_or_default()
288    }
289
290    /// Returns the total number of logical null values in this array.
291    ///
292    /// Note: this method returns the logical null count, i.e. that encoded in
293    /// [`Array::logical_nulls`]. In general this is equivalent to [`Array::null_count`] but may differ in the
294    /// presence of logical nullability, see [`Array::nulls`] and [`Array::logical_nulls`].
295    ///
296    /// # Example:
297    ///
298    /// ```
299    /// use arrow_array::{Array, Int32Array};
300    ///
301    /// // Construct an array with values [1, NULL, NULL]
302    /// let array = Int32Array::from(vec![Some(1), None, None]);
303    ///
304    /// assert_eq!(array.logical_null_count(), 2);
305    /// ```
306    fn logical_null_count(&self) -> usize {
307        self.logical_nulls()
308            .map(|n| n.null_count())
309            .unwrap_or_default()
310    }
311
312    /// Returns `false` if the array is guaranteed to not contain any logical nulls
313    ///
314    /// This is generally equivalent to `Array::logical_null_count() != 0` unless determining
315    /// the logical nulls is expensive, in which case this method can return true even for an
316    /// array without nulls.
317    ///
318    /// This is also generally equivalent to `Array::null_count() != 0` but may differ in the
319    /// presence of logical nullability, see [`Array::logical_null_count`] and [`Array::null_count`].
320    ///
321    /// Implementations will return `true` unless they can cheaply prove no logical nulls
322    /// are present. For example a [`DictionaryArray`] with nullable values will still return true,
323    /// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
324    /// and therefore would not appear in [`Array::logical_nulls`].
325    fn is_nullable(&self) -> bool {
326        self.logical_null_count() != 0
327    }
328
329    /// Returns the total number of bytes of memory pointed to by this array.
330    /// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
331    /// Note that this does not always correspond to the exact memory usage of an array,
332    /// since multiple arrays can share the same buffers or slices thereof.
333    fn get_buffer_memory_size(&self) -> usize;
334
335    /// Returns the total number of bytes of memory occupied physically by this array.
336    /// This value will always be greater than returned by `get_buffer_memory_size()` and
337    /// includes the overhead of the data structures that contain the pointers to the various buffers.
338    fn get_array_memory_size(&self) -> usize;
339}
340
341/// A reference-counted reference to a generic `Array`
342pub type ArrayRef = Arc<dyn Array>;
343
344/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
345impl Array for ArrayRef {
346    fn as_any(&self) -> &dyn Any {
347        self.as_ref().as_any()
348    }
349
350    fn to_data(&self) -> ArrayData {
351        self.as_ref().to_data()
352    }
353
354    fn into_data(self) -> ArrayData {
355        self.to_data()
356    }
357
358    fn data_type(&self) -> &DataType {
359        self.as_ref().data_type()
360    }
361
362    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
363        self.as_ref().slice(offset, length)
364    }
365
366    fn len(&self) -> usize {
367        self.as_ref().len()
368    }
369
370    fn is_empty(&self) -> bool {
371        self.as_ref().is_empty()
372    }
373
374    /// For shared buffers, this is a no-op.
375    fn shrink_to_fit(&mut self) {
376        if let Some(slf) = Arc::get_mut(self) {
377            slf.shrink_to_fit();
378        } else {
379            // We ignore shared buffers.
380        }
381    }
382
383    fn offset(&self) -> usize {
384        self.as_ref().offset()
385    }
386
387    fn nulls(&self) -> Option<&NullBuffer> {
388        self.as_ref().nulls()
389    }
390
391    fn logical_nulls(&self) -> Option<NullBuffer> {
392        self.as_ref().logical_nulls()
393    }
394
395    fn is_null(&self, index: usize) -> bool {
396        self.as_ref().is_null(index)
397    }
398
399    fn is_valid(&self, index: usize) -> bool {
400        self.as_ref().is_valid(index)
401    }
402
403    fn null_count(&self) -> usize {
404        self.as_ref().null_count()
405    }
406
407    fn logical_null_count(&self) -> usize {
408        self.as_ref().logical_null_count()
409    }
410
411    fn is_nullable(&self) -> bool {
412        self.as_ref().is_nullable()
413    }
414
415    fn get_buffer_memory_size(&self) -> usize {
416        self.as_ref().get_buffer_memory_size()
417    }
418
419    fn get_array_memory_size(&self) -> usize {
420        self.as_ref().get_array_memory_size()
421    }
422}
423
424impl<T: Array> Array for &T {
425    fn as_any(&self) -> &dyn Any {
426        T::as_any(self)
427    }
428
429    fn to_data(&self) -> ArrayData {
430        T::to_data(self)
431    }
432
433    fn into_data(self) -> ArrayData {
434        self.to_data()
435    }
436
437    fn data_type(&self) -> &DataType {
438        T::data_type(self)
439    }
440
441    fn slice(&self, offset: usize, length: usize) -> ArrayRef {
442        T::slice(self, offset, length)
443    }
444
445    fn len(&self) -> usize {
446        T::len(self)
447    }
448
449    fn is_empty(&self) -> bool {
450        T::is_empty(self)
451    }
452
453    fn offset(&self) -> usize {
454        T::offset(self)
455    }
456
457    fn nulls(&self) -> Option<&NullBuffer> {
458        T::nulls(self)
459    }
460
461    fn logical_nulls(&self) -> Option<NullBuffer> {
462        T::logical_nulls(self)
463    }
464
465    fn is_null(&self, index: usize) -> bool {
466        T::is_null(self, index)
467    }
468
469    fn is_valid(&self, index: usize) -> bool {
470        T::is_valid(self, index)
471    }
472
473    fn null_count(&self) -> usize {
474        T::null_count(self)
475    }
476
477    fn logical_null_count(&self) -> usize {
478        T::logical_null_count(self)
479    }
480
481    fn is_nullable(&self) -> bool {
482        T::is_nullable(self)
483    }
484
485    fn get_buffer_memory_size(&self) -> usize {
486        T::get_buffer_memory_size(self)
487    }
488
489    fn get_array_memory_size(&self) -> usize {
490        T::get_array_memory_size(self)
491    }
492}
493
494/// A generic trait for accessing the values of an [`Array`]
495///
496/// This trait helps write specialized implementations of algorithms for
497/// different array types. Specialized implementations allow the compiler
498/// to optimize the code for the specific array type, which can lead to
499/// significant performance improvements.
500///
501/// # Example
502/// For example, to write three different implementations of a string length function
503/// for [`StringArray`], [`LargeStringArray`], and [`StringViewArray`], you can write
504///
505/// ```
506/// # use std::sync::Arc;
507/// # use arrow_array::{ArrayAccessor, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
508/// # use arrow_buffer::ArrowNativeType;
509/// # use arrow_array::cast::AsArray;
510/// # use arrow_array::iterator::ArrayIter;
511/// # use arrow_array::types::{Int32Type, Int64Type};
512/// # use arrow_schema::{ArrowError, DataType};
513/// /// This function takes a dynamically typed `ArrayRef` and calls
514/// /// calls one of three specialized implementations
515/// fn character_length(arg: ArrayRef) -> Result<ArrayRef, ArrowError> {
516///     match arg.data_type() {
517///         DataType::Utf8 => {
518///             // downcast the ArrayRef to a StringArray and call the specialized implementation
519///             let string_array = arg.as_string::<i32>();
520///             character_length_general::<Int32Type, _>(string_array)
521///         }
522///         DataType::LargeUtf8 => {
523///             character_length_general::<Int64Type, _>(arg.as_string::<i64>())
524///         }
525///         DataType::Utf8View => {
526///             character_length_general::<Int32Type, _>(arg.as_string_view())
527///         }
528///         _ => Err(ArrowError::InvalidArgumentError("Unsupported data type".to_string())),
529///     }
530/// }
531///
532/// /// A generic implementation of the character_length function
533/// /// This function uses the `ArrayAccessor` trait to access the values of the array
534/// /// so the compiler can generated specialized implementations for different array types
535/// ///
536/// /// Returns a new array with the length of each string in the input array
537/// /// * Int32Array for Utf8 and Utf8View arrays (lengths are 32-bit integers)
538/// /// * Int64Array for LargeUtf8 arrays (lengths are 64-bit integers)
539/// ///
540/// /// This is generic on the type of the primitive array (different string arrays have
541/// /// different lengths) and the type of the array accessor (different string arrays
542/// /// have different ways to access the values)
543/// fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
544///     array: V,
545/// ) -> Result<ArrayRef, ArrowError>
546/// where
547///     T::Native: OffsetSizeTrait,
548/// {
549///     let iter = ArrayIter::new(array);
550///     // Create a Int32Array / Int64Array with the length of each string
551///     let result = iter
552///         .map(|string| {
553///             string.map(|string: &str| {
554///                 T::Native::from_usize(string.chars().count())
555///                     .expect("should not fail as string.chars will always return integer")
556///             })
557///         })
558///         .collect::<PrimitiveArray<T>>();
559///
560///     /// Return the result as a new ArrayRef (dynamically typed)
561///     Ok(Arc::new(result) as ArrayRef)
562/// }
563/// ```
564///
565/// # Validity
566///
567/// An [`ArrayAccessor`] must always return a well-defined value for an index
568/// that is within the bounds `0..Array::len`, including for null indexes where
569/// [`Array::is_null`] is true.
570///
571/// The value at null indexes is unspecified, and implementations must not rely
572/// on a specific value such as [`Default::default`] being returned, however, it
573/// must not be undefined
574pub trait ArrayAccessor: Array {
575    /// The Arrow type of the element being accessed.
576    type Item: Send + Sync;
577
578    /// Returns the element at index `i`
579    /// # Panics
580    /// Panics if the value is outside the bounds of the array
581    fn value(&self, index: usize) -> Self::Item;
582
583    /// Returns the element at index `i`
584    /// # Safety
585    /// Caller is responsible for ensuring that the index is within the bounds of the array
586    unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
587}
588
589/// A trait for Arrow String Arrays, currently three types are supported:
590/// - `StringArray`
591/// - `LargeStringArray`
592/// - `StringViewArray`
593///
594/// This trait helps to abstract over the different types of string arrays
595/// so that we don't need to duplicate the implementation for each type.
596pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
597    /// Returns true if all data within this string array is ASCII
598    fn is_ascii(&self) -> bool;
599
600    /// Constructs a new iterator
601    fn iter(&self) -> ArrayIter<Self>;
602}
603
604impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
605    fn is_ascii(&self) -> bool {
606        GenericStringArray::<O>::is_ascii(self)
607    }
608
609    fn iter(&self) -> ArrayIter<Self> {
610        GenericStringArray::<O>::iter(self)
611    }
612}
613impl<'a> StringArrayType<'a> for &'a StringViewArray {
614    fn is_ascii(&self) -> bool {
615        StringViewArray::is_ascii(self)
616    }
617
618    fn iter(&self) -> ArrayIter<Self> {
619        StringViewArray::iter(self)
620    }
621}
622
623/// A trait for Arrow String Arrays, currently three types are supported:
624/// - `BinaryArray`
625/// - `LargeBinaryArray`
626/// - `BinaryViewArray`
627///
628/// This trait helps to abstract over the different types of binary arrays
629/// so that we don't need to duplicate the implementation for each type.
630pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
631    /// Constructs a new iterator
632    fn iter(&self) -> ArrayIter<Self>;
633}
634
635impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
636    fn iter(&self) -> ArrayIter<Self> {
637        GenericBinaryArray::<O>::iter(self)
638    }
639}
640impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
641    fn iter(&self) -> ArrayIter<Self> {
642        BinaryViewArray::iter(self)
643    }
644}
645
646impl PartialEq for dyn Array + '_ {
647    fn eq(&self, other: &Self) -> bool {
648        self.to_data().eq(&other.to_data())
649    }
650}
651
652impl<T: Array> PartialEq<T> for dyn Array + '_ {
653    fn eq(&self, other: &T) -> bool {
654        self.to_data().eq(&other.to_data())
655    }
656}
657
658impl PartialEq for NullArray {
659    fn eq(&self, other: &NullArray) -> bool {
660        self.to_data().eq(&other.to_data())
661    }
662}
663
664impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
665    fn eq(&self, other: &PrimitiveArray<T>) -> bool {
666        self.to_data().eq(&other.to_data())
667    }
668}
669
670impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
671    fn eq(&self, other: &Self) -> bool {
672        self.to_data().eq(&other.to_data())
673    }
674}
675
676impl PartialEq for BooleanArray {
677    fn eq(&self, other: &BooleanArray) -> bool {
678        self.to_data().eq(&other.to_data())
679    }
680}
681
682impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
683    fn eq(&self, other: &Self) -> bool {
684        self.to_data().eq(&other.to_data())
685    }
686}
687
688impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
689    fn eq(&self, other: &Self) -> bool {
690        self.to_data().eq(&other.to_data())
691    }
692}
693
694impl PartialEq for FixedSizeBinaryArray {
695    fn eq(&self, other: &Self) -> bool {
696        self.to_data().eq(&other.to_data())
697    }
698}
699
700impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
701    fn eq(&self, other: &Self) -> bool {
702        self.to_data().eq(&other.to_data())
703    }
704}
705
706impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
707    fn eq(&self, other: &Self) -> bool {
708        self.to_data().eq(&other.to_data())
709    }
710}
711
712impl PartialEq for MapArray {
713    fn eq(&self, other: &Self) -> bool {
714        self.to_data().eq(&other.to_data())
715    }
716}
717
718impl PartialEq for FixedSizeListArray {
719    fn eq(&self, other: &Self) -> bool {
720        self.to_data().eq(&other.to_data())
721    }
722}
723
724impl PartialEq for StructArray {
725    fn eq(&self, other: &Self) -> bool {
726        self.to_data().eq(&other.to_data())
727    }
728}
729
730impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
731    fn eq(&self, other: &Self) -> bool {
732        self.to_data().eq(&other.to_data())
733    }
734}
735
736impl<R: RunEndIndexType> PartialEq for RunArray<R> {
737    fn eq(&self, other: &Self) -> bool {
738        self.to_data().eq(&other.to_data())
739    }
740}
741
742/// Constructs an array using the input `data`.
743/// Returns a reference-counted `Array` instance.
744pub fn make_array(data: ArrayData) -> ArrayRef {
745    match data.data_type() {
746        DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
747        DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
748        DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
749        DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
750        DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
751        DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
752        DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
753        DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
754        DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
755        DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
756        DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
757        DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
758        DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
759        DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
760        DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
761        DataType::Time32(TimeUnit::Millisecond) => {
762            Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
763        }
764        DataType::Time64(TimeUnit::Microsecond) => {
765            Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
766        }
767        DataType::Time64(TimeUnit::Nanosecond) => {
768            Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
769        }
770        DataType::Timestamp(TimeUnit::Second, _) => {
771            Arc::new(TimestampSecondArray::from(data)) as ArrayRef
772        }
773        DataType::Timestamp(TimeUnit::Millisecond, _) => {
774            Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
775        }
776        DataType::Timestamp(TimeUnit::Microsecond, _) => {
777            Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
778        }
779        DataType::Timestamp(TimeUnit::Nanosecond, _) => {
780            Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
781        }
782        DataType::Interval(IntervalUnit::YearMonth) => {
783            Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
784        }
785        DataType::Interval(IntervalUnit::DayTime) => {
786            Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
787        }
788        DataType::Interval(IntervalUnit::MonthDayNano) => {
789            Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
790        }
791        DataType::Duration(TimeUnit::Second) => {
792            Arc::new(DurationSecondArray::from(data)) as ArrayRef
793        }
794        DataType::Duration(TimeUnit::Millisecond) => {
795            Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
796        }
797        DataType::Duration(TimeUnit::Microsecond) => {
798            Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
799        }
800        DataType::Duration(TimeUnit::Nanosecond) => {
801            Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
802        }
803        DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
804        DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
805        DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
806        DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
807        DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
808        DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
809        DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
810        DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
811        DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
812        DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
813        DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
814        DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
815        DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
816        DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
817        DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
818        DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
819            DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
820            DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
821            DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
822            DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
823            DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
824            DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
825            DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
826            DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
827            dt => panic!("Unexpected dictionary key type {dt:?}"),
828        },
829        DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
830            DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
831            DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
832            DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
833            dt => panic!("Unexpected data type for run_ends array {dt:?}"),
834        },
835        DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
836        DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
837        DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
838        dt => panic!("Unexpected data type {dt:?}"),
839    }
840}
841
842/// Creates a new empty array
843///
844/// ```
845/// use std::sync::Arc;
846/// use arrow_schema::DataType;
847/// use arrow_array::{ArrayRef, Int32Array, new_empty_array};
848///
849/// let empty_array = new_empty_array(&DataType::Int32);
850/// let array: ArrayRef = Arc::new(Int32Array::from(vec![] as Vec<i32>));
851///
852/// assert_eq!(&array, &empty_array);
853/// ```
854pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
855    let data = ArrayData::new_empty(data_type);
856    make_array(data)
857}
858
859/// Creates a new array of `data_type` of length `length` filled
860/// entirely of `NULL` values
861///
862/// ```
863/// use std::sync::Arc;
864/// use arrow_schema::DataType;
865/// use arrow_array::{ArrayRef, Int32Array, new_null_array};
866///
867/// let null_array = new_null_array(&DataType::Int32, 3);
868/// let array: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None]));
869///
870/// assert_eq!(&array, &null_array);
871/// ```
872pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
873    make_array(ArrayData::new_null(data_type, length))
874}
875
876/// Helper function that gets offset from an [`ArrayData`]
877///
878/// # Safety
879///
880/// - ArrayData must contain a valid [`OffsetBuffer`] as its first buffer
881unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
882    match data.is_empty() && data.buffers()[0].is_empty() {
883        true => OffsetBuffer::new_empty(),
884        false => {
885            let buffer =
886                ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
887            // Safety:
888            // ArrayData is valid
889            unsafe { OffsetBuffer::new_unchecked(buffer) }
890        }
891    }
892}
893
894/// Helper function for printing potentially long arrays.
895fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
896where
897    A: Array,
898    F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
899{
900    let head = std::cmp::min(10, array.len());
901
902    for i in 0..head {
903        if array.is_null(i) {
904            writeln!(f, "  null,")?;
905        } else {
906            write!(f, "  ")?;
907            print_item(array, i, f)?;
908            writeln!(f, ",")?;
909        }
910    }
911    if array.len() > 10 {
912        if array.len() > 20 {
913            writeln!(f, "  ...{} elements...,", array.len() - 20)?;
914        }
915
916        let tail = std::cmp::max(head, array.len() - 10);
917
918        for i in tail..array.len() {
919            if array.is_null(i) {
920                writeln!(f, "  null,")?;
921            } else {
922                write!(f, "  ")?;
923                print_item(array, i, f)?;
924                writeln!(f, ",")?;
925            }
926        }
927    }
928    Ok(())
929}
930
931#[cfg(test)]
932mod tests {
933    use super::*;
934    use crate::cast::{as_union_array, downcast_array};
935    use crate::downcast_run_array;
936    use arrow_buffer::MutableBuffer;
937    use arrow_schema::{Field, Fields, UnionFields, UnionMode};
938
939    #[test]
940    fn test_empty_primitive() {
941        let array = new_empty_array(&DataType::Int32);
942        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
943        assert_eq!(a.len(), 0);
944        let expected: &[i32] = &[];
945        assert_eq!(a.values(), expected);
946    }
947
948    #[test]
949    fn test_empty_variable_sized() {
950        let array = new_empty_array(&DataType::Utf8);
951        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
952        assert_eq!(a.len(), 0);
953        assert_eq!(a.value_offsets()[0], 0i32);
954    }
955
956    #[test]
957    fn test_empty_list_primitive() {
958        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
959        let array = new_empty_array(&data_type);
960        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
961        assert_eq!(a.len(), 0);
962        assert_eq!(a.value_offsets()[0], 0i32);
963    }
964
965    #[test]
966    fn test_null_boolean() {
967        let array = new_null_array(&DataType::Boolean, 9);
968        let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
969        assert_eq!(a.len(), 9);
970        for i in 0..9 {
971            assert!(a.is_null(i));
972        }
973    }
974
975    #[test]
976    fn test_null_primitive() {
977        let array = new_null_array(&DataType::Int32, 9);
978        let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
979        assert_eq!(a.len(), 9);
980        for i in 0..9 {
981            assert!(a.is_null(i));
982        }
983    }
984
985    #[test]
986    fn test_null_struct() {
987        // It is possible to create a null struct containing a non-nullable child
988        // see https://github.com/apache/arrow-rs/pull/3244 for details
989        let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
990        let array = new_null_array(&struct_type, 9);
991
992        let a = array.as_any().downcast_ref::<StructArray>().unwrap();
993        assert_eq!(a.len(), 9);
994        assert_eq!(a.column(0).len(), 9);
995        for i in 0..9 {
996            assert!(a.is_null(i));
997        }
998
999        // Make sure we can slice the resulting array.
1000        a.slice(0, 5);
1001    }
1002
1003    #[test]
1004    fn test_null_variable_sized() {
1005        let array = new_null_array(&DataType::Utf8, 9);
1006        let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1007        assert_eq!(a.len(), 9);
1008        assert_eq!(a.value_offsets()[9], 0i32);
1009        for i in 0..9 {
1010            assert!(a.is_null(i));
1011        }
1012    }
1013
1014    #[test]
1015    fn test_null_list_primitive() {
1016        let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1017        let array = new_null_array(&data_type, 9);
1018        let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1019        assert_eq!(a.len(), 9);
1020        assert_eq!(a.value_offsets()[9], 0i32);
1021        for i in 0..9 {
1022            assert!(a.is_null(i));
1023        }
1024    }
1025
1026    #[test]
1027    fn test_null_map() {
1028        let data_type = DataType::Map(
1029            Arc::new(Field::new(
1030                "entry",
1031                DataType::Struct(Fields::from(vec![
1032                    Field::new("key", DataType::Utf8, false),
1033                    Field::new("value", DataType::Int32, true),
1034                ])),
1035                false,
1036            )),
1037            false,
1038        );
1039        let array = new_null_array(&data_type, 9);
1040        let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1041        assert_eq!(a.len(), 9);
1042        assert_eq!(a.value_offsets()[9], 0i32);
1043        for i in 0..9 {
1044            assert!(a.is_null(i));
1045        }
1046    }
1047
1048    #[test]
1049    fn test_null_dictionary() {
1050        let values =
1051            vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1052
1053        let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1054        let array = Arc::new(array) as ArrayRef;
1055
1056        let null_array = new_null_array(array.data_type(), 9);
1057        assert_eq!(&array, &null_array);
1058        assert_eq!(
1059            array.to_data().buffers()[0].len(),
1060            null_array.to_data().buffers()[0].len()
1061        );
1062    }
1063
1064    #[test]
1065    fn test_null_union() {
1066        for mode in [UnionMode::Sparse, UnionMode::Dense] {
1067            let data_type = DataType::Union(
1068                UnionFields::new(
1069                    vec![2, 1],
1070                    vec![
1071                        Field::new("foo", DataType::Int32, true),
1072                        Field::new("bar", DataType::Int64, true),
1073                    ],
1074                ),
1075                mode,
1076            );
1077            let array = new_null_array(&data_type, 4);
1078
1079            let array = as_union_array(array.as_ref());
1080            assert_eq!(array.len(), 4);
1081            assert_eq!(array.null_count(), 0);
1082            assert_eq!(array.logical_null_count(), 4);
1083
1084            for i in 0..4 {
1085                let a = array.value(i);
1086                assert_eq!(a.len(), 1);
1087                assert_eq!(a.null_count(), 1);
1088                assert_eq!(a.logical_null_count(), 1);
1089                assert!(a.is_null(0))
1090            }
1091
1092            array.to_data().validate_full().unwrap();
1093        }
1094    }
1095
1096    #[test]
1097    #[allow(unused_parens)]
1098    fn test_null_runs() {
1099        for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1100            let data_type = DataType::RunEndEncoded(
1101                Arc::new(Field::new("run_ends", r, false)),
1102                Arc::new(Field::new("values", DataType::Utf8, true)),
1103            );
1104
1105            let array = new_null_array(&data_type, 4);
1106            let array = array.as_ref();
1107
1108            downcast_run_array! {
1109                array => {
1110                    assert_eq!(array.len(), 4);
1111                    assert_eq!(array.null_count(), 0);
1112                    assert_eq!(array.logical_null_count(), 4);
1113                    assert_eq!(array.values().len(), 1);
1114                    assert_eq!(array.values().null_count(), 1);
1115                    assert_eq!(array.run_ends().len(), 4);
1116                    assert_eq!(array.run_ends().values(), &[4]);
1117
1118                    let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1119                    assert_eq!(idx, &[0,0,0,0]);
1120                }
1121                d => unreachable!("{d}")
1122            }
1123        }
1124    }
1125
1126    #[test]
1127    fn test_null_fixed_size_binary() {
1128        for size in [1, 2, 7] {
1129            let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1130            let array = array
1131                .as_ref()
1132                .as_any()
1133                .downcast_ref::<FixedSizeBinaryArray>()
1134                .unwrap();
1135
1136            assert_eq!(array.len(), 6);
1137            assert_eq!(array.null_count(), 6);
1138            assert_eq!(array.logical_null_count(), 6);
1139            array.iter().for_each(|x| assert!(x.is_none()));
1140        }
1141    }
1142
1143    #[test]
1144    fn test_memory_size_null() {
1145        let null_arr = NullArray::new(32);
1146
1147        assert_eq!(0, null_arr.get_buffer_memory_size());
1148        assert_eq!(
1149            std::mem::size_of::<usize>(),
1150            null_arr.get_array_memory_size()
1151        );
1152    }
1153
1154    #[test]
1155    fn test_memory_size_primitive() {
1156        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1157        let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1158
1159        // subtract empty array to avoid magic numbers for the size of additional fields
1160        assert_eq!(
1161            arr.get_array_memory_size() - empty.get_array_memory_size(),
1162            128 * std::mem::size_of::<i64>()
1163        );
1164    }
1165
1166    #[test]
1167    fn test_memory_size_primitive_sliced() {
1168        let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1169        let slice1 = arr.slice(0, 64);
1170        let slice2 = arr.slice(64, 64);
1171
1172        // both slices report the full buffer memory usage, even though the buffers are shared
1173        assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1174        assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1175    }
1176
1177    #[test]
1178    fn test_memory_size_primitive_nullable() {
1179        let arr: PrimitiveArray<Int64Type> = (0..128)
1180            .map(|i| if i % 20 == 0 { Some(i) } else { None })
1181            .collect();
1182        let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1183            ArrayData::builder(arr.data_type().clone())
1184                .add_buffer(MutableBuffer::new(0).into())
1185                .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1186                .build()
1187                .unwrap(),
1188        );
1189
1190        // expected size is the size of the PrimitiveArray struct,
1191        // which includes the optional validity buffer
1192        // plus one buffer on the heap
1193        assert_eq!(
1194            std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1195            empty_with_bitmap.get_array_memory_size()
1196        );
1197
1198        // subtract empty array to avoid magic numbers for the size of additional fields
1199        // the size of the validity bitmap is rounded up to 64 bytes
1200        assert_eq!(
1201            arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1202            128 * std::mem::size_of::<i64>() + 64
1203        );
1204    }
1205
1206    #[test]
1207    fn test_memory_size_dictionary() {
1208        let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1209        let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1210            (0..256).map(|i| (i % values.len()) as i16),
1211        );
1212
1213        let dict_data_type = DataType::Dictionary(
1214            Box::new(keys.data_type().clone()),
1215            Box::new(values.data_type().clone()),
1216        );
1217        let dict_data = keys
1218            .into_data()
1219            .into_builder()
1220            .data_type(dict_data_type)
1221            .child_data(vec![values.into_data()])
1222            .build()
1223            .unwrap();
1224
1225        let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1226            Box::new(DataType::Int16),
1227            Box::new(DataType::Int64),
1228        ));
1229
1230        let arr = DictionaryArray::<Int16Type>::from(dict_data);
1231        let empty = DictionaryArray::<Int16Type>::from(empty_data);
1232
1233        let expected_keys_size = 256 * std::mem::size_of::<i16>();
1234        assert_eq!(
1235            arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1236            expected_keys_size
1237        );
1238
1239        let expected_values_size = 16 * std::mem::size_of::<i64>();
1240        assert_eq!(
1241            arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1242            expected_values_size
1243        );
1244
1245        let expected_size = expected_keys_size + expected_values_size;
1246        assert_eq!(
1247            arr.get_array_memory_size() - empty.get_array_memory_size(),
1248            expected_size
1249        );
1250    }
1251
1252    /// Test function that takes an &dyn Array
1253    fn compute_my_thing(arr: &dyn Array) -> bool {
1254        !arr.is_empty()
1255    }
1256
1257    #[test]
1258    fn test_array_ref_as_array() {
1259        let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1260
1261        // works well!
1262        assert!(compute_my_thing(&arr));
1263
1264        // Should also work when wrapped as an ArrayRef
1265        let arr: ArrayRef = Arc::new(arr);
1266        assert!(compute_my_thing(&arr));
1267        assert!(compute_my_thing(arr.as_ref()));
1268    }
1269
1270    #[test]
1271    fn test_downcast_array() {
1272        let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1273
1274        let boxed: ArrayRef = Arc::new(array);
1275        let array: Int32Array = downcast_array(&boxed);
1276
1277        let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1278        assert_eq!(array, expected);
1279    }
1280}