arrow_array/array/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! The concrete array definitions
19
20mod binary_array;
21
22use crate::types::*;
23use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ScalarBuffer};
24use arrow_data::ArrayData;
25use arrow_schema::{DataType, IntervalUnit, TimeUnit};
26use std::any::Any;
27use std::sync::Arc;
28
29pub use binary_array::*;
30
31mod boolean_array;
32pub use boolean_array::*;
33
34mod byte_array;
35pub use byte_array::*;
36
37mod dictionary_array;
38pub use dictionary_array::*;
39
40mod fixed_size_binary_array;
41pub use fixed_size_binary_array::*;
42
43mod fixed_size_list_array;
44pub use fixed_size_list_array::*;
45
46mod list_array;
47pub use list_array::*;
48
49mod map_array;
50pub use map_array::*;
51
52mod null_array;
53pub use null_array::*;
54
55mod primitive_array;
56pub use primitive_array::*;
57
58mod string_array;
59pub use string_array::*;
60
61mod struct_array;
62pub use struct_array::*;
63
64mod union_array;
65pub use union_array::*;
66
67mod run_array;
68
69pub use run_array::*;
70
71mod byte_view_array;
72
73pub use byte_view_array::*;
74
75mod list_view_array;
76
77pub use list_view_array::*;
78
79use crate::iterator::ArrayIter;
80
81/// An array in the [Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html)
82///
83/// # Safety
84///
85/// Implementations of this trait must ensure that all methods implementations comply with
86/// the Arrow specification. No safety guards are placed and failing to comply with it can
87/// translate into panics or undefined behavior. For example, a value computed based on `len`
88/// may be used as a direct index into memory regions without checks.
89///
90/// Note that it is likely impossible to correctly implement the trait for a
91/// third party type, as substantial arrow-rs functionality is based on the
92/// return values of [`Array::data_type`] and third party types cannot extend
93/// the [`DataType`] enum. So any code that attempts casting based on data type
94/// (including internal arrow library code) risks a panic or undefined behavior.
95/// See [this discussion] for more details.
96///
97/// This trait might be sealed in the future. Use at your own risk.
98///
99/// [this discussion]: https://github.com/apache/arrow-rs/pull/9234#pullrequestreview-3708950936
100pub unsafe trait Array: std::fmt::Debug + Send + Sync {
101 /// Returns the array as [`Any`] so that it can be
102 /// downcasted to a specific implementation.
103 ///
104 /// # Example:
105 ///
106 /// ```
107 /// # use std::sync::Arc;
108 /// # use arrow_array::{Int32Array, RecordBatch};
109 /// # use arrow_schema::{Schema, Field, DataType, ArrowError};
110 ///
111 /// let id = Int32Array::from(vec![1, 2, 3, 4, 5]);
112 /// let batch = RecordBatch::try_new(
113 /// Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)])),
114 /// vec![Arc::new(id)]
115 /// ).unwrap();
116 ///
117 /// let int32array = batch
118 /// .column(0)
119 /// .as_any()
120 /// .downcast_ref::<Int32Array>()
121 /// .expect("Failed to downcast");
122 /// ```
123 fn as_any(&self) -> &dyn Any;
124
125 /// Returns the underlying data of this array
126 fn to_data(&self) -> ArrayData;
127
128 /// Returns the underlying data of this array
129 ///
130 /// Unlike [`Array::to_data`] this consumes self, allowing it avoid unnecessary clones
131 fn into_data(self) -> ArrayData;
132
133 /// Returns a reference to the [`DataType`] of this array.
134 ///
135 /// # Example:
136 ///
137 /// ```
138 /// use arrow_schema::DataType;
139 /// use arrow_array::{Array, Int32Array};
140 ///
141 /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
142 ///
143 /// assert_eq!(*array.data_type(), DataType::Int32);
144 /// ```
145 fn data_type(&self) -> &DataType;
146
147 /// Returns a zero-copy slice of this array with the indicated offset and length.
148 ///
149 /// # Example:
150 ///
151 /// ```
152 /// use arrow_array::{Array, Int32Array};
153 ///
154 /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
155 /// // Make slice over the values [2, 3, 4]
156 /// let array_slice = array.slice(1, 3);
157 ///
158 /// assert_eq!(&array_slice, &Int32Array::from(vec![2, 3, 4]));
159 /// ```
160 fn slice(&self, offset: usize, length: usize) -> ArrayRef;
161
162 /// Returns the length (i.e., number of elements) of this array.
163 ///
164 /// # Example:
165 ///
166 /// ```
167 /// use arrow_array::{Array, Int32Array};
168 ///
169 /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
170 ///
171 /// assert_eq!(array.len(), 5);
172 /// ```
173 fn len(&self) -> usize;
174
175 /// Returns whether this array is empty.
176 ///
177 /// # Example:
178 ///
179 /// ```
180 /// use arrow_array::{Array, Int32Array};
181 ///
182 /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]);
183 ///
184 /// assert_eq!(array.is_empty(), false);
185 /// ```
186 fn is_empty(&self) -> bool;
187
188 /// Shrinks the capacity of any exclusively owned buffer as much as possible
189 ///
190 /// Shared or externally allocated buffers will be ignored, and
191 /// any buffer offsets will be preserved.
192 fn shrink_to_fit(&mut self) {}
193
194 /// Returns the offset into the underlying data used by this array(-slice).
195 /// Note that the underlying data can be shared by many arrays.
196 /// This defaults to `0`.
197 ///
198 /// # Example:
199 ///
200 /// ```
201 /// use arrow_array::{Array, BooleanArray};
202 ///
203 /// let array = BooleanArray::from(vec![false, false, true, true]);
204 /// let array_slice = array.slice(1, 3);
205 ///
206 /// assert_eq!(array.offset(), 0);
207 /// assert_eq!(array_slice.offset(), 1);
208 /// ```
209 fn offset(&self) -> usize;
210
211 /// Returns the null buffer of this array if any.
212 ///
213 /// The null buffer contains the "physical" nulls of an array, that is how
214 /// the nulls are represented in the underlying arrow format.
215 ///
216 /// The physical representation is efficient, but is sometimes non intuitive
217 /// for certain array types such as those with nullable child arrays like
218 /// [`DictionaryArray::values`], [`RunArray::values`] or [`UnionArray`], or without a
219 /// null buffer, such as [`NullArray`].
220 ///
221 /// To determine if each element of such an array is "logically" null,
222 /// use the slower [`Array::logical_nulls`] to obtain a computed mask.
223 fn nulls(&self) -> Option<&NullBuffer>;
224
225 /// Returns a potentially computed [`NullBuffer`] that represents the logical
226 /// null values of this array, if any.
227 ///
228 /// Logical nulls represent the values that are null in the array,
229 /// regardless of the underlying physical arrow representation.
230 ///
231 /// For most array types, this is equivalent to the "physical" nulls
232 /// returned by [`Array::nulls`]. It is different for the following cases, because which
233 /// elements are null is not encoded in a single null buffer:
234 ///
235 /// * [`DictionaryArray`] where [`DictionaryArray::values`] contains nulls
236 /// * [`RunArray`] where [`RunArray::values`] contains nulls
237 /// * [`NullArray`] where all indices are nulls
238 /// * [`UnionArray`] where the selected values contains nulls
239 ///
240 /// In these cases a logical [`NullBuffer`] will be computed, encoding the
241 /// logical nullability of these arrays, beyond what is encoded in
242 /// [`Array::nulls`]
243 fn logical_nulls(&self) -> Option<NullBuffer> {
244 self.nulls().cloned()
245 }
246
247 /// Returns whether the element at `index` is null according to [`Array::nulls`]
248 ///
249 /// Note: For performance reasons, this method returns nullability solely as determined by the
250 /// null buffer. This difference can lead to surprising results, for example, [`NullArray::is_null`] always
251 /// returns `false` as the array lacks a null buffer. Similarly [`DictionaryArray`], [`RunArray`] and [`UnionArray`] may
252 /// encode nullability in their children. See [`Self::logical_nulls`] for more information.
253 ///
254 /// # Example:
255 ///
256 /// ```
257 /// use arrow_array::{Array, Int32Array, NullArray};
258 ///
259 /// let array = Int32Array::from(vec![Some(1), None]);
260 /// assert_eq!(array.is_null(0), false);
261 /// assert_eq!(array.is_null(1), true);
262 ///
263 /// // NullArrays do not have a null buffer, and therefore always
264 /// // return false for is_null.
265 /// let array = NullArray::new(1);
266 /// assert_eq!(array.is_null(0), false);
267 /// ```
268 fn is_null(&self, index: usize) -> bool {
269 self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
270 }
271
272 /// Returns whether the element at `index` is *not* null, the
273 /// opposite of [`Self::is_null`].
274 ///
275 /// # Example:
276 ///
277 /// ```
278 /// use arrow_array::{Array, Int32Array};
279 ///
280 /// let array = Int32Array::from(vec![Some(1), None]);
281 ///
282 /// assert_eq!(array.is_valid(0), true);
283 /// assert_eq!(array.is_valid(1), false);
284 /// ```
285 fn is_valid(&self, index: usize) -> bool {
286 !self.is_null(index)
287 }
288
289 /// Returns the total number of physical null values in this array.
290 ///
291 /// Note: this method returns the physical null count, i.e. that encoded in [`Array::nulls`],
292 /// see [`Array::logical_nulls`] for logical nullability
293 ///
294 /// # Example:
295 ///
296 /// ```
297 /// use arrow_array::{Array, Int32Array};
298 ///
299 /// // Construct an array with values [1, NULL, NULL]
300 /// let array = Int32Array::from(vec![Some(1), None, None]);
301 ///
302 /// assert_eq!(array.null_count(), 2);
303 /// ```
304 fn null_count(&self) -> usize {
305 self.nulls().map(|n| n.null_count()).unwrap_or_default()
306 }
307
308 /// Returns the total number of logical null values in this array.
309 ///
310 /// Note: this method returns the logical null count, i.e. that encoded in
311 /// [`Array::logical_nulls`]. In general this is equivalent to [`Array::null_count`] but may differ in the
312 /// presence of logical nullability, see [`Array::nulls`] and [`Array::logical_nulls`].
313 ///
314 /// # Example:
315 ///
316 /// ```
317 /// use arrow_array::{Array, Int32Array};
318 ///
319 /// // Construct an array with values [1, NULL, NULL]
320 /// let array = Int32Array::from(vec![Some(1), None, None]);
321 ///
322 /// assert_eq!(array.logical_null_count(), 2);
323 /// ```
324 fn logical_null_count(&self) -> usize {
325 self.logical_nulls()
326 .map(|n| n.null_count())
327 .unwrap_or_default()
328 }
329
330 /// Returns `false` if the array is guaranteed to not contain any logical nulls
331 ///
332 /// This is generally equivalent to `Array::logical_null_count() != 0` unless determining
333 /// the logical nulls is expensive, in which case this method can return true even for an
334 /// array without nulls.
335 ///
336 /// This is also generally equivalent to `Array::null_count() != 0` but may differ in the
337 /// presence of logical nullability, see [`Array::logical_null_count`] and [`Array::null_count`].
338 ///
339 /// Implementations will return `true` unless they can cheaply prove no logical nulls
340 /// are present. For example a [`DictionaryArray`] with nullable values will still return true,
341 /// even if the nulls present in [`DictionaryArray::values`] are not referenced by any key,
342 /// and therefore would not appear in [`Array::logical_nulls`].
343 fn is_nullable(&self) -> bool {
344 self.logical_null_count() != 0
345 }
346
347 /// Returns the total number of bytes of memory pointed to by this array.
348 /// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
349 /// Note that this does not always correspond to the exact memory usage of an array,
350 /// since multiple arrays can share the same buffers or slices thereof.
351 fn get_buffer_memory_size(&self) -> usize;
352
353 /// Returns the total number of bytes of memory occupied physically by this array.
354 /// This value will always be greater than returned by `get_buffer_memory_size()` and
355 /// includes the overhead of the data structures that contain the pointers to the various buffers.
356 fn get_array_memory_size(&self) -> usize;
357
358 /// Claim memory used by this array in the provided memory pool.
359 ///
360 /// This recursively claims memory for:
361 /// - All data buffers in this array
362 /// - All child arrays (for nested types like List, Struct, etc.)
363 /// - The null bitmap buffer if present
364 ///
365 /// This method guarantees that the memory pool will only compute occupied memory
366 /// exactly once. For example, if this array is derived from operations like `slice`,
367 /// calling `claim` on it would not change the memory pool's usage if the underlying buffers
368 /// are already counted before.
369 ///
370 /// # Example
371 /// ```
372 /// # use arrow_array::{Int32Array, Array};
373 /// # use arrow_buffer::TrackingMemoryPool;
374 /// # use arrow_buffer::MemoryPool;
375 ///
376 /// let pool = TrackingMemoryPool::default();
377 ///
378 /// let small_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
379 /// let small_array_size = small_array.get_buffer_memory_size();
380 ///
381 /// // Claim the array's memory in the pool
382 /// small_array.claim(&pool);
383 ///
384 /// // Create and claim slices of `small_array`; should not increase memory usage
385 /// let slice1 = small_array.slice(0, 2);
386 /// let slice2 = small_array.slice(2, 2);
387 /// slice1.claim(&pool);
388 /// slice2.claim(&pool);
389 ///
390 /// assert_eq!(pool.used(), small_array_size);
391 ///
392 /// // Create a `large_array` which does not derive from the original `small_array`
393 ///
394 /// let large_array = Int32Array::from((0..1000).collect::<Vec<i32>>());
395 /// let large_array_size = large_array.get_buffer_memory_size();
396 ///
397 /// large_array.claim(&pool);
398 ///
399 /// // Trying to claim more than once is a no-op
400 /// large_array.claim(&pool);
401 /// large_array.claim(&pool);
402 ///
403 /// assert_eq!(pool.used(), small_array_size + large_array_size);
404 ///
405 /// let sum_of_all_sizes = small_array_size + large_array_size + slice1.get_buffer_memory_size() + slice2.get_buffer_memory_size();
406 ///
407 /// // `get_buffer_memory_size` works independently of the memory pool, so a sum of all the
408 /// // arrays in scope will always be >= the memory used reported by the memory pool.
409 /// assert_ne!(pool.used(), sum_of_all_sizes);
410 ///
411 /// // Until the final claim is dropped the buffer size remains accounted for
412 /// drop(small_array);
413 /// drop(slice1);
414 ///
415 /// assert_eq!(pool.used(), small_array_size + large_array_size);
416 ///
417 /// // Dropping this finally releases the buffer that was backing `small_array`
418 /// drop(slice2);
419 ///
420 /// assert_eq!(pool.used(), large_array_size);
421 /// ```
422 #[cfg(feature = "pool")]
423 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
424 self.to_data().claim(pool)
425 }
426}
427
428/// A reference-counted reference to a generic `Array`
429pub type ArrayRef = Arc<dyn Array>;
430
431/// Ergonomics: Allow use of an ArrayRef as an `&dyn Array`
432unsafe impl Array for ArrayRef {
433 fn as_any(&self) -> &dyn Any {
434 self.as_ref().as_any()
435 }
436
437 fn to_data(&self) -> ArrayData {
438 self.as_ref().to_data()
439 }
440
441 fn into_data(self) -> ArrayData {
442 self.to_data()
443 }
444
445 fn data_type(&self) -> &DataType {
446 self.as_ref().data_type()
447 }
448
449 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
450 self.as_ref().slice(offset, length)
451 }
452
453 fn len(&self) -> usize {
454 self.as_ref().len()
455 }
456
457 fn is_empty(&self) -> bool {
458 self.as_ref().is_empty()
459 }
460
461 /// For shared buffers, this is a no-op.
462 fn shrink_to_fit(&mut self) {
463 if let Some(slf) = Arc::get_mut(self) {
464 slf.shrink_to_fit();
465 } else {
466 // We ignore shared buffers.
467 }
468 }
469
470 fn offset(&self) -> usize {
471 self.as_ref().offset()
472 }
473
474 fn nulls(&self) -> Option<&NullBuffer> {
475 self.as_ref().nulls()
476 }
477
478 fn logical_nulls(&self) -> Option<NullBuffer> {
479 self.as_ref().logical_nulls()
480 }
481
482 fn is_null(&self, index: usize) -> bool {
483 self.as_ref().is_null(index)
484 }
485
486 fn is_valid(&self, index: usize) -> bool {
487 self.as_ref().is_valid(index)
488 }
489
490 fn null_count(&self) -> usize {
491 self.as_ref().null_count()
492 }
493
494 fn logical_null_count(&self) -> usize {
495 self.as_ref().logical_null_count()
496 }
497
498 fn is_nullable(&self) -> bool {
499 self.as_ref().is_nullable()
500 }
501
502 fn get_buffer_memory_size(&self) -> usize {
503 self.as_ref().get_buffer_memory_size()
504 }
505
506 fn get_array_memory_size(&self) -> usize {
507 self.as_ref().get_array_memory_size()
508 }
509
510 #[cfg(feature = "pool")]
511 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
512 self.as_ref().claim(pool)
513 }
514}
515
516unsafe impl<T: Array> Array for &T {
517 fn as_any(&self) -> &dyn Any {
518 T::as_any(self)
519 }
520
521 fn to_data(&self) -> ArrayData {
522 T::to_data(self)
523 }
524
525 fn into_data(self) -> ArrayData {
526 self.to_data()
527 }
528
529 fn data_type(&self) -> &DataType {
530 T::data_type(self)
531 }
532
533 fn slice(&self, offset: usize, length: usize) -> ArrayRef {
534 T::slice(self, offset, length)
535 }
536
537 fn len(&self) -> usize {
538 T::len(self)
539 }
540
541 fn is_empty(&self) -> bool {
542 T::is_empty(self)
543 }
544
545 fn offset(&self) -> usize {
546 T::offset(self)
547 }
548
549 fn nulls(&self) -> Option<&NullBuffer> {
550 T::nulls(self)
551 }
552
553 fn logical_nulls(&self) -> Option<NullBuffer> {
554 T::logical_nulls(self)
555 }
556
557 fn is_null(&self, index: usize) -> bool {
558 T::is_null(self, index)
559 }
560
561 fn is_valid(&self, index: usize) -> bool {
562 T::is_valid(self, index)
563 }
564
565 fn null_count(&self) -> usize {
566 T::null_count(self)
567 }
568
569 fn logical_null_count(&self) -> usize {
570 T::logical_null_count(self)
571 }
572
573 fn is_nullable(&self) -> bool {
574 T::is_nullable(self)
575 }
576
577 fn get_buffer_memory_size(&self) -> usize {
578 T::get_buffer_memory_size(self)
579 }
580
581 fn get_array_memory_size(&self) -> usize {
582 T::get_array_memory_size(self)
583 }
584
585 #[cfg(feature = "pool")]
586 fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
587 T::claim(self, pool)
588 }
589}
590
591/// A generic trait for accessing the values of an [`Array`]
592///
593/// This trait helps write specialized implementations of algorithms for
594/// different array types. Specialized implementations allow the compiler
595/// to optimize the code for the specific array type, which can lead to
596/// significant performance improvements.
597///
598/// # Example
599/// For example, to write three different implementations of a string length function
600/// for [`StringArray`], [`LargeStringArray`], and [`StringViewArray`], you can write
601///
602/// ```
603/// # use std::sync::Arc;
604/// # use arrow_array::{ArrayAccessor, ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray};
605/// # use arrow_buffer::ArrowNativeType;
606/// # use arrow_array::cast::AsArray;
607/// # use arrow_array::iterator::ArrayIter;
608/// # use arrow_array::types::{Int32Type, Int64Type};
609/// # use arrow_schema::{ArrowError, DataType};
610/// /// This function takes a dynamically typed `ArrayRef` and calls
611/// /// calls one of three specialized implementations
612/// fn character_length(arg: ArrayRef) -> Result<ArrayRef, ArrowError> {
613/// match arg.data_type() {
614/// DataType::Utf8 => {
615/// // downcast the ArrayRef to a StringArray and call the specialized implementation
616/// let string_array = arg.as_string::<i32>();
617/// character_length_general::<Int32Type, _>(string_array)
618/// }
619/// DataType::LargeUtf8 => {
620/// character_length_general::<Int64Type, _>(arg.as_string::<i64>())
621/// }
622/// DataType::Utf8View => {
623/// character_length_general::<Int32Type, _>(arg.as_string_view())
624/// }
625/// _ => Err(ArrowError::InvalidArgumentError("Unsupported data type".to_string())),
626/// }
627/// }
628///
629/// /// A generic implementation of the character_length function
630/// /// This function uses the `ArrayAccessor` trait to access the values of the array
631/// /// so the compiler can generated specialized implementations for different array types
632/// ///
633/// /// Returns a new array with the length of each string in the input array
634/// /// * Int32Array for Utf8 and Utf8View arrays (lengths are 32-bit integers)
635/// /// * Int64Array for LargeUtf8 arrays (lengths are 64-bit integers)
636/// ///
637/// /// This is generic on the type of the primitive array (different string arrays have
638/// /// different lengths) and the type of the array accessor (different string arrays
639/// /// have different ways to access the values)
640/// fn character_length_general<'a, T: ArrowPrimitiveType, V: ArrayAccessor<Item = &'a str>>(
641/// array: V,
642/// ) -> Result<ArrayRef, ArrowError>
643/// where
644/// T::Native: OffsetSizeTrait,
645/// {
646/// let iter = ArrayIter::new(array);
647/// // Create a Int32Array / Int64Array with the length of each string
648/// let result = iter
649/// .map(|string| {
650/// string.map(|string: &str| {
651/// T::Native::from_usize(string.chars().count())
652/// .expect("should not fail as string.chars will always return integer")
653/// })
654/// })
655/// .collect::<PrimitiveArray<T>>();
656///
657/// /// Return the result as a new ArrayRef (dynamically typed)
658/// Ok(Arc::new(result) as ArrayRef)
659/// }
660/// ```
661///
662/// # Validity
663///
664/// An [`ArrayAccessor`] must always return a well-defined value for an index
665/// that is within the bounds `0..Array::len`, including for null indexes where
666/// [`Array::is_null`] is true.
667///
668/// The value at null indexes is unspecified, and implementations must not rely
669/// on a specific value such as [`Default::default`] being returned, however, it
670/// must not be undefined
671pub trait ArrayAccessor: Array {
672 /// The Arrow type of the element being accessed.
673 type Item: Send + Sync;
674
675 /// Returns the element at index `i`
676 /// # Panics
677 /// Panics if the value is outside the bounds of the array
678 fn value(&self, index: usize) -> Self::Item;
679
680 /// Returns the element at index `i`
681 /// # Safety
682 /// Caller is responsible for ensuring that the index is within the bounds of the array
683 unsafe fn value_unchecked(&self, index: usize) -> Self::Item;
684}
685
686/// A trait for Arrow String Arrays, currently three types are supported:
687/// - `StringArray`
688/// - `LargeStringArray`
689/// - `StringViewArray`
690///
691/// This trait helps to abstract over the different types of string arrays
692/// so that we don't need to duplicate the implementation for each type.
693pub trait StringArrayType<'a>: ArrayAccessor<Item = &'a str> + Sized {
694 /// Returns true if all data within this string array is ASCII
695 fn is_ascii(&self) -> bool;
696
697 /// Constructs a new iterator
698 fn iter(&self) -> ArrayIter<Self>;
699}
700
701impl<'a, O: OffsetSizeTrait> StringArrayType<'a> for &'a GenericStringArray<O> {
702 fn is_ascii(&self) -> bool {
703 GenericStringArray::<O>::is_ascii(self)
704 }
705
706 fn iter(&self) -> ArrayIter<Self> {
707 GenericStringArray::<O>::iter(self)
708 }
709}
710impl<'a> StringArrayType<'a> for &'a StringViewArray {
711 fn is_ascii(&self) -> bool {
712 StringViewArray::is_ascii(self)
713 }
714
715 fn iter(&self) -> ArrayIter<Self> {
716 StringViewArray::iter(self)
717 }
718}
719
720/// A trait for Arrow Binary Arrays, currently four types are supported:
721/// - `BinaryArray`
722/// - `LargeBinaryArray`
723/// - `BinaryViewArray`
724/// - `FixedSizeBinaryArray`
725///
726/// This trait helps to abstract over the different types of binary arrays
727/// so that we don't need to duplicate the implementation for each type.
728pub trait BinaryArrayType<'a>: ArrayAccessor<Item = &'a [u8]> + Sized {
729 /// Constructs a new iterator
730 fn iter(&self) -> ArrayIter<Self>;
731}
732
733impl<'a, O: OffsetSizeTrait> BinaryArrayType<'a> for &'a GenericBinaryArray<O> {
734 fn iter(&self) -> ArrayIter<Self> {
735 GenericBinaryArray::<O>::iter(self)
736 }
737}
738impl<'a> BinaryArrayType<'a> for &'a BinaryViewArray {
739 fn iter(&self) -> ArrayIter<Self> {
740 BinaryViewArray::iter(self)
741 }
742}
743impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
744 fn iter(&self) -> ArrayIter<Self> {
745 FixedSizeBinaryArray::iter(self)
746 }
747}
748
749/// A trait for Arrow list-like arrays, abstracting over
750/// [`GenericListArray`], [`GenericListViewArray`], and [`FixedSizeListArray`].
751///
752/// This trait provides a uniform interface for accessing the child values and
753/// computing the element range for a given index, regardless of the underlying
754/// list layout (offsets, offsets+sizes, or fixed-size).
755pub trait ListLikeArray: Array {
756 /// Returns the child values array.
757 fn values(&self) -> &ArrayRef;
758
759 /// Returns the start and end indices into the values array for the list
760 /// element at `index`.
761 fn element_range(&self, index: usize) -> std::ops::Range<usize>;
762}
763
764impl PartialEq for dyn Array + '_ {
765 fn eq(&self, other: &Self) -> bool {
766 self.to_data().eq(&other.to_data())
767 }
768}
769
770impl<T: Array> PartialEq<T> for dyn Array + '_ {
771 fn eq(&self, other: &T) -> bool {
772 self.to_data().eq(&other.to_data())
773 }
774}
775
776impl PartialEq for NullArray {
777 fn eq(&self, other: &NullArray) -> bool {
778 self.to_data().eq(&other.to_data())
779 }
780}
781
782impl<T: ArrowPrimitiveType> PartialEq for PrimitiveArray<T> {
783 fn eq(&self, other: &PrimitiveArray<T>) -> bool {
784 self.to_data().eq(&other.to_data())
785 }
786}
787
788impl<K: ArrowDictionaryKeyType> PartialEq for DictionaryArray<K> {
789 fn eq(&self, other: &Self) -> bool {
790 self.to_data().eq(&other.to_data())
791 }
792}
793
794impl PartialEq for BooleanArray {
795 fn eq(&self, other: &BooleanArray) -> bool {
796 self.to_data().eq(&other.to_data())
797 }
798}
799
800impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericStringArray<OffsetSize> {
801 fn eq(&self, other: &Self) -> bool {
802 self.to_data().eq(&other.to_data())
803 }
804}
805
806impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericBinaryArray<OffsetSize> {
807 fn eq(&self, other: &Self) -> bool {
808 self.to_data().eq(&other.to_data())
809 }
810}
811
812impl PartialEq for FixedSizeBinaryArray {
813 fn eq(&self, other: &Self) -> bool {
814 self.to_data().eq(&other.to_data())
815 }
816}
817
818impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListArray<OffsetSize> {
819 fn eq(&self, other: &Self) -> bool {
820 self.to_data().eq(&other.to_data())
821 }
822}
823
824impl<OffsetSize: OffsetSizeTrait> PartialEq for GenericListViewArray<OffsetSize> {
825 fn eq(&self, other: &Self) -> bool {
826 self.to_data().eq(&other.to_data())
827 }
828}
829
830impl PartialEq for MapArray {
831 fn eq(&self, other: &Self) -> bool {
832 self.to_data().eq(&other.to_data())
833 }
834}
835
836impl PartialEq for FixedSizeListArray {
837 fn eq(&self, other: &Self) -> bool {
838 self.to_data().eq(&other.to_data())
839 }
840}
841
842impl PartialEq for StructArray {
843 fn eq(&self, other: &Self) -> bool {
844 self.to_data().eq(&other.to_data())
845 }
846}
847
848impl<T: ByteViewType + ?Sized> PartialEq for GenericByteViewArray<T> {
849 fn eq(&self, other: &Self) -> bool {
850 self.to_data().eq(&other.to_data())
851 }
852}
853
854impl<R: RunEndIndexType> PartialEq for RunArray<R> {
855 fn eq(&self, other: &Self) -> bool {
856 self.to_data().eq(&other.to_data())
857 }
858}
859
860/// Constructs an [`ArrayRef`] from an [`ArrayData`].
861///
862/// # Notes:
863///
864/// It is more efficient to directly construct the concrete array type rather
865/// than using this function as creating an `ArrayData` requires at least one
866/// additional allocation (the Vec of buffers).
867///
868/// # Example:
869/// ```
870/// # use std::sync::Arc;
871/// # use arrow_data::ArrayData;
872/// # use arrow_array::{make_array, ArrayRef, Int32Array};
873/// # use arrow_buffer::{Buffer, ScalarBuffer};
874/// # use arrow_schema::DataType;
875/// // Create an Int32Array with values [1, 2, 3]
876/// let values_buffer = Buffer::from_slice_ref(&[1, 2, 3]);
877/// // ArrayData can be constructed using ArrayDataBuilder
878/// let builder = ArrayData::builder(DataType::Int32)
879/// .len(3)
880/// .add_buffer(values_buffer.clone());
881/// let array_data = builder.build().unwrap();
882/// // Create the ArrayRef from the ArrayData
883/// let array = make_array(array_data);
884///
885/// // It is equivalent to directly constructing the Int32Array
886/// let scalar_buffer = ScalarBuffer::from(values_buffer);
887/// let int32_array: ArrayRef = Arc::new(Int32Array::new(scalar_buffer, None));
888/// assert_eq!(&array, &int32_array);
889/// ```
890pub fn make_array(data: ArrayData) -> ArrayRef {
891 match data.data_type() {
892 DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef,
893 DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef,
894 DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef,
895 DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef,
896 DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef,
897 DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef,
898 DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef,
899 DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef,
900 DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef,
901 DataType::Float16 => Arc::new(Float16Array::from(data)) as ArrayRef,
902 DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef,
903 DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
904 DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
905 DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
906 DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
907 DataType::Time32(TimeUnit::Millisecond) => {
908 Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
909 }
910 DataType::Time64(TimeUnit::Microsecond) => {
911 Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef
912 }
913 DataType::Time64(TimeUnit::Nanosecond) => {
914 Arc::new(Time64NanosecondArray::from(data)) as ArrayRef
915 }
916 DataType::Timestamp(TimeUnit::Second, _) => {
917 Arc::new(TimestampSecondArray::from(data)) as ArrayRef
918 }
919 DataType::Timestamp(TimeUnit::Millisecond, _) => {
920 Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef
921 }
922 DataType::Timestamp(TimeUnit::Microsecond, _) => {
923 Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef
924 }
925 DataType::Timestamp(TimeUnit::Nanosecond, _) => {
926 Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef
927 }
928 DataType::Interval(IntervalUnit::YearMonth) => {
929 Arc::new(IntervalYearMonthArray::from(data)) as ArrayRef
930 }
931 DataType::Interval(IntervalUnit::DayTime) => {
932 Arc::new(IntervalDayTimeArray::from(data)) as ArrayRef
933 }
934 DataType::Interval(IntervalUnit::MonthDayNano) => {
935 Arc::new(IntervalMonthDayNanoArray::from(data)) as ArrayRef
936 }
937 DataType::Duration(TimeUnit::Second) => {
938 Arc::new(DurationSecondArray::from(data)) as ArrayRef
939 }
940 DataType::Duration(TimeUnit::Millisecond) => {
941 Arc::new(DurationMillisecondArray::from(data)) as ArrayRef
942 }
943 DataType::Duration(TimeUnit::Microsecond) => {
944 Arc::new(DurationMicrosecondArray::from(data)) as ArrayRef
945 }
946 DataType::Duration(TimeUnit::Nanosecond) => {
947 Arc::new(DurationNanosecondArray::from(data)) as ArrayRef
948 }
949 DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
950 DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
951 DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
952 DataType::BinaryView => Arc::new(BinaryViewArray::from(data)) as ArrayRef,
953 DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
954 DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
955 DataType::Utf8View => Arc::new(StringViewArray::from(data)) as ArrayRef,
956 DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
957 DataType::LargeList(_) => Arc::new(LargeListArray::from(data)) as ArrayRef,
958 DataType::ListView(_) => Arc::new(ListViewArray::from(data)) as ArrayRef,
959 DataType::LargeListView(_) => Arc::new(LargeListViewArray::from(data)) as ArrayRef,
960 DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
961 DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
962 DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
963 DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
964 DataType::Dictionary(key_type, _) => match key_type.as_ref() {
965 DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
966 DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
967 DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
968 DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
969 DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
970 DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
971 DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
972 DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
973 dt => unimplemented!("Unexpected dictionary key type {dt}"),
974 },
975 DataType::RunEndEncoded(run_ends_type, _) => match run_ends_type.data_type() {
976 DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
977 DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
978 DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
979 dt => unimplemented!("Unexpected data type for run_ends array {dt}"),
980 },
981 DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
982 DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
983 DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
984 DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
985 DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
986 dt => unimplemented!("Unexpected data type {dt}"),
987 }
988}
989
990/// Creates a new empty array
991///
992/// ```
993/// use std::sync::Arc;
994/// use arrow_schema::DataType;
995/// use arrow_array::{ArrayRef, Int32Array, new_empty_array};
996///
997/// let empty_array = new_empty_array(&DataType::Int32);
998/// let array: ArrayRef = Arc::new(Int32Array::from(vec![] as Vec<i32>));
999///
1000/// assert_eq!(&array, &empty_array);
1001/// ```
1002pub fn new_empty_array(data_type: &DataType) -> ArrayRef {
1003 let data = ArrayData::new_empty(data_type);
1004 make_array(data)
1005}
1006
1007/// Creates a new array of `data_type` of length `length` filled
1008/// entirely of `NULL` values
1009///
1010/// ```
1011/// use std::sync::Arc;
1012/// use arrow_schema::DataType;
1013/// use arrow_array::{ArrayRef, Int32Array, new_null_array};
1014///
1015/// let null_array = new_null_array(&DataType::Int32, 3);
1016/// let array: ArrayRef = Arc::new(Int32Array::from(vec![None, None, None]));
1017///
1018/// assert_eq!(&array, &null_array);
1019/// ```
1020pub fn new_null_array(data_type: &DataType, length: usize) -> ArrayRef {
1021 make_array(ArrayData::new_null(data_type, length))
1022}
1023
1024/// Helper function that creates an [`OffsetBuffer`] from a buffer and array offset/ length
1025///
1026/// # Safety
1027///
1028/// - buffer must contain valid arrow offsets ( [`OffsetBuffer`] ) for the
1029/// given length and offset.
1030unsafe fn get_offsets_from_buffer<O: ArrowNativeType>(
1031 buffer: Buffer,
1032 offset: usize,
1033 len: usize,
1034) -> OffsetBuffer<O> {
1035 if len == 0 && buffer.is_empty() {
1036 return OffsetBuffer::new_empty();
1037 }
1038
1039 let scalar_buffer = ScalarBuffer::new(buffer, offset, len + 1);
1040 // Safety:
1041 // Arguments were valid
1042 unsafe { OffsetBuffer::new_unchecked(scalar_buffer) }
1043}
1044
1045/// Helper function for printing potentially long arrays.
1046fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
1047where
1048 A: Array,
1049 F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
1050{
1051 let head = std::cmp::min(10, array.len());
1052
1053 for i in 0..head {
1054 if array.is_null(i) {
1055 writeln!(f, " null,")?;
1056 } else {
1057 write!(f, " ")?;
1058 print_item(array, i, f)?;
1059 writeln!(f, ",")?;
1060 }
1061 }
1062 if array.len() > 10 {
1063 if array.len() > 20 {
1064 writeln!(f, " ...{} elements...,", array.len() - 20)?;
1065 }
1066
1067 let tail = std::cmp::max(head, array.len() - 10);
1068
1069 for i in tail..array.len() {
1070 if array.is_null(i) {
1071 writeln!(f, " null,")?;
1072 } else {
1073 write!(f, " ")?;
1074 print_item(array, i, f)?;
1075 writeln!(f, ",")?;
1076 }
1077 }
1078 }
1079 Ok(())
1080}
1081
1082#[cfg(test)]
1083mod tests {
1084 use super::*;
1085 use crate::cast::{as_union_array, downcast_array};
1086 use crate::downcast_run_array;
1087 use arrow_buffer::MutableBuffer;
1088 use arrow_schema::{Field, Fields, UnionFields, UnionMode};
1089
1090 #[test]
1091 fn test_empty_primitive() {
1092 let array = new_empty_array(&DataType::Int32);
1093 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1094 assert_eq!(a.len(), 0);
1095 let expected: &[i32] = &[];
1096 assert_eq!(a.values(), expected);
1097 }
1098
1099 #[test]
1100 fn test_empty_variable_sized() {
1101 let array = new_empty_array(&DataType::Utf8);
1102 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1103 assert_eq!(a.len(), 0);
1104 assert_eq!(a.value_offsets()[0], 0i32);
1105 }
1106
1107 #[test]
1108 fn test_empty_list_primitive() {
1109 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
1110 let array = new_empty_array(&data_type);
1111 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1112 assert_eq!(a.len(), 0);
1113 assert_eq!(a.value_offsets()[0], 0i32);
1114 }
1115
1116 #[test]
1117 fn test_null_boolean() {
1118 let array = new_null_array(&DataType::Boolean, 9);
1119 let a = array.as_any().downcast_ref::<BooleanArray>().unwrap();
1120 assert_eq!(a.len(), 9);
1121 for i in 0..9 {
1122 assert!(a.is_null(i));
1123 }
1124 }
1125
1126 #[test]
1127 fn test_null_primitive() {
1128 let array = new_null_array(&DataType::Int32, 9);
1129 let a = array.as_any().downcast_ref::<Int32Array>().unwrap();
1130 assert_eq!(a.len(), 9);
1131 for i in 0..9 {
1132 assert!(a.is_null(i));
1133 }
1134 }
1135
1136 #[test]
1137 fn test_null_struct() {
1138 // It is possible to create a null struct containing a non-nullable child
1139 // see https://github.com/apache/arrow-rs/pull/3244 for details
1140 let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
1141 let array = new_null_array(&struct_type, 9);
1142
1143 let a = array.as_any().downcast_ref::<StructArray>().unwrap();
1144 assert_eq!(a.len(), 9);
1145 assert_eq!(a.column(0).len(), 9);
1146 for i in 0..9 {
1147 assert!(a.is_null(i));
1148 }
1149
1150 // Make sure we can slice the resulting array.
1151 a.slice(0, 5);
1152 }
1153
1154 #[test]
1155 fn test_null_variable_sized() {
1156 let array = new_null_array(&DataType::Utf8, 9);
1157 let a = array.as_any().downcast_ref::<StringArray>().unwrap();
1158 assert_eq!(a.len(), 9);
1159 assert_eq!(a.value_offsets()[9], 0i32);
1160 for i in 0..9 {
1161 assert!(a.is_null(i));
1162 }
1163 }
1164
1165 #[test]
1166 fn test_null_list_primitive() {
1167 let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
1168 let array = new_null_array(&data_type, 9);
1169 let a = array.as_any().downcast_ref::<ListArray>().unwrap();
1170 assert_eq!(a.len(), 9);
1171 assert_eq!(a.value_offsets()[9], 0i32);
1172 for i in 0..9 {
1173 assert!(a.is_null(i));
1174 }
1175 }
1176
1177 #[test]
1178 fn test_null_map() {
1179 let data_type = DataType::Map(
1180 Arc::new(Field::new(
1181 "entry",
1182 DataType::Struct(Fields::from(vec![
1183 Field::new("key", DataType::Utf8, false),
1184 Field::new("value", DataType::Int32, true),
1185 ])),
1186 false,
1187 )),
1188 false,
1189 );
1190 let array = new_null_array(&data_type, 9);
1191 let a = array.as_any().downcast_ref::<MapArray>().unwrap();
1192 assert_eq!(a.len(), 9);
1193 assert_eq!(a.value_offsets()[9], 0i32);
1194 for i in 0..9 {
1195 assert!(a.is_null(i));
1196 }
1197 }
1198
1199 #[test]
1200 fn test_null_dictionary() {
1201 let values =
1202 vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
1203
1204 let array: DictionaryArray<Int8Type> = values.into_iter().collect();
1205 let array = Arc::new(array) as ArrayRef;
1206
1207 let null_array = new_null_array(array.data_type(), 9);
1208 assert_eq!(&array, &null_array);
1209 assert_eq!(
1210 array.to_data().buffers()[0].len(),
1211 null_array.to_data().buffers()[0].len()
1212 );
1213 }
1214
1215 #[test]
1216 fn test_null_union() {
1217 for mode in [UnionMode::Sparse, UnionMode::Dense] {
1218 let data_type = DataType::Union(
1219 UnionFields::try_new(
1220 vec![2, 1],
1221 vec![
1222 Field::new("foo", DataType::Int32, true),
1223 Field::new("bar", DataType::Int64, true),
1224 ],
1225 )
1226 .unwrap(),
1227 mode,
1228 );
1229 let array = new_null_array(&data_type, 4);
1230
1231 let array = as_union_array(array.as_ref());
1232 assert_eq!(array.len(), 4);
1233 assert_eq!(array.null_count(), 0);
1234 assert_eq!(array.logical_null_count(), 4);
1235
1236 for i in 0..4 {
1237 let a = array.value(i);
1238 assert_eq!(a.len(), 1);
1239 assert_eq!(a.null_count(), 1);
1240 assert_eq!(a.logical_null_count(), 1);
1241 assert!(a.is_null(0))
1242 }
1243
1244 array.to_data().validate_full().unwrap();
1245 }
1246 }
1247
1248 #[test]
1249 #[allow(unused_parens)]
1250 fn test_null_runs() {
1251 for r in [DataType::Int16, DataType::Int32, DataType::Int64] {
1252 let data_type = DataType::RunEndEncoded(
1253 Arc::new(Field::new("run_ends", r, false)),
1254 Arc::new(Field::new("values", DataType::Utf8, true)),
1255 );
1256
1257 let array = new_null_array(&data_type, 4);
1258 let array = array.as_ref();
1259
1260 downcast_run_array! {
1261 array => {
1262 assert_eq!(array.len(), 4);
1263 assert_eq!(array.null_count(), 0);
1264 assert_eq!(array.logical_null_count(), 4);
1265 assert_eq!(array.values().len(), 1);
1266 assert_eq!(array.values().null_count(), 1);
1267 assert_eq!(array.run_ends().len(), 4);
1268 assert_eq!(array.run_ends().values(), &[4]);
1269
1270 let idx = array.get_physical_indices(&[0, 1, 2, 3]).unwrap();
1271 assert_eq!(idx, &[0,0,0,0]);
1272 }
1273 d => unreachable!("{d}")
1274 }
1275 }
1276 }
1277
1278 #[test]
1279 fn test_null_fixed_size_binary() {
1280 for size in [1, 2, 7] {
1281 let array = new_null_array(&DataType::FixedSizeBinary(size), 6);
1282 let array = array
1283 .as_ref()
1284 .as_any()
1285 .downcast_ref::<FixedSizeBinaryArray>()
1286 .unwrap();
1287
1288 assert_eq!(array.len(), 6);
1289 assert_eq!(array.null_count(), 6);
1290 assert_eq!(array.logical_null_count(), 6);
1291 array.iter().for_each(|x| assert!(x.is_none()));
1292 }
1293 }
1294
1295 #[test]
1296 fn test_memory_size_null() {
1297 let null_arr = NullArray::new(32);
1298
1299 assert_eq!(0, null_arr.get_buffer_memory_size());
1300 assert_eq!(
1301 std::mem::size_of::<usize>(),
1302 null_arr.get_array_memory_size()
1303 );
1304 }
1305
1306 #[test]
1307 fn test_memory_size_primitive() {
1308 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1309 let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
1310
1311 // subtract empty array to avoid magic numbers for the size of additional fields
1312 assert_eq!(
1313 arr.get_array_memory_size() - empty.get_array_memory_size(),
1314 128 * std::mem::size_of::<i64>()
1315 );
1316 }
1317
1318 #[test]
1319 fn test_memory_size_primitive_sliced() {
1320 let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
1321 let slice1 = arr.slice(0, 64);
1322 let slice2 = arr.slice(64, 64);
1323
1324 // both slices report the full buffer memory usage, even though the buffers are shared
1325 assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
1326 assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
1327 }
1328
1329 #[test]
1330 fn test_memory_size_primitive_nullable() {
1331 let arr: PrimitiveArray<Int64Type> = (0..128)
1332 .map(|i| if i % 20 == 0 { Some(i) } else { None })
1333 .collect();
1334 let empty_with_bitmap = PrimitiveArray::<Int64Type>::from(
1335 ArrayData::builder(arr.data_type().clone())
1336 .add_buffer(MutableBuffer::new(0).into())
1337 .null_bit_buffer(Some(MutableBuffer::new_null(0).into()))
1338 .build()
1339 .unwrap(),
1340 );
1341
1342 // expected size is the size of the PrimitiveArray struct,
1343 // which includes the optional validity buffer
1344 // plus one buffer on the heap
1345 assert_eq!(
1346 std::mem::size_of::<PrimitiveArray<Int64Type>>(),
1347 empty_with_bitmap.get_array_memory_size()
1348 );
1349
1350 // subtract empty array to avoid magic numbers for the size of additional fields
1351 // the size of the validity bitmap is rounded up to 64 bytes
1352 assert_eq!(
1353 arr.get_array_memory_size() - empty_with_bitmap.get_array_memory_size(),
1354 128 * std::mem::size_of::<i64>() + 64
1355 );
1356 }
1357
1358 #[test]
1359 fn test_memory_size_dictionary() {
1360 let values = PrimitiveArray::<Int64Type>::from_iter_values(0..16);
1361 let keys = PrimitiveArray::<Int16Type>::from_iter_values(
1362 (0..256).map(|i| (i % values.len()) as i16),
1363 );
1364
1365 let dict_data_type = DataType::Dictionary(
1366 Box::new(keys.data_type().clone()),
1367 Box::new(values.data_type().clone()),
1368 );
1369 let dict_data = keys
1370 .into_data()
1371 .into_builder()
1372 .data_type(dict_data_type)
1373 .child_data(vec![values.into_data()])
1374 .build()
1375 .unwrap();
1376
1377 let empty_data = ArrayData::new_empty(&DataType::Dictionary(
1378 Box::new(DataType::Int16),
1379 Box::new(DataType::Int64),
1380 ));
1381
1382 let arr = DictionaryArray::<Int16Type>::from(dict_data);
1383 let empty = DictionaryArray::<Int16Type>::from(empty_data);
1384
1385 let expected_keys_size = 256 * std::mem::size_of::<i16>();
1386 assert_eq!(
1387 arr.keys().get_array_memory_size() - empty.keys().get_array_memory_size(),
1388 expected_keys_size
1389 );
1390
1391 let expected_values_size = 16 * std::mem::size_of::<i64>();
1392 assert_eq!(
1393 arr.values().get_array_memory_size() - empty.values().get_array_memory_size(),
1394 expected_values_size
1395 );
1396
1397 let expected_size = expected_keys_size + expected_values_size;
1398 assert_eq!(
1399 arr.get_array_memory_size() - empty.get_array_memory_size(),
1400 expected_size
1401 );
1402 }
1403
1404 /// Test function that takes an &dyn Array
1405 fn compute_my_thing(arr: &dyn Array) -> bool {
1406 !arr.is_empty()
1407 }
1408
1409 #[test]
1410 fn test_array_ref_as_array() {
1411 let arr: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1412
1413 // works well!
1414 assert!(compute_my_thing(&arr));
1415
1416 // Should also work when wrapped as an ArrayRef
1417 let arr: ArrayRef = Arc::new(arr);
1418 assert!(compute_my_thing(&arr));
1419 assert!(compute_my_thing(arr.as_ref()));
1420 }
1421
1422 #[test]
1423 fn test_downcast_array() {
1424 let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1425
1426 let boxed: ArrayRef = Arc::new(array);
1427 let array: Int32Array = downcast_array(&boxed);
1428
1429 let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect();
1430 assert_eq!(array, expected);
1431 }
1432}