arrow_arith/
aggregate.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines aggregations over Arrow arrays.
19
20use arrow_array::cast::*;
21use arrow_array::iterator::ArrayIter;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_data::bit_iterator::try_for_each_valid_idx;
25use arrow_schema::*;
26use std::borrow::BorrowMut;
27use std::cmp::{self, Ordering};
28use std::ops::{BitAnd, BitOr, BitXor};
29use types::ByteViewType;
30
31/// An accumulator for primitive numeric values.
32trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default {
33    /// Accumulate a non-null value.
34    fn accumulate(&mut self, value: T);
35    /// Accumulate a nullable values.
36    /// If `valid` is false the `value` should not affect the accumulator state.
37    fn accumulate_nullable(&mut self, value: T, valid: bool);
38    /// Merge another accumulator into this accumulator
39    fn merge(&mut self, other: Self);
40    /// Return the aggregated value.
41    fn finish(&mut self) -> T;
42}
43
44/// Helper for branchlessly selecting either `a` or `b` based on the boolean `m`.
45/// After verifying the generated assembly this can be a simple `if`.
46#[inline(always)]
47fn select<T: Copy>(m: bool, a: T, b: T) -> T {
48    if m { a } else { b }
49}
50
51#[derive(Clone, Copy)]
52struct SumAccumulator<T: ArrowNativeTypeOp> {
53    sum: T,
54}
55
56impl<T: ArrowNativeTypeOp> Default for SumAccumulator<T> {
57    fn default() -> Self {
58        Self { sum: T::ZERO }
59    }
60}
61
62impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for SumAccumulator<T> {
63    fn accumulate(&mut self, value: T) {
64        self.sum = self.sum.add_wrapping(value);
65    }
66
67    fn accumulate_nullable(&mut self, value: T, valid: bool) {
68        let sum = self.sum;
69        self.sum = select(valid, sum.add_wrapping(value), sum)
70    }
71
72    fn merge(&mut self, other: Self) {
73        self.sum = self.sum.add_wrapping(other.sum);
74    }
75
76    fn finish(&mut self) -> T {
77        self.sum
78    }
79}
80
81#[derive(Clone, Copy)]
82struct MinAccumulator<T: ArrowNativeTypeOp> {
83    min: T,
84}
85
86impl<T: ArrowNativeTypeOp> Default for MinAccumulator<T> {
87    fn default() -> Self {
88        Self {
89            min: T::MAX_TOTAL_ORDER,
90        }
91    }
92}
93
94impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MinAccumulator<T> {
95    fn accumulate(&mut self, value: T) {
96        let min = self.min;
97        self.min = select(value.is_lt(min), value, min);
98    }
99
100    fn accumulate_nullable(&mut self, value: T, valid: bool) {
101        let min = self.min;
102        let is_lt = valid & value.is_lt(min);
103        self.min = select(is_lt, value, min);
104    }
105
106    fn merge(&mut self, other: Self) {
107        self.accumulate(other.min)
108    }
109
110    fn finish(&mut self) -> T {
111        self.min
112    }
113}
114
115#[derive(Clone, Copy)]
116struct MaxAccumulator<T: ArrowNativeTypeOp> {
117    max: T,
118}
119
120impl<T: ArrowNativeTypeOp> Default for MaxAccumulator<T> {
121    fn default() -> Self {
122        Self {
123            max: T::MIN_TOTAL_ORDER,
124        }
125    }
126}
127
128impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MaxAccumulator<T> {
129    fn accumulate(&mut self, value: T) {
130        let max = self.max;
131        self.max = select(value.is_gt(max), value, max);
132    }
133
134    fn accumulate_nullable(&mut self, value: T, valid: bool) {
135        let max = self.max;
136        let is_gt = value.is_gt(max) & valid;
137        self.max = select(is_gt, value, max);
138    }
139
140    fn merge(&mut self, other: Self) {
141        self.accumulate(other.max)
142    }
143
144    fn finish(&mut self) -> T {
145        self.max
146    }
147}
148
149fn reduce_accumulators<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
150    mut acc: [A; LANES],
151) -> A {
152    assert!(LANES > 0 && LANES.is_power_of_two());
153    let mut len = LANES;
154
155    // attempt at tree reduction, unfortunately llvm does not fully recognize this pattern,
156    // but the generated code is still a little faster than purely sequential reduction for floats.
157    while len >= 2 {
158        let mid = len / 2;
159        let (h, t) = acc[..len].split_at_mut(mid);
160
161        for i in 0..mid {
162            h[i].merge(t[i]);
163        }
164        len /= 2;
165    }
166    acc[0]
167}
168
169#[inline(always)]
170fn aggregate_nonnull_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
171    acc: &mut [A; LANES],
172    values: &[T; LANES],
173) {
174    for i in 0..LANES {
175        acc[i].accumulate(values[i]);
176    }
177}
178
179#[inline(always)]
180fn aggregate_nullable_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
181    acc: &mut [A; LANES],
182    values: &[T; LANES],
183    validity: u64,
184) {
185    let mut bit = 1;
186    for i in 0..LANES {
187        acc[i].accumulate_nullable(values[i], (validity & bit) != 0);
188        bit <<= 1;
189    }
190}
191
192fn aggregate_nonnull_simple<T: ArrowNativeTypeOp, A: NumericAccumulator<T>>(values: &[T]) -> T {
193    values
194        .iter()
195        .copied()
196        .fold(A::default(), |mut a, b| {
197            a.accumulate(b);
198            a
199        })
200        .finish()
201}
202
203#[inline(never)]
204fn aggregate_nonnull_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
205    values: &[T],
206) -> T {
207    // aggregating into multiple independent accumulators allows the compiler to use vector registers
208    // with a single accumulator the compiler would not be allowed to reorder floating point addition
209    let mut acc = [A::default(); LANES];
210    let mut chunks = values.chunks_exact(LANES);
211    chunks.borrow_mut().for_each(|chunk| {
212        aggregate_nonnull_chunk(&mut acc, chunk[..LANES].try_into().unwrap());
213    });
214
215    let remainder = chunks.remainder();
216    for i in 0..remainder.len() {
217        acc[i].accumulate(remainder[i]);
218    }
219
220    reduce_accumulators(acc).finish()
221}
222
223#[inline(never)]
224fn aggregate_nullable_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
225    values: &[T],
226    validity: &NullBuffer,
227) -> T {
228    assert!(LANES > 0 && 64 % LANES == 0);
229    assert_eq!(values.len(), validity.len());
230
231    // aggregating into multiple independent accumulators allows the compiler to use vector registers
232    let mut acc = [A::default(); LANES];
233    // we process 64 bits of validity at a time
234    let mut values_chunks = values.chunks_exact(64);
235    let validity_chunks = validity.inner().bit_chunks();
236    let mut validity_chunks_iter = validity_chunks.iter();
237
238    values_chunks.borrow_mut().for_each(|chunk| {
239        // Safety: we asserted that values and validity have the same length and trust the iterator impl
240        let mut validity = unsafe { validity_chunks_iter.next().unwrap_unchecked() };
241        // chunk further based on the number of vector lanes
242        chunk.chunks_exact(LANES).for_each(|chunk| {
243            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
244            validity >>= LANES;
245        });
246    });
247
248    let remainder = values_chunks.remainder();
249    if !remainder.is_empty() {
250        let mut validity = validity_chunks.remainder_bits();
251
252        let mut remainder_chunks = remainder.chunks_exact(LANES);
253        remainder_chunks.borrow_mut().for_each(|chunk| {
254            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
255            validity >>= LANES;
256        });
257
258        let remainder = remainder_chunks.remainder();
259        if !remainder.is_empty() {
260            let mut bit = 1;
261            for i in 0..remainder.len() {
262                acc[i].accumulate_nullable(remainder[i], (validity & bit) != 0);
263                bit <<= 1;
264            }
265        }
266    }
267
268    reduce_accumulators(acc).finish()
269}
270
271/// The preferred vector size in bytes for the target platform.
272/// Note that the avx512 target feature is still unstable and this also means it is not detected on stable rust.
273const PREFERRED_VECTOR_SIZE: usize =
274    if cfg!(all(target_arch = "x86_64", target_feature = "avx512f")) {
275        64
276    } else if cfg!(all(target_arch = "x86_64", target_feature = "avx")) {
277        32
278    } else {
279        16
280    };
281
282/// non-nullable aggregation requires fewer temporary registers so we can use more of them for accumulators
283const PREFERRED_VECTOR_SIZE_NON_NULL: usize = PREFERRED_VECTOR_SIZE * 2;
284
285/// Generic aggregation for any primitive type.
286/// Returns None if there are no non-null values in `array`.
287fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: NumericAccumulator<T>>(
288    array: &PrimitiveArray<P>,
289) -> Option<T> {
290    let null_count = array.null_count();
291    if null_count == array.len() {
292        return None;
293    }
294    let values = array.values().as_ref();
295    match array.nulls() {
296        Some(nulls) if null_count > 0 => {
297            // const generics depending on a generic type parameter are not supported
298            // so we have to match and call aggregate with the corresponding constant
299            match PREFERRED_VECTOR_SIZE / std::mem::size_of::<T>() {
300                64 => Some(aggregate_nullable_lanes::<T, A, 64>(values, nulls)),
301                32 => Some(aggregate_nullable_lanes::<T, A, 32>(values, nulls)),
302                16 => Some(aggregate_nullable_lanes::<T, A, 16>(values, nulls)),
303                8 => Some(aggregate_nullable_lanes::<T, A, 8>(values, nulls)),
304                4 => Some(aggregate_nullable_lanes::<T, A, 4>(values, nulls)),
305                2 => Some(aggregate_nullable_lanes::<T, A, 2>(values, nulls)),
306                _ => Some(aggregate_nullable_lanes::<T, A, 1>(values, nulls)),
307            }
308        }
309        _ => {
310            let is_float = matches!(
311                array.data_type(),
312                DataType::Float16 | DataType::Float32 | DataType::Float64
313            );
314            if is_float {
315                match PREFERRED_VECTOR_SIZE_NON_NULL / std::mem::size_of::<T>() {
316                    64 => Some(aggregate_nonnull_lanes::<T, A, 64>(values)),
317                    32 => Some(aggregate_nonnull_lanes::<T, A, 32>(values)),
318                    16 => Some(aggregate_nonnull_lanes::<T, A, 16>(values)),
319                    8 => Some(aggregate_nonnull_lanes::<T, A, 8>(values)),
320                    4 => Some(aggregate_nonnull_lanes::<T, A, 4>(values)),
321                    2 => Some(aggregate_nonnull_lanes::<T, A, 2>(values)),
322                    _ => Some(aggregate_nonnull_simple::<T, A>(values)),
323                }
324            } else {
325                // for non-null integers its better to not chunk ourselves and instead
326                // let llvm fully handle loop unrolling and vectorization
327                Some(aggregate_nonnull_simple::<T, A>(values))
328            }
329        }
330    }
331}
332
333/// Returns the minimum value in the boolean array.
334///
335/// # Example
336/// ```
337/// # use arrow_array::BooleanArray;
338/// # use arrow_arith::aggregate::min_boolean;
339/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
340/// assert_eq!(min_boolean(&a), Some(false))
341/// ```
342pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
343    // short circuit if all nulls / zero length array
344    if array.null_count() == array.len() {
345        return None;
346    }
347
348    // Note the min bool is false (0), so short circuit as soon as we see it
349    match array.nulls() {
350        None => {
351            let bit_chunks = array.values().bit_chunks();
352            if bit_chunks.iter().any(|x| {
353                // u64::MAX has all bits set, so if the value is not that, then there is a false
354                x != u64::MAX
355            }) {
356                return Some(false);
357            }
358            // If the remainder bits are not all set, then there is a false
359            if bit_chunks.remainder_bits().count_ones() as usize != bit_chunks.remainder_len() {
360                Some(false)
361            } else {
362                Some(true)
363            }
364        }
365        Some(nulls) => {
366            let validity_chunks = nulls.inner().bit_chunks();
367            let value_chunks = array.values().bit_chunks();
368
369            if value_chunks
370                .iter()
371                .zip(validity_chunks.iter())
372                .any(|(value, validity)| {
373                    // We are looking for a false value, but because applying the validity mask
374                    // can create a false for a true value (e.g. value: true, validity: false), we instead invert the value, so that we have to look for a true.
375                    (!value & validity) != 0
376                })
377            {
378                return Some(false);
379            }
380
381            // Same trick as above: Instead of looking for a false, we invert the value bits and look for a true
382            if (!value_chunks.remainder_bits() & validity_chunks.remainder_bits()) != 0 {
383                Some(false)
384            } else {
385                Some(true)
386            }
387        }
388    }
389}
390
391/// Returns the maximum value in the boolean array
392///
393/// # Example
394/// ```
395/// # use arrow_array::BooleanArray;
396/// # use arrow_arith::aggregate::max_boolean;
397/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
398/// assert_eq!(max_boolean(&a), Some(true))
399/// ```
400pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
401    // short circuit if all nulls / zero length array
402    if array.null_count() == array.len() {
403        return None;
404    }
405
406    // Note the max bool is true (1), so short circuit as soon as we see it
407    match array.nulls() {
408        None => array
409            .values()
410            .bit_chunks()
411            .iter_padded()
412            // We found a true if any bit is set
413            .map(|x| x != 0)
414            .find(|b| *b)
415            .or(Some(false)),
416        Some(nulls) => {
417            let validity_chunks = nulls.inner().bit_chunks().iter_padded();
418            let value_chunks = array.values().bit_chunks().iter_padded();
419            value_chunks
420                .zip(validity_chunks)
421                // We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk
422                .map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0)
423                .find(|b| *b)
424                .or(Some(false))
425        }
426    }
427}
428
429/// Helper to compute min/max of [`ArrayAccessor`].
430fn min_max_helper<T, A: ArrayAccessor<Item = T>, F>(array: A, cmp: F) -> Option<T>
431where
432    F: Fn(&T, &T) -> bool,
433{
434    let null_count = array.null_count();
435    if null_count == array.len() {
436        None
437    } else if null_count == 0 {
438        // JUSTIFICATION
439        //  Benefit:  ~8% speedup
440        //  Soundness: `i` is always within the array bounds
441        (0..array.len())
442            .map(|i| unsafe { array.value_unchecked(i) })
443            .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
444    } else {
445        let nulls = array.nulls().unwrap();
446        unsafe {
447            let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
448                let acc = array.value_unchecked(acc_idx);
449                let item = array.value_unchecked(idx);
450                if cmp(&acc, &item) { idx } else { acc_idx }
451            });
452            idx.map(|idx| array.value_unchecked(idx))
453        }
454    }
455}
456
457/// Helper to compute min/max of [`GenericByteViewArray<T>`].
458/// The specialized min/max leverages the inlined values to compare the byte views.
459/// `swap_cond` is the condition to swap current min/max with the new value.
460/// For example, `Ordering::Greater` for max and `Ordering::Less` for min.
461fn min_max_view_helper<T: ByteViewType>(
462    array: &GenericByteViewArray<T>,
463    swap_cond: cmp::Ordering,
464) -> Option<&T::Native> {
465    let null_count = array.null_count();
466    if null_count == array.len() {
467        None
468    } else if null_count == 0 {
469        let target_idx = (0..array.len()).reduce(|acc, item| {
470            // SAFETY:  array's length is correct so item is within bounds
471            let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) };
472            if cmp == swap_cond { item } else { acc }
473        });
474        // SAFETY: idx came from valid range `0..array.len()`
475        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
476    } else {
477        let nulls = array.nulls().unwrap();
478
479        let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| {
480            let cmp =
481                unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) };
482            if cmp == swap_cond { idx } else { acc_idx }
483        });
484
485        // SAFETY: idx came from valid range `0..array.len()`
486        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
487    }
488}
489
490/// Returns the maximum value in the binary array, according to the natural order.
491pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
492    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
493}
494
495/// Returns the maximum value in the binary view array, according to the natural order.
496pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
497    min_max_view_helper(array, Ordering::Greater)
498}
499
500/// Returns the maximum value in the fixed size binary array, according to the natural order.
501pub fn max_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
502    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
503}
504
505/// Returns the minimum value in the binary array, according to the natural order.
506pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
507    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
508}
509
510/// Returns the minimum value in the binary view array, according to the natural order.
511pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
512    min_max_view_helper(array, Ordering::Less)
513}
514
515/// Returns the minimum value in the fixed size binary array, according to the natural order.
516pub fn min_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
517    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
518}
519
520/// Returns the maximum value in the string array, according to the natural order.
521pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
522    min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
523}
524
525/// Returns the maximum value in the string view array, according to the natural order.
526pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
527    min_max_view_helper(array, Ordering::Greater)
528}
529
530/// Returns the minimum value in the string array, according to the natural order.
531pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
532    min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
533}
534
535/// Returns the minimum value in the string view array, according to the natural order.
536pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
537    min_max_view_helper(array, Ordering::Less)
538}
539
540/// Returns the sum of values in the array.
541///
542/// This doesn't detect overflow. Once overflowing, the result will wrap around.
543/// For an overflow-checking variant, use `sum_array_checked` instead.
544pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
545where
546    T: ArrowNumericType,
547    T::Native: ArrowNativeTypeOp,
548{
549    match array.data_type() {
550        DataType::Dictionary(_, _) => {
551            let null_count = array.null_count();
552
553            if null_count == array.len() {
554                return None;
555            }
556
557            let iter = ArrayIter::new(array);
558            let sum = iter
559                .into_iter()
560                .fold(T::default_value(), |accumulator, value| {
561                    if let Some(value) = value {
562                        accumulator.add_wrapping(value)
563                    } else {
564                        accumulator
565                    }
566                });
567
568            Some(sum)
569        }
570        _ => sum::<T>(as_primitive_array(&array)),
571    }
572}
573
574/// Returns the sum of values in the array.
575///
576/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
577/// use `sum_array` instead.
578pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
579    array: A,
580) -> Result<Option<T::Native>, ArrowError>
581where
582    T: ArrowNumericType,
583    T::Native: ArrowNativeTypeOp,
584{
585    match array.data_type() {
586        DataType::Dictionary(_, _) => {
587            let null_count = array.null_count();
588
589            if null_count == array.len() {
590                return Ok(None);
591            }
592
593            let iter = ArrayIter::new(array);
594            let sum = iter
595                .into_iter()
596                .try_fold(T::default_value(), |accumulator, value| {
597                    if let Some(value) = value {
598                        accumulator.add_checked(value)
599                    } else {
600                        Ok(accumulator)
601                    }
602                })?;
603
604            Ok(Some(sum))
605        }
606        _ => sum_checked::<T>(as_primitive_array(&array)),
607    }
608}
609
610/// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
611/// array with value of `ArrowNumericType` type.
612pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
613where
614    T: ArrowNumericType,
615    T::Native: ArrowNativeType,
616{
617    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min)
618}
619
620/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
621/// array with value of `ArrowNumericType` type.
622pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
623where
624    T: ArrowNumericType,
625    T::Native: ArrowNativeTypeOp,
626{
627    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max)
628}
629
630fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
631    array: A,
632    cmp: F,
633    m: M,
634) -> Option<T::Native>
635where
636    T: ArrowNumericType,
637    F: Fn(&T::Native, &T::Native) -> bool,
638    M: Fn(&PrimitiveArray<T>) -> Option<T::Native>,
639{
640    match array.data_type() {
641        DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp),
642        _ => m(as_primitive_array(&array)),
643    }
644}
645
646macro_rules! bit_operation {
647    ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => {
648        #[doc = $DOC]
649        ///
650        /// Returns `None` if the array is empty or only contains null values.
651        pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
652        where
653            T: ArrowNumericType,
654            T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp,
655        {
656            let default;
657            if $DEFAULT == -1 {
658                default = T::Native::ONE.neg_wrapping();
659            } else {
660                default = T::default_value();
661            }
662
663            let null_count = array.null_count();
664
665            if null_count == array.len() {
666                return None;
667            }
668
669            let data: &[T::Native] = array.values();
670
671            match array.nulls() {
672                None => {
673                    let result = data
674                        .iter()
675                        .fold(default, |accumulator, value| accumulator.$OP(*value));
676
677                    Some(result)
678                }
679                Some(nulls) => {
680                    let mut result = default;
681                    let data_chunks = data.chunks_exact(64);
682                    let remainder = data_chunks.remainder();
683
684                    let bit_chunks = nulls.inner().bit_chunks();
685                    data_chunks
686                        .zip(bit_chunks.iter())
687                        .for_each(|(chunk, mask)| {
688                            // index_mask has value 1 << i in the loop
689                            let mut index_mask = 1;
690                            chunk.iter().for_each(|value| {
691                                if (mask & index_mask) != 0 {
692                                    result = result.$OP(*value);
693                                }
694                                index_mask <<= 1;
695                            });
696                        });
697
698                    let remainder_bits = bit_chunks.remainder_bits();
699
700                    remainder.iter().enumerate().for_each(|(i, value)| {
701                        if remainder_bits & (1 << i) != 0 {
702                            result = result.$OP(*value);
703                        }
704                    });
705
706                    Some(result)
707                }
708            }
709        }
710    };
711}
712
713bit_operation!(
714    bit_and,
715    bitand,
716    BitAnd,
717    -1,
718    "Returns the bitwise and of all non-null input values."
719);
720bit_operation!(
721    bit_or,
722    bitor,
723    BitOr,
724    0,
725    "Returns the bitwise or of all non-null input values."
726);
727bit_operation!(
728    bit_xor,
729    bitxor,
730    BitXor,
731    0,
732    "Returns the bitwise xor of all non-null input values."
733);
734
735/// Returns true if all non-null input values are true, otherwise false.
736///
737/// Returns `None` if the array is empty or only contains null values.
738pub fn bool_and(array: &BooleanArray) -> Option<bool> {
739    min_boolean(array)
740}
741
742/// Returns true if any non-null input value is true, otherwise false.
743///
744/// Returns `None` if the array is empty or only contains null values.
745pub fn bool_or(array: &BooleanArray) -> Option<bool> {
746    max_boolean(array)
747}
748
749/// Returns the sum of values in the primitive array.
750///
751/// Returns `Ok(None)` if the array is empty or only contains null values.
752///
753/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
754/// use `sum` instead.
755pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
756where
757    T: ArrowNumericType,
758    T::Native: ArrowNativeTypeOp,
759{
760    let null_count = array.null_count();
761
762    if null_count == array.len() {
763        return Ok(None);
764    }
765
766    let data: &[T::Native] = array.values();
767
768    match array.nulls() {
769        None => {
770            let sum = data
771                .iter()
772                .try_fold(T::default_value(), |accumulator, value| {
773                    accumulator.add_checked(*value)
774                })?;
775
776            Ok(Some(sum))
777        }
778        Some(nulls) => {
779            let mut sum = T::default_value();
780
781            try_for_each_valid_idx(
782                nulls.len(),
783                nulls.offset(),
784                nulls.null_count(),
785                Some(nulls.validity()),
786                |idx| {
787                    unsafe { sum = sum.add_checked(array.value_unchecked(idx))? };
788                    Ok::<_, ArrowError>(())
789                },
790            )?;
791
792            Ok(Some(sum))
793        }
794    }
795}
796
797/// Returns the sum of values in the primitive array.
798///
799/// Returns `None` if the array is empty or only contains null values.
800///
801/// This doesn't detect overflow in release mode by default. Once overflowing, the result will
802/// wrap around. For an overflow-checking variant, use `sum_checked` instead.
803pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
804where
805    T::Native: ArrowNativeTypeOp,
806{
807    aggregate::<T::Native, T, SumAccumulator<T::Native>>(array)
808}
809
810/// Returns the minimum value in the array, according to the natural order.
811/// For floating point arrays any NaN values are considered to be greater than any other non-null value
812///
813/// # Example
814/// ```rust
815/// # use arrow_array::Int32Array;
816/// # use arrow_arith::aggregate::min;
817/// let array = Int32Array::from(vec![8, 2, 4]);
818/// let result = min(&array);
819/// assert_eq!(result, Some(2));
820/// ```
821pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
822where
823    T::Native: PartialOrd,
824{
825    aggregate::<T::Native, T, MinAccumulator<T::Native>>(array)
826}
827
828/// Returns the maximum value in the array, according to the natural order.
829/// For floating point arrays any NaN values are considered to be greater than any other non-null value
830///
831/// # Example
832/// ```rust
833/// # use arrow_array::Int32Array;
834/// # use arrow_arith::aggregate::max;
835/// let array = Int32Array::from(vec![4, 8, 2]);
836/// let result = max(&array);
837/// assert_eq!(result, Some(8));
838/// ```
839pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
840where
841    T::Native: PartialOrd,
842{
843    aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array)
844}
845
846#[cfg(test)]
847mod tests {
848    use super::*;
849    use arrow_array::types::*;
850    use builder::BooleanBuilder;
851    use std::sync::Arc;
852
853    #[test]
854    fn test_primitive_array_sum() {
855        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
856        assert_eq!(15, sum(&a).unwrap());
857    }
858
859    #[test]
860    fn test_primitive_array_float_sum() {
861        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
862        assert_eq!(16.5, sum(&a).unwrap());
863    }
864
865    #[test]
866    fn test_primitive_array_sum_with_nulls() {
867        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
868        assert_eq!(10, sum(&a).unwrap());
869    }
870
871    #[test]
872    fn test_primitive_array_sum_all_nulls() {
873        let a = Int32Array::from(vec![None, None, None]);
874        assert_eq!(None, sum(&a));
875    }
876
877    #[test]
878    fn test_primitive_array_sum_large_float_64() {
879        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), None);
880        assert_eq!(Some((1..=100).sum::<i64>() as f64), sum(&c));
881
882        // create an array that actually has non-zero values at the invalid indices
883        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
884        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), Some(validity));
885
886        assert_eq!(
887            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f64),
888            sum(&c)
889        );
890    }
891
892    #[test]
893    fn test_primitive_array_sum_large_float_32() {
894        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), None);
895        assert_eq!(Some((1..=100).sum::<i64>() as f32), sum(&c));
896
897        // create an array that actually has non-zero values at the invalid indices
898        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
899        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), Some(validity));
900
901        assert_eq!(
902            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f32),
903            sum(&c)
904        );
905    }
906
907    #[test]
908    fn test_primitive_array_sum_large_64() {
909        let c = Int64Array::new((1..=100).collect(), None);
910        assert_eq!(Some((1..=100).sum()), sum(&c));
911
912        // create an array that actually has non-zero values at the invalid indices
913        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
914        let c = Int64Array::new((1..=100).collect(), Some(validity));
915
916        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
917    }
918
919    #[test]
920    fn test_primitive_array_sum_large_32() {
921        let c = Int32Array::new((1..=100).collect(), None);
922        assert_eq!(Some((1..=100).sum()), sum(&c));
923
924        // create an array that actually has non-zero values at the invalid indices
925        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
926        let c = Int32Array::new((1..=100).collect(), Some(validity));
927        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
928    }
929
930    #[test]
931    fn test_primitive_array_sum_large_16() {
932        let c = Int16Array::new((1..=100).collect(), None);
933        assert_eq!(Some((1..=100).sum()), sum(&c));
934
935        // create an array that actually has non-zero values at the invalid indices
936        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
937        let c = Int16Array::new((1..=100).collect(), Some(validity));
938        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
939    }
940
941    #[test]
942    fn test_primitive_array_sum_large_8() {
943        let c = UInt8Array::new((1..=100).collect(), None);
944        assert_eq!(
945            Some((1..=100).fold(0_u8, |a, x| a.wrapping_add(x))),
946            sum(&c)
947        );
948
949        // create an array that actually has non-zero values at the invalid indices
950        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
951        let c = UInt8Array::new((1..=100).collect(), Some(validity));
952        assert_eq!(
953            Some(
954                (1..=100)
955                    .filter(|i| i % 3 == 0)
956                    .fold(0_u8, |a, x| a.wrapping_add(x))
957            ),
958            sum(&c)
959        );
960    }
961
962    #[test]
963    fn test_primitive_array_bit_and() {
964        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
965        assert_eq!(0, bit_and(&a).unwrap());
966    }
967
968    #[test]
969    fn test_primitive_array_bit_and_with_nulls() {
970        let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]);
971        assert_eq!(2, bit_and(&a).unwrap());
972    }
973
974    #[test]
975    fn test_primitive_array_bit_and_all_nulls() {
976        let a = Int32Array::from(vec![None, None, None]);
977        assert_eq!(None, bit_and(&a));
978    }
979
980    #[test]
981    fn test_primitive_array_bit_or() {
982        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
983        assert_eq!(7, bit_or(&a).unwrap());
984    }
985
986    #[test]
987    fn test_primitive_array_bit_or_with_nulls() {
988        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
989        assert_eq!(7, bit_or(&a).unwrap());
990    }
991
992    #[test]
993    fn test_primitive_array_bit_or_all_nulls() {
994        let a = Int32Array::from(vec![None, None, None]);
995        assert_eq!(None, bit_or(&a));
996    }
997
998    #[test]
999    fn test_primitive_array_bit_xor() {
1000        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1001        assert_eq!(1, bit_xor(&a).unwrap());
1002    }
1003
1004    #[test]
1005    fn test_primitive_array_bit_xor_with_nulls() {
1006        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
1007        assert_eq!(4, bit_xor(&a).unwrap());
1008    }
1009
1010    #[test]
1011    fn test_primitive_array_bit_xor_all_nulls() {
1012        let a = Int32Array::from(vec![None, None, None]);
1013        assert_eq!(None, bit_xor(&a));
1014    }
1015
1016    #[test]
1017    fn test_primitive_array_bool_and() {
1018        let a = BooleanArray::from(vec![true, false, true, false, true]);
1019        assert!(!bool_and(&a).unwrap());
1020    }
1021
1022    #[test]
1023    fn test_primitive_array_bool_and_with_nulls() {
1024        let a = BooleanArray::from(vec![None, Some(true), Some(true), None, Some(true)]);
1025        assert!(bool_and(&a).unwrap());
1026    }
1027
1028    #[test]
1029    fn test_primitive_array_bool_and_all_nulls() {
1030        let a = BooleanArray::from(vec![None, None, None]);
1031        assert_eq!(None, bool_and(&a));
1032    }
1033
1034    #[test]
1035    fn test_primitive_array_bool_or() {
1036        let a = BooleanArray::from(vec![true, false, true, false, true]);
1037        assert!(bool_or(&a).unwrap());
1038    }
1039
1040    #[test]
1041    fn test_primitive_array_bool_or_with_nulls() {
1042        let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
1043        assert!(!bool_or(&a).unwrap());
1044    }
1045
1046    #[test]
1047    fn test_primitive_array_bool_or_all_nulls() {
1048        let a = BooleanArray::from(vec![None, None, None]);
1049        assert_eq!(None, bool_or(&a));
1050    }
1051
1052    #[test]
1053    fn test_primitive_array_min_max() {
1054        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
1055        assert_eq!(5, min(&a).unwrap());
1056        assert_eq!(9, max(&a).unwrap());
1057    }
1058
1059    #[test]
1060    fn test_primitive_array_min_max_with_nulls() {
1061        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
1062        assert_eq!(5, min(&a).unwrap());
1063        assert_eq!(9, max(&a).unwrap());
1064    }
1065
1066    #[test]
1067    fn test_primitive_min_max_1() {
1068        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
1069        assert_eq!(Some(2), min(&a));
1070        assert_eq!(Some(5), max(&a));
1071    }
1072
1073    #[test]
1074    fn test_primitive_min_max_float_large_nonnull_array() {
1075        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
1076        // min/max are on boundaries of chunked data
1077        assert_eq!(Some(1.0), min(&a));
1078        assert_eq!(Some(256.0), max(&a));
1079
1080        // max is last value in remainder after chunking
1081        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1082        assert_eq!(Some(255.0), max(&a));
1083
1084        // max is first value in remainder after chunking
1085        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1086        assert_eq!(Some(257.0), max(&a));
1087    }
1088
1089    #[test]
1090    fn test_primitive_min_max_float_large_nullable_array() {
1091        let a: Float64Array = (0..256)
1092            .map(|i| {
1093                if (i + 1) % 3 == 0 {
1094                    None
1095                } else {
1096                    Some((i + 1) as f64)
1097                }
1098            })
1099            .collect();
1100        // min/max are on boundaries of chunked data
1101        assert_eq!(Some(1.0), min(&a));
1102        assert_eq!(Some(256.0), max(&a));
1103
1104        let a: Float64Array = (0..256)
1105            .map(|i| {
1106                if i == 0 || i == 255 {
1107                    None
1108                } else {
1109                    Some((i + 1) as f64)
1110                }
1111            })
1112            .collect();
1113        // boundaries of chunked data are null
1114        assert_eq!(Some(2.0), min(&a));
1115        assert_eq!(Some(255.0), max(&a));
1116
1117        let a: Float64Array = (0..256)
1118            .map(|i| if i != 100 { None } else { Some((i) as f64) })
1119            .collect();
1120        // a single non-null value somewhere in the middle
1121        assert_eq!(Some(100.0), min(&a));
1122        assert_eq!(Some(100.0), max(&a));
1123
1124        // max is last value in remainder after chunking
1125        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1126        assert_eq!(Some(255.0), max(&a));
1127
1128        // max is first value in remainder after chunking
1129        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1130        assert_eq!(Some(257.0), max(&a));
1131    }
1132
1133    #[test]
1134    fn test_primitive_min_max_float_edge_cases() {
1135        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
1136        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1137        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
1138
1139        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
1140        assert_eq!(Some(f64::MIN), min(&a));
1141        assert_eq!(Some(f64::MIN), max(&a));
1142
1143        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
1144        assert_eq!(Some(f64::MAX), min(&a));
1145        assert_eq!(Some(f64::MAX), max(&a));
1146
1147        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
1148        assert_eq!(Some(f64::INFINITY), min(&a));
1149        assert_eq!(Some(f64::INFINITY), max(&a));
1150    }
1151
1152    #[test]
1153    fn test_primitive_min_max_float_all_nans_non_null() {
1154        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
1155        assert!(max(&a).unwrap().is_nan());
1156        assert!(min(&a).unwrap().is_nan());
1157    }
1158
1159    #[test]
1160    fn test_primitive_min_max_float_negative_nan() {
1161        let a: Float64Array =
1162            Float64Array::from(vec![f64::NEG_INFINITY, f64::NAN, f64::INFINITY, -f64::NAN]);
1163        let max = max(&a).unwrap();
1164        let min = min(&a).unwrap();
1165        assert!(max.is_nan());
1166        assert!(max.is_sign_positive());
1167
1168        assert!(min.is_nan());
1169        assert!(min.is_sign_negative());
1170    }
1171
1172    #[test]
1173    fn test_primitive_min_max_float_first_nan_nonnull() {
1174        let a: Float64Array = (0..100)
1175            .map(|i| {
1176                if i == 0 {
1177                    Some(f64::NAN)
1178                } else {
1179                    Some(i as f64)
1180                }
1181            })
1182            .collect();
1183        assert_eq!(Some(1.0), min(&a));
1184        assert!(max(&a).unwrap().is_nan());
1185    }
1186
1187    #[test]
1188    fn test_primitive_min_max_float_last_nan_nonnull() {
1189        let a: Float64Array = (0..100)
1190            .map(|i| {
1191                if i == 99 {
1192                    Some(f64::NAN)
1193                } else {
1194                    Some((i + 1) as f64)
1195                }
1196            })
1197            .collect();
1198        assert_eq!(Some(1.0), min(&a));
1199        assert!(max(&a).unwrap().is_nan());
1200    }
1201
1202    #[test]
1203    fn test_primitive_min_max_float_first_nan_nullable() {
1204        let a: Float64Array = (0..100)
1205            .map(|i| {
1206                if i == 0 {
1207                    Some(f64::NAN)
1208                } else if i % 2 == 0 {
1209                    None
1210                } else {
1211                    Some(i as f64)
1212                }
1213            })
1214            .collect();
1215        assert_eq!(Some(1.0), min(&a));
1216        assert!(max(&a).unwrap().is_nan());
1217    }
1218
1219    #[test]
1220    fn test_primitive_min_max_float_last_nan_nullable() {
1221        let a: Float64Array = (0..100)
1222            .map(|i| {
1223                if i == 99 {
1224                    Some(f64::NAN)
1225                } else if i % 2 == 0 {
1226                    None
1227                } else {
1228                    Some(i as f64)
1229                }
1230            })
1231            .collect();
1232        assert_eq!(Some(1.0), min(&a));
1233        assert!(max(&a).unwrap().is_nan());
1234    }
1235
1236    #[test]
1237    fn test_primitive_min_max_float_inf_and_nans() {
1238        let a: Float64Array = (0..100)
1239            .map(|i| {
1240                let x = match i % 10 {
1241                    0 => f64::NEG_INFINITY,
1242                    1 => f64::MIN,
1243                    2 => f64::MAX,
1244                    4 => f64::INFINITY,
1245                    5 => f64::NAN,
1246                    _ => i as f64,
1247                };
1248                Some(x)
1249            })
1250            .collect();
1251        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1252        assert!(max(&a).unwrap().is_nan());
1253    }
1254
1255    fn pad_inputs_and_test_fixed_size_binary(
1256        input: Vec<Option<&[u8]>>,
1257        expected_min: Option<&[u8]>,
1258        expected_max: Option<&[u8]>,
1259    ) {
1260        fn pad_slice(slice: &[u8], len: usize) -> Vec<u8> {
1261            let mut padded = vec![0; len];
1262            padded[..slice.len()].copy_from_slice(slice);
1263            padded
1264        }
1265
1266        let max_len = input
1267            .iter()
1268            .filter_map(|x| x.as_ref().map(|b| b.len()))
1269            .max()
1270            .unwrap_or(0);
1271        let padded_input = input
1272            .iter()
1273            .map(|x| x.as_ref().map(|b| pad_slice(b, max_len)));
1274        let input_arr =
1275            FixedSizeBinaryArray::try_from_sparse_iter_with_size(padded_input, max_len as i32)
1276                .unwrap();
1277        let padded_expected_min = expected_min.map(|b| pad_slice(b, max_len));
1278        let padded_expected_max = expected_max.map(|b| pad_slice(b, max_len));
1279
1280        assert_eq!(
1281            padded_expected_min.as_deref(),
1282            min_fixed_size_binary(&input_arr)
1283        );
1284        assert_eq!(
1285            padded_expected_max.as_deref(),
1286            max_fixed_size_binary(&input_arr)
1287        );
1288    }
1289
1290    macro_rules! test_binary {
1291        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1292            #[test]
1293            fn $NAME() {
1294                let binary = BinaryArray::from($ARRAY);
1295                assert_eq!($EXPECTED_MIN, min_binary(&binary));
1296                assert_eq!($EXPECTED_MAX, max_binary(&binary));
1297
1298                let large_binary = LargeBinaryArray::from($ARRAY);
1299                assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
1300                assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
1301
1302                let binary_view = BinaryViewArray::from($ARRAY);
1303                assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
1304                assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
1305
1306                pad_inputs_and_test_fixed_size_binary($ARRAY, $EXPECTED_MIN, $EXPECTED_MAX);
1307            }
1308        };
1309    }
1310
1311    test_binary!(
1312        test_binary_min_max_with_nulls,
1313        vec![
1314            Some("b01234567890123".as_bytes()), // long bytes
1315            None,
1316            None,
1317            Some(b"a"),
1318            Some(b"c"),
1319            Some(b"abcdedfg0123456"),
1320        ],
1321        Some("a".as_bytes()),
1322        Some("c".as_bytes())
1323    );
1324
1325    test_binary!(
1326        test_binary_min_max_no_null,
1327        vec![
1328            Some("b".as_bytes()),
1329            Some(b"abcdefghijklmnopqrst"), // long bytes
1330            Some(b"c"),
1331            Some(b"b01234567890123"), // long bytes for view types
1332        ],
1333        Some("abcdefghijklmnopqrst".as_bytes()),
1334        Some("c".as_bytes())
1335    );
1336
1337    test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
1338
1339    test_binary!(
1340        test_binary_min_max_1,
1341        vec![
1342            None,
1343            Some("b01234567890123435".as_bytes()), // long bytes for view types
1344            None,
1345            Some(b"b0123xxxxxxxxxxx"),
1346            Some(b"a")
1347        ],
1348        Some("a".as_bytes()),
1349        Some("b0123xxxxxxxxxxx".as_bytes())
1350    );
1351
1352    macro_rules! test_string {
1353        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1354            #[test]
1355            fn $NAME() {
1356                let string = StringArray::from($ARRAY);
1357                assert_eq!($EXPECTED_MIN, min_string(&string));
1358                assert_eq!($EXPECTED_MAX, max_string(&string));
1359
1360                let large_string = LargeStringArray::from($ARRAY);
1361                assert_eq!($EXPECTED_MIN, min_string(&large_string));
1362                assert_eq!($EXPECTED_MAX, max_string(&large_string));
1363
1364                let string_view = StringViewArray::from($ARRAY);
1365                assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
1366                assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
1367            }
1368        };
1369    }
1370
1371    test_string!(
1372        test_string_min_max_with_nulls,
1373        vec![
1374            Some("b012345678901234"), // long bytes for view types
1375            None,
1376            None,
1377            Some("a"),
1378            Some("c"),
1379            Some("b0123xxxxxxxxxxx")
1380        ],
1381        Some("a"),
1382        Some("c")
1383    );
1384
1385    test_string!(
1386        test_string_min_max_no_null,
1387        vec![
1388            Some("b"),
1389            Some("b012345678901234"), // long bytes for view types
1390            Some("a"),
1391            Some("b012xxxxxxxxxxxx")
1392        ],
1393        Some("a"),
1394        Some("b012xxxxxxxxxxxx")
1395    );
1396
1397    test_string!(
1398        test_string_min_max_all_nulls,
1399        Vec::<Option<&str>>::from_iter([None, None]),
1400        None,
1401        None
1402    );
1403
1404    test_string!(
1405        test_string_min_max_1,
1406        vec![
1407            None,
1408            Some("c12345678901234"), // long bytes for view types
1409            None,
1410            Some("b"),
1411            Some("c1234xxxxxxxxxx")
1412        ],
1413        Some("b"),
1414        Some("c1234xxxxxxxxxx")
1415    );
1416
1417    test_string!(
1418        test_string_min_max_empty,
1419        Vec::<Option<&str>>::new(),
1420        None,
1421        None
1422    );
1423
1424    #[test]
1425    fn test_boolean_min_max_empty() {
1426        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
1427        assert_eq!(None, min_boolean(&a));
1428        assert_eq!(None, max_boolean(&a));
1429    }
1430
1431    #[test]
1432    fn test_boolean_min_max_all_null() {
1433        let a = BooleanArray::from(vec![None, None]);
1434        assert_eq!(None, min_boolean(&a));
1435        assert_eq!(None, max_boolean(&a));
1436    }
1437
1438    #[test]
1439    fn test_boolean_min_max_no_null() {
1440        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
1441        assert_eq!(Some(false), min_boolean(&a));
1442        assert_eq!(Some(true), max_boolean(&a));
1443    }
1444
1445    #[test]
1446    fn test_boolean_min_max() {
1447        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
1448        assert_eq!(Some(false), min_boolean(&a));
1449        assert_eq!(Some(true), max_boolean(&a));
1450
1451        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
1452        assert_eq!(Some(false), min_boolean(&a));
1453        assert_eq!(Some(true), max_boolean(&a));
1454
1455        let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
1456        assert_eq!(Some(false), min_boolean(&a));
1457        assert_eq!(Some(true), max_boolean(&a));
1458
1459        let a = BooleanArray::from(vec![Some(true), None]);
1460        assert_eq!(Some(true), min_boolean(&a));
1461        assert_eq!(Some(true), max_boolean(&a));
1462
1463        let a = BooleanArray::from(vec![Some(false), None]);
1464        assert_eq!(Some(false), min_boolean(&a));
1465        assert_eq!(Some(false), max_boolean(&a));
1466
1467        let a = BooleanArray::from(vec![Some(true)]);
1468        assert_eq!(Some(true), min_boolean(&a));
1469        assert_eq!(Some(true), max_boolean(&a));
1470
1471        let a = BooleanArray::from(vec![Some(false)]);
1472        assert_eq!(Some(false), min_boolean(&a));
1473        assert_eq!(Some(false), max_boolean(&a));
1474    }
1475
1476    #[test]
1477    fn test_boolean_min_max_smaller() {
1478        let a = BooleanArray::from(vec![Some(false)]);
1479        assert_eq!(Some(false), min_boolean(&a));
1480        assert_eq!(Some(false), max_boolean(&a));
1481
1482        let a = BooleanArray::from(vec![None, Some(false)]);
1483        assert_eq!(Some(false), min_boolean(&a));
1484        assert_eq!(Some(false), max_boolean(&a));
1485
1486        let a = BooleanArray::from(vec![None, Some(true)]);
1487        assert_eq!(Some(true), min_boolean(&a));
1488        assert_eq!(Some(true), max_boolean(&a));
1489
1490        let a = BooleanArray::from(vec![Some(true)]);
1491        assert_eq!(Some(true), min_boolean(&a));
1492        assert_eq!(Some(true), max_boolean(&a));
1493    }
1494
1495    #[test]
1496    fn test_boolean_min_max_64_true_64_false() {
1497        let mut no_nulls = BooleanBuilder::new();
1498        no_nulls.append_slice(&[true; 64]);
1499        no_nulls.append_slice(&[false; 64]);
1500        let no_nulls = no_nulls.finish();
1501
1502        assert_eq!(Some(false), min_boolean(&no_nulls));
1503        assert_eq!(Some(true), max_boolean(&no_nulls));
1504
1505        let mut with_nulls = BooleanBuilder::new();
1506        with_nulls.append_slice(&[true; 31]);
1507        with_nulls.append_null();
1508        with_nulls.append_slice(&[true; 32]);
1509        with_nulls.append_slice(&[false; 1]);
1510        with_nulls.append_nulls(63);
1511        let with_nulls = with_nulls.finish();
1512
1513        assert_eq!(Some(false), min_boolean(&with_nulls));
1514        assert_eq!(Some(true), max_boolean(&with_nulls));
1515    }
1516
1517    #[test]
1518    fn test_boolean_min_max_64_false_64_true() {
1519        let mut no_nulls = BooleanBuilder::new();
1520        no_nulls.append_slice(&[false; 64]);
1521        no_nulls.append_slice(&[true; 64]);
1522        let no_nulls = no_nulls.finish();
1523
1524        assert_eq!(Some(false), min_boolean(&no_nulls));
1525        assert_eq!(Some(true), max_boolean(&no_nulls));
1526
1527        let mut with_nulls = BooleanBuilder::new();
1528        with_nulls.append_slice(&[false; 31]);
1529        with_nulls.append_null();
1530        with_nulls.append_slice(&[false; 32]);
1531        with_nulls.append_slice(&[true; 1]);
1532        with_nulls.append_nulls(63);
1533        let with_nulls = with_nulls.finish();
1534
1535        assert_eq!(Some(false), min_boolean(&with_nulls));
1536        assert_eq!(Some(true), max_boolean(&with_nulls));
1537    }
1538
1539    #[test]
1540    fn test_boolean_min_max_96_true() {
1541        let mut no_nulls = BooleanBuilder::new();
1542        no_nulls.append_slice(&[true; 96]);
1543        let no_nulls = no_nulls.finish();
1544
1545        assert_eq!(Some(true), min_boolean(&no_nulls));
1546        assert_eq!(Some(true), max_boolean(&no_nulls));
1547
1548        let mut with_nulls = BooleanBuilder::new();
1549        with_nulls.append_slice(&[true; 31]);
1550        with_nulls.append_null();
1551        with_nulls.append_slice(&[true; 32]);
1552        with_nulls.append_slice(&[true; 31]);
1553        with_nulls.append_null();
1554        let with_nulls = with_nulls.finish();
1555
1556        assert_eq!(Some(true), min_boolean(&with_nulls));
1557        assert_eq!(Some(true), max_boolean(&with_nulls));
1558    }
1559
1560    #[test]
1561    fn test_boolean_min_max_96_false() {
1562        let mut no_nulls = BooleanBuilder::new();
1563        no_nulls.append_slice(&[false; 96]);
1564        let no_nulls = no_nulls.finish();
1565
1566        assert_eq!(Some(false), min_boolean(&no_nulls));
1567        assert_eq!(Some(false), max_boolean(&no_nulls));
1568
1569        let mut with_nulls = BooleanBuilder::new();
1570        with_nulls.append_slice(&[false; 31]);
1571        with_nulls.append_null();
1572        with_nulls.append_slice(&[false; 32]);
1573        with_nulls.append_slice(&[false; 31]);
1574        with_nulls.append_null();
1575        let with_nulls = with_nulls.finish();
1576
1577        assert_eq!(Some(false), min_boolean(&with_nulls));
1578        assert_eq!(Some(false), max_boolean(&with_nulls));
1579    }
1580
1581    #[test]
1582    fn test_sum_dyn() {
1583        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1584        let values = Arc::new(values) as ArrayRef;
1585        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1586
1587        let dict_array = DictionaryArray::new(keys, values.clone());
1588        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1589        assert_eq!(39, sum_array::<Int8Type, _>(array).unwrap());
1590
1591        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1592        assert_eq!(15, sum_array::<Int32Type, _>(&a).unwrap());
1593
1594        let keys = Int8Array::from(vec![Some(2_i8), None, Some(4)]);
1595        let dict_array = DictionaryArray::new(keys, values.clone());
1596        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1597        assert_eq!(26, sum_array::<Int8Type, _>(array).unwrap());
1598
1599        let keys = Int8Array::from(vec![None, None, None]);
1600        let dict_array = DictionaryArray::new(keys, values.clone());
1601        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1602        assert!(sum_array::<Int8Type, _>(array).is_none());
1603    }
1604
1605    #[test]
1606    fn test_max_min_dyn() {
1607        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1608        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1609        let values = Arc::new(values) as ArrayRef;
1610
1611        let dict_array = DictionaryArray::new(keys, values.clone());
1612        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1613        assert_eq!(14, max_array::<Int8Type, _>(array).unwrap());
1614
1615        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1616        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1617
1618        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1619        assert_eq!(5, max_array::<Int32Type, _>(&a).unwrap());
1620        assert_eq!(1, min_array::<Int32Type, _>(&a).unwrap());
1621
1622        let keys = Int8Array::from(vec![Some(2_i8), None, Some(7)]);
1623        let dict_array = DictionaryArray::new(keys, values.clone());
1624        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1625        assert_eq!(17, max_array::<Int8Type, _>(array).unwrap());
1626        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1627        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1628
1629        let keys = Int8Array::from(vec![None, None, None]);
1630        let dict_array = DictionaryArray::new(keys, values.clone());
1631        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1632        assert!(max_array::<Int8Type, _>(array).is_none());
1633        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1634        assert!(min_array::<Int8Type, _>(array).is_none());
1635    }
1636
1637    #[test]
1638    fn test_max_min_dyn_nan() {
1639        let values = Float32Array::from(vec![5.0_f32, 2.0_f32, f32::NAN]);
1640        let keys = Int8Array::from_iter_values([0_i8, 1, 2]);
1641
1642        let dict_array = DictionaryArray::new(keys, Arc::new(values));
1643        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1644        assert!(max_array::<Float32Type, _>(array).unwrap().is_nan());
1645
1646        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1647        assert_eq!(2.0_f32, min_array::<Float32Type, _>(array).unwrap());
1648    }
1649
1650    #[test]
1651    fn test_min_max_sliced_primitive() {
1652        let expected = Some(4.0);
1653        let input: Float64Array = vec![None, Some(4.0)].into_iter().collect();
1654        let actual = min(&input);
1655        assert_eq!(actual, expected);
1656        let actual = max(&input);
1657        assert_eq!(actual, expected);
1658
1659        let sliced_input: Float64Array = vec![None, None, None, None, None, Some(4.0)]
1660            .into_iter()
1661            .collect();
1662        let sliced_input = sliced_input.slice(4, 2);
1663
1664        assert_eq!(&sliced_input, &input);
1665
1666        let actual = min(&sliced_input);
1667        assert_eq!(actual, expected);
1668        let actual = max(&sliced_input);
1669        assert_eq!(actual, expected);
1670    }
1671
1672    #[test]
1673    fn test_min_max_sliced_boolean() {
1674        let expected = Some(true);
1675        let input: BooleanArray = vec![None, Some(true)].into_iter().collect();
1676        let actual = min_boolean(&input);
1677        assert_eq!(actual, expected);
1678        let actual = max_boolean(&input);
1679        assert_eq!(actual, expected);
1680
1681        let sliced_input: BooleanArray = vec![None, None, None, None, None, Some(true)]
1682            .into_iter()
1683            .collect();
1684        let sliced_input = sliced_input.slice(4, 2);
1685
1686        assert_eq!(sliced_input, input);
1687
1688        let actual = min_boolean(&sliced_input);
1689        assert_eq!(actual, expected);
1690        let actual = max_boolean(&sliced_input);
1691        assert_eq!(actual, expected);
1692    }
1693
1694    #[test]
1695    fn test_min_max_sliced_string() {
1696        let expected = Some("foo");
1697        let input: StringArray = vec![None, Some("foo")].into_iter().collect();
1698        let actual = min_string(&input);
1699        assert_eq!(actual, expected);
1700        let actual = max_string(&input);
1701        assert_eq!(actual, expected);
1702
1703        let sliced_input: StringArray = vec![None, None, None, None, None, Some("foo")]
1704            .into_iter()
1705            .collect();
1706        let sliced_input = sliced_input.slice(4, 2);
1707
1708        assert_eq!(&sliced_input, &input);
1709
1710        let actual = min_string(&sliced_input);
1711        assert_eq!(actual, expected);
1712        let actual = max_string(&sliced_input);
1713        assert_eq!(actual, expected);
1714    }
1715
1716    #[test]
1717    fn test_min_max_sliced_binary() {
1718        let expected: Option<&[u8]> = Some(&[5]);
1719        let input: BinaryArray = vec![None, Some(&[5])].into_iter().collect();
1720        let actual = min_binary(&input);
1721        assert_eq!(actual, expected);
1722        let actual = max_binary(&input);
1723        assert_eq!(actual, expected);
1724
1725        let sliced_input: BinaryArray = vec![None, None, None, None, None, Some(&[5])]
1726            .into_iter()
1727            .collect();
1728        let sliced_input = sliced_input.slice(4, 2);
1729
1730        assert_eq!(&sliced_input, &input);
1731
1732        let actual = min_binary(&sliced_input);
1733        assert_eq!(actual, expected);
1734        let actual = max_binary(&sliced_input);
1735        assert_eq!(actual, expected);
1736    }
1737
1738    #[test]
1739    fn test_sum_overflow() {
1740        let a = Int32Array::from(vec![i32::MAX, 1]);
1741
1742        assert_eq!(sum(&a).unwrap(), -2147483648);
1743        assert_eq!(sum_array::<Int32Type, _>(&a).unwrap(), -2147483648);
1744    }
1745
1746    #[test]
1747    fn test_sum_checked_overflow() {
1748        let a = Int32Array::from(vec![i32::MAX, 1]);
1749
1750        sum_checked(&a).expect_err("overflow should be detected");
1751        sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected");
1752    }
1753}