arrow_arith/
aggregate.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Defines aggregations over Arrow arrays.
19
20use arrow_array::cast::*;
21use arrow_array::iterator::ArrayIter;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_data::bit_iterator::try_for_each_valid_idx;
25use arrow_schema::*;
26use std::borrow::BorrowMut;
27use std::cmp::{self, Ordering};
28use std::ops::{BitAnd, BitOr, BitXor};
29use types::ByteViewType;
30
31/// An accumulator for primitive numeric values.
32trait NumericAccumulator<T: ArrowNativeTypeOp>: Copy + Default {
33    /// Accumulate a non-null value.
34    fn accumulate(&mut self, value: T);
35    /// Accumulate a nullable values.
36    /// If `valid` is false the `value` should not affect the accumulator state.
37    fn accumulate_nullable(&mut self, value: T, valid: bool);
38    /// Merge another accumulator into this accumulator
39    fn merge(&mut self, other: Self);
40    /// Return the aggregated value.
41    fn finish(&mut self) -> T;
42}
43
44/// Helper for branchlessly selecting either `a` or `b` based on the boolean `m`.
45/// After verifying the generated assembly this can be a simple `if`.
46#[inline(always)]
47fn select<T: Copy>(m: bool, a: T, b: T) -> T {
48    if m {
49        a
50    } else {
51        b
52    }
53}
54
55#[derive(Clone, Copy)]
56struct SumAccumulator<T: ArrowNativeTypeOp> {
57    sum: T,
58}
59
60impl<T: ArrowNativeTypeOp> Default for SumAccumulator<T> {
61    fn default() -> Self {
62        Self { sum: T::ZERO }
63    }
64}
65
66impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for SumAccumulator<T> {
67    fn accumulate(&mut self, value: T) {
68        self.sum = self.sum.add_wrapping(value);
69    }
70
71    fn accumulate_nullable(&mut self, value: T, valid: bool) {
72        let sum = self.sum;
73        self.sum = select(valid, sum.add_wrapping(value), sum)
74    }
75
76    fn merge(&mut self, other: Self) {
77        self.sum = self.sum.add_wrapping(other.sum);
78    }
79
80    fn finish(&mut self) -> T {
81        self.sum
82    }
83}
84
85#[derive(Clone, Copy)]
86struct MinAccumulator<T: ArrowNativeTypeOp> {
87    min: T,
88}
89
90impl<T: ArrowNativeTypeOp> Default for MinAccumulator<T> {
91    fn default() -> Self {
92        Self {
93            min: T::MAX_TOTAL_ORDER,
94        }
95    }
96}
97
98impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MinAccumulator<T> {
99    fn accumulate(&mut self, value: T) {
100        let min = self.min;
101        self.min = select(value.is_lt(min), value, min);
102    }
103
104    fn accumulate_nullable(&mut self, value: T, valid: bool) {
105        let min = self.min;
106        let is_lt = valid & value.is_lt(min);
107        self.min = select(is_lt, value, min);
108    }
109
110    fn merge(&mut self, other: Self) {
111        self.accumulate(other.min)
112    }
113
114    fn finish(&mut self) -> T {
115        self.min
116    }
117}
118
119#[derive(Clone, Copy)]
120struct MaxAccumulator<T: ArrowNativeTypeOp> {
121    max: T,
122}
123
124impl<T: ArrowNativeTypeOp> Default for MaxAccumulator<T> {
125    fn default() -> Self {
126        Self {
127            max: T::MIN_TOTAL_ORDER,
128        }
129    }
130}
131
132impl<T: ArrowNativeTypeOp> NumericAccumulator<T> for MaxAccumulator<T> {
133    fn accumulate(&mut self, value: T) {
134        let max = self.max;
135        self.max = select(value.is_gt(max), value, max);
136    }
137
138    fn accumulate_nullable(&mut self, value: T, valid: bool) {
139        let max = self.max;
140        let is_gt = value.is_gt(max) & valid;
141        self.max = select(is_gt, value, max);
142    }
143
144    fn merge(&mut self, other: Self) {
145        self.accumulate(other.max)
146    }
147
148    fn finish(&mut self) -> T {
149        self.max
150    }
151}
152
153fn reduce_accumulators<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
154    mut acc: [A; LANES],
155) -> A {
156    assert!(LANES > 0 && LANES.is_power_of_two());
157    let mut len = LANES;
158
159    // attempt at tree reduction, unfortunately llvm does not fully recognize this pattern,
160    // but the generated code is still a little faster than purely sequential reduction for floats.
161    while len >= 2 {
162        let mid = len / 2;
163        let (h, t) = acc[..len].split_at_mut(mid);
164
165        for i in 0..mid {
166            h[i].merge(t[i]);
167        }
168        len /= 2;
169    }
170    acc[0]
171}
172
173#[inline(always)]
174fn aggregate_nonnull_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
175    acc: &mut [A; LANES],
176    values: &[T; LANES],
177) {
178    for i in 0..LANES {
179        acc[i].accumulate(values[i]);
180    }
181}
182
183#[inline(always)]
184fn aggregate_nullable_chunk<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
185    acc: &mut [A; LANES],
186    values: &[T; LANES],
187    validity: u64,
188) {
189    let mut bit = 1;
190    for i in 0..LANES {
191        acc[i].accumulate_nullable(values[i], (validity & bit) != 0);
192        bit <<= 1;
193    }
194}
195
196fn aggregate_nonnull_simple<T: ArrowNativeTypeOp, A: NumericAccumulator<T>>(values: &[T]) -> T {
197    values
198        .iter()
199        .copied()
200        .fold(A::default(), |mut a, b| {
201            a.accumulate(b);
202            a
203        })
204        .finish()
205}
206
207#[inline(never)]
208fn aggregate_nonnull_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
209    values: &[T],
210) -> T {
211    // aggregating into multiple independent accumulators allows the compiler to use vector registers
212    // with a single accumulator the compiler would not be allowed to reorder floating point addition
213    let mut acc = [A::default(); LANES];
214    let mut chunks = values.chunks_exact(LANES);
215    chunks.borrow_mut().for_each(|chunk| {
216        aggregate_nonnull_chunk(&mut acc, chunk[..LANES].try_into().unwrap());
217    });
218
219    let remainder = chunks.remainder();
220    for i in 0..remainder.len() {
221        acc[i].accumulate(remainder[i]);
222    }
223
224    reduce_accumulators(acc).finish()
225}
226
227#[inline(never)]
228fn aggregate_nullable_lanes<T: ArrowNativeTypeOp, A: NumericAccumulator<T>, const LANES: usize>(
229    values: &[T],
230    validity: &NullBuffer,
231) -> T {
232    assert!(LANES > 0 && 64 % LANES == 0);
233    assert_eq!(values.len(), validity.len());
234
235    // aggregating into multiple independent accumulators allows the compiler to use vector registers
236    let mut acc = [A::default(); LANES];
237    // we process 64 bits of validity at a time
238    let mut values_chunks = values.chunks_exact(64);
239    let validity_chunks = validity.inner().bit_chunks();
240    let mut validity_chunks_iter = validity_chunks.iter();
241
242    values_chunks.borrow_mut().for_each(|chunk| {
243        // Safety: we asserted that values and validity have the same length and trust the iterator impl
244        let mut validity = unsafe { validity_chunks_iter.next().unwrap_unchecked() };
245        // chunk further based on the number of vector lanes
246        chunk.chunks_exact(LANES).for_each(|chunk| {
247            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
248            validity >>= LANES;
249        });
250    });
251
252    let remainder = values_chunks.remainder();
253    if !remainder.is_empty() {
254        let mut validity = validity_chunks.remainder_bits();
255
256        let mut remainder_chunks = remainder.chunks_exact(LANES);
257        remainder_chunks.borrow_mut().for_each(|chunk| {
258            aggregate_nullable_chunk(&mut acc, chunk[..LANES].try_into().unwrap(), validity);
259            validity >>= LANES;
260        });
261
262        let remainder = remainder_chunks.remainder();
263        if !remainder.is_empty() {
264            let mut bit = 1;
265            for i in 0..remainder.len() {
266                acc[i].accumulate_nullable(remainder[i], (validity & bit) != 0);
267                bit <<= 1;
268            }
269        }
270    }
271
272    reduce_accumulators(acc).finish()
273}
274
275/// The preferred vector size in bytes for the target platform.
276/// Note that the avx512 target feature is still unstable and this also means it is not detected on stable rust.
277const PREFERRED_VECTOR_SIZE: usize =
278    if cfg!(all(target_arch = "x86_64", target_feature = "avx512f")) {
279        64
280    } else if cfg!(all(target_arch = "x86_64", target_feature = "avx")) {
281        32
282    } else {
283        16
284    };
285
286/// non-nullable aggregation requires fewer temporary registers so we can use more of them for accumulators
287const PREFERRED_VECTOR_SIZE_NON_NULL: usize = PREFERRED_VECTOR_SIZE * 2;
288
289/// Generic aggregation for any primitive type.
290/// Returns None if there are no non-null values in `array`.
291fn aggregate<T: ArrowNativeTypeOp, P: ArrowPrimitiveType<Native = T>, A: NumericAccumulator<T>>(
292    array: &PrimitiveArray<P>,
293) -> Option<T> {
294    let null_count = array.null_count();
295    if null_count == array.len() {
296        return None;
297    }
298    let values = array.values().as_ref();
299    match array.nulls() {
300        Some(nulls) if null_count > 0 => {
301            // const generics depending on a generic type parameter are not supported
302            // so we have to match and call aggregate with the corresponding constant
303            match PREFERRED_VECTOR_SIZE / std::mem::size_of::<T>() {
304                64 => Some(aggregate_nullable_lanes::<T, A, 64>(values, nulls)),
305                32 => Some(aggregate_nullable_lanes::<T, A, 32>(values, nulls)),
306                16 => Some(aggregate_nullable_lanes::<T, A, 16>(values, nulls)),
307                8 => Some(aggregate_nullable_lanes::<T, A, 8>(values, nulls)),
308                4 => Some(aggregate_nullable_lanes::<T, A, 4>(values, nulls)),
309                2 => Some(aggregate_nullable_lanes::<T, A, 2>(values, nulls)),
310                _ => Some(aggregate_nullable_lanes::<T, A, 1>(values, nulls)),
311            }
312        }
313        _ => {
314            let is_float = matches!(
315                array.data_type(),
316                DataType::Float16 | DataType::Float32 | DataType::Float64
317            );
318            if is_float {
319                match PREFERRED_VECTOR_SIZE_NON_NULL / std::mem::size_of::<T>() {
320                    64 => Some(aggregate_nonnull_lanes::<T, A, 64>(values)),
321                    32 => Some(aggregate_nonnull_lanes::<T, A, 32>(values)),
322                    16 => Some(aggregate_nonnull_lanes::<T, A, 16>(values)),
323                    8 => Some(aggregate_nonnull_lanes::<T, A, 8>(values)),
324                    4 => Some(aggregate_nonnull_lanes::<T, A, 4>(values)),
325                    2 => Some(aggregate_nonnull_lanes::<T, A, 2>(values)),
326                    _ => Some(aggregate_nonnull_simple::<T, A>(values)),
327                }
328            } else {
329                // for non-null integers its better to not chunk ourselves and instead
330                // let llvm fully handle loop unrolling and vectorization
331                Some(aggregate_nonnull_simple::<T, A>(values))
332            }
333        }
334    }
335}
336
337/// Returns the minimum value in the boolean array.
338///
339/// ```
340/// # use arrow_array::BooleanArray;
341/// # use arrow_arith::aggregate::min_boolean;
342///
343/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
344/// assert_eq!(min_boolean(&a), Some(false))
345/// ```
346pub fn min_boolean(array: &BooleanArray) -> Option<bool> {
347    // short circuit if all nulls / zero length array
348    if array.null_count() == array.len() {
349        return None;
350    }
351
352    // Note the min bool is false (0), so short circuit as soon as we see it
353    match array.nulls() {
354        None => {
355            let bit_chunks = array.values().bit_chunks();
356            if bit_chunks.iter().any(|x| {
357                // u64::MAX has all bits set, so if the value is not that, then there is a false
358                x != u64::MAX
359            }) {
360                return Some(false);
361            }
362            // If the remainder bits are not all set, then there is a false
363            if bit_chunks.remainder_bits().count_ones() as usize != bit_chunks.remainder_len() {
364                Some(false)
365            } else {
366                Some(true)
367            }
368        }
369        Some(nulls) => {
370            let validity_chunks = nulls.inner().bit_chunks();
371            let value_chunks = array.values().bit_chunks();
372
373            if value_chunks
374                .iter()
375                .zip(validity_chunks.iter())
376                .any(|(value, validity)| {
377                    // We are looking for a false value, but because applying the validity mask
378                    // can create a false for a true value (e.g. value: true, validity: false), we instead invert the value, so that we have to look for a true.
379                    (!value & validity) != 0
380                })
381            {
382                return Some(false);
383            }
384
385            // Same trick as above: Instead of looking for a false, we invert the value bits and look for a true
386            if (!value_chunks.remainder_bits() & validity_chunks.remainder_bits()) != 0 {
387                Some(false)
388            } else {
389                Some(true)
390            }
391        }
392    }
393}
394
395/// Returns the maximum value in the boolean array
396///
397/// ```
398/// # use arrow_array::BooleanArray;
399/// # use arrow_arith::aggregate::max_boolean;
400///
401/// let a = BooleanArray::from(vec![Some(true), None, Some(false)]);
402/// assert_eq!(max_boolean(&a), Some(true))
403/// ```
404pub fn max_boolean(array: &BooleanArray) -> Option<bool> {
405    // short circuit if all nulls / zero length array
406    if array.null_count() == array.len() {
407        return None;
408    }
409
410    // Note the max bool is true (1), so short circuit as soon as we see it
411    match array.nulls() {
412        None => array
413            .values()
414            .bit_chunks()
415            .iter_padded()
416            // We found a true if any bit is set
417            .map(|x| x != 0)
418            .find(|b| *b)
419            .or(Some(false)),
420        Some(nulls) => {
421            let validity_chunks = nulls.inner().bit_chunks().iter_padded();
422            let value_chunks = array.values().bit_chunks().iter_padded();
423            value_chunks
424                .zip(validity_chunks)
425                // We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk
426                .map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0)
427                .find(|b| *b)
428                .or(Some(false))
429        }
430    }
431}
432
433/// Helper to compute min/max of [`ArrayAccessor`].
434fn min_max_helper<T, A: ArrayAccessor<Item = T>, F>(array: A, cmp: F) -> Option<T>
435where
436    F: Fn(&T, &T) -> bool,
437{
438    let null_count = array.null_count();
439    if null_count == array.len() {
440        None
441    } else if null_count == 0 {
442        // JUSTIFICATION
443        //  Benefit:  ~8% speedup
444        //  Soundness: `i` is always within the array bounds
445        (0..array.len())
446            .map(|i| unsafe { array.value_unchecked(i) })
447            .reduce(|acc, item| if cmp(&acc, &item) { item } else { acc })
448    } else {
449        let nulls = array.nulls().unwrap();
450        unsafe {
451            let idx = nulls.valid_indices().reduce(|acc_idx, idx| {
452                let acc = array.value_unchecked(acc_idx);
453                let item = array.value_unchecked(idx);
454                if cmp(&acc, &item) {
455                    idx
456                } else {
457                    acc_idx
458                }
459            });
460            idx.map(|idx| array.value_unchecked(idx))
461        }
462    }
463}
464
465/// Helper to compute min/max of [`GenericByteViewArray<T>`].
466/// The specialized min/max leverages the inlined values to compare the byte views.
467/// `swap_cond` is the condition to swap current min/max with the new value.
468/// For example, `Ordering::Greater` for max and `Ordering::Less` for min.
469fn min_max_view_helper<T: ByteViewType>(
470    array: &GenericByteViewArray<T>,
471    swap_cond: cmp::Ordering,
472) -> Option<&T::Native> {
473    let null_count = array.null_count();
474    if null_count == array.len() {
475        None
476    } else if null_count == 0 {
477        let target_idx = (0..array.len()).reduce(|acc, item| {
478            // SAFETY:  array's length is correct so item is within bounds
479            let cmp = unsafe { GenericByteViewArray::compare_unchecked(array, item, array, acc) };
480            if cmp == swap_cond {
481                item
482            } else {
483                acc
484            }
485        });
486        // SAFETY: idx came from valid range `0..array.len()`
487        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
488    } else {
489        let nulls = array.nulls().unwrap();
490
491        let target_idx = nulls.valid_indices().reduce(|acc_idx, idx| {
492            let cmp =
493                unsafe { GenericByteViewArray::compare_unchecked(array, idx, array, acc_idx) };
494            if cmp == swap_cond {
495                idx
496            } else {
497                acc_idx
498            }
499        });
500
501        // SAFETY: idx came from valid range `0..array.len()`
502        unsafe { target_idx.map(|idx| array.value_unchecked(idx)) }
503    }
504}
505
506/// Returns the maximum value in the binary array, according to the natural order.
507pub fn max_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
508    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
509}
510
511/// Returns the maximum value in the binary view array, according to the natural order.
512pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
513    min_max_view_helper(array, Ordering::Greater)
514}
515
516/// Returns the maximum value in the fixed size binary array, according to the natural order.
517pub fn max_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
518    min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b)
519}
520
521/// Returns the minimum value in the binary array, according to the natural order.
522pub fn min_binary<T: OffsetSizeTrait>(array: &GenericBinaryArray<T>) -> Option<&[u8]> {
523    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
524}
525
526/// Returns the minimum value in the binary view array, according to the natural order.
527pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> {
528    min_max_view_helper(array, Ordering::Less)
529}
530
531/// Returns the minimum value in the fixed size binary array, according to the natural order.
532pub fn min_fixed_size_binary(array: &FixedSizeBinaryArray) -> Option<&[u8]> {
533    min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b)
534}
535
536/// Returns the maximum value in the string array, according to the natural order.
537pub fn max_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
538    min_max_helper::<&str, _, _>(array, |a, b| *a < *b)
539}
540
541/// Returns the maximum value in the string view array, according to the natural order.
542pub fn max_string_view(array: &StringViewArray) -> Option<&str> {
543    min_max_view_helper(array, Ordering::Greater)
544}
545
546/// Returns the minimum value in the string array, according to the natural order.
547pub fn min_string<T: OffsetSizeTrait>(array: &GenericStringArray<T>) -> Option<&str> {
548    min_max_helper::<&str, _, _>(array, |a, b| *a > *b)
549}
550
551/// Returns the minimum value in the string view array, according to the natural order.
552pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
553    min_max_view_helper(array, Ordering::Less)
554}
555
556/// Returns the sum of values in the array.
557///
558/// This doesn't detect overflow. Once overflowing, the result will wrap around.
559/// For an overflow-checking variant, use `sum_array_checked` instead.
560pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
561where
562    T: ArrowNumericType,
563    T::Native: ArrowNativeTypeOp,
564{
565    match array.data_type() {
566        DataType::Dictionary(_, _) => {
567            let null_count = array.null_count();
568
569            if null_count == array.len() {
570                return None;
571            }
572
573            let iter = ArrayIter::new(array);
574            let sum = iter
575                .into_iter()
576                .fold(T::default_value(), |accumulator, value| {
577                    if let Some(value) = value {
578                        accumulator.add_wrapping(value)
579                    } else {
580                        accumulator
581                    }
582                });
583
584            Some(sum)
585        }
586        _ => sum::<T>(as_primitive_array(&array)),
587    }
588}
589
590/// Returns the sum of values in the array.
591///
592/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
593/// use `sum_array` instead.
594pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
595    array: A,
596) -> Result<Option<T::Native>, ArrowError>
597where
598    T: ArrowNumericType,
599    T::Native: ArrowNativeTypeOp,
600{
601    match array.data_type() {
602        DataType::Dictionary(_, _) => {
603            let null_count = array.null_count();
604
605            if null_count == array.len() {
606                return Ok(None);
607            }
608
609            let iter = ArrayIter::new(array);
610            let sum = iter
611                .into_iter()
612                .try_fold(T::default_value(), |accumulator, value| {
613                    if let Some(value) = value {
614                        accumulator.add_checked(value)
615                    } else {
616                        Ok(accumulator)
617                    }
618                })?;
619
620            Ok(Some(sum))
621        }
622        _ => sum_checked::<T>(as_primitive_array(&array)),
623    }
624}
625
626/// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
627/// array with value of `ArrowNumericType` type.
628pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
629where
630    T: ArrowNumericType,
631    T::Native: ArrowNativeType,
632{
633    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_gt(*b), min)
634}
635
636/// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
637/// array with value of `ArrowNumericType` type.
638pub fn max_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
639where
640    T: ArrowNumericType,
641    T::Native: ArrowNativeTypeOp,
642{
643    min_max_array_helper::<T, A, _, _>(array, |a, b| a.is_lt(*b), max)
644}
645
646fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
647    array: A,
648    cmp: F,
649    m: M,
650) -> Option<T::Native>
651where
652    T: ArrowNumericType,
653    F: Fn(&T::Native, &T::Native) -> bool,
654    M: Fn(&PrimitiveArray<T>) -> Option<T::Native>,
655{
656    match array.data_type() {
657        DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp),
658        _ => m(as_primitive_array(&array)),
659    }
660}
661
662macro_rules! bit_operation {
663    ($NAME:ident, $OP:ident, $NATIVE:ident, $DEFAULT:expr, $DOC:expr) => {
664        #[doc = $DOC]
665        ///
666        /// Returns `None` if the array is empty or only contains null values.
667        pub fn $NAME<T>(array: &PrimitiveArray<T>) -> Option<T::Native>
668        where
669            T: ArrowNumericType,
670            T::Native: $NATIVE<Output = T::Native> + ArrowNativeTypeOp,
671        {
672            let default;
673            if $DEFAULT == -1 {
674                default = T::Native::ONE.neg_wrapping();
675            } else {
676                default = T::default_value();
677            }
678
679            let null_count = array.null_count();
680
681            if null_count == array.len() {
682                return None;
683            }
684
685            let data: &[T::Native] = array.values();
686
687            match array.nulls() {
688                None => {
689                    let result = data
690                        .iter()
691                        .fold(default, |accumulator, value| accumulator.$OP(*value));
692
693                    Some(result)
694                }
695                Some(nulls) => {
696                    let mut result = default;
697                    let data_chunks = data.chunks_exact(64);
698                    let remainder = data_chunks.remainder();
699
700                    let bit_chunks = nulls.inner().bit_chunks();
701                    data_chunks
702                        .zip(bit_chunks.iter())
703                        .for_each(|(chunk, mask)| {
704                            // index_mask has value 1 << i in the loop
705                            let mut index_mask = 1;
706                            chunk.iter().for_each(|value| {
707                                if (mask & index_mask) != 0 {
708                                    result = result.$OP(*value);
709                                }
710                                index_mask <<= 1;
711                            });
712                        });
713
714                    let remainder_bits = bit_chunks.remainder_bits();
715
716                    remainder.iter().enumerate().for_each(|(i, value)| {
717                        if remainder_bits & (1 << i) != 0 {
718                            result = result.$OP(*value);
719                        }
720                    });
721
722                    Some(result)
723                }
724            }
725        }
726    };
727}
728
729bit_operation!(
730    bit_and,
731    bitand,
732    BitAnd,
733    -1,
734    "Returns the bitwise and of all non-null input values."
735);
736bit_operation!(
737    bit_or,
738    bitor,
739    BitOr,
740    0,
741    "Returns the bitwise or of all non-null input values."
742);
743bit_operation!(
744    bit_xor,
745    bitxor,
746    BitXor,
747    0,
748    "Returns the bitwise xor of all non-null input values."
749);
750
751/// Returns true if all non-null input values are true, otherwise false.
752///
753/// Returns `None` if the array is empty or only contains null values.
754pub fn bool_and(array: &BooleanArray) -> Option<bool> {
755    min_boolean(array)
756}
757
758/// Returns true if any non-null input value is true, otherwise false.
759///
760/// Returns `None` if the array is empty or only contains null values.
761pub fn bool_or(array: &BooleanArray) -> Option<bool> {
762    max_boolean(array)
763}
764
765/// Returns the sum of values in the primitive array.
766///
767/// Returns `Ok(None)` if the array is empty or only contains null values.
768///
769/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
770/// use `sum` instead.
771pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
772where
773    T: ArrowNumericType,
774    T::Native: ArrowNativeTypeOp,
775{
776    let null_count = array.null_count();
777
778    if null_count == array.len() {
779        return Ok(None);
780    }
781
782    let data: &[T::Native] = array.values();
783
784    match array.nulls() {
785        None => {
786            let sum = data
787                .iter()
788                .try_fold(T::default_value(), |accumulator, value| {
789                    accumulator.add_checked(*value)
790                })?;
791
792            Ok(Some(sum))
793        }
794        Some(nulls) => {
795            let mut sum = T::default_value();
796
797            try_for_each_valid_idx(
798                nulls.len(),
799                nulls.offset(),
800                nulls.null_count(),
801                Some(nulls.validity()),
802                |idx| {
803                    unsafe { sum = sum.add_checked(array.value_unchecked(idx))? };
804                    Ok::<_, ArrowError>(())
805                },
806            )?;
807
808            Ok(Some(sum))
809        }
810    }
811}
812
813/// Returns the sum of values in the primitive array.
814///
815/// Returns `None` if the array is empty or only contains null values.
816///
817/// This doesn't detect overflow in release mode by default. Once overflowing, the result will
818/// wrap around. For an overflow-checking variant, use `sum_checked` instead.
819pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
820where
821    T::Native: ArrowNativeTypeOp,
822{
823    aggregate::<T::Native, T, SumAccumulator<T::Native>>(array)
824}
825
826/// Returns the minimum value in the array, according to the natural order.
827/// For floating point arrays any NaN values are considered to be greater than any other non-null value
828pub fn min<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
829where
830    T::Native: PartialOrd,
831{
832    aggregate::<T::Native, T, MinAccumulator<T::Native>>(array)
833}
834
835/// Returns the maximum value in the array, according to the natural order.
836/// For floating point arrays any NaN values are considered to be greater than any other non-null value
837pub fn max<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
838where
839    T::Native: PartialOrd,
840{
841    aggregate::<T::Native, T, MaxAccumulator<T::Native>>(array)
842}
843
844#[cfg(test)]
845mod tests {
846    use super::*;
847    use arrow_array::types::*;
848    use builder::BooleanBuilder;
849    use std::sync::Arc;
850
851    #[test]
852    fn test_primitive_array_sum() {
853        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
854        assert_eq!(15, sum(&a).unwrap());
855    }
856
857    #[test]
858    fn test_primitive_array_float_sum() {
859        let a = Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5]);
860        assert_eq!(16.5, sum(&a).unwrap());
861    }
862
863    #[test]
864    fn test_primitive_array_sum_with_nulls() {
865        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
866        assert_eq!(10, sum(&a).unwrap());
867    }
868
869    #[test]
870    fn test_primitive_array_sum_all_nulls() {
871        let a = Int32Array::from(vec![None, None, None]);
872        assert_eq!(None, sum(&a));
873    }
874
875    #[test]
876    fn test_primitive_array_sum_large_float_64() {
877        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), None);
878        assert_eq!(Some((1..=100).sum::<i64>() as f64), sum(&c));
879
880        // create an array that actually has non-zero values at the invalid indices
881        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
882        let c = Float64Array::new((1..=100).map(|x| x as f64).collect(), Some(validity));
883
884        assert_eq!(
885            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f64),
886            sum(&c)
887        );
888    }
889
890    #[test]
891    fn test_primitive_array_sum_large_float_32() {
892        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), None);
893        assert_eq!(Some((1..=100).sum::<i64>() as f32), sum(&c));
894
895        // create an array that actually has non-zero values at the invalid indices
896        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
897        let c = Float32Array::new((1..=100).map(|x| x as f32).collect(), Some(validity));
898
899        assert_eq!(
900            Some((1..=100).filter(|i| i % 3 == 0).sum::<i64>() as f32),
901            sum(&c)
902        );
903    }
904
905    #[test]
906    fn test_primitive_array_sum_large_64() {
907        let c = Int64Array::new((1..=100).collect(), None);
908        assert_eq!(Some((1..=100).sum()), sum(&c));
909
910        // create an array that actually has non-zero values at the invalid indices
911        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
912        let c = Int64Array::new((1..=100).collect(), Some(validity));
913
914        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
915    }
916
917    #[test]
918    fn test_primitive_array_sum_large_32() {
919        let c = Int32Array::new((1..=100).collect(), None);
920        assert_eq!(Some((1..=100).sum()), sum(&c));
921
922        // create an array that actually has non-zero values at the invalid indices
923        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
924        let c = Int32Array::new((1..=100).collect(), Some(validity));
925        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
926    }
927
928    #[test]
929    fn test_primitive_array_sum_large_16() {
930        let c = Int16Array::new((1..=100).collect(), None);
931        assert_eq!(Some((1..=100).sum()), sum(&c));
932
933        // create an array that actually has non-zero values at the invalid indices
934        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
935        let c = Int16Array::new((1..=100).collect(), Some(validity));
936        assert_eq!(Some((1..=100).filter(|i| i % 3 == 0).sum()), sum(&c));
937    }
938
939    #[test]
940    fn test_primitive_array_sum_large_8() {
941        let c = UInt8Array::new((1..=100).collect(), None);
942        assert_eq!(
943            Some((1..=100).fold(0_u8, |a, x| a.wrapping_add(x))),
944            sum(&c)
945        );
946
947        // create an array that actually has non-zero values at the invalid indices
948        let validity = NullBuffer::new((1..=100).map(|x| x % 3 == 0).collect());
949        let c = UInt8Array::new((1..=100).collect(), Some(validity));
950        assert_eq!(
951            Some(
952                (1..=100)
953                    .filter(|i| i % 3 == 0)
954                    .fold(0_u8, |a, x| a.wrapping_add(x))
955            ),
956            sum(&c)
957        );
958    }
959
960    #[test]
961    fn test_primitive_array_bit_and() {
962        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
963        assert_eq!(0, bit_and(&a).unwrap());
964    }
965
966    #[test]
967    fn test_primitive_array_bit_and_with_nulls() {
968        let a = Int32Array::from(vec![None, Some(2), Some(3), None, None]);
969        assert_eq!(2, bit_and(&a).unwrap());
970    }
971
972    #[test]
973    fn test_primitive_array_bit_and_all_nulls() {
974        let a = Int32Array::from(vec![None, None, None]);
975        assert_eq!(None, bit_and(&a));
976    }
977
978    #[test]
979    fn test_primitive_array_bit_or() {
980        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
981        assert_eq!(7, bit_or(&a).unwrap());
982    }
983
984    #[test]
985    fn test_primitive_array_bit_or_with_nulls() {
986        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
987        assert_eq!(7, bit_or(&a).unwrap());
988    }
989
990    #[test]
991    fn test_primitive_array_bit_or_all_nulls() {
992        let a = Int32Array::from(vec![None, None, None]);
993        assert_eq!(None, bit_or(&a));
994    }
995
996    #[test]
997    fn test_primitive_array_bit_xor() {
998        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
999        assert_eq!(1, bit_xor(&a).unwrap());
1000    }
1001
1002    #[test]
1003    fn test_primitive_array_bit_xor_with_nulls() {
1004        let a = Int32Array::from(vec![None, Some(2), Some(3), None, Some(5)]);
1005        assert_eq!(4, bit_xor(&a).unwrap());
1006    }
1007
1008    #[test]
1009    fn test_primitive_array_bit_xor_all_nulls() {
1010        let a = Int32Array::from(vec![None, None, None]);
1011        assert_eq!(None, bit_xor(&a));
1012    }
1013
1014    #[test]
1015    fn test_primitive_array_bool_and() {
1016        let a = BooleanArray::from(vec![true, false, true, false, true]);
1017        assert!(!bool_and(&a).unwrap());
1018    }
1019
1020    #[test]
1021    fn test_primitive_array_bool_and_with_nulls() {
1022        let a = BooleanArray::from(vec![None, Some(true), Some(true), None, Some(true)]);
1023        assert!(bool_and(&a).unwrap());
1024    }
1025
1026    #[test]
1027    fn test_primitive_array_bool_and_all_nulls() {
1028        let a = BooleanArray::from(vec![None, None, None]);
1029        assert_eq!(None, bool_and(&a));
1030    }
1031
1032    #[test]
1033    fn test_primitive_array_bool_or() {
1034        let a = BooleanArray::from(vec![true, false, true, false, true]);
1035        assert!(bool_or(&a).unwrap());
1036    }
1037
1038    #[test]
1039    fn test_primitive_array_bool_or_with_nulls() {
1040        let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
1041        assert!(!bool_or(&a).unwrap());
1042    }
1043
1044    #[test]
1045    fn test_primitive_array_bool_or_all_nulls() {
1046        let a = BooleanArray::from(vec![None, None, None]);
1047        assert_eq!(None, bool_or(&a));
1048    }
1049
1050    #[test]
1051    fn test_primitive_array_min_max() {
1052        let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
1053        assert_eq!(5, min(&a).unwrap());
1054        assert_eq!(9, max(&a).unwrap());
1055    }
1056
1057    #[test]
1058    fn test_primitive_array_min_max_with_nulls() {
1059        let a = Int32Array::from(vec![Some(5), None, None, Some(8), Some(9)]);
1060        assert_eq!(5, min(&a).unwrap());
1061        assert_eq!(9, max(&a).unwrap());
1062    }
1063
1064    #[test]
1065    fn test_primitive_min_max_1() {
1066        let a = Int32Array::from(vec![None, None, Some(5), Some(2)]);
1067        assert_eq!(Some(2), min(&a));
1068        assert_eq!(Some(5), max(&a));
1069    }
1070
1071    #[test]
1072    fn test_primitive_min_max_float_large_nonnull_array() {
1073        let a: Float64Array = (0..256).map(|i| Some((i + 1) as f64)).collect();
1074        // min/max are on boundaries of chunked data
1075        assert_eq!(Some(1.0), min(&a));
1076        assert_eq!(Some(256.0), max(&a));
1077
1078        // max is last value in remainder after chunking
1079        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1080        assert_eq!(Some(255.0), max(&a));
1081
1082        // max is first value in remainder after chunking
1083        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1084        assert_eq!(Some(257.0), max(&a));
1085    }
1086
1087    #[test]
1088    fn test_primitive_min_max_float_large_nullable_array() {
1089        let a: Float64Array = (0..256)
1090            .map(|i| {
1091                if (i + 1) % 3 == 0 {
1092                    None
1093                } else {
1094                    Some((i + 1) as f64)
1095                }
1096            })
1097            .collect();
1098        // min/max are on boundaries of chunked data
1099        assert_eq!(Some(1.0), min(&a));
1100        assert_eq!(Some(256.0), max(&a));
1101
1102        let a: Float64Array = (0..256)
1103            .map(|i| {
1104                if i == 0 || i == 255 {
1105                    None
1106                } else {
1107                    Some((i + 1) as f64)
1108                }
1109            })
1110            .collect();
1111        // boundaries of chunked data are null
1112        assert_eq!(Some(2.0), min(&a));
1113        assert_eq!(Some(255.0), max(&a));
1114
1115        let a: Float64Array = (0..256)
1116            .map(|i| if i != 100 { None } else { Some((i) as f64) })
1117            .collect();
1118        // a single non-null value somewhere in the middle
1119        assert_eq!(Some(100.0), min(&a));
1120        assert_eq!(Some(100.0), max(&a));
1121
1122        // max is last value in remainder after chunking
1123        let a: Float64Array = (0..255).map(|i| Some((i + 1) as f64)).collect();
1124        assert_eq!(Some(255.0), max(&a));
1125
1126        // max is first value in remainder after chunking
1127        let a: Float64Array = (0..257).map(|i| Some((i + 1) as f64)).collect();
1128        assert_eq!(Some(257.0), max(&a));
1129    }
1130
1131    #[test]
1132    fn test_primitive_min_max_float_edge_cases() {
1133        let a: Float64Array = (0..100).map(|_| Some(f64::NEG_INFINITY)).collect();
1134        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1135        assert_eq!(Some(f64::NEG_INFINITY), max(&a));
1136
1137        let a: Float64Array = (0..100).map(|_| Some(f64::MIN)).collect();
1138        assert_eq!(Some(f64::MIN), min(&a));
1139        assert_eq!(Some(f64::MIN), max(&a));
1140
1141        let a: Float64Array = (0..100).map(|_| Some(f64::MAX)).collect();
1142        assert_eq!(Some(f64::MAX), min(&a));
1143        assert_eq!(Some(f64::MAX), max(&a));
1144
1145        let a: Float64Array = (0..100).map(|_| Some(f64::INFINITY)).collect();
1146        assert_eq!(Some(f64::INFINITY), min(&a));
1147        assert_eq!(Some(f64::INFINITY), max(&a));
1148    }
1149
1150    #[test]
1151    fn test_primitive_min_max_float_all_nans_non_null() {
1152        let a: Float64Array = (0..100).map(|_| Some(f64::NAN)).collect();
1153        assert!(max(&a).unwrap().is_nan());
1154        assert!(min(&a).unwrap().is_nan());
1155    }
1156
1157    #[test]
1158    fn test_primitive_min_max_float_negative_nan() {
1159        let a: Float64Array =
1160            Float64Array::from(vec![f64::NEG_INFINITY, f64::NAN, f64::INFINITY, -f64::NAN]);
1161        let max = max(&a).unwrap();
1162        let min = min(&a).unwrap();
1163        assert!(max.is_nan());
1164        assert!(max.is_sign_positive());
1165
1166        assert!(min.is_nan());
1167        assert!(min.is_sign_negative());
1168    }
1169
1170    #[test]
1171    fn test_primitive_min_max_float_first_nan_nonnull() {
1172        let a: Float64Array = (0..100)
1173            .map(|i| {
1174                if i == 0 {
1175                    Some(f64::NAN)
1176                } else {
1177                    Some(i as f64)
1178                }
1179            })
1180            .collect();
1181        assert_eq!(Some(1.0), min(&a));
1182        assert!(max(&a).unwrap().is_nan());
1183    }
1184
1185    #[test]
1186    fn test_primitive_min_max_float_last_nan_nonnull() {
1187        let a: Float64Array = (0..100)
1188            .map(|i| {
1189                if i == 99 {
1190                    Some(f64::NAN)
1191                } else {
1192                    Some((i + 1) as f64)
1193                }
1194            })
1195            .collect();
1196        assert_eq!(Some(1.0), min(&a));
1197        assert!(max(&a).unwrap().is_nan());
1198    }
1199
1200    #[test]
1201    fn test_primitive_min_max_float_first_nan_nullable() {
1202        let a: Float64Array = (0..100)
1203            .map(|i| {
1204                if i == 0 {
1205                    Some(f64::NAN)
1206                } else if i % 2 == 0 {
1207                    None
1208                } else {
1209                    Some(i as f64)
1210                }
1211            })
1212            .collect();
1213        assert_eq!(Some(1.0), min(&a));
1214        assert!(max(&a).unwrap().is_nan());
1215    }
1216
1217    #[test]
1218    fn test_primitive_min_max_float_last_nan_nullable() {
1219        let a: Float64Array = (0..100)
1220            .map(|i| {
1221                if i == 99 {
1222                    Some(f64::NAN)
1223                } else if i % 2 == 0 {
1224                    None
1225                } else {
1226                    Some(i as f64)
1227                }
1228            })
1229            .collect();
1230        assert_eq!(Some(1.0), min(&a));
1231        assert!(max(&a).unwrap().is_nan());
1232    }
1233
1234    #[test]
1235    fn test_primitive_min_max_float_inf_and_nans() {
1236        let a: Float64Array = (0..100)
1237            .map(|i| {
1238                let x = match i % 10 {
1239                    0 => f64::NEG_INFINITY,
1240                    1 => f64::MIN,
1241                    2 => f64::MAX,
1242                    4 => f64::INFINITY,
1243                    5 => f64::NAN,
1244                    _ => i as f64,
1245                };
1246                Some(x)
1247            })
1248            .collect();
1249        assert_eq!(Some(f64::NEG_INFINITY), min(&a));
1250        assert!(max(&a).unwrap().is_nan());
1251    }
1252
1253    fn pad_inputs_and_test_fixed_size_binary(
1254        input: Vec<Option<&[u8]>>,
1255        expected_min: Option<&[u8]>,
1256        expected_max: Option<&[u8]>,
1257    ) {
1258        fn pad_slice(slice: &[u8], len: usize) -> Vec<u8> {
1259            let mut padded = vec![0; len];
1260            padded[..slice.len()].copy_from_slice(slice);
1261            padded
1262        }
1263
1264        let max_len = input
1265            .iter()
1266            .filter_map(|x| x.as_ref().map(|b| b.len()))
1267            .max()
1268            .unwrap_or(0);
1269        let padded_input = input
1270            .iter()
1271            .map(|x| x.as_ref().map(|b| pad_slice(b, max_len)));
1272        let input_arr =
1273            FixedSizeBinaryArray::try_from_sparse_iter_with_size(padded_input, max_len as i32)
1274                .unwrap();
1275        let padded_expected_min = expected_min.map(|b| pad_slice(b, max_len));
1276        let padded_expected_max = expected_max.map(|b| pad_slice(b, max_len));
1277
1278        assert_eq!(
1279            padded_expected_min.as_deref(),
1280            min_fixed_size_binary(&input_arr)
1281        );
1282        assert_eq!(
1283            padded_expected_max.as_deref(),
1284            max_fixed_size_binary(&input_arr)
1285        );
1286    }
1287
1288    macro_rules! test_binary {
1289        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1290            #[test]
1291            fn $NAME() {
1292                let binary = BinaryArray::from($ARRAY);
1293                assert_eq!($EXPECTED_MIN, min_binary(&binary));
1294                assert_eq!($EXPECTED_MAX, max_binary(&binary));
1295
1296                let large_binary = LargeBinaryArray::from($ARRAY);
1297                assert_eq!($EXPECTED_MIN, min_binary(&large_binary));
1298                assert_eq!($EXPECTED_MAX, max_binary(&large_binary));
1299
1300                let binary_view = BinaryViewArray::from($ARRAY);
1301                assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view));
1302                assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view));
1303
1304                pad_inputs_and_test_fixed_size_binary($ARRAY, $EXPECTED_MIN, $EXPECTED_MAX);
1305            }
1306        };
1307    }
1308
1309    test_binary!(
1310        test_binary_min_max_with_nulls,
1311        vec![
1312            Some("b01234567890123".as_bytes()), // long bytes
1313            None,
1314            None,
1315            Some(b"a"),
1316            Some(b"c"),
1317            Some(b"abcdedfg0123456"),
1318        ],
1319        Some("a".as_bytes()),
1320        Some("c".as_bytes())
1321    );
1322
1323    test_binary!(
1324        test_binary_min_max_no_null,
1325        vec![
1326            Some("b".as_bytes()),
1327            Some(b"abcdefghijklmnopqrst"), // long bytes
1328            Some(b"c"),
1329            Some(b"b01234567890123"), // long bytes for view types
1330        ],
1331        Some("abcdefghijklmnopqrst".as_bytes()),
1332        Some("c".as_bytes())
1333    );
1334
1335    test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None);
1336
1337    test_binary!(
1338        test_binary_min_max_1,
1339        vec![
1340            None,
1341            Some("b01234567890123435".as_bytes()), // long bytes for view types
1342            None,
1343            Some(b"b0123xxxxxxxxxxx"),
1344            Some(b"a")
1345        ],
1346        Some("a".as_bytes()),
1347        Some("b0123xxxxxxxxxxx".as_bytes())
1348    );
1349
1350    macro_rules! test_string {
1351        ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => {
1352            #[test]
1353            fn $NAME() {
1354                let string = StringArray::from($ARRAY);
1355                assert_eq!($EXPECTED_MIN, min_string(&string));
1356                assert_eq!($EXPECTED_MAX, max_string(&string));
1357
1358                let large_string = LargeStringArray::from($ARRAY);
1359                assert_eq!($EXPECTED_MIN, min_string(&large_string));
1360                assert_eq!($EXPECTED_MAX, max_string(&large_string));
1361
1362                let string_view = StringViewArray::from($ARRAY);
1363                assert_eq!($EXPECTED_MIN, min_string_view(&string_view));
1364                assert_eq!($EXPECTED_MAX, max_string_view(&string_view));
1365            }
1366        };
1367    }
1368
1369    test_string!(
1370        test_string_min_max_with_nulls,
1371        vec![
1372            Some("b012345678901234"), // long bytes for view types
1373            None,
1374            None,
1375            Some("a"),
1376            Some("c"),
1377            Some("b0123xxxxxxxxxxx")
1378        ],
1379        Some("a"),
1380        Some("c")
1381    );
1382
1383    test_string!(
1384        test_string_min_max_no_null,
1385        vec![
1386            Some("b"),
1387            Some("b012345678901234"), // long bytes for view types
1388            Some("a"),
1389            Some("b012xxxxxxxxxxxx")
1390        ],
1391        Some("a"),
1392        Some("b012xxxxxxxxxxxx")
1393    );
1394
1395    test_string!(
1396        test_string_min_max_all_nulls,
1397        Vec::<Option<&str>>::from_iter([None, None]),
1398        None,
1399        None
1400    );
1401
1402    test_string!(
1403        test_string_min_max_1,
1404        vec![
1405            None,
1406            Some("c12345678901234"), // long bytes for view types
1407            None,
1408            Some("b"),
1409            Some("c1234xxxxxxxxxx")
1410        ],
1411        Some("b"),
1412        Some("c1234xxxxxxxxxx")
1413    );
1414
1415    test_string!(
1416        test_string_min_max_empty,
1417        Vec::<Option<&str>>::new(),
1418        None,
1419        None
1420    );
1421
1422    #[test]
1423    fn test_boolean_min_max_empty() {
1424        let a = BooleanArray::from(vec![] as Vec<Option<bool>>);
1425        assert_eq!(None, min_boolean(&a));
1426        assert_eq!(None, max_boolean(&a));
1427    }
1428
1429    #[test]
1430    fn test_boolean_min_max_all_null() {
1431        let a = BooleanArray::from(vec![None, None]);
1432        assert_eq!(None, min_boolean(&a));
1433        assert_eq!(None, max_boolean(&a));
1434    }
1435
1436    #[test]
1437    fn test_boolean_min_max_no_null() {
1438        let a = BooleanArray::from(vec![Some(true), Some(false), Some(true)]);
1439        assert_eq!(Some(false), min_boolean(&a));
1440        assert_eq!(Some(true), max_boolean(&a));
1441    }
1442
1443    #[test]
1444    fn test_boolean_min_max() {
1445        let a = BooleanArray::from(vec![Some(true), Some(true), None, Some(false), None]);
1446        assert_eq!(Some(false), min_boolean(&a));
1447        assert_eq!(Some(true), max_boolean(&a));
1448
1449        let a = BooleanArray::from(vec![None, Some(true), None, Some(false), None]);
1450        assert_eq!(Some(false), min_boolean(&a));
1451        assert_eq!(Some(true), max_boolean(&a));
1452
1453        let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
1454        assert_eq!(Some(false), min_boolean(&a));
1455        assert_eq!(Some(true), max_boolean(&a));
1456
1457        let a = BooleanArray::from(vec![Some(true), None]);
1458        assert_eq!(Some(true), min_boolean(&a));
1459        assert_eq!(Some(true), max_boolean(&a));
1460
1461        let a = BooleanArray::from(vec![Some(false), None]);
1462        assert_eq!(Some(false), min_boolean(&a));
1463        assert_eq!(Some(false), max_boolean(&a));
1464
1465        let a = BooleanArray::from(vec![Some(true)]);
1466        assert_eq!(Some(true), min_boolean(&a));
1467        assert_eq!(Some(true), max_boolean(&a));
1468
1469        let a = BooleanArray::from(vec![Some(false)]);
1470        assert_eq!(Some(false), min_boolean(&a));
1471        assert_eq!(Some(false), max_boolean(&a));
1472    }
1473
1474    #[test]
1475    fn test_boolean_min_max_smaller() {
1476        let a = BooleanArray::from(vec![Some(false)]);
1477        assert_eq!(Some(false), min_boolean(&a));
1478        assert_eq!(Some(false), max_boolean(&a));
1479
1480        let a = BooleanArray::from(vec![None, Some(false)]);
1481        assert_eq!(Some(false), min_boolean(&a));
1482        assert_eq!(Some(false), max_boolean(&a));
1483
1484        let a = BooleanArray::from(vec![None, Some(true)]);
1485        assert_eq!(Some(true), min_boolean(&a));
1486        assert_eq!(Some(true), max_boolean(&a));
1487
1488        let a = BooleanArray::from(vec![Some(true)]);
1489        assert_eq!(Some(true), min_boolean(&a));
1490        assert_eq!(Some(true), max_boolean(&a));
1491    }
1492
1493    #[test]
1494    fn test_boolean_min_max_64_true_64_false() {
1495        let mut no_nulls = BooleanBuilder::new();
1496        no_nulls.append_slice(&[true; 64]);
1497        no_nulls.append_slice(&[false; 64]);
1498        let no_nulls = no_nulls.finish();
1499
1500        assert_eq!(Some(false), min_boolean(&no_nulls));
1501        assert_eq!(Some(true), max_boolean(&no_nulls));
1502
1503        let mut with_nulls = BooleanBuilder::new();
1504        with_nulls.append_slice(&[true; 31]);
1505        with_nulls.append_null();
1506        with_nulls.append_slice(&[true; 32]);
1507        with_nulls.append_slice(&[false; 1]);
1508        with_nulls.append_nulls(63);
1509        let with_nulls = with_nulls.finish();
1510
1511        assert_eq!(Some(false), min_boolean(&with_nulls));
1512        assert_eq!(Some(true), max_boolean(&with_nulls));
1513    }
1514
1515    #[test]
1516    fn test_boolean_min_max_64_false_64_true() {
1517        let mut no_nulls = BooleanBuilder::new();
1518        no_nulls.append_slice(&[false; 64]);
1519        no_nulls.append_slice(&[true; 64]);
1520        let no_nulls = no_nulls.finish();
1521
1522        assert_eq!(Some(false), min_boolean(&no_nulls));
1523        assert_eq!(Some(true), max_boolean(&no_nulls));
1524
1525        let mut with_nulls = BooleanBuilder::new();
1526        with_nulls.append_slice(&[false; 31]);
1527        with_nulls.append_null();
1528        with_nulls.append_slice(&[false; 32]);
1529        with_nulls.append_slice(&[true; 1]);
1530        with_nulls.append_nulls(63);
1531        let with_nulls = with_nulls.finish();
1532
1533        assert_eq!(Some(false), min_boolean(&with_nulls));
1534        assert_eq!(Some(true), max_boolean(&with_nulls));
1535    }
1536
1537    #[test]
1538    fn test_boolean_min_max_96_true() {
1539        let mut no_nulls = BooleanBuilder::new();
1540        no_nulls.append_slice(&[true; 96]);
1541        let no_nulls = no_nulls.finish();
1542
1543        assert_eq!(Some(true), min_boolean(&no_nulls));
1544        assert_eq!(Some(true), max_boolean(&no_nulls));
1545
1546        let mut with_nulls = BooleanBuilder::new();
1547        with_nulls.append_slice(&[true; 31]);
1548        with_nulls.append_null();
1549        with_nulls.append_slice(&[true; 32]);
1550        with_nulls.append_slice(&[true; 31]);
1551        with_nulls.append_null();
1552        let with_nulls = with_nulls.finish();
1553
1554        assert_eq!(Some(true), min_boolean(&with_nulls));
1555        assert_eq!(Some(true), max_boolean(&with_nulls));
1556    }
1557
1558    #[test]
1559    fn test_boolean_min_max_96_false() {
1560        let mut no_nulls = BooleanBuilder::new();
1561        no_nulls.append_slice(&[false; 96]);
1562        let no_nulls = no_nulls.finish();
1563
1564        assert_eq!(Some(false), min_boolean(&no_nulls));
1565        assert_eq!(Some(false), max_boolean(&no_nulls));
1566
1567        let mut with_nulls = BooleanBuilder::new();
1568        with_nulls.append_slice(&[false; 31]);
1569        with_nulls.append_null();
1570        with_nulls.append_slice(&[false; 32]);
1571        with_nulls.append_slice(&[false; 31]);
1572        with_nulls.append_null();
1573        let with_nulls = with_nulls.finish();
1574
1575        assert_eq!(Some(false), min_boolean(&with_nulls));
1576        assert_eq!(Some(false), max_boolean(&with_nulls));
1577    }
1578
1579    #[test]
1580    fn test_sum_dyn() {
1581        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1582        let values = Arc::new(values) as ArrayRef;
1583        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1584
1585        let dict_array = DictionaryArray::new(keys, values.clone());
1586        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1587        assert_eq!(39, sum_array::<Int8Type, _>(array).unwrap());
1588
1589        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1590        assert_eq!(15, sum_array::<Int32Type, _>(&a).unwrap());
1591
1592        let keys = Int8Array::from(vec![Some(2_i8), None, Some(4)]);
1593        let dict_array = DictionaryArray::new(keys, values.clone());
1594        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1595        assert_eq!(26, sum_array::<Int8Type, _>(array).unwrap());
1596
1597        let keys = Int8Array::from(vec![None, None, None]);
1598        let dict_array = DictionaryArray::new(keys, values.clone());
1599        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1600        assert!(sum_array::<Int8Type, _>(array).is_none());
1601    }
1602
1603    #[test]
1604    fn test_max_min_dyn() {
1605        let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
1606        let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
1607        let values = Arc::new(values) as ArrayRef;
1608
1609        let dict_array = DictionaryArray::new(keys, values.clone());
1610        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1611        assert_eq!(14, max_array::<Int8Type, _>(array).unwrap());
1612
1613        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1614        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1615
1616        let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
1617        assert_eq!(5, max_array::<Int32Type, _>(&a).unwrap());
1618        assert_eq!(1, min_array::<Int32Type, _>(&a).unwrap());
1619
1620        let keys = Int8Array::from(vec![Some(2_i8), None, Some(7)]);
1621        let dict_array = DictionaryArray::new(keys, values.clone());
1622        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1623        assert_eq!(17, max_array::<Int8Type, _>(array).unwrap());
1624        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1625        assert_eq!(12, min_array::<Int8Type, _>(array).unwrap());
1626
1627        let keys = Int8Array::from(vec![None, None, None]);
1628        let dict_array = DictionaryArray::new(keys, values.clone());
1629        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1630        assert!(max_array::<Int8Type, _>(array).is_none());
1631        let array = dict_array.downcast_dict::<Int8Array>().unwrap();
1632        assert!(min_array::<Int8Type, _>(array).is_none());
1633    }
1634
1635    #[test]
1636    fn test_max_min_dyn_nan() {
1637        let values = Float32Array::from(vec![5.0_f32, 2.0_f32, f32::NAN]);
1638        let keys = Int8Array::from_iter_values([0_i8, 1, 2]);
1639
1640        let dict_array = DictionaryArray::new(keys, Arc::new(values));
1641        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1642        assert!(max_array::<Float32Type, _>(array).unwrap().is_nan());
1643
1644        let array = dict_array.downcast_dict::<Float32Array>().unwrap();
1645        assert_eq!(2.0_f32, min_array::<Float32Type, _>(array).unwrap());
1646    }
1647
1648    #[test]
1649    fn test_min_max_sliced_primitive() {
1650        let expected = Some(4.0);
1651        let input: Float64Array = vec![None, Some(4.0)].into_iter().collect();
1652        let actual = min(&input);
1653        assert_eq!(actual, expected);
1654        let actual = max(&input);
1655        assert_eq!(actual, expected);
1656
1657        let sliced_input: Float64Array = vec![None, None, None, None, None, Some(4.0)]
1658            .into_iter()
1659            .collect();
1660        let sliced_input = sliced_input.slice(4, 2);
1661
1662        assert_eq!(&sliced_input, &input);
1663
1664        let actual = min(&sliced_input);
1665        assert_eq!(actual, expected);
1666        let actual = max(&sliced_input);
1667        assert_eq!(actual, expected);
1668    }
1669
1670    #[test]
1671    fn test_min_max_sliced_boolean() {
1672        let expected = Some(true);
1673        let input: BooleanArray = vec![None, Some(true)].into_iter().collect();
1674        let actual = min_boolean(&input);
1675        assert_eq!(actual, expected);
1676        let actual = max_boolean(&input);
1677        assert_eq!(actual, expected);
1678
1679        let sliced_input: BooleanArray = vec![None, None, None, None, None, Some(true)]
1680            .into_iter()
1681            .collect();
1682        let sliced_input = sliced_input.slice(4, 2);
1683
1684        assert_eq!(sliced_input, input);
1685
1686        let actual = min_boolean(&sliced_input);
1687        assert_eq!(actual, expected);
1688        let actual = max_boolean(&sliced_input);
1689        assert_eq!(actual, expected);
1690    }
1691
1692    #[test]
1693    fn test_min_max_sliced_string() {
1694        let expected = Some("foo");
1695        let input: StringArray = vec![None, Some("foo")].into_iter().collect();
1696        let actual = min_string(&input);
1697        assert_eq!(actual, expected);
1698        let actual = max_string(&input);
1699        assert_eq!(actual, expected);
1700
1701        let sliced_input: StringArray = vec![None, None, None, None, None, Some("foo")]
1702            .into_iter()
1703            .collect();
1704        let sliced_input = sliced_input.slice(4, 2);
1705
1706        assert_eq!(&sliced_input, &input);
1707
1708        let actual = min_string(&sliced_input);
1709        assert_eq!(actual, expected);
1710        let actual = max_string(&sliced_input);
1711        assert_eq!(actual, expected);
1712    }
1713
1714    #[test]
1715    fn test_min_max_sliced_binary() {
1716        let expected: Option<&[u8]> = Some(&[5]);
1717        let input: BinaryArray = vec![None, Some(&[5])].into_iter().collect();
1718        let actual = min_binary(&input);
1719        assert_eq!(actual, expected);
1720        let actual = max_binary(&input);
1721        assert_eq!(actual, expected);
1722
1723        let sliced_input: BinaryArray = vec![None, None, None, None, None, Some(&[5])]
1724            .into_iter()
1725            .collect();
1726        let sliced_input = sliced_input.slice(4, 2);
1727
1728        assert_eq!(&sliced_input, &input);
1729
1730        let actual = min_binary(&sliced_input);
1731        assert_eq!(actual, expected);
1732        let actual = max_binary(&sliced_input);
1733        assert_eq!(actual, expected);
1734    }
1735
1736    #[test]
1737    fn test_sum_overflow() {
1738        let a = Int32Array::from(vec![i32::MAX, 1]);
1739
1740        assert_eq!(sum(&a).unwrap(), -2147483648);
1741        assert_eq!(sum_array::<Int32Type, _>(&a).unwrap(), -2147483648);
1742    }
1743
1744    #[test]
1745    fn test_sum_checked_overflow() {
1746        let a = Int32Array::from(vec![i32::MAX, 1]);
1747
1748        sum_checked(&a).expect_err("overflow should be detected");
1749        sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected");
1750    }
1751}
arrow_arith/aggregate.rs

arrow_arith/
aggregate.rs