arrow_ord/
ord.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains functions and function factories to compare arrays.
19
20use arrow_array::cast::AsArray;
21use arrow_array::types::*;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_schema::{ArrowError, DataType, SortOptions};
25use std::{cmp::Ordering, collections::HashMap};
26
27/// Compare the values at two arbitrary indices in two arrays.
28pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
29
30/// If parent sort order is descending we need to invert the value of nulls_first so that
31/// when the parent is sorted based on the produced ranks, nulls are still ordered correctly
32fn child_opts(opts: SortOptions) -> SortOptions {
33    SortOptions {
34        descending: false,
35        nulls_first: opts.nulls_first != opts.descending,
36    }
37}
38
39fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator
40where
41    A: Array + Clone,
42    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
43{
44    let l = l.logical_nulls().filter(|x| x.null_count() > 0);
45    let r = r.logical_nulls().filter(|x| x.null_count() > 0);
46    match (opts.nulls_first, opts.descending) {
47        (true, true) => compare_impl::<true, true, _>(l, r, cmp),
48        (true, false) => compare_impl::<true, false, _>(l, r, cmp),
49        (false, true) => compare_impl::<false, true, _>(l, r, cmp),
50        (false, false) => compare_impl::<false, false, _>(l, r, cmp),
51    }
52}
53
54fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>(
55    l: Option<NullBuffer>,
56    r: Option<NullBuffer>,
57    cmp: F,
58) -> DynComparator
59where
60    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
61{
62    let cmp = move |i, j| match DESCENDING {
63        true => cmp(i, j).reverse(),
64        false => cmp(i, j),
65    };
66
67    let (left_null, right_null) = match NULLS_FIRST {
68        true => (Ordering::Less, Ordering::Greater),
69        false => (Ordering::Greater, Ordering::Less),
70    };
71
72    match (l, r) {
73        (None, None) => Box::new(cmp),
74        (Some(l), None) => Box::new(move |i, j| match l.is_null(i) {
75            true => left_null,
76            false => cmp(i, j),
77        }),
78        (None, Some(r)) => Box::new(move |i, j| match r.is_null(j) {
79            true => right_null,
80            false => cmp(i, j),
81        }),
82        (Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) {
83            (true, true) => Ordering::Equal,
84            (true, false) => left_null,
85            (false, true) => right_null,
86            (false, false) => cmp(i, j),
87        }),
88    }
89}
90
91fn compare_primitive<T: ArrowPrimitiveType>(
92    left: &dyn Array,
93    right: &dyn Array,
94    opts: SortOptions,
95) -> DynComparator
96where
97    T::Native: ArrowNativeTypeOp,
98{
99    let left = left.as_primitive::<T>();
100    let right = right.as_primitive::<T>();
101    let l_values = left.values().clone();
102    let r_values = right.values().clone();
103
104    compare(&left, &right, opts, move |i, j| {
105        l_values[i].compare(r_values[j])
106    })
107}
108
109fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator {
110    let left = left.as_boolean();
111    let right = right.as_boolean();
112
113    let l_values = left.values().clone();
114    let r_values = right.values().clone();
115
116    compare(left, right, opts, move |i, j| {
117        l_values.value(i).cmp(&r_values.value(j))
118    })
119}
120
121fn compare_bytes<T: ByteArrayType>(
122    left: &dyn Array,
123    right: &dyn Array,
124    opts: SortOptions,
125) -> DynComparator {
126    let left = left.as_bytes::<T>();
127    let right = right.as_bytes::<T>();
128
129    let l = left.clone();
130    let r = right.clone();
131    compare(left, right, opts, move |i, j| {
132        let l: &[u8] = l.value(i).as_ref();
133        let r: &[u8] = r.value(j).as_ref();
134        l.cmp(r)
135    })
136}
137
138fn compare_byte_view<T: ByteViewType>(
139    left: &dyn Array,
140    right: &dyn Array,
141    opts: SortOptions,
142) -> DynComparator {
143    let left = left.as_byte_view::<T>();
144    let right = right.as_byte_view::<T>();
145
146    let l = left.clone();
147    let r = right.clone();
148    compare(left, right, opts, move |i, j| {
149        crate::cmp::compare_byte_view(&l, i, &r, j)
150    })
151}
152
153fn compare_dict<K: ArrowDictionaryKeyType>(
154    left: &dyn Array,
155    right: &dyn Array,
156    opts: SortOptions,
157) -> Result<DynComparator, ArrowError> {
158    let left = left.as_dictionary::<K>();
159    let right = right.as_dictionary::<K>();
160
161    let c_opts = child_opts(opts);
162    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
163    let left_keys = left.keys().values().clone();
164    let right_keys = right.keys().values().clone();
165
166    let f = compare(left, right, opts, move |i, j| {
167        let l = left_keys[i].as_usize();
168        let r = right_keys[j].as_usize();
169        cmp(l, r)
170    });
171    Ok(f)
172}
173
174fn compare_list<O: OffsetSizeTrait>(
175    left: &dyn Array,
176    right: &dyn Array,
177    opts: SortOptions,
178) -> Result<DynComparator, ArrowError> {
179    let left = left.as_list::<O>();
180    let right = right.as_list::<O>();
181
182    let c_opts = child_opts(opts);
183    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
184
185    let l_o = left.offsets().clone();
186    let r_o = right.offsets().clone();
187    let f = compare(left, right, opts, move |i, j| {
188        let l_end = l_o[i + 1].as_usize();
189        let l_start = l_o[i].as_usize();
190
191        let r_end = r_o[j + 1].as_usize();
192        let r_start = r_o[j].as_usize();
193
194        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
195            match cmp(i, j) {
196                Ordering::Equal => continue,
197                r => return r,
198            }
199        }
200        (l_end - l_start).cmp(&(r_end - r_start))
201    });
202    Ok(f)
203}
204
205fn compare_fixed_list(
206    left: &dyn Array,
207    right: &dyn Array,
208    opts: SortOptions,
209) -> Result<DynComparator, ArrowError> {
210    let left = left.as_fixed_size_list();
211    let right = right.as_fixed_size_list();
212
213    let c_opts = child_opts(opts);
214    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
215
216    let l_size = left.value_length().to_usize().unwrap();
217    let r_size = right.value_length().to_usize().unwrap();
218    let size_cmp = l_size.cmp(&r_size);
219
220    let f = compare(left, right, opts, move |i, j| {
221        let l_start = i * l_size;
222        let l_end = l_start + l_size;
223        let r_start = j * r_size;
224        let r_end = r_start + r_size;
225        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
226            match cmp(i, j) {
227                Ordering::Equal => continue,
228                r => return r,
229            }
230        }
231        size_cmp
232    });
233    Ok(f)
234}
235
236fn compare_map(
237    left: &dyn Array,
238    right: &dyn Array,
239    opts: SortOptions,
240) -> Result<DynComparator, ArrowError> {
241    let left = left.as_map();
242    let right = right.as_map();
243
244    let c_opts = child_opts(opts);
245    let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
246
247    let l_o = left.offsets().clone();
248    let r_o = right.offsets().clone();
249    let f = compare(left, right, opts, move |i, j| {
250        let l_end = l_o[i + 1].as_usize();
251        let l_start = l_o[i].as_usize();
252
253        let r_end = r_o[j + 1].as_usize();
254        let r_start = r_o[j].as_usize();
255
256        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
257            match cmp(i, j) {
258                Ordering::Equal => continue,
259                r => return r,
260            }
261        }
262        (l_end - l_start).cmp(&(r_end - r_start))
263    });
264    Ok(f)
265}
266
267fn compare_struct(
268    left: &dyn Array,
269    right: &dyn Array,
270    opts: SortOptions,
271) -> Result<DynComparator, ArrowError> {
272    let left = left.as_struct();
273    let right = right.as_struct();
274
275    if left.columns().len() != right.columns().len() {
276        return Err(ArrowError::InvalidArgumentError(
277            "Cannot compare StructArray with different number of columns".to_string(),
278        ));
279    }
280
281    let c_opts = child_opts(opts);
282    let columns = left.columns().iter().zip(right.columns());
283    let comparators = columns
284        .map(|(l, r)| make_comparator(l, r, c_opts))
285        .collect::<Result<Vec<_>, _>>()?;
286
287    let f = compare(left, right, opts, move |i, j| {
288        for cmp in &comparators {
289            match cmp(i, j) {
290                Ordering::Equal => continue,
291                r => return r,
292            }
293        }
294        Ordering::Equal
295    });
296    Ok(f)
297}
298
299fn compare_union(
300    left: &dyn Array,
301    right: &dyn Array,
302    opts: SortOptions,
303) -> Result<DynComparator, ArrowError> {
304    let left = left.as_union();
305    let right = right.as_union();
306
307    let (left_fields, left_mode) = match left.data_type() {
308        DataType::Union(fields, mode) => (fields, mode),
309        _ => unreachable!(),
310    };
311    let (right_fields, right_mode) = match right.data_type() {
312        DataType::Union(fields, mode) => (fields, mode),
313        _ => unreachable!(),
314    };
315
316    if left_fields != right_fields {
317        return Err(ArrowError::InvalidArgumentError(format!(
318            "Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
319            left_fields, right_fields
320        )));
321    }
322
323    if left_mode != right_mode {
324        return Err(ArrowError::InvalidArgumentError(format!(
325            "Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
326            left_mode, right_mode
327        )));
328    }
329
330    let c_opts = child_opts(opts);
331
332    let mut field_comparators = HashMap::with_capacity(left_fields.len());
333
334    for (type_id, _field) in left_fields.iter() {
335        let left_child = left.child(type_id);
336        let right_child = right.child(type_id);
337        let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
338
339        field_comparators.insert(type_id, cmp);
340    }
341
342    let left_type_ids = left.type_ids().clone();
343    let right_type_ids = right.type_ids().clone();
344
345    let left_offsets = left.offsets().cloned();
346    let right_offsets = right.offsets().cloned();
347
348    let f = compare(left, right, opts, move |i, j| {
349        let left_type_id = left_type_ids[i];
350        let right_type_id = right_type_ids[j];
351
352        // first, compare by type_id
353        match left_type_id.cmp(&right_type_id) {
354            Ordering::Equal => {
355                // second, compare by values
356                let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
357                let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
358
359                let cmp = field_comparators
360                    .get(&left_type_id)
361                    .expect("type id not found in field_comparators");
362
363                cmp(left_offset, right_offset)
364            }
365            other => other,
366        }
367    });
368    Ok(f)
369}
370
371/// Returns a comparison function that compares two values at two different positions
372/// between the two arrays.
373///
374/// For comparing arrays element-wise, see also the vectorised kernels in [`crate::cmp`].
375///
376/// If `nulls_first` is true `NULL` values will be considered less than any non-null value,
377/// otherwise they will be considered greater.
378///
379/// # Basic Usage
380///
381/// ```
382/// # use std::cmp::Ordering;
383/// # use arrow_array::Int32Array;
384/// # use arrow_ord::ord::make_comparator;
385/// # use arrow_schema::SortOptions;
386/// #
387/// let array1 = Int32Array::from(vec![1, 2]);
388/// let array2 = Int32Array::from(vec![3, 4]);
389///
390/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
391/// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2)
392/// assert_eq!(cmp(0, 1), Ordering::Less);
393///
394/// let array1 = Int32Array::from(vec![Some(1), None]);
395/// let array2 = Int32Array::from(vec![None, Some(2)]);
396/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
397///
398/// assert_eq!(cmp(0, 1), Ordering::Less); // Some(1) vs Some(2)
399/// assert_eq!(cmp(1, 1), Ordering::Less); // None vs Some(2)
400/// assert_eq!(cmp(1, 0), Ordering::Equal); // None vs None
401/// assert_eq!(cmp(0, 0), Ordering::Greater); // Some(1) vs None
402/// ```
403///
404/// # Postgres-compatible Nested Comparison
405///
406/// Whilst SQL prescribes ternary logic for nulls, that is comparing a value against a NULL yields
407/// a NULL, many systems, including postgres, instead apply a total ordering to comparison of
408/// nested nulls. That is nulls within nested types are either greater than any value (postgres),
409/// or less than any value (Spark).
410///
411/// In particular
412///
413/// ```ignore
414/// { a: 1, b: null } == { a: 1, b: null } => true
415/// { a: 1, b: null } == { a: 1, b: 1 } => false
416/// { a: 1, b: null } == null => null
417/// null == null => null
418/// ```
419///
420/// This could be implemented as below
421///
422/// ```
423/// # use arrow_array::{Array, BooleanArray};
424/// # use arrow_buffer::NullBuffer;
425/// # use arrow_ord::cmp;
426/// # use arrow_ord::ord::make_comparator;
427/// # use arrow_schema::{ArrowError, SortOptions};
428/// fn eq(a: &dyn Array, b: &dyn Array) -> Result<BooleanArray, ArrowError> {
429///     if !a.data_type().is_nested() {
430///         return cmp::eq(&a, &b); // Use faster vectorised kernel
431///     }
432///
433///     let cmp = make_comparator(a, b, SortOptions::default())?;
434///     let len = a.len().min(b.len());
435///     let values = (0..len).map(|i| cmp(i, i).is_eq()).collect();
436///     let nulls = NullBuffer::union(a.nulls(), b.nulls());
437///     Ok(BooleanArray::new(values, nulls))
438/// }
439/// ````
440pub fn make_comparator(
441    left: &dyn Array,
442    right: &dyn Array,
443    opts: SortOptions,
444) -> Result<DynComparator, ArrowError> {
445    use arrow_schema::DataType::*;
446
447    macro_rules! primitive_helper {
448        ($t:ty, $left:expr, $right:expr, $nulls_first:expr) => {
449            Ok(compare_primitive::<$t>($left, $right, $nulls_first))
450        };
451    }
452    downcast_primitive! {
453        left.data_type(), right.data_type() => (primitive_helper, left, right, opts),
454        (Boolean, Boolean) => Ok(compare_boolean(left, right, opts)),
455        (Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)),
456        (LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)),
457        (Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)),
458        (Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)),
459        (LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)),
460        (BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)),
461        (FixedSizeBinary(_), FixedSizeBinary(_)) => {
462            let left = left.as_fixed_size_binary();
463            let right = right.as_fixed_size_binary();
464
465            let l = left.clone();
466            let r = right.clone();
467            Ok(compare(left, right, opts, move |i, j| {
468                l.value(i).cmp(r.value(j))
469            }))
470        },
471        (List(_), List(_)) => compare_list::<i32>(left, right, opts),
472        (LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts),
473        (FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts),
474        (Struct(_), Struct(_)) => compare_struct(left, right, opts),
475        (Dictionary(l_key, _), Dictionary(r_key, _)) => {
476             macro_rules! dict_helper {
477                ($t:ty, $left:expr, $right:expr, $opts: expr) => {
478                     compare_dict::<$t>($left, $right, $opts)
479                 };
480             }
481            downcast_integer! {
482                 l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts),
483                 _ => unreachable!()
484             }
485        },
486        (Map(_, _), Map(_, _)) => compare_map(left, right, opts),
487        (Union(_, _), Union(_, _)) => compare_union(left, right, opts),
488        (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
489            true => format!("The data type type {lhs:?} has no natural order"),
490            false => "Can't compare arrays of different types".to_string(),
491        }))
492    }
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498    use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
499    use arrow_buffer::{IntervalDayTime, OffsetBuffer, ScalarBuffer, i256};
500    use arrow_schema::{DataType, Field, Fields, UnionFields};
501    use half::f16;
502    use std::sync::Arc;
503
504    #[test]
505    fn test_fixed_size_binary() {
506        let items = vec![vec![1u8], vec![2u8]];
507        let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
508
509        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
510
511        assert_eq!(Ordering::Less, cmp(0, 1));
512    }
513
514    #[test]
515    fn test_fixed_size_binary_fixed_size_binary() {
516        let items = vec![vec![1u8]];
517        let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
518        let items = vec![vec![2u8]];
519        let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
520
521        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
522
523        assert_eq!(Ordering::Less, cmp(0, 0));
524    }
525
526    #[test]
527    fn test_i32() {
528        let array = Int32Array::from(vec![1, 2]);
529
530        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
531
532        assert_eq!(Ordering::Less, (cmp)(0, 1));
533    }
534
535    #[test]
536    fn test_i32_i32() {
537        let array1 = Int32Array::from(vec![1]);
538        let array2 = Int32Array::from(vec![2]);
539
540        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
541
542        assert_eq!(Ordering::Less, cmp(0, 0));
543    }
544
545    #[test]
546    fn test_f16() {
547        let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]);
548
549        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
550
551        assert_eq!(Ordering::Less, cmp(0, 1));
552    }
553
554    #[test]
555    fn test_f64() {
556        let array = Float64Array::from(vec![1.0, 2.0]);
557
558        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
559
560        assert_eq!(Ordering::Less, cmp(0, 1));
561    }
562
563    #[test]
564    fn test_f64_nan() {
565        let array = Float64Array::from(vec![1.0, f64::NAN]);
566
567        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
568
569        assert_eq!(Ordering::Less, cmp(0, 1));
570        assert_eq!(Ordering::Equal, cmp(1, 1));
571    }
572
573    #[test]
574    fn test_f64_zeros() {
575        let array = Float64Array::from(vec![-0.0, 0.0]);
576
577        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
578
579        assert_eq!(Ordering::Less, cmp(0, 1));
580        assert_eq!(Ordering::Greater, cmp(1, 0));
581    }
582
583    #[test]
584    fn test_interval_day_time() {
585        let array = IntervalDayTimeArray::from(vec![
586            // 0 days, 1 second
587            IntervalDayTimeType::make_value(0, 1000),
588            // 1 day, 2 milliseconds
589            IntervalDayTimeType::make_value(1, 2),
590            // 90M milliseconds (which is more than is in 1 day)
591            IntervalDayTimeType::make_value(0, 90_000_000),
592        ]);
593
594        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
595
596        assert_eq!(Ordering::Less, cmp(0, 1));
597        assert_eq!(Ordering::Greater, cmp(1, 0));
598
599        // somewhat confusingly, while 90M milliseconds is more than 1 day,
600        // it will compare less as the comparison is done on the underlying
601        // values not field by field
602        assert_eq!(Ordering::Greater, cmp(1, 2));
603        assert_eq!(Ordering::Less, cmp(2, 1));
604    }
605
606    #[test]
607    fn test_interval_year_month() {
608        let array = IntervalYearMonthArray::from(vec![
609            // 1 year, 0 months
610            IntervalYearMonthType::make_value(1, 0),
611            // 0 years, 13 months
612            IntervalYearMonthType::make_value(0, 13),
613            // 1 year, 1 month
614            IntervalYearMonthType::make_value(1, 1),
615        ]);
616
617        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
618
619        assert_eq!(Ordering::Less, cmp(0, 1));
620        assert_eq!(Ordering::Greater, cmp(1, 0));
621
622        // the underlying representation is months, so both quantities are the same
623        assert_eq!(Ordering::Equal, cmp(1, 2));
624        assert_eq!(Ordering::Equal, cmp(2, 1));
625    }
626
627    #[test]
628    fn test_interval_month_day_nano() {
629        let array = IntervalMonthDayNanoArray::from(vec![
630            // 100 days
631            IntervalMonthDayNanoType::make_value(0, 100, 0),
632            // 1 month
633            IntervalMonthDayNanoType::make_value(1, 0, 0),
634            // 100 day, 1 nanoseconds
635            IntervalMonthDayNanoType::make_value(0, 100, 2),
636        ]);
637
638        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
639
640        assert_eq!(Ordering::Less, cmp(0, 1));
641        assert_eq!(Ordering::Greater, cmp(1, 0));
642
643        // somewhat confusingly, while 100 days is more than 1 month in all cases
644        // it will compare less as the comparison is done on the underlying
645        // values not field by field
646        assert_eq!(Ordering::Greater, cmp(1, 2));
647        assert_eq!(Ordering::Less, cmp(2, 1));
648    }
649
650    #[test]
651    fn test_decimali32() {
652        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
653            .into_iter()
654            .collect::<Decimal32Array>()
655            .with_precision_and_scale(8, 6)
656            .unwrap();
657
658        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
659        assert_eq!(Ordering::Less, cmp(1, 0));
660        assert_eq!(Ordering::Greater, cmp(0, 2));
661    }
662
663    #[test]
664    fn test_decimali64() {
665        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
666            .into_iter()
667            .collect::<Decimal64Array>()
668            .with_precision_and_scale(16, 6)
669            .unwrap();
670
671        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
672        assert_eq!(Ordering::Less, cmp(1, 0));
673        assert_eq!(Ordering::Greater, cmp(0, 2));
674    }
675
676    #[test]
677    fn test_decimali128() {
678        let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
679            .into_iter()
680            .collect::<Decimal128Array>()
681            .with_precision_and_scale(23, 6)
682            .unwrap();
683
684        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
685        assert_eq!(Ordering::Less, cmp(1, 0));
686        assert_eq!(Ordering::Greater, cmp(0, 2));
687    }
688
689    #[test]
690    fn test_decimali256() {
691        let array = vec![
692            Some(i256::from_i128(5_i128)),
693            Some(i256::from_i128(2_i128)),
694            Some(i256::from_i128(3_i128)),
695        ]
696        .into_iter()
697        .collect::<Decimal256Array>()
698        .with_precision_and_scale(53, 6)
699        .unwrap();
700
701        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
702        assert_eq!(Ordering::Less, cmp(1, 0));
703        assert_eq!(Ordering::Greater, cmp(0, 2));
704    }
705
706    #[test]
707    fn test_dict() {
708        let data = vec!["a", "b", "c", "a", "a", "c", "c"];
709        let array = data.into_iter().collect::<DictionaryArray<Int16Type>>();
710
711        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
712
713        assert_eq!(Ordering::Less, cmp(0, 1));
714        assert_eq!(Ordering::Equal, cmp(3, 4));
715        assert_eq!(Ordering::Greater, cmp(2, 3));
716    }
717
718    #[test]
719    fn test_multiple_dict() {
720        let d1 = vec!["a", "b", "c", "d"];
721        let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>();
722        let d2 = vec!["e", "f", "g", "a"];
723        let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>();
724
725        let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap();
726
727        assert_eq!(Ordering::Less, cmp(0, 0));
728        assert_eq!(Ordering::Equal, cmp(0, 3));
729        assert_eq!(Ordering::Greater, cmp(1, 3));
730    }
731
732    #[test]
733    fn test_primitive_dict() {
734        let values = Int32Array::from(vec![1_i32, 0, 2, 5]);
735        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
736        let array1 = DictionaryArray::new(keys, Arc::new(values));
737
738        let values = Int32Array::from(vec![2_i32, 3, 4, 5]);
739        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
740        let array2 = DictionaryArray::new(keys, Arc::new(values));
741
742        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
743
744        assert_eq!(Ordering::Less, cmp(0, 0));
745        assert_eq!(Ordering::Less, cmp(0, 3));
746        assert_eq!(Ordering::Equal, cmp(3, 3));
747        assert_eq!(Ordering::Greater, cmp(3, 1));
748        assert_eq!(Ordering::Greater, cmp(3, 2));
749    }
750
751    #[test]
752    fn test_float_dict() {
753        let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]);
754        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
755        let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
756
757        let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]);
758        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
759        let array2 = DictionaryArray::new(keys, Arc::new(values));
760
761        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
762
763        assert_eq!(Ordering::Less, cmp(0, 0));
764        assert_eq!(Ordering::Less, cmp(0, 3));
765        assert_eq!(Ordering::Equal, cmp(3, 3));
766        assert_eq!(Ordering::Greater, cmp(3, 1));
767        assert_eq!(Ordering::Greater, cmp(3, 2));
768    }
769
770    #[test]
771    fn test_timestamp_dict() {
772        let values = TimestampSecondArray::from(vec![1, 0, 2, 5]);
773        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
774        let array1 = DictionaryArray::new(keys, Arc::new(values));
775
776        let values = TimestampSecondArray::from(vec![2, 3, 4, 5]);
777        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
778        let array2 = DictionaryArray::new(keys, Arc::new(values));
779
780        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
781
782        assert_eq!(Ordering::Less, cmp(0, 0));
783        assert_eq!(Ordering::Less, cmp(0, 3));
784        assert_eq!(Ordering::Equal, cmp(3, 3));
785        assert_eq!(Ordering::Greater, cmp(3, 1));
786        assert_eq!(Ordering::Greater, cmp(3, 2));
787    }
788
789    #[test]
790    fn test_interval_dict() {
791        let v1 = IntervalDayTime::new(0, 1);
792        let v2 = IntervalDayTime::new(0, 2);
793        let v3 = IntervalDayTime::new(12, 2);
794
795        let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]);
796        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
797        let array1 = DictionaryArray::new(keys, Arc::new(values));
798
799        let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]);
800        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
801        let array2 = DictionaryArray::new(keys, Arc::new(values));
802
803        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
804
805        assert_eq!(Ordering::Less, cmp(0, 0)); // v1 vs v3
806        assert_eq!(Ordering::Equal, cmp(0, 3)); // v1 vs v1
807        assert_eq!(Ordering::Greater, cmp(3, 3)); // v3 vs v1
808        assert_eq!(Ordering::Greater, cmp(3, 1)); // v3 vs v2
809        assert_eq!(Ordering::Greater, cmp(3, 2)); // v3 vs v2
810    }
811
812    #[test]
813    fn test_duration_dict() {
814        let values = DurationSecondArray::from(vec![1, 0, 2, 5]);
815        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
816        let array1 = DictionaryArray::new(keys, Arc::new(values));
817
818        let values = DurationSecondArray::from(vec![2, 3, 4, 5]);
819        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
820        let array2 = DictionaryArray::new(keys, Arc::new(values));
821
822        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
823
824        assert_eq!(Ordering::Less, cmp(0, 0));
825        assert_eq!(Ordering::Less, cmp(0, 3));
826        assert_eq!(Ordering::Equal, cmp(3, 3));
827        assert_eq!(Ordering::Greater, cmp(3, 1));
828        assert_eq!(Ordering::Greater, cmp(3, 2));
829    }
830
831    #[test]
832    fn test_decimal_dict() {
833        let values = Decimal128Array::from(vec![1, 0, 2, 5]);
834        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
835        let array1 = DictionaryArray::new(keys, Arc::new(values));
836
837        let values = Decimal128Array::from(vec![2, 3, 4, 5]);
838        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
839        let array2 = DictionaryArray::new(keys, Arc::new(values));
840
841        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
842
843        assert_eq!(Ordering::Less, cmp(0, 0));
844        assert_eq!(Ordering::Less, cmp(0, 3));
845        assert_eq!(Ordering::Equal, cmp(3, 3));
846        assert_eq!(Ordering::Greater, cmp(3, 1));
847        assert_eq!(Ordering::Greater, cmp(3, 2));
848    }
849
850    #[test]
851    fn test_decimal256_dict() {
852        let values = Decimal256Array::from(vec![
853            i256::from_i128(1),
854            i256::from_i128(0),
855            i256::from_i128(2),
856            i256::from_i128(5),
857        ]);
858        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
859        let array1 = DictionaryArray::new(keys, Arc::new(values));
860
861        let values = Decimal256Array::from(vec![
862            i256::from_i128(2),
863            i256::from_i128(3),
864            i256::from_i128(4),
865            i256::from_i128(5),
866        ]);
867        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
868        let array2 = DictionaryArray::new(keys, Arc::new(values));
869
870        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
871
872        assert_eq!(Ordering::Less, cmp(0, 0));
873        assert_eq!(Ordering::Less, cmp(0, 3));
874        assert_eq!(Ordering::Equal, cmp(3, 3));
875        assert_eq!(Ordering::Greater, cmp(3, 1));
876        assert_eq!(Ordering::Greater, cmp(3, 2));
877    }
878
879    fn test_bytes_impl<T: ByteArrayType>() {
880        let offsets = OffsetBuffer::from_lengths([3, 3, 1]);
881        let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None);
882        let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap();
883
884        assert_eq!(Ordering::Less, cmp(0, 1));
885        assert_eq!(Ordering::Greater, cmp(0, 2));
886        assert_eq!(Ordering::Equal, cmp(1, 1));
887    }
888
889    #[test]
890    fn test_bytes() {
891        test_bytes_impl::<Utf8Type>();
892        test_bytes_impl::<LargeUtf8Type>();
893        test_bytes_impl::<BinaryType>();
894        test_bytes_impl::<LargeBinaryType>();
895    }
896
897    #[test]
898    fn test_lists() {
899        let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
900        a.extend([
901            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
902            Some(vec![
903                Some(vec![Some(1), Some(2), Some(3)]),
904                Some(vec![Some(1)]),
905            ]),
906            Some(vec![]),
907        ]);
908        let a = a.finish();
909        let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
910        b.extend([
911            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
912            Some(vec![
913                Some(vec![Some(1), Some(2), None]),
914                Some(vec![Some(1)]),
915            ]),
916            Some(vec![
917                Some(vec![Some(1), Some(2), Some(3), Some(4)]),
918                Some(vec![Some(1)]),
919            ]),
920            None,
921        ]);
922        let b = b.finish();
923
924        let opts = SortOptions {
925            descending: false,
926            nulls_first: true,
927        };
928        let cmp = make_comparator(&a, &b, opts).unwrap();
929        assert_eq!(cmp(0, 0), Ordering::Equal);
930        assert_eq!(cmp(0, 1), Ordering::Less);
931        assert_eq!(cmp(0, 2), Ordering::Less);
932        assert_eq!(cmp(1, 2), Ordering::Less);
933        assert_eq!(cmp(1, 3), Ordering::Greater);
934        assert_eq!(cmp(2, 0), Ordering::Less);
935
936        let opts = SortOptions {
937            descending: true,
938            nulls_first: true,
939        };
940        let cmp = make_comparator(&a, &b, opts).unwrap();
941        assert_eq!(cmp(0, 0), Ordering::Equal);
942        assert_eq!(cmp(0, 1), Ordering::Less);
943        assert_eq!(cmp(0, 2), Ordering::Less);
944        assert_eq!(cmp(1, 2), Ordering::Greater);
945        assert_eq!(cmp(1, 3), Ordering::Greater);
946        assert_eq!(cmp(2, 0), Ordering::Greater);
947
948        let opts = SortOptions {
949            descending: true,
950            nulls_first: false,
951        };
952        let cmp = make_comparator(&a, &b, opts).unwrap();
953        assert_eq!(cmp(0, 0), Ordering::Equal);
954        assert_eq!(cmp(0, 1), Ordering::Greater);
955        assert_eq!(cmp(0, 2), Ordering::Greater);
956        assert_eq!(cmp(1, 2), Ordering::Greater);
957        assert_eq!(cmp(1, 3), Ordering::Less);
958        assert_eq!(cmp(2, 0), Ordering::Greater);
959
960        let opts = SortOptions {
961            descending: false,
962            nulls_first: false,
963        };
964        let cmp = make_comparator(&a, &b, opts).unwrap();
965        assert_eq!(cmp(0, 0), Ordering::Equal);
966        assert_eq!(cmp(0, 1), Ordering::Greater);
967        assert_eq!(cmp(0, 2), Ordering::Greater);
968        assert_eq!(cmp(1, 2), Ordering::Less);
969        assert_eq!(cmp(1, 3), Ordering::Less);
970        assert_eq!(cmp(2, 0), Ordering::Less);
971    }
972
973    #[test]
974    fn test_struct() {
975        let fields = Fields::from(vec![
976            Field::new("a", DataType::Int32, true),
977            Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
978        ]);
979
980        let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
981        let mut b = ListBuilder::new(Int32Builder::new());
982        b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]);
983        let b = b.finish();
984
985        let nulls = Some(NullBuffer::from_iter([true, true, true, false]));
986        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
987        let s1 = StructArray::new(fields.clone(), values, nulls);
988
989        let a = Int32Array::from(vec![None, Some(2), None]);
990        let mut b = ListBuilder::new(Int32Builder::new());
991        b.extend([None, None, Some(vec![])]);
992        let b = b.finish();
993
994        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
995        let s2 = StructArray::new(fields.clone(), values, None);
996
997        let opts = SortOptions {
998            descending: false,
999            nulls_first: true,
1000        };
1001        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1002        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1003        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1004        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1005        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1006        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1007        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1008        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1009
1010        let opts = SortOptions {
1011            descending: true,
1012            nulls_first: true,
1013        };
1014        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1015        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1016        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1017        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1018        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1019        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1020        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1021        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1022
1023        let opts = SortOptions {
1024            descending: true,
1025            nulls_first: false,
1026        };
1027        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1028        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1029        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1030        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1031        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1032        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1033        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1034        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1035
1036        let opts = SortOptions {
1037            descending: false,
1038            nulls_first: false,
1039        };
1040        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1041        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1042        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1043        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1044        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1045        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1046        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1047        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1048    }
1049
1050    #[test]
1051    fn test_map() {
1052        // Create first map array demonstrating key priority over values:
1053        // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
1054        let string_builder = StringBuilder::new();
1055        let int_builder = Int32Builder::new();
1056        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1057
1058        // {"a": 100, "b": 1} - high value for "a", low value for "b"
1059        map1_builder.keys().append_value("a");
1060        map1_builder.values().append_value(100);
1061        map1_builder.keys().append_value("b");
1062        map1_builder.values().append_value(1);
1063        map1_builder.append(true).unwrap();
1064
1065        // {"b": 999, "c": 1} - very high value for "b", low value for "c"
1066        map1_builder.keys().append_value("b");
1067        map1_builder.values().append_value(999);
1068        map1_builder.keys().append_value("c");
1069        map1_builder.values().append_value(1);
1070        map1_builder.append(true).unwrap();
1071
1072        // {}
1073        map1_builder.append(true).unwrap();
1074
1075        // {"x": 1}
1076        map1_builder.keys().append_value("x");
1077        map1_builder.values().append_value(1);
1078        map1_builder.append(true).unwrap();
1079
1080        let map1 = map1_builder.finish();
1081
1082        // Create second map array:
1083        // [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
1084        let string_builder = StringBuilder::new();
1085        let int_builder = Int32Builder::new();
1086        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1087
1088        // {"a": 1, "c": 999} - low value for "a", high value for "c"
1089        map2_builder.keys().append_value("a");
1090        map2_builder.values().append_value(1);
1091        map2_builder.keys().append_value("c");
1092        map2_builder.values().append_value(999);
1093        map2_builder.append(true).unwrap();
1094
1095        // {"b": 1, "d": 999} - low value for "b", high value for "d"
1096        map2_builder.keys().append_value("b");
1097        map2_builder.values().append_value(1);
1098        map2_builder.keys().append_value("d");
1099        map2_builder.values().append_value(999);
1100        map2_builder.append(true).unwrap();
1101
1102        // {"a": 1}
1103        map2_builder.keys().append_value("a");
1104        map2_builder.values().append_value(1);
1105        map2_builder.append(true).unwrap();
1106
1107        // None
1108        map2_builder.append(false).unwrap();
1109
1110        let map2 = map2_builder.finish();
1111
1112        let opts = SortOptions {
1113            descending: false,
1114            nulls_first: true,
1115        };
1116        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1117
1118        // Test that keys have priority over values:
1119        // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1120        // First entries match (a:100 vs a:1), but 100 > 1, so Greater
1121        assert_eq!(cmp(0, 0), Ordering::Greater);
1122
1123        // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1124        // First entries match (b:999 vs b:1), but 999 > 1, so Greater
1125        assert_eq!(cmp(1, 1), Ordering::Greater);
1126
1127        // Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 1}
1128        assert_eq!(cmp(0, 1), Ordering::Less);
1129
1130        // Empty map vs non-empty
1131        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1132
1133        // Non-null vs null
1134        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1135
1136        // Key priority test: "x" > "a", regardless of values
1137        assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 999}
1138
1139        // Empty vs non-empty
1140        assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999}
1141
1142        let opts = SortOptions {
1143            descending: true,
1144            nulls_first: true,
1145        };
1146        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1147
1148        // With descending=true, value comparison is reversed
1149        assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} (reversed)
1150        assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} (reversed)
1151        assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs {"b": 999, "c": 1} (key order reversed)
1152        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1153        assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed)
1154
1155        let opts = SortOptions {
1156            descending: false,
1157            nulls_first: false,
1158        };
1159        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1160
1161        // Same key priority behavior with nulls_first=false
1162        assert_eq!(cmp(0, 0), Ordering::Greater); // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1163        assert_eq!(cmp(1, 1), Ordering::Greater); // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1164        assert_eq!(cmp(3, 3), Ordering::Less); // {"x": 1} < None (nulls last)
1165        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1166    }
1167
1168    #[test]
1169    fn test_map_vs_list_consistency() {
1170        // Create map arrays and convert them to list arrays to verify comparison consistency
1171        // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}]
1172        let string_builder = StringBuilder::new();
1173        let int_builder = Int32Builder::new();
1174        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1175
1176        // {"a": 1, "b": 2}
1177        map1_builder.keys().append_value("a");
1178        map1_builder.values().append_value(1);
1179        map1_builder.keys().append_value("b");
1180        map1_builder.values().append_value(2);
1181        map1_builder.append(true).unwrap();
1182
1183        // {"x": 10}
1184        map1_builder.keys().append_value("x");
1185        map1_builder.values().append_value(10);
1186        map1_builder.append(true).unwrap();
1187
1188        // {}
1189        map1_builder.append(true).unwrap();
1190
1191        // {"c": 3}
1192        map1_builder.keys().append_value("c");
1193        map1_builder.values().append_value(3);
1194        map1_builder.append(true).unwrap();
1195
1196        let map1 = map1_builder.finish();
1197
1198        // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None]
1199        let string_builder = StringBuilder::new();
1200        let int_builder = Int32Builder::new();
1201        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1202
1203        // {"a": 1, "b": 2}
1204        map2_builder.keys().append_value("a");
1205        map2_builder.values().append_value(1);
1206        map2_builder.keys().append_value("b");
1207        map2_builder.values().append_value(2);
1208        map2_builder.append(true).unwrap();
1209
1210        // {"y": 20}
1211        map2_builder.keys().append_value("y");
1212        map2_builder.values().append_value(20);
1213        map2_builder.append(true).unwrap();
1214
1215        // {"d": 4}
1216        map2_builder.keys().append_value("d");
1217        map2_builder.values().append_value(4);
1218        map2_builder.append(true).unwrap();
1219
1220        // None
1221        map2_builder.append(false).unwrap();
1222
1223        let map2 = map2_builder.finish();
1224
1225        // Convert map arrays to list arrays (Map entries are struct arrays with key-value pairs)
1226        let list1: ListArray = map1.clone().into();
1227        let list2: ListArray = map2.clone().into();
1228
1229        let test_cases = [
1230            SortOptions {
1231                descending: false,
1232                nulls_first: true,
1233            },
1234            SortOptions {
1235                descending: true,
1236                nulls_first: true,
1237            },
1238            SortOptions {
1239                descending: false,
1240                nulls_first: false,
1241            },
1242            SortOptions {
1243                descending: true,
1244                nulls_first: false,
1245            },
1246        ];
1247
1248        for opts in test_cases {
1249            let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
1250            let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
1251
1252            // Test all possible index combinations
1253            for i in 0..map1.len() {
1254                for j in 0..map2.len() {
1255                    let map_result = map_cmp(i, j);
1256                    let list_result = list_cmp(i, j);
1257                    assert_eq!(
1258                        map_result, list_result,
1259                        "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
1260                    );
1261                }
1262            }
1263        }
1264    }
1265
1266    #[test]
1267    fn test_dense_union() {
1268        // create a dense union array with Int32 (type_id = 0) and Utf8 (type_id=1)
1269        // the values are: [1, "b", 2, "a", 3]
1270        //  type_ids are: [0,  1,  0,  1,  0]
1271        //   offsets are: [0, 0, 1, 1, 2] from [1, 2, 3] and ["b", "a"]
1272        let int_array = Int32Array::from(vec![1, 2, 3]);
1273        let str_array = StringArray::from(vec!["b", "a"]);
1274
1275        let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1276        let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
1277
1278        let union_fields = [
1279            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1280            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1281        ]
1282        .into_iter()
1283        .collect::<UnionFields>();
1284
1285        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1286
1287        let array1 =
1288            UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
1289
1290        // create a second array: [2, "a", 1, "c"]
1291        //          type ids are: [0,  1,  0,  1]
1292        //           offsets are: [0, 0, 1, 1] from [2, 1] and ["a", "c"]
1293        let int_array2 = Int32Array::from(vec![2, 1]);
1294        let str_array2 = StringArray::from(vec!["a", "c"]);
1295        let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1296        let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
1297
1298        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1299
1300        let array2 =
1301            UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
1302
1303        let opts = SortOptions {
1304            descending: false,
1305            nulls_first: true,
1306        };
1307
1308        // comparing
1309        // [1, "b", 2, "a", 3]
1310        // [2, "a", 1, "c"]
1311        let cmp = make_comparator(&array1, &array2, opts).unwrap();
1312
1313        // array1[0] = (type_id=0, value=1)
1314        // array2[0] = (type_id=0, value=2)
1315        assert_eq!(cmp(0, 0), Ordering::Less); // 1 < 2
1316
1317        // array1[0] = (type_id=0, value=1)
1318        // array2[1] = (type_id=1, value="a")
1319        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1320
1321        // array1[1] = (type_id=1, value="b")
1322        // array2[1] = (type_id=1, value="a")
1323        assert_eq!(cmp(1, 1), Ordering::Greater); // "b" > "a"
1324
1325        // array1[2] = (type_id=0, value=2)
1326        // array2[0] = (type_id=0, value=2)
1327        assert_eq!(cmp(2, 0), Ordering::Equal); // 2 == 2
1328
1329        // array1[3] = (type_id=1, value="a")
1330        // array2[1] = (type_id=1, value="a")
1331        assert_eq!(cmp(3, 1), Ordering::Equal); // "a" == "a"
1332
1333        // array1[1] = (type_id=1, value="b")
1334        // array2[3] = (type_id=1, value="c")
1335        assert_eq!(cmp(1, 3), Ordering::Less); // "b" < "c"
1336
1337        let opts_desc = SortOptions {
1338            descending: true,
1339            nulls_first: true,
1340        };
1341        let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
1342
1343        assert_eq!(cmp_desc(0, 0), Ordering::Greater); // 1 > 2 (reversed)
1344        assert_eq!(cmp_desc(0, 1), Ordering::Greater); // type_id 0 < 1, reversed to Greater
1345        assert_eq!(cmp_desc(1, 1), Ordering::Less); // "b" < "a" (reversed)
1346    }
1347
1348    #[test]
1349    fn test_sparse_union() {
1350        // create a sparse union array with Int32 (type_id=0) and Utf8 (type_id=1)
1351        // values: [1, "b", 3]
1352        // note, in sparse unions, child arrays have the same length as the union
1353        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1354        let str_array = StringArray::from(vec![None, Some("b"), None]);
1355        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1356
1357        let union_fields = [
1358            (0, Arc::new(Field::new("a", DataType::Int32, false))),
1359            (1, Arc::new(Field::new("b", DataType::Utf8, false))),
1360        ]
1361        .into_iter()
1362        .collect::<UnionFields>();
1363
1364        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1365
1366        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
1367
1368        let opts = SortOptions::default();
1369        let cmp = make_comparator(&array, &array, opts).unwrap();
1370
1371        // array[0] = (type_id=0, value=1), array[2] = (type_id=0, value=3)
1372        assert_eq!(cmp(0, 2), Ordering::Less); // 1 < 3
1373        // array[0] = (type_id=0, value=1), array[1] = (type_id=1, value="b")
1374        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1375    }
1376
1377    #[test]
1378    #[should_panic(expected = "index out of bounds")]
1379    fn test_union_out_of_bounds() {
1380        // create a dense union array with 3 elements
1381        let int_array = Int32Array::from(vec![1, 2]);
1382        let str_array = StringArray::from(vec!["a"]);
1383
1384        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1385        let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
1386
1387        let union_fields = [
1388            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1389            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1390        ]
1391        .into_iter()
1392        .collect::<UnionFields>();
1393
1394        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1395
1396        let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
1397
1398        let opts = SortOptions::default();
1399        let cmp = make_comparator(&array, &array, opts).unwrap();
1400
1401        // oob
1402        cmp(0, 3);
1403    }
1404
1405    #[test]
1406    fn test_union_incompatible_fields() {
1407        // create first union with Int32 and Utf8
1408        let int_array1 = Int32Array::from(vec![1, 2]);
1409        let str_array1 = StringArray::from(vec!["a", "b"]);
1410
1411        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1412        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1413
1414        let union_fields1 = [
1415            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1416            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1417        ]
1418        .into_iter()
1419        .collect::<UnionFields>();
1420
1421        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1422
1423        let array1 =
1424            UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
1425
1426        // create second union with Int32 and Float64 (incompatible with first)
1427        let int_array2 = Int32Array::from(vec![3, 4]);
1428        let float_array2 = Float64Array::from(vec![1.0, 2.0]);
1429
1430        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1431        let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1432
1433        let union_fields2 = [
1434            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1435            (1, Arc::new(Field::new("C", DataType::Float64, false))),
1436        ]
1437        .into_iter()
1438        .collect::<UnionFields>();
1439
1440        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
1441
1442        let array2 =
1443            UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
1444
1445        let opts = SortOptions::default();
1446
1447        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1448            make_comparator(&array1, &array2, opts)
1449        else {
1450            panic!("expected error when making comparator of incompatible union arrays");
1451        };
1452
1453        assert_eq!(
1454            &out,
1455            "Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
1456        );
1457    }
1458
1459    #[test]
1460    fn test_union_incompatible_modes() {
1461        // create first union as Dense with Int32 and Utf8
1462        let int_array1 = Int32Array::from(vec![1, 2]);
1463        let str_array1 = StringArray::from(vec!["a", "b"]);
1464
1465        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1466        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1467
1468        let union_fields1 = [
1469            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1470            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1471        ]
1472        .into_iter()
1473        .collect::<UnionFields>();
1474
1475        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1476
1477        let array1 =
1478            UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
1479                .unwrap();
1480
1481        // create second union as Sparse with same fields (Int32 and Utf8)
1482        let int_array2 = Int32Array::from(vec![Some(3), None]);
1483        let str_array2 = StringArray::from(vec![None, Some("c")]);
1484
1485        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1486
1487        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1488
1489        let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
1490
1491        let opts = SortOptions::default();
1492
1493        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1494            make_comparator(&array1, &array2, opts)
1495        else {
1496            panic!("expected error when making comparator of union arrays with different modes");
1497        };
1498
1499        assert_eq!(
1500            &out,
1501            "Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
1502        );
1503    }
1504}