arrow_ord/
ord.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains functions and function factories to compare arrays.
19
20use arrow_array::cast::AsArray;
21use arrow_array::types::*;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_schema::{ArrowError, DataType, SortOptions};
25use std::{cmp::Ordering, collections::HashMap};
26
27/// Compare the values at two arbitrary indices in two arrays.
28pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
29
30/// If parent sort order is descending we need to invert the value of nulls_first so that
31/// when the parent is sorted based on the produced ranks, nulls are still ordered correctly
32fn child_opts(opts: SortOptions) -> SortOptions {
33    SortOptions {
34        descending: false,
35        nulls_first: opts.nulls_first != opts.descending,
36    }
37}
38
39fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator
40where
41    A: Array + Clone,
42    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
43{
44    let l = l.logical_nulls().filter(|x| x.null_count() > 0);
45    let r = r.logical_nulls().filter(|x| x.null_count() > 0);
46    match (opts.nulls_first, opts.descending) {
47        (true, true) => compare_impl::<true, true, _>(l, r, cmp),
48        (true, false) => compare_impl::<true, false, _>(l, r, cmp),
49        (false, true) => compare_impl::<false, true, _>(l, r, cmp),
50        (false, false) => compare_impl::<false, false, _>(l, r, cmp),
51    }
52}
53
54fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>(
55    l: Option<NullBuffer>,
56    r: Option<NullBuffer>,
57    cmp: F,
58) -> DynComparator
59where
60    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
61{
62    let cmp = move |i, j| match DESCENDING {
63        true => cmp(i, j).reverse(),
64        false => cmp(i, j),
65    };
66
67    let (left_null, right_null) = match NULLS_FIRST {
68        true => (Ordering::Less, Ordering::Greater),
69        false => (Ordering::Greater, Ordering::Less),
70    };
71
72    match (l, r) {
73        (None, None) => Box::new(cmp),
74        (Some(l), None) => Box::new(move |i, j| match l.is_null(i) {
75            true => left_null,
76            false => cmp(i, j),
77        }),
78        (None, Some(r)) => Box::new(move |i, j| match r.is_null(j) {
79            true => right_null,
80            false => cmp(i, j),
81        }),
82        (Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) {
83            (true, true) => Ordering::Equal,
84            (true, false) => left_null,
85            (false, true) => right_null,
86            (false, false) => cmp(i, j),
87        }),
88    }
89}
90
91fn compare_primitive<T: ArrowPrimitiveType>(
92    left: &dyn Array,
93    right: &dyn Array,
94    opts: SortOptions,
95) -> DynComparator
96where
97    T::Native: ArrowNativeTypeOp,
98{
99    let left = left.as_primitive::<T>();
100    let right = right.as_primitive::<T>();
101    let l_values = left.values().clone();
102    let r_values = right.values().clone();
103
104    compare(&left, &right, opts, move |i, j| {
105        l_values[i].compare(r_values[j])
106    })
107}
108
109fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator {
110    let left = left.as_boolean();
111    let right = right.as_boolean();
112
113    let l_values = left.values().clone();
114    let r_values = right.values().clone();
115
116    compare(left, right, opts, move |i, j| {
117        l_values.value(i).cmp(&r_values.value(j))
118    })
119}
120
121fn compare_bytes<T: ByteArrayType>(
122    left: &dyn Array,
123    right: &dyn Array,
124    opts: SortOptions,
125) -> DynComparator {
126    let left = left.as_bytes::<T>();
127    let right = right.as_bytes::<T>();
128
129    let l = left.clone();
130    let r = right.clone();
131    compare(left, right, opts, move |i, j| {
132        let l: &[u8] = l.value(i).as_ref();
133        let r: &[u8] = r.value(j).as_ref();
134        l.cmp(r)
135    })
136}
137
138fn compare_byte_view<T: ByteViewType>(
139    left: &dyn Array,
140    right: &dyn Array,
141    opts: SortOptions,
142) -> DynComparator {
143    let left = left.as_byte_view::<T>();
144    let right = right.as_byte_view::<T>();
145
146    let l = left.clone();
147    let r = right.clone();
148    compare(left, right, opts, move |i, j| {
149        crate::cmp::compare_byte_view(&l, i, &r, j)
150    })
151}
152
153fn compare_dict<K: ArrowDictionaryKeyType>(
154    left: &dyn Array,
155    right: &dyn Array,
156    opts: SortOptions,
157) -> Result<DynComparator, ArrowError> {
158    let left = left.as_dictionary::<K>();
159    let right = right.as_dictionary::<K>();
160
161    let c_opts = child_opts(opts);
162    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
163    let left_keys = left.keys().values().clone();
164    let right_keys = right.keys().values().clone();
165
166    let f = compare(left, right, opts, move |i, j| {
167        let l = left_keys[i].as_usize();
168        let r = right_keys[j].as_usize();
169        cmp(l, r)
170    });
171    Ok(f)
172}
173
174fn compare_list<O: OffsetSizeTrait>(
175    left: &dyn Array,
176    right: &dyn Array,
177    opts: SortOptions,
178) -> Result<DynComparator, ArrowError> {
179    let left = left.as_list::<O>();
180    let right = right.as_list::<O>();
181
182    let c_opts = child_opts(opts);
183    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
184
185    let l_o = left.offsets().clone();
186    let r_o = right.offsets().clone();
187    let f = compare(left, right, opts, move |i, j| {
188        let l_end = l_o[i + 1].as_usize();
189        let l_start = l_o[i].as_usize();
190
191        let r_end = r_o[j + 1].as_usize();
192        let r_start = r_o[j].as_usize();
193
194        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
195            match cmp(i, j) {
196                Ordering::Equal => continue,
197                r => return r,
198            }
199        }
200        (l_end - l_start).cmp(&(r_end - r_start))
201    });
202    Ok(f)
203}
204
205fn compare_fixed_list(
206    left: &dyn Array,
207    right: &dyn Array,
208    opts: SortOptions,
209) -> Result<DynComparator, ArrowError> {
210    let left = left.as_fixed_size_list();
211    let right = right.as_fixed_size_list();
212
213    let c_opts = child_opts(opts);
214    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
215
216    let l_size = left.value_length().to_usize().unwrap();
217    let r_size = right.value_length().to_usize().unwrap();
218    let size_cmp = l_size.cmp(&r_size);
219
220    let f = compare(left, right, opts, move |i, j| {
221        let l_start = i * l_size;
222        let l_end = l_start + l_size;
223        let r_start = j * r_size;
224        let r_end = r_start + r_size;
225        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
226            match cmp(i, j) {
227                Ordering::Equal => continue,
228                r => return r,
229            }
230        }
231        size_cmp
232    });
233    Ok(f)
234}
235
236fn compare_map(
237    left: &dyn Array,
238    right: &dyn Array,
239    opts: SortOptions,
240) -> Result<DynComparator, ArrowError> {
241    let left = left.as_map();
242    let right = right.as_map();
243
244    let c_opts = child_opts(opts);
245    let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
246
247    let l_o = left.offsets().clone();
248    let r_o = right.offsets().clone();
249    let f = compare(left, right, opts, move |i, j| {
250        let l_end = l_o[i + 1].as_usize();
251        let l_start = l_o[i].as_usize();
252
253        let r_end = r_o[j + 1].as_usize();
254        let r_start = r_o[j].as_usize();
255
256        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
257            match cmp(i, j) {
258                Ordering::Equal => continue,
259                r => return r,
260            }
261        }
262        (l_end - l_start).cmp(&(r_end - r_start))
263    });
264    Ok(f)
265}
266
267fn compare_struct(
268    left: &dyn Array,
269    right: &dyn Array,
270    opts: SortOptions,
271) -> Result<DynComparator, ArrowError> {
272    let left = left.as_struct();
273    let right = right.as_struct();
274
275    if left.columns().len() != right.columns().len() {
276        return Err(ArrowError::InvalidArgumentError(
277            "Cannot compare StructArray with different number of columns".to_string(),
278        ));
279    }
280
281    let c_opts = child_opts(opts);
282    let columns = left.columns().iter().zip(right.columns());
283    let comparators = columns
284        .map(|(l, r)| make_comparator(l, r, c_opts))
285        .collect::<Result<Vec<_>, _>>()?;
286
287    let f = compare(left, right, opts, move |i, j| {
288        for cmp in &comparators {
289            match cmp(i, j) {
290                Ordering::Equal => continue,
291                r => return r,
292            }
293        }
294        Ordering::Equal
295    });
296    Ok(f)
297}
298
299fn compare_union(
300    left: &dyn Array,
301    right: &dyn Array,
302    opts: SortOptions,
303) -> Result<DynComparator, ArrowError> {
304    let left = left.as_union();
305    let right = right.as_union();
306
307    let (left_fields, left_mode) = match left.data_type() {
308        DataType::Union(fields, mode) => (fields, mode),
309        _ => unreachable!(),
310    };
311    let (right_fields, right_mode) = match right.data_type() {
312        DataType::Union(fields, mode) => (fields, mode),
313        _ => unreachable!(),
314    };
315
316    if left_fields != right_fields {
317        return Err(ArrowError::InvalidArgumentError(format!(
318            "Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
319            left_fields, right_fields
320        )));
321    }
322
323    if left_mode != right_mode {
324        return Err(ArrowError::InvalidArgumentError(format!(
325            "Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
326            left_mode, right_mode
327        )));
328    }
329
330    let c_opts = child_opts(opts);
331
332    let mut field_comparators = HashMap::with_capacity(left_fields.len());
333
334    for (type_id, _field) in left_fields.iter() {
335        let left_child = left.child(type_id);
336        let right_child = right.child(type_id);
337        let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
338
339        field_comparators.insert(type_id, cmp);
340    }
341
342    let left_type_ids = left.type_ids().clone();
343    let right_type_ids = right.type_ids().clone();
344
345    let left_offsets = left.offsets().cloned();
346    let right_offsets = right.offsets().cloned();
347
348    let f = compare(left, right, opts, move |i, j| {
349        let left_type_id = left_type_ids[i];
350        let right_type_id = right_type_ids[j];
351
352        // first, compare by type_id
353        match left_type_id.cmp(&right_type_id) {
354            Ordering::Equal => {
355                // second, compare by values
356                let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
357                let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
358
359                let cmp = field_comparators
360                    .get(&left_type_id)
361                    .expect("type id not found in field_comparators");
362
363                cmp(left_offset, right_offset)
364            }
365            other => other,
366        }
367    });
368    Ok(f)
369}
370
371/// Returns a comparison function that compares two values at two different positions
372/// between the two arrays.
373///
374/// For comparing arrays element-wise, see also the vectorised kernels in [`crate::cmp`].
375///
376/// If `nulls_first` is true `NULL` values will be considered less than any non-null value,
377/// otherwise they will be considered greater.
378///
379/// # Basic Usage
380///
381/// ```
382/// # use std::cmp::Ordering;
383/// # use arrow_array::Int32Array;
384/// # use arrow_ord::ord::make_comparator;
385/// # use arrow_schema::SortOptions;
386/// #
387/// let array1 = Int32Array::from(vec![1, 2]);
388/// let array2 = Int32Array::from(vec![3, 4]);
389///
390/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
391/// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2)
392/// assert_eq!(cmp(0, 1), Ordering::Less);
393///
394/// let array1 = Int32Array::from(vec![Some(1), None]);
395/// let array2 = Int32Array::from(vec![None, Some(2)]);
396/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
397///
398/// assert_eq!(cmp(0, 1), Ordering::Less); // Some(1) vs Some(2)
399/// assert_eq!(cmp(1, 1), Ordering::Less); // None vs Some(2)
400/// assert_eq!(cmp(1, 0), Ordering::Equal); // None vs None
401/// assert_eq!(cmp(0, 0), Ordering::Greater); // Some(1) vs None
402/// ```
403///
404/// # Postgres-compatible Nested Comparison
405///
406/// Whilst SQL prescribes ternary logic for nulls, that is comparing a value against a NULL yields
407/// a NULL, many systems, including postgres, instead apply a total ordering to comparison of
408/// nested nulls. That is nulls within nested types are either greater than any value (postgres),
409/// or less than any value (Spark).
410///
411/// In particular
412///
413/// ```ignore
414/// { a: 1, b: null } == { a: 1, b: null } => true
415/// { a: 1, b: null } == { a: 1, b: 1 } => false
416/// { a: 1, b: null } == null => null
417/// null == null => null
418/// ```
419///
420/// This could be implemented as below
421///
422/// ```
423/// # use arrow_array::{Array, BooleanArray};
424/// # use arrow_buffer::NullBuffer;
425/// # use arrow_ord::cmp;
426/// # use arrow_ord::ord::make_comparator;
427/// # use arrow_schema::{ArrowError, SortOptions};
428/// fn eq(a: &dyn Array, b: &dyn Array) -> Result<BooleanArray, ArrowError> {
429///     if !a.data_type().is_nested() {
430///         return cmp::eq(&a, &b); // Use faster vectorised kernel
431///     }
432///
433///     let cmp = make_comparator(a, b, SortOptions::default())?;
434///     let len = a.len().min(b.len());
435///     let values = (0..len).map(|i| cmp(i, i).is_eq()).collect();
436///     let nulls = NullBuffer::union(a.nulls(), b.nulls());
437///     Ok(BooleanArray::new(values, nulls))
438/// }
439/// ````
440pub fn make_comparator(
441    left: &dyn Array,
442    right: &dyn Array,
443    opts: SortOptions,
444) -> Result<DynComparator, ArrowError> {
445    use arrow_schema::DataType::*;
446
447    macro_rules! primitive_helper {
448        ($t:ty, $left:expr, $right:expr, $nulls_first:expr) => {
449            Ok(compare_primitive::<$t>($left, $right, $nulls_first))
450        };
451    }
452    downcast_primitive! {
453        left.data_type(), right.data_type() => (primitive_helper, left, right, opts),
454        (Boolean, Boolean) => Ok(compare_boolean(left, right, opts)),
455        (Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)),
456        (LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)),
457        (Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)),
458        (Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)),
459        (LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)),
460        (BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)),
461        (FixedSizeBinary(_), FixedSizeBinary(_)) => {
462            let left = left.as_fixed_size_binary();
463            let right = right.as_fixed_size_binary();
464
465            let l = left.clone();
466            let r = right.clone();
467            Ok(compare(left, right, opts, move |i, j| {
468                l.value(i).cmp(r.value(j))
469            }))
470        },
471        (List(_), List(_)) => compare_list::<i32>(left, right, opts),
472        (LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts),
473        (FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts),
474        (Struct(_), Struct(_)) => compare_struct(left, right, opts),
475        (Dictionary(l_key, _), Dictionary(r_key, _)) => {
476             macro_rules! dict_helper {
477                ($t:ty, $left:expr, $right:expr, $opts: expr) => {
478                     compare_dict::<$t>($left, $right, $opts)
479                 };
480             }
481            downcast_integer! {
482                 l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts),
483                 _ => unreachable!()
484             }
485        },
486        (Map(_, _), Map(_, _)) => compare_map(left, right, opts),
487        (Null, Null) => Ok(Box::new(|_, _| Ordering::Equal)),
488        (Union(_, _), Union(_, _)) => compare_union(left, right, opts),
489        (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
490            true => format!("The data type type {lhs:?} has no natural order"),
491            false => "Can't compare arrays of different types".to_string(),
492        }))
493    }
494}
495
496#[cfg(test)]
497mod tests {
498    use super::*;
499    use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
500    use arrow_buffer::{IntervalDayTime, OffsetBuffer, ScalarBuffer, i256};
501    use arrow_schema::{DataType, Field, Fields, UnionFields};
502    use half::f16;
503    use std::sync::Arc;
504
505    #[test]
506    fn test_fixed_size_binary() {
507        let items = vec![vec![1u8], vec![2u8]];
508        let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
509
510        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
511
512        assert_eq!(Ordering::Less, cmp(0, 1));
513    }
514
515    #[test]
516    fn test_fixed_size_binary_fixed_size_binary() {
517        let items = vec![vec![1u8]];
518        let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
519        let items = vec![vec![2u8]];
520        let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
521
522        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
523
524        assert_eq!(Ordering::Less, cmp(0, 0));
525    }
526
527    #[test]
528    fn test_i32() {
529        let array = Int32Array::from(vec![1, 2]);
530
531        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
532
533        assert_eq!(Ordering::Less, (cmp)(0, 1));
534    }
535
536    #[test]
537    fn test_i32_i32() {
538        let array1 = Int32Array::from(vec![1]);
539        let array2 = Int32Array::from(vec![2]);
540
541        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
542
543        assert_eq!(Ordering::Less, cmp(0, 0));
544    }
545
546    #[test]
547    fn test_f16() {
548        let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]);
549
550        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
551
552        assert_eq!(Ordering::Less, cmp(0, 1));
553    }
554
555    #[test]
556    fn test_f64() {
557        let array = Float64Array::from(vec![1.0, 2.0]);
558
559        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
560
561        assert_eq!(Ordering::Less, cmp(0, 1));
562    }
563
564    #[test]
565    fn test_f64_nan() {
566        let array = Float64Array::from(vec![1.0, f64::NAN]);
567
568        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
569
570        assert_eq!(Ordering::Less, cmp(0, 1));
571        assert_eq!(Ordering::Equal, cmp(1, 1));
572    }
573
574    #[test]
575    fn test_f64_zeros() {
576        let array = Float64Array::from(vec![-0.0, 0.0]);
577
578        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
579
580        assert_eq!(Ordering::Less, cmp(0, 1));
581        assert_eq!(Ordering::Greater, cmp(1, 0));
582    }
583
584    #[test]
585    fn test_interval_day_time() {
586        let array = IntervalDayTimeArray::from(vec![
587            // 0 days, 1 second
588            IntervalDayTimeType::make_value(0, 1000),
589            // 1 day, 2 milliseconds
590            IntervalDayTimeType::make_value(1, 2),
591            // 90M milliseconds (which is more than is in 1 day)
592            IntervalDayTimeType::make_value(0, 90_000_000),
593        ]);
594
595        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
596
597        assert_eq!(Ordering::Less, cmp(0, 1));
598        assert_eq!(Ordering::Greater, cmp(1, 0));
599
600        // somewhat confusingly, while 90M milliseconds is more than 1 day,
601        // it will compare less as the comparison is done on the underlying
602        // values not field by field
603        assert_eq!(Ordering::Greater, cmp(1, 2));
604        assert_eq!(Ordering::Less, cmp(2, 1));
605    }
606
607    #[test]
608    fn test_interval_year_month() {
609        let array = IntervalYearMonthArray::from(vec![
610            // 1 year, 0 months
611            IntervalYearMonthType::make_value(1, 0),
612            // 0 years, 13 months
613            IntervalYearMonthType::make_value(0, 13),
614            // 1 year, 1 month
615            IntervalYearMonthType::make_value(1, 1),
616        ]);
617
618        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
619
620        assert_eq!(Ordering::Less, cmp(0, 1));
621        assert_eq!(Ordering::Greater, cmp(1, 0));
622
623        // the underlying representation is months, so both quantities are the same
624        assert_eq!(Ordering::Equal, cmp(1, 2));
625        assert_eq!(Ordering::Equal, cmp(2, 1));
626    }
627
628    #[test]
629    fn test_interval_month_day_nano() {
630        let array = IntervalMonthDayNanoArray::from(vec![
631            // 100 days
632            IntervalMonthDayNanoType::make_value(0, 100, 0),
633            // 1 month
634            IntervalMonthDayNanoType::make_value(1, 0, 0),
635            // 100 day, 1 nanoseconds
636            IntervalMonthDayNanoType::make_value(0, 100, 2),
637        ]);
638
639        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
640
641        assert_eq!(Ordering::Less, cmp(0, 1));
642        assert_eq!(Ordering::Greater, cmp(1, 0));
643
644        // somewhat confusingly, while 100 days is more than 1 month in all cases
645        // it will compare less as the comparison is done on the underlying
646        // values not field by field
647        assert_eq!(Ordering::Greater, cmp(1, 2));
648        assert_eq!(Ordering::Less, cmp(2, 1));
649    }
650
651    #[test]
652    fn test_decimali32() {
653        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
654            .into_iter()
655            .collect::<Decimal32Array>()
656            .with_precision_and_scale(8, 6)
657            .unwrap();
658
659        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
660        assert_eq!(Ordering::Less, cmp(1, 0));
661        assert_eq!(Ordering::Greater, cmp(0, 2));
662    }
663
664    #[test]
665    fn test_decimali64() {
666        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
667            .into_iter()
668            .collect::<Decimal64Array>()
669            .with_precision_and_scale(16, 6)
670            .unwrap();
671
672        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
673        assert_eq!(Ordering::Less, cmp(1, 0));
674        assert_eq!(Ordering::Greater, cmp(0, 2));
675    }
676
677    #[test]
678    fn test_decimali128() {
679        let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
680            .into_iter()
681            .collect::<Decimal128Array>()
682            .with_precision_and_scale(23, 6)
683            .unwrap();
684
685        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
686        assert_eq!(Ordering::Less, cmp(1, 0));
687        assert_eq!(Ordering::Greater, cmp(0, 2));
688    }
689
690    #[test]
691    fn test_decimali256() {
692        let array = vec![
693            Some(i256::from_i128(5_i128)),
694            Some(i256::from_i128(2_i128)),
695            Some(i256::from_i128(3_i128)),
696        ]
697        .into_iter()
698        .collect::<Decimal256Array>()
699        .with_precision_and_scale(53, 6)
700        .unwrap();
701
702        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
703        assert_eq!(Ordering::Less, cmp(1, 0));
704        assert_eq!(Ordering::Greater, cmp(0, 2));
705    }
706
707    #[test]
708    fn test_dict() {
709        let data = vec!["a", "b", "c", "a", "a", "c", "c"];
710        let array = data.into_iter().collect::<DictionaryArray<Int16Type>>();
711
712        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
713
714        assert_eq!(Ordering::Less, cmp(0, 1));
715        assert_eq!(Ordering::Equal, cmp(3, 4));
716        assert_eq!(Ordering::Greater, cmp(2, 3));
717    }
718
719    #[test]
720    fn test_multiple_dict() {
721        let d1 = vec!["a", "b", "c", "d"];
722        let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>();
723        let d2 = vec!["e", "f", "g", "a"];
724        let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>();
725
726        let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap();
727
728        assert_eq!(Ordering::Less, cmp(0, 0));
729        assert_eq!(Ordering::Equal, cmp(0, 3));
730        assert_eq!(Ordering::Greater, cmp(1, 3));
731    }
732
733    #[test]
734    fn test_primitive_dict() {
735        let values = Int32Array::from(vec![1_i32, 0, 2, 5]);
736        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
737        let array1 = DictionaryArray::new(keys, Arc::new(values));
738
739        let values = Int32Array::from(vec![2_i32, 3, 4, 5]);
740        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
741        let array2 = DictionaryArray::new(keys, Arc::new(values));
742
743        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
744
745        assert_eq!(Ordering::Less, cmp(0, 0));
746        assert_eq!(Ordering::Less, cmp(0, 3));
747        assert_eq!(Ordering::Equal, cmp(3, 3));
748        assert_eq!(Ordering::Greater, cmp(3, 1));
749        assert_eq!(Ordering::Greater, cmp(3, 2));
750    }
751
752    #[test]
753    fn test_float_dict() {
754        let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]);
755        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
756        let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
757
758        let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]);
759        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
760        let array2 = DictionaryArray::new(keys, Arc::new(values));
761
762        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
763
764        assert_eq!(Ordering::Less, cmp(0, 0));
765        assert_eq!(Ordering::Less, cmp(0, 3));
766        assert_eq!(Ordering::Equal, cmp(3, 3));
767        assert_eq!(Ordering::Greater, cmp(3, 1));
768        assert_eq!(Ordering::Greater, cmp(3, 2));
769    }
770
771    #[test]
772    fn test_timestamp_dict() {
773        let values = TimestampSecondArray::from(vec![1, 0, 2, 5]);
774        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
775        let array1 = DictionaryArray::new(keys, Arc::new(values));
776
777        let values = TimestampSecondArray::from(vec![2, 3, 4, 5]);
778        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
779        let array2 = DictionaryArray::new(keys, Arc::new(values));
780
781        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
782
783        assert_eq!(Ordering::Less, cmp(0, 0));
784        assert_eq!(Ordering::Less, cmp(0, 3));
785        assert_eq!(Ordering::Equal, cmp(3, 3));
786        assert_eq!(Ordering::Greater, cmp(3, 1));
787        assert_eq!(Ordering::Greater, cmp(3, 2));
788    }
789
790    #[test]
791    fn test_interval_dict() {
792        let v1 = IntervalDayTime::new(0, 1);
793        let v2 = IntervalDayTime::new(0, 2);
794        let v3 = IntervalDayTime::new(12, 2);
795
796        let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]);
797        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
798        let array1 = DictionaryArray::new(keys, Arc::new(values));
799
800        let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]);
801        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
802        let array2 = DictionaryArray::new(keys, Arc::new(values));
803
804        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
805
806        assert_eq!(Ordering::Less, cmp(0, 0)); // v1 vs v3
807        assert_eq!(Ordering::Equal, cmp(0, 3)); // v1 vs v1
808        assert_eq!(Ordering::Greater, cmp(3, 3)); // v3 vs v1
809        assert_eq!(Ordering::Greater, cmp(3, 1)); // v3 vs v2
810        assert_eq!(Ordering::Greater, cmp(3, 2)); // v3 vs v2
811    }
812
813    #[test]
814    fn test_duration_dict() {
815        let values = DurationSecondArray::from(vec![1, 0, 2, 5]);
816        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
817        let array1 = DictionaryArray::new(keys, Arc::new(values));
818
819        let values = DurationSecondArray::from(vec![2, 3, 4, 5]);
820        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
821        let array2 = DictionaryArray::new(keys, Arc::new(values));
822
823        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
824
825        assert_eq!(Ordering::Less, cmp(0, 0));
826        assert_eq!(Ordering::Less, cmp(0, 3));
827        assert_eq!(Ordering::Equal, cmp(3, 3));
828        assert_eq!(Ordering::Greater, cmp(3, 1));
829        assert_eq!(Ordering::Greater, cmp(3, 2));
830    }
831
832    #[test]
833    fn test_decimal_dict() {
834        let values = Decimal128Array::from(vec![1, 0, 2, 5]);
835        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
836        let array1 = DictionaryArray::new(keys, Arc::new(values));
837
838        let values = Decimal128Array::from(vec![2, 3, 4, 5]);
839        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
840        let array2 = DictionaryArray::new(keys, Arc::new(values));
841
842        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
843
844        assert_eq!(Ordering::Less, cmp(0, 0));
845        assert_eq!(Ordering::Less, cmp(0, 3));
846        assert_eq!(Ordering::Equal, cmp(3, 3));
847        assert_eq!(Ordering::Greater, cmp(3, 1));
848        assert_eq!(Ordering::Greater, cmp(3, 2));
849    }
850
851    #[test]
852    fn test_decimal256_dict() {
853        let values = Decimal256Array::from(vec![
854            i256::from_i128(1),
855            i256::from_i128(0),
856            i256::from_i128(2),
857            i256::from_i128(5),
858        ]);
859        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
860        let array1 = DictionaryArray::new(keys, Arc::new(values));
861
862        let values = Decimal256Array::from(vec![
863            i256::from_i128(2),
864            i256::from_i128(3),
865            i256::from_i128(4),
866            i256::from_i128(5),
867        ]);
868        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
869        let array2 = DictionaryArray::new(keys, Arc::new(values));
870
871        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
872
873        assert_eq!(Ordering::Less, cmp(0, 0));
874        assert_eq!(Ordering::Less, cmp(0, 3));
875        assert_eq!(Ordering::Equal, cmp(3, 3));
876        assert_eq!(Ordering::Greater, cmp(3, 1));
877        assert_eq!(Ordering::Greater, cmp(3, 2));
878    }
879
880    fn test_bytes_impl<T: ByteArrayType>() {
881        let offsets = OffsetBuffer::from_lengths([3, 3, 1]);
882        let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None);
883        let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap();
884
885        assert_eq!(Ordering::Less, cmp(0, 1));
886        assert_eq!(Ordering::Greater, cmp(0, 2));
887        assert_eq!(Ordering::Equal, cmp(1, 1));
888    }
889
890    #[test]
891    fn test_bytes() {
892        test_bytes_impl::<Utf8Type>();
893        test_bytes_impl::<LargeUtf8Type>();
894        test_bytes_impl::<BinaryType>();
895        test_bytes_impl::<LargeBinaryType>();
896    }
897
898    #[test]
899    fn test_lists() {
900        let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
901        a.extend([
902            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
903            Some(vec![
904                Some(vec![Some(1), Some(2), Some(3)]),
905                Some(vec![Some(1)]),
906            ]),
907            Some(vec![]),
908        ]);
909        let a = a.finish();
910        let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
911        b.extend([
912            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
913            Some(vec![
914                Some(vec![Some(1), Some(2), None]),
915                Some(vec![Some(1)]),
916            ]),
917            Some(vec![
918                Some(vec![Some(1), Some(2), Some(3), Some(4)]),
919                Some(vec![Some(1)]),
920            ]),
921            None,
922        ]);
923        let b = b.finish();
924
925        let opts = SortOptions {
926            descending: false,
927            nulls_first: true,
928        };
929        let cmp = make_comparator(&a, &b, opts).unwrap();
930        assert_eq!(cmp(0, 0), Ordering::Equal);
931        assert_eq!(cmp(0, 1), Ordering::Less);
932        assert_eq!(cmp(0, 2), Ordering::Less);
933        assert_eq!(cmp(1, 2), Ordering::Less);
934        assert_eq!(cmp(1, 3), Ordering::Greater);
935        assert_eq!(cmp(2, 0), Ordering::Less);
936
937        let opts = SortOptions {
938            descending: true,
939            nulls_first: true,
940        };
941        let cmp = make_comparator(&a, &b, opts).unwrap();
942        assert_eq!(cmp(0, 0), Ordering::Equal);
943        assert_eq!(cmp(0, 1), Ordering::Less);
944        assert_eq!(cmp(0, 2), Ordering::Less);
945        assert_eq!(cmp(1, 2), Ordering::Greater);
946        assert_eq!(cmp(1, 3), Ordering::Greater);
947        assert_eq!(cmp(2, 0), Ordering::Greater);
948
949        let opts = SortOptions {
950            descending: true,
951            nulls_first: false,
952        };
953        let cmp = make_comparator(&a, &b, opts).unwrap();
954        assert_eq!(cmp(0, 0), Ordering::Equal);
955        assert_eq!(cmp(0, 1), Ordering::Greater);
956        assert_eq!(cmp(0, 2), Ordering::Greater);
957        assert_eq!(cmp(1, 2), Ordering::Greater);
958        assert_eq!(cmp(1, 3), Ordering::Less);
959        assert_eq!(cmp(2, 0), Ordering::Greater);
960
961        let opts = SortOptions {
962            descending: false,
963            nulls_first: false,
964        };
965        let cmp = make_comparator(&a, &b, opts).unwrap();
966        assert_eq!(cmp(0, 0), Ordering::Equal);
967        assert_eq!(cmp(0, 1), Ordering::Greater);
968        assert_eq!(cmp(0, 2), Ordering::Greater);
969        assert_eq!(cmp(1, 2), Ordering::Less);
970        assert_eq!(cmp(1, 3), Ordering::Less);
971        assert_eq!(cmp(2, 0), Ordering::Less);
972    }
973
974    #[test]
975    fn test_struct() {
976        let fields = Fields::from(vec![
977            Field::new("a", DataType::Int32, true),
978            Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
979        ]);
980
981        let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
982        let mut b = ListBuilder::new(Int32Builder::new());
983        b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]);
984        let b = b.finish();
985
986        let nulls = Some(NullBuffer::from_iter([true, true, true, false]));
987        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
988        let s1 = StructArray::new(fields.clone(), values, nulls);
989
990        let a = Int32Array::from(vec![None, Some(2), None]);
991        let mut b = ListBuilder::new(Int32Builder::new());
992        b.extend([None, None, Some(vec![])]);
993        let b = b.finish();
994
995        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
996        let s2 = StructArray::new(fields.clone(), values, None);
997
998        let opts = SortOptions {
999            descending: false,
1000            nulls_first: true,
1001        };
1002        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1003        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1004        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1005        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1006        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1007        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1008        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1009        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1010
1011        let opts = SortOptions {
1012            descending: true,
1013            nulls_first: true,
1014        };
1015        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1016        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1017        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1018        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1019        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1020        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1021        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1022        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1023
1024        let opts = SortOptions {
1025            descending: true,
1026            nulls_first: false,
1027        };
1028        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1029        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1030        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1031        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1032        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1033        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1034        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1035        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1036
1037        let opts = SortOptions {
1038            descending: false,
1039            nulls_first: false,
1040        };
1041        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1042        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1043        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1044        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1045        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1046        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1047        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1048        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1049    }
1050
1051    #[test]
1052    fn test_map() {
1053        // Create first map array demonstrating key priority over values:
1054        // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
1055        let string_builder = StringBuilder::new();
1056        let int_builder = Int32Builder::new();
1057        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1058
1059        // {"a": 100, "b": 1} - high value for "a", low value for "b"
1060        map1_builder.keys().append_value("a");
1061        map1_builder.values().append_value(100);
1062        map1_builder.keys().append_value("b");
1063        map1_builder.values().append_value(1);
1064        map1_builder.append(true).unwrap();
1065
1066        // {"b": 999, "c": 1} - very high value for "b", low value for "c"
1067        map1_builder.keys().append_value("b");
1068        map1_builder.values().append_value(999);
1069        map1_builder.keys().append_value("c");
1070        map1_builder.values().append_value(1);
1071        map1_builder.append(true).unwrap();
1072
1073        // {}
1074        map1_builder.append(true).unwrap();
1075
1076        // {"x": 1}
1077        map1_builder.keys().append_value("x");
1078        map1_builder.values().append_value(1);
1079        map1_builder.append(true).unwrap();
1080
1081        let map1 = map1_builder.finish();
1082
1083        // Create second map array:
1084        // [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
1085        let string_builder = StringBuilder::new();
1086        let int_builder = Int32Builder::new();
1087        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1088
1089        // {"a": 1, "c": 999} - low value for "a", high value for "c"
1090        map2_builder.keys().append_value("a");
1091        map2_builder.values().append_value(1);
1092        map2_builder.keys().append_value("c");
1093        map2_builder.values().append_value(999);
1094        map2_builder.append(true).unwrap();
1095
1096        // {"b": 1, "d": 999} - low value for "b", high value for "d"
1097        map2_builder.keys().append_value("b");
1098        map2_builder.values().append_value(1);
1099        map2_builder.keys().append_value("d");
1100        map2_builder.values().append_value(999);
1101        map2_builder.append(true).unwrap();
1102
1103        // {"a": 1}
1104        map2_builder.keys().append_value("a");
1105        map2_builder.values().append_value(1);
1106        map2_builder.append(true).unwrap();
1107
1108        // None
1109        map2_builder.append(false).unwrap();
1110
1111        let map2 = map2_builder.finish();
1112
1113        let opts = SortOptions {
1114            descending: false,
1115            nulls_first: true,
1116        };
1117        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1118
1119        // Test that keys have priority over values:
1120        // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1121        // First entries match (a:100 vs a:1), but 100 > 1, so Greater
1122        assert_eq!(cmp(0, 0), Ordering::Greater);
1123
1124        // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1125        // First entries match (b:999 vs b:1), but 999 > 1, so Greater
1126        assert_eq!(cmp(1, 1), Ordering::Greater);
1127
1128        // Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 1}
1129        assert_eq!(cmp(0, 1), Ordering::Less);
1130
1131        // Empty map vs non-empty
1132        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1133
1134        // Non-null vs null
1135        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1136
1137        // Key priority test: "x" > "a", regardless of values
1138        assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 999}
1139
1140        // Empty vs non-empty
1141        assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999}
1142
1143        let opts = SortOptions {
1144            descending: true,
1145            nulls_first: true,
1146        };
1147        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1148
1149        // With descending=true, value comparison is reversed
1150        assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} (reversed)
1151        assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} (reversed)
1152        assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs {"b": 999, "c": 1} (key order reversed)
1153        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1154        assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed)
1155
1156        let opts = SortOptions {
1157            descending: false,
1158            nulls_first: false,
1159        };
1160        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1161
1162        // Same key priority behavior with nulls_first=false
1163        assert_eq!(cmp(0, 0), Ordering::Greater); // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1164        assert_eq!(cmp(1, 1), Ordering::Greater); // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1165        assert_eq!(cmp(3, 3), Ordering::Less); // {"x": 1} < None (nulls last)
1166        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1167    }
1168
1169    #[test]
1170    fn test_map_vs_list_consistency() {
1171        // Create map arrays and convert them to list arrays to verify comparison consistency
1172        // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}]
1173        let string_builder = StringBuilder::new();
1174        let int_builder = Int32Builder::new();
1175        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1176
1177        // {"a": 1, "b": 2}
1178        map1_builder.keys().append_value("a");
1179        map1_builder.values().append_value(1);
1180        map1_builder.keys().append_value("b");
1181        map1_builder.values().append_value(2);
1182        map1_builder.append(true).unwrap();
1183
1184        // {"x": 10}
1185        map1_builder.keys().append_value("x");
1186        map1_builder.values().append_value(10);
1187        map1_builder.append(true).unwrap();
1188
1189        // {}
1190        map1_builder.append(true).unwrap();
1191
1192        // {"c": 3}
1193        map1_builder.keys().append_value("c");
1194        map1_builder.values().append_value(3);
1195        map1_builder.append(true).unwrap();
1196
1197        let map1 = map1_builder.finish();
1198
1199        // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None]
1200        let string_builder = StringBuilder::new();
1201        let int_builder = Int32Builder::new();
1202        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1203
1204        // {"a": 1, "b": 2}
1205        map2_builder.keys().append_value("a");
1206        map2_builder.values().append_value(1);
1207        map2_builder.keys().append_value("b");
1208        map2_builder.values().append_value(2);
1209        map2_builder.append(true).unwrap();
1210
1211        // {"y": 20}
1212        map2_builder.keys().append_value("y");
1213        map2_builder.values().append_value(20);
1214        map2_builder.append(true).unwrap();
1215
1216        // {"d": 4}
1217        map2_builder.keys().append_value("d");
1218        map2_builder.values().append_value(4);
1219        map2_builder.append(true).unwrap();
1220
1221        // None
1222        map2_builder.append(false).unwrap();
1223
1224        let map2 = map2_builder.finish();
1225
1226        // Convert map arrays to list arrays (Map entries are struct arrays with key-value pairs)
1227        let list1: ListArray = map1.clone().into();
1228        let list2: ListArray = map2.clone().into();
1229
1230        let test_cases = [
1231            SortOptions {
1232                descending: false,
1233                nulls_first: true,
1234            },
1235            SortOptions {
1236                descending: true,
1237                nulls_first: true,
1238            },
1239            SortOptions {
1240                descending: false,
1241                nulls_first: false,
1242            },
1243            SortOptions {
1244                descending: true,
1245                nulls_first: false,
1246            },
1247        ];
1248
1249        for opts in test_cases {
1250            let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
1251            let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
1252
1253            // Test all possible index combinations
1254            for i in 0..map1.len() {
1255                for j in 0..map2.len() {
1256                    let map_result = map_cmp(i, j);
1257                    let list_result = list_cmp(i, j);
1258                    assert_eq!(
1259                        map_result, list_result,
1260                        "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
1261                    );
1262                }
1263            }
1264        }
1265    }
1266
1267    #[test]
1268    fn test_dense_union() {
1269        // create a dense union array with Int32 (type_id = 0) and Utf8 (type_id=1)
1270        // the values are: [1, "b", 2, "a", 3]
1271        //  type_ids are: [0,  1,  0,  1,  0]
1272        //   offsets are: [0, 0, 1, 1, 2] from [1, 2, 3] and ["b", "a"]
1273        let int_array = Int32Array::from(vec![1, 2, 3]);
1274        let str_array = StringArray::from(vec!["b", "a"]);
1275
1276        let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1277        let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
1278
1279        let union_fields = [
1280            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1281            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1282        ]
1283        .into_iter()
1284        .collect::<UnionFields>();
1285
1286        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1287
1288        let array1 =
1289            UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
1290
1291        // create a second array: [2, "a", 1, "c"]
1292        //          type ids are: [0,  1,  0,  1]
1293        //           offsets are: [0, 0, 1, 1] from [2, 1] and ["a", "c"]
1294        let int_array2 = Int32Array::from(vec![2, 1]);
1295        let str_array2 = StringArray::from(vec!["a", "c"]);
1296        let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1297        let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
1298
1299        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1300
1301        let array2 =
1302            UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
1303
1304        let opts = SortOptions {
1305            descending: false,
1306            nulls_first: true,
1307        };
1308
1309        // comparing
1310        // [1, "b", 2, "a", 3]
1311        // [2, "a", 1, "c"]
1312        let cmp = make_comparator(&array1, &array2, opts).unwrap();
1313
1314        // array1[0] = (type_id=0, value=1)
1315        // array2[0] = (type_id=0, value=2)
1316        assert_eq!(cmp(0, 0), Ordering::Less); // 1 < 2
1317
1318        // array1[0] = (type_id=0, value=1)
1319        // array2[1] = (type_id=1, value="a")
1320        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1321
1322        // array1[1] = (type_id=1, value="b")
1323        // array2[1] = (type_id=1, value="a")
1324        assert_eq!(cmp(1, 1), Ordering::Greater); // "b" > "a"
1325
1326        // array1[2] = (type_id=0, value=2)
1327        // array2[0] = (type_id=0, value=2)
1328        assert_eq!(cmp(2, 0), Ordering::Equal); // 2 == 2
1329
1330        // array1[3] = (type_id=1, value="a")
1331        // array2[1] = (type_id=1, value="a")
1332        assert_eq!(cmp(3, 1), Ordering::Equal); // "a" == "a"
1333
1334        // array1[1] = (type_id=1, value="b")
1335        // array2[3] = (type_id=1, value="c")
1336        assert_eq!(cmp(1, 3), Ordering::Less); // "b" < "c"
1337
1338        let opts_desc = SortOptions {
1339            descending: true,
1340            nulls_first: true,
1341        };
1342        let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
1343
1344        assert_eq!(cmp_desc(0, 0), Ordering::Greater); // 1 > 2 (reversed)
1345        assert_eq!(cmp_desc(0, 1), Ordering::Greater); // type_id 0 < 1, reversed to Greater
1346        assert_eq!(cmp_desc(1, 1), Ordering::Less); // "b" < "a" (reversed)
1347    }
1348
1349    #[test]
1350    fn test_sparse_union() {
1351        // create a sparse union array with Int32 (type_id=0) and Utf8 (type_id=1)
1352        // values: [1, "b", 3]
1353        // note, in sparse unions, child arrays have the same length as the union
1354        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1355        let str_array = StringArray::from(vec![None, Some("b"), None]);
1356        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1357
1358        let union_fields = [
1359            (0, Arc::new(Field::new("a", DataType::Int32, false))),
1360            (1, Arc::new(Field::new("b", DataType::Utf8, false))),
1361        ]
1362        .into_iter()
1363        .collect::<UnionFields>();
1364
1365        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1366
1367        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
1368
1369        let opts = SortOptions::default();
1370        let cmp = make_comparator(&array, &array, opts).unwrap();
1371
1372        // array[0] = (type_id=0, value=1), array[2] = (type_id=0, value=3)
1373        assert_eq!(cmp(0, 2), Ordering::Less); // 1 < 3
1374        // array[0] = (type_id=0, value=1), array[1] = (type_id=1, value="b")
1375        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1376    }
1377
1378    #[test]
1379    #[should_panic(expected = "index out of bounds")]
1380    fn test_union_out_of_bounds() {
1381        // create a dense union array with 3 elements
1382        let int_array = Int32Array::from(vec![1, 2]);
1383        let str_array = StringArray::from(vec!["a"]);
1384
1385        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1386        let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
1387
1388        let union_fields = [
1389            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1390            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1391        ]
1392        .into_iter()
1393        .collect::<UnionFields>();
1394
1395        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1396
1397        let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
1398
1399        let opts = SortOptions::default();
1400        let cmp = make_comparator(&array, &array, opts).unwrap();
1401
1402        // oob
1403        cmp(0, 3);
1404    }
1405
1406    #[test]
1407    fn test_union_incompatible_fields() {
1408        // create first union with Int32 and Utf8
1409        let int_array1 = Int32Array::from(vec![1, 2]);
1410        let str_array1 = StringArray::from(vec!["a", "b"]);
1411
1412        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1413        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1414
1415        let union_fields1 = [
1416            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1417            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1418        ]
1419        .into_iter()
1420        .collect::<UnionFields>();
1421
1422        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1423
1424        let array1 =
1425            UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
1426
1427        // create second union with Int32 and Float64 (incompatible with first)
1428        let int_array2 = Int32Array::from(vec![3, 4]);
1429        let float_array2 = Float64Array::from(vec![1.0, 2.0]);
1430
1431        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1432        let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1433
1434        let union_fields2 = [
1435            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1436            (1, Arc::new(Field::new("C", DataType::Float64, false))),
1437        ]
1438        .into_iter()
1439        .collect::<UnionFields>();
1440
1441        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
1442
1443        let array2 =
1444            UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
1445
1446        let opts = SortOptions::default();
1447
1448        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1449            make_comparator(&array1, &array2, opts)
1450        else {
1451            panic!("expected error when making comparator of incompatible union arrays");
1452        };
1453
1454        assert_eq!(
1455            &out,
1456            "Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
1457        );
1458    }
1459
1460    #[test]
1461    fn test_union_incompatible_modes() {
1462        // create first union as Dense with Int32 and Utf8
1463        let int_array1 = Int32Array::from(vec![1, 2]);
1464        let str_array1 = StringArray::from(vec!["a", "b"]);
1465
1466        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1467        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1468
1469        let union_fields1 = [
1470            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1471            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1472        ]
1473        .into_iter()
1474        .collect::<UnionFields>();
1475
1476        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1477
1478        let array1 =
1479            UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
1480                .unwrap();
1481
1482        // create second union as Sparse with same fields (Int32 and Utf8)
1483        let int_array2 = Int32Array::from(vec![Some(3), None]);
1484        let str_array2 = StringArray::from(vec![None, Some("c")]);
1485
1486        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1487
1488        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1489
1490        let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
1491
1492        let opts = SortOptions::default();
1493
1494        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1495            make_comparator(&array1, &array2, opts)
1496        else {
1497            panic!("expected error when making comparator of union arrays with different modes");
1498        };
1499
1500        assert_eq!(
1501            &out,
1502            "Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
1503        );
1504    }
1505
1506    #[test]
1507    fn test_null_array_cmp() {
1508        let a = NullArray::new(3);
1509        let b = NullArray::new(3);
1510        let cmp = make_comparator(&a, &b, SortOptions::default()).unwrap();
1511
1512        assert_eq!(cmp(0, 0), Ordering::Equal);
1513        assert_eq!(cmp(0, 1), Ordering::Equal);
1514        assert_eq!(cmp(2, 0), Ordering::Equal);
1515    }
1516}