Skip to main content

arrow_ord/
ord.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! Contains functions and function factories to compare arrays.
19
20use arrow_array::cast::AsArray;
21use arrow_array::types::*;
22use arrow_array::*;
23use arrow_buffer::{ArrowNativeType, NullBuffer};
24use arrow_schema::{ArrowError, DataType, SortOptions};
25use std::{cmp::Ordering, collections::HashMap};
26
27fn compare_run_end_encoded<R: RunEndIndexType>(
28    left: &dyn Array,
29    right: &dyn Array,
30    opts: SortOptions,
31) -> Result<DynComparator, ArrowError> {
32    let left = left.as_run::<R>();
33    let right = right.as_run::<R>();
34
35    let c_opts = child_opts(opts);
36    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
37
38    let l_run_ends = left.run_ends().clone();
39    let r_run_ends = right.run_ends().clone();
40
41    let f = compare(left, right, opts, move |i, j| {
42        let l_physical = l_run_ends.get_physical_index(i);
43        let r_physical = r_run_ends.get_physical_index(j);
44        cmp(l_physical, r_physical)
45    });
46    Ok(f)
47}
48
49/// Compare the values at two arbitrary indices in two arrays.
50pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
51
52/// If parent sort order is descending we need to invert the value of nulls_first so that
53/// when the parent is sorted based on the produced ranks, nulls are still ordered correctly
54fn child_opts(opts: SortOptions) -> SortOptions {
55    SortOptions {
56        descending: false,
57        nulls_first: opts.nulls_first != opts.descending,
58    }
59}
60
61fn compare<A, F>(l: &A, r: &A, opts: SortOptions, cmp: F) -> DynComparator
62where
63    A: Array + Clone,
64    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
65{
66    let l = l.logical_nulls().filter(|x| x.null_count() > 0);
67    let r = r.logical_nulls().filter(|x| x.null_count() > 0);
68    match (opts.nulls_first, opts.descending) {
69        (true, true) => compare_impl::<true, true, _>(l, r, cmp),
70        (true, false) => compare_impl::<true, false, _>(l, r, cmp),
71        (false, true) => compare_impl::<false, true, _>(l, r, cmp),
72        (false, false) => compare_impl::<false, false, _>(l, r, cmp),
73    }
74}
75
76fn compare_impl<const NULLS_FIRST: bool, const DESCENDING: bool, F>(
77    l: Option<NullBuffer>,
78    r: Option<NullBuffer>,
79    cmp: F,
80) -> DynComparator
81where
82    F: Fn(usize, usize) -> Ordering + Send + Sync + 'static,
83{
84    let cmp = move |i, j| match DESCENDING {
85        true => cmp(i, j).reverse(),
86        false => cmp(i, j),
87    };
88
89    let (left_null, right_null) = match NULLS_FIRST {
90        true => (Ordering::Less, Ordering::Greater),
91        false => (Ordering::Greater, Ordering::Less),
92    };
93
94    match (l, r) {
95        (None, None) => Box::new(cmp),
96        (Some(l), None) => Box::new(move |i, j| match l.is_null(i) {
97            true => left_null,
98            false => cmp(i, j),
99        }),
100        (None, Some(r)) => Box::new(move |i, j| match r.is_null(j) {
101            true => right_null,
102            false => cmp(i, j),
103        }),
104        (Some(l), Some(r)) => Box::new(move |i, j| match (l.is_null(i), r.is_null(j)) {
105            (true, true) => Ordering::Equal,
106            (true, false) => left_null,
107            (false, true) => right_null,
108            (false, false) => cmp(i, j),
109        }),
110    }
111}
112
113fn compare_primitive<T: ArrowPrimitiveType>(
114    left: &dyn Array,
115    right: &dyn Array,
116    opts: SortOptions,
117) -> DynComparator {
118    let left = left.as_primitive::<T>();
119    let right = right.as_primitive::<T>();
120    let l_values = left.values().clone();
121    let r_values = right.values().clone();
122
123    compare(&left, &right, opts, move |i, j| {
124        l_values[i].compare(r_values[j])
125    })
126}
127
128fn compare_boolean(left: &dyn Array, right: &dyn Array, opts: SortOptions) -> DynComparator {
129    let left = left.as_boolean();
130    let right = right.as_boolean();
131
132    let l_values = left.values().clone();
133    let r_values = right.values().clone();
134
135    compare(left, right, opts, move |i, j| {
136        l_values.value(i).cmp(&r_values.value(j))
137    })
138}
139
140fn compare_bytes<T: ByteArrayType>(
141    left: &dyn Array,
142    right: &dyn Array,
143    opts: SortOptions,
144) -> DynComparator {
145    let left = left.as_bytes::<T>();
146    let right = right.as_bytes::<T>();
147
148    let l = left.clone();
149    let r = right.clone();
150    compare(left, right, opts, move |i, j| {
151        let l: &[u8] = l.value(i).as_ref();
152        let r: &[u8] = r.value(j).as_ref();
153        l.cmp(r)
154    })
155}
156
157fn compare_byte_view<T: ByteViewType>(
158    left: &dyn Array,
159    right: &dyn Array,
160    opts: SortOptions,
161) -> DynComparator {
162    let left = left.as_byte_view::<T>();
163    let right = right.as_byte_view::<T>();
164
165    let l = left.clone();
166    let r = right.clone();
167    compare(left, right, opts, move |i, j| {
168        crate::cmp::compare_byte_view(&l, i, &r, j)
169    })
170}
171
172fn compare_dict<K: ArrowDictionaryKeyType>(
173    left: &dyn Array,
174    right: &dyn Array,
175    opts: SortOptions,
176) -> Result<DynComparator, ArrowError> {
177    let left = left.as_dictionary::<K>();
178    let right = right.as_dictionary::<K>();
179
180    let c_opts = child_opts(opts);
181    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
182    let left_keys = left.keys().values().clone();
183    let right_keys = right.keys().values().clone();
184
185    let f = compare(left, right, opts, move |i, j| {
186        let l = left_keys[i].as_usize();
187        let r = right_keys[j].as_usize();
188        cmp(l, r)
189    });
190    Ok(f)
191}
192
193fn compare_list<O: OffsetSizeTrait>(
194    left: &dyn Array,
195    right: &dyn Array,
196    opts: SortOptions,
197) -> Result<DynComparator, ArrowError> {
198    let left = left.as_list::<O>();
199    let right = right.as_list::<O>();
200
201    let c_opts = child_opts(opts);
202    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
203
204    let l_o = left.offsets().clone();
205    let r_o = right.offsets().clone();
206    let f = compare(left, right, opts, move |i, j| {
207        let l_end = l_o[i + 1].as_usize();
208        let l_start = l_o[i].as_usize();
209
210        let r_end = r_o[j + 1].as_usize();
211        let r_start = r_o[j].as_usize();
212
213        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
214            match cmp(i, j) {
215                Ordering::Equal => continue,
216                r => return r,
217            }
218        }
219        (l_end - l_start).cmp(&(r_end - r_start))
220    });
221    Ok(f)
222}
223
224fn compare_fixed_list(
225    left: &dyn Array,
226    right: &dyn Array,
227    opts: SortOptions,
228) -> Result<DynComparator, ArrowError> {
229    let left = left.as_fixed_size_list();
230    let right = right.as_fixed_size_list();
231
232    let c_opts = child_opts(opts);
233    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
234
235    let l_size = left.value_length().to_usize().unwrap();
236    let r_size = right.value_length().to_usize().unwrap();
237    let size_cmp = l_size.cmp(&r_size);
238
239    let f = compare(left, right, opts, move |i, j| {
240        let l_start = i * l_size;
241        let l_end = l_start + l_size;
242        let r_start = j * r_size;
243        let r_end = r_start + r_size;
244        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
245            match cmp(i, j) {
246                Ordering::Equal => continue,
247                r => return r,
248            }
249        }
250        size_cmp
251    });
252    Ok(f)
253}
254
255fn compare_list_view<O: OffsetSizeTrait>(
256    left: &dyn Array,
257    right: &dyn Array,
258    opts: SortOptions,
259) -> Result<DynComparator, ArrowError> {
260    let left = left.as_list_view::<O>();
261    let right = right.as_list_view::<O>();
262
263    let c_opts = child_opts(opts);
264    let cmp = make_comparator(left.values().as_ref(), right.values().as_ref(), c_opts)?;
265
266    let l_offsets = left.offsets().clone();
267    let l_sizes = left.sizes().clone();
268    let r_offsets = right.offsets().clone();
269    let r_sizes = right.sizes().clone();
270
271    let f = compare(left, right, opts, move |i, j| {
272        let l_start = l_offsets[i].as_usize();
273        let l_len = l_sizes[i].as_usize();
274        let l_end = l_start + l_len;
275
276        let r_start = r_offsets[j].as_usize();
277        let r_len = r_sizes[j].as_usize();
278        let r_end = r_start + r_len;
279
280        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
281            match cmp(i, j) {
282                Ordering::Equal => continue,
283                r => return r,
284            }
285        }
286        l_len.cmp(&r_len)
287    });
288    Ok(f)
289}
290
291fn compare_map(
292    left: &dyn Array,
293    right: &dyn Array,
294    opts: SortOptions,
295) -> Result<DynComparator, ArrowError> {
296    let left = left.as_map();
297    let right = right.as_map();
298
299    let c_opts = child_opts(opts);
300    let cmp = make_comparator(left.entries(), right.entries(), c_opts)?;
301
302    let l_o = left.offsets().clone();
303    let r_o = right.offsets().clone();
304    let f = compare(left, right, opts, move |i, j| {
305        let l_end = l_o[i + 1].as_usize();
306        let l_start = l_o[i].as_usize();
307
308        let r_end = r_o[j + 1].as_usize();
309        let r_start = r_o[j].as_usize();
310
311        for (i, j) in (l_start..l_end).zip(r_start..r_end) {
312            match cmp(i, j) {
313                Ordering::Equal => continue,
314                r => return r,
315            }
316        }
317        (l_end - l_start).cmp(&(r_end - r_start))
318    });
319    Ok(f)
320}
321
322fn compare_struct(
323    left: &dyn Array,
324    right: &dyn Array,
325    opts: SortOptions,
326) -> Result<DynComparator, ArrowError> {
327    let left = left.as_struct();
328    let right = right.as_struct();
329
330    if left.columns().len() != right.columns().len() {
331        return Err(ArrowError::InvalidArgumentError(
332            "Cannot compare StructArray with different number of columns".to_string(),
333        ));
334    }
335
336    let c_opts = child_opts(opts);
337    let columns = left.columns().iter().zip(right.columns());
338    let comparators = columns
339        .map(|(l, r)| make_comparator(l, r, c_opts))
340        .collect::<Result<Vec<_>, _>>()?;
341
342    let f = compare(left, right, opts, move |i, j| {
343        for cmp in &comparators {
344            match cmp(i, j) {
345                Ordering::Equal => continue,
346                r => return r,
347            }
348        }
349        Ordering::Equal
350    });
351    Ok(f)
352}
353
354fn compare_union(
355    left: &dyn Array,
356    right: &dyn Array,
357    opts: SortOptions,
358) -> Result<DynComparator, ArrowError> {
359    let left = left.as_union();
360    let right = right.as_union();
361
362    let (left_fields, left_mode) = match left.data_type() {
363        DataType::Union(fields, mode) => (fields, mode),
364        _ => unreachable!(),
365    };
366    let (right_fields, right_mode) = match right.data_type() {
367        DataType::Union(fields, mode) => (fields, mode),
368        _ => unreachable!(),
369    };
370
371    if left_fields != right_fields {
372        return Err(ArrowError::InvalidArgumentError(format!(
373            "Cannot compare UnionArrays with different fields: left={:?}, right={:?}",
374            left_fields, right_fields
375        )));
376    }
377
378    if left_mode != right_mode {
379        return Err(ArrowError::InvalidArgumentError(format!(
380            "Cannot compare UnionArrays with different modes: left={:?}, right={:?}",
381            left_mode, right_mode
382        )));
383    }
384
385    let c_opts = child_opts(opts);
386
387    let mut field_comparators = HashMap::with_capacity(left_fields.len());
388
389    for (type_id, _field) in left_fields.iter() {
390        let left_child = left.child(type_id);
391        let right_child = right.child(type_id);
392        let cmp = make_comparator(left_child.as_ref(), right_child.as_ref(), c_opts)?;
393
394        field_comparators.insert(type_id, cmp);
395    }
396
397    let left_type_ids = left.type_ids().clone();
398    let right_type_ids = right.type_ids().clone();
399
400    let left_offsets = left.offsets().cloned();
401    let right_offsets = right.offsets().cloned();
402
403    let f = compare(left, right, opts, move |i, j| {
404        let left_type_id = left_type_ids[i];
405        let right_type_id = right_type_ids[j];
406
407        // first, compare by type_id
408        match left_type_id.cmp(&right_type_id) {
409            Ordering::Equal => {
410                // second, compare by values
411                let left_offset = left_offsets.as_ref().map(|o| o[i] as usize).unwrap_or(i);
412                let right_offset = right_offsets.as_ref().map(|o| o[j] as usize).unwrap_or(j);
413
414                let cmp = field_comparators
415                    .get(&left_type_id)
416                    .expect("type id not found in field_comparators");
417
418                cmp(left_offset, right_offset)
419            }
420            other => other,
421        }
422    });
423    Ok(f)
424}
425
426/// Returns a comparison function that compares two values at two different positions
427/// between the two arrays.
428///
429/// For comparing arrays element-wise, see also the vectorised kernels in [`crate::cmp`].
430///
431/// If `nulls_first` is true `NULL` values will be considered less than any non-null value,
432/// otherwise they will be considered greater.
433///
434/// # Basic Usage
435///
436/// ```
437/// # use std::cmp::Ordering;
438/// # use arrow_array::Int32Array;
439/// # use arrow_ord::ord::make_comparator;
440/// # use arrow_schema::SortOptions;
441/// #
442/// let array1 = Int32Array::from(vec![1, 2]);
443/// let array2 = Int32Array::from(vec![3, 4]);
444///
445/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
446/// // 1 (index 0 of array1) is smaller than 4 (index 1 of array2)
447/// assert_eq!(cmp(0, 1), Ordering::Less);
448///
449/// let array1 = Int32Array::from(vec![Some(1), None]);
450/// let array2 = Int32Array::from(vec![None, Some(2)]);
451/// let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
452///
453/// assert_eq!(cmp(0, 1), Ordering::Less); // Some(1) vs Some(2)
454/// assert_eq!(cmp(1, 1), Ordering::Less); // None vs Some(2)
455/// assert_eq!(cmp(1, 0), Ordering::Equal); // None vs None
456/// assert_eq!(cmp(0, 0), Ordering::Greater); // Some(1) vs None
457/// ```
458///
459/// # Postgres-compatible Nested Comparison
460///
461/// Whilst SQL prescribes ternary logic for nulls, that is comparing a value against a NULL yields
462/// a NULL, many systems, including postgres, instead apply a total ordering to comparison of
463/// nested nulls. That is nulls within nested types are either greater than any value (postgres),
464/// or less than any value (Spark).
465///
466/// In particular
467///
468/// ```ignore
469/// { a: 1, b: null } == { a: 1, b: null } => true
470/// { a: 1, b: null } == { a: 1, b: 1 } => false
471/// { a: 1, b: null } == null => null
472/// null == null => null
473/// ```
474///
475/// This could be implemented as below
476///
477/// ```
478/// # use arrow_array::{Array, BooleanArray};
479/// # use arrow_buffer::NullBuffer;
480/// # use arrow_ord::cmp;
481/// # use arrow_ord::ord::make_comparator;
482/// # use arrow_schema::{ArrowError, SortOptions};
483/// fn eq(a: &dyn Array, b: &dyn Array) -> Result<BooleanArray, ArrowError> {
484///     if !a.data_type().is_nested() {
485///         return cmp::eq(&a, &b); // Use faster vectorised kernel
486///     }
487///
488///     let cmp = make_comparator(a, b, SortOptions::default())?;
489///     let len = a.len().min(b.len());
490///     let values = (0..len).map(|i| cmp(i, i).is_eq()).collect();
491///     let nulls = NullBuffer::union(a.nulls(), b.nulls());
492///     Ok(BooleanArray::new(values, nulls))
493/// }
494/// ````
495pub fn make_comparator(
496    left: &dyn Array,
497    right: &dyn Array,
498    opts: SortOptions,
499) -> Result<DynComparator, ArrowError> {
500    use arrow_schema::DataType::*;
501
502    macro_rules! primitive_helper {
503        ($t:ty, $left:expr, $right:expr, $nulls_first:expr) => {
504            Ok(compare_primitive::<$t>($left, $right, $nulls_first))
505        };
506    }
507    downcast_primitive! {
508        left.data_type(), right.data_type() => (primitive_helper, left, right, opts),
509        (Boolean, Boolean) => Ok(compare_boolean(left, right, opts)),
510        (Utf8, Utf8) => Ok(compare_bytes::<Utf8Type>(left, right, opts)),
511        (LargeUtf8, LargeUtf8) => Ok(compare_bytes::<LargeUtf8Type>(left, right, opts)),
512        (Utf8View, Utf8View) => Ok(compare_byte_view::<StringViewType>(left, right, opts)),
513        (Binary, Binary) => Ok(compare_bytes::<BinaryType>(left, right, opts)),
514        (LargeBinary, LargeBinary) => Ok(compare_bytes::<LargeBinaryType>(left, right, opts)),
515        (BinaryView, BinaryView) => Ok(compare_byte_view::<BinaryViewType>(left, right, opts)),
516        (FixedSizeBinary(_), FixedSizeBinary(_)) => {
517            let left = left.as_fixed_size_binary();
518            let right = right.as_fixed_size_binary();
519
520            let l = left.clone();
521            let r = right.clone();
522            Ok(compare(left, right, opts, move |i, j| {
523                l.value(i).cmp(r.value(j))
524            }))
525        },
526        (List(_), List(_)) => compare_list::<i32>(left, right, opts),
527        (LargeList(_), LargeList(_)) => compare_list::<i64>(left, right, opts),
528        (ListView(_), ListView(_)) => compare_list_view::<i32>(left, right, opts),
529        (LargeListView(_), LargeListView(_)) => compare_list_view::<i64>(left, right, opts),
530        (FixedSizeList(_, _), FixedSizeList(_, _)) => compare_fixed_list(left, right, opts),
531        (Struct(_), Struct(_)) => compare_struct(left, right, opts),
532        (Dictionary(l_key, _), Dictionary(r_key, _)) => {
533             macro_rules! dict_helper {
534                ($t:ty, $left:expr, $right:expr, $opts: expr) => {
535                     compare_dict::<$t>($left, $right, $opts)
536                 };
537             }
538            downcast_integer! {
539                 l_key.as_ref(), r_key.as_ref() => (dict_helper, left, right, opts),
540                 _ => unreachable!()
541             }
542        },
543        (RunEndEncoded(l_run_ends, _), RunEndEncoded(r_run_ends, _)) => {
544            macro_rules! run_end_helper {
545                ($t:ty, $left:expr, $right:expr, $opts:expr) => {
546                    compare_run_end_encoded::<$t>($left, $right, $opts)
547                };
548            }
549            downcast_run_end_index! {
550                l_run_ends.data_type(), r_run_ends.data_type() => (run_end_helper, left, right, opts),
551                _ => Err(ArrowError::InvalidArgumentError(format!(
552                    "Cannot compare RunEndEncoded arrays with different run ends types: left={:?}, right={:?}",
553                    l_run_ends.data_type(),
554                    r_run_ends.data_type()
555                )))
556            }
557        },
558        (Map(_, _), Map(_, _)) => compare_map(left, right, opts),
559        (Null, Null) => Ok(Box::new(|_, _| Ordering::Equal)),
560        (Union(_, _), Union(_, _)) => compare_union(left, right, opts),
561        (lhs, rhs) => Err(ArrowError::InvalidArgumentError(match lhs == rhs {
562            true => format!("The data type type {lhs:?} has no natural order"),
563            false => "Can't compare arrays of different types".to_string(),
564        }))
565    }
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571    use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder, StringBuilder};
572    use arrow_buffer::{IntervalDayTime, NullBuffer, OffsetBuffer, ScalarBuffer, i256};
573    use arrow_schema::{DataType, Field, Fields, UnionFields};
574    use half::f16;
575    use std::sync::Arc;
576
577    #[test]
578    fn test_fixed_size_binary() {
579        let items = vec![vec![1u8], vec![2u8]];
580        let array = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
581
582        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
583
584        assert_eq!(Ordering::Less, cmp(0, 1));
585    }
586
587    #[test]
588    fn test_fixed_size_binary_fixed_size_binary() {
589        let items = vec![vec![1u8]];
590        let array1 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
591        let items = vec![vec![2u8]];
592        let array2 = FixedSizeBinaryArray::try_from_iter(items.into_iter()).unwrap();
593
594        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
595
596        assert_eq!(Ordering::Less, cmp(0, 0));
597    }
598
599    #[test]
600    fn test_i32() {
601        let array = Int32Array::from(vec![1, 2]);
602
603        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
604
605        assert_eq!(Ordering::Less, (cmp)(0, 1));
606    }
607
608    #[test]
609    fn test_i32_i32() {
610        let array1 = Int32Array::from(vec![1]);
611        let array2 = Int32Array::from(vec![2]);
612
613        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
614
615        assert_eq!(Ordering::Less, cmp(0, 0));
616    }
617
618    #[test]
619    fn test_f16() {
620        let array = Float16Array::from(vec![f16::from_f32(1.0), f16::from_f32(2.0)]);
621
622        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
623
624        assert_eq!(Ordering::Less, cmp(0, 1));
625    }
626
627    #[test]
628    fn test_f64() {
629        let array = Float64Array::from(vec![1.0, 2.0]);
630
631        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
632
633        assert_eq!(Ordering::Less, cmp(0, 1));
634    }
635
636    #[test]
637    fn test_f64_nan() {
638        let array = Float64Array::from(vec![1.0, f64::NAN]);
639
640        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
641
642        assert_eq!(Ordering::Less, cmp(0, 1));
643        assert_eq!(Ordering::Equal, cmp(1, 1));
644    }
645
646    #[test]
647    fn test_f64_zeros() {
648        let array = Float64Array::from(vec![-0.0, 0.0]);
649
650        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
651
652        assert_eq!(Ordering::Less, cmp(0, 1));
653        assert_eq!(Ordering::Greater, cmp(1, 0));
654    }
655
656    #[test]
657    fn test_interval_day_time() {
658        let array = IntervalDayTimeArray::from(vec![
659            // 0 days, 1 second
660            IntervalDayTimeType::make_value(0, 1000),
661            // 1 day, 2 milliseconds
662            IntervalDayTimeType::make_value(1, 2),
663            // 90M milliseconds (which is more than is in 1 day)
664            IntervalDayTimeType::make_value(0, 90_000_000),
665        ]);
666
667        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
668
669        assert_eq!(Ordering::Less, cmp(0, 1));
670        assert_eq!(Ordering::Greater, cmp(1, 0));
671
672        // somewhat confusingly, while 90M milliseconds is more than 1 day,
673        // it will compare less as the comparison is done on the underlying
674        // values not field by field
675        assert_eq!(Ordering::Greater, cmp(1, 2));
676        assert_eq!(Ordering::Less, cmp(2, 1));
677    }
678
679    #[test]
680    fn test_interval_year_month() {
681        let array = IntervalYearMonthArray::from(vec![
682            // 1 year, 0 months
683            IntervalYearMonthType::make_value(1, 0),
684            // 0 years, 13 months
685            IntervalYearMonthType::make_value(0, 13),
686            // 1 year, 1 month
687            IntervalYearMonthType::make_value(1, 1),
688        ]);
689
690        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
691
692        assert_eq!(Ordering::Less, cmp(0, 1));
693        assert_eq!(Ordering::Greater, cmp(1, 0));
694
695        // the underlying representation is months, so both quantities are the same
696        assert_eq!(Ordering::Equal, cmp(1, 2));
697        assert_eq!(Ordering::Equal, cmp(2, 1));
698    }
699
700    #[test]
701    fn test_interval_month_day_nano() {
702        let array = IntervalMonthDayNanoArray::from(vec![
703            // 100 days
704            IntervalMonthDayNanoType::make_value(0, 100, 0),
705            // 1 month
706            IntervalMonthDayNanoType::make_value(1, 0, 0),
707            // 100 day, 1 nanoseconds
708            IntervalMonthDayNanoType::make_value(0, 100, 2),
709        ]);
710
711        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
712
713        assert_eq!(Ordering::Less, cmp(0, 1));
714        assert_eq!(Ordering::Greater, cmp(1, 0));
715
716        // somewhat confusingly, while 100 days is more than 1 month in all cases
717        // it will compare less as the comparison is done on the underlying
718        // values not field by field
719        assert_eq!(Ordering::Greater, cmp(1, 2));
720        assert_eq!(Ordering::Less, cmp(2, 1));
721    }
722
723    #[test]
724    fn test_decimali32() {
725        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
726            .into_iter()
727            .collect::<Decimal32Array>()
728            .with_precision_and_scale(8, 6)
729            .unwrap();
730
731        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
732        assert_eq!(Ordering::Less, cmp(1, 0));
733        assert_eq!(Ordering::Greater, cmp(0, 2));
734    }
735
736    #[test]
737    fn test_decimali64() {
738        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
739            .into_iter()
740            .collect::<Decimal64Array>()
741            .with_precision_and_scale(16, 6)
742            .unwrap();
743
744        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
745        assert_eq!(Ordering::Less, cmp(1, 0));
746        assert_eq!(Ordering::Greater, cmp(0, 2));
747    }
748
749    #[test]
750    fn test_decimali128() {
751        let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
752            .into_iter()
753            .collect::<Decimal128Array>()
754            .with_precision_and_scale(23, 6)
755            .unwrap();
756
757        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
758        assert_eq!(Ordering::Less, cmp(1, 0));
759        assert_eq!(Ordering::Greater, cmp(0, 2));
760    }
761
762    #[test]
763    fn test_decimali256() {
764        let array = vec![
765            Some(i256::from_i128(5_i128)),
766            Some(i256::from_i128(2_i128)),
767            Some(i256::from_i128(3_i128)),
768        ]
769        .into_iter()
770        .collect::<Decimal256Array>()
771        .with_precision_and_scale(53, 6)
772        .unwrap();
773
774        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
775        assert_eq!(Ordering::Less, cmp(1, 0));
776        assert_eq!(Ordering::Greater, cmp(0, 2));
777    }
778
779    #[test]
780    fn test_dict() {
781        let data = vec!["a", "b", "c", "a", "a", "c", "c"];
782        let array = data.into_iter().collect::<DictionaryArray<Int16Type>>();
783
784        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
785
786        assert_eq!(Ordering::Less, cmp(0, 1));
787        assert_eq!(Ordering::Equal, cmp(3, 4));
788        assert_eq!(Ordering::Greater, cmp(2, 3));
789    }
790
791    #[test]
792    fn test_multiple_dict() {
793        let d1 = vec!["a", "b", "c", "d"];
794        let a1 = d1.into_iter().collect::<DictionaryArray<Int16Type>>();
795        let d2 = vec!["e", "f", "g", "a"];
796        let a2 = d2.into_iter().collect::<DictionaryArray<Int16Type>>();
797
798        let cmp = make_comparator(&a1, &a2, SortOptions::default()).unwrap();
799
800        assert_eq!(Ordering::Less, cmp(0, 0));
801        assert_eq!(Ordering::Equal, cmp(0, 3));
802        assert_eq!(Ordering::Greater, cmp(1, 3));
803    }
804
805    #[test]
806    fn test_primitive_dict() {
807        let values = Int32Array::from(vec![1_i32, 0, 2, 5]);
808        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
809        let array1 = DictionaryArray::new(keys, Arc::new(values));
810
811        let values = Int32Array::from(vec![2_i32, 3, 4, 5]);
812        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
813        let array2 = DictionaryArray::new(keys, Arc::new(values));
814
815        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
816
817        assert_eq!(Ordering::Less, cmp(0, 0));
818        assert_eq!(Ordering::Less, cmp(0, 3));
819        assert_eq!(Ordering::Equal, cmp(3, 3));
820        assert_eq!(Ordering::Greater, cmp(3, 1));
821        assert_eq!(Ordering::Greater, cmp(3, 2));
822    }
823
824    #[test]
825    fn test_float_dict() {
826        let values = Float32Array::from(vec![1.0, 0.5, 2.1, 5.5]);
827        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
828        let array1 = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
829
830        let values = Float32Array::from(vec![1.2, 3.2, 4.0, 5.5]);
831        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
832        let array2 = DictionaryArray::new(keys, Arc::new(values));
833
834        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
835
836        assert_eq!(Ordering::Less, cmp(0, 0));
837        assert_eq!(Ordering::Less, cmp(0, 3));
838        assert_eq!(Ordering::Equal, cmp(3, 3));
839        assert_eq!(Ordering::Greater, cmp(3, 1));
840        assert_eq!(Ordering::Greater, cmp(3, 2));
841    }
842
843    #[test]
844    fn test_timestamp_dict() {
845        let values = TimestampSecondArray::from(vec![1, 0, 2, 5]);
846        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
847        let array1 = DictionaryArray::new(keys, Arc::new(values));
848
849        let values = TimestampSecondArray::from(vec![2, 3, 4, 5]);
850        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
851        let array2 = DictionaryArray::new(keys, Arc::new(values));
852
853        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
854
855        assert_eq!(Ordering::Less, cmp(0, 0));
856        assert_eq!(Ordering::Less, cmp(0, 3));
857        assert_eq!(Ordering::Equal, cmp(3, 3));
858        assert_eq!(Ordering::Greater, cmp(3, 1));
859        assert_eq!(Ordering::Greater, cmp(3, 2));
860    }
861
862    #[test]
863    fn test_interval_dict() {
864        let v1 = IntervalDayTime::new(0, 1);
865        let v2 = IntervalDayTime::new(0, 2);
866        let v3 = IntervalDayTime::new(12, 2);
867
868        let values = IntervalDayTimeArray::from(vec![Some(v1), Some(v2), None, Some(v3)]);
869        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
870        let array1 = DictionaryArray::new(keys, Arc::new(values));
871
872        let values = IntervalDayTimeArray::from(vec![Some(v3), Some(v2), None, Some(v1)]);
873        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
874        let array2 = DictionaryArray::new(keys, Arc::new(values));
875
876        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
877
878        assert_eq!(Ordering::Less, cmp(0, 0)); // v1 vs v3
879        assert_eq!(Ordering::Equal, cmp(0, 3)); // v1 vs v1
880        assert_eq!(Ordering::Greater, cmp(3, 3)); // v3 vs v1
881        assert_eq!(Ordering::Greater, cmp(3, 1)); // v3 vs v2
882        assert_eq!(Ordering::Greater, cmp(3, 2)); // v3 vs v2
883    }
884
885    #[test]
886    fn test_duration_dict() {
887        let values = DurationSecondArray::from(vec![1, 0, 2, 5]);
888        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
889        let array1 = DictionaryArray::new(keys, Arc::new(values));
890
891        let values = DurationSecondArray::from(vec![2, 3, 4, 5]);
892        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
893        let array2 = DictionaryArray::new(keys, Arc::new(values));
894
895        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
896
897        assert_eq!(Ordering::Less, cmp(0, 0));
898        assert_eq!(Ordering::Less, cmp(0, 3));
899        assert_eq!(Ordering::Equal, cmp(3, 3));
900        assert_eq!(Ordering::Greater, cmp(3, 1));
901        assert_eq!(Ordering::Greater, cmp(3, 2));
902    }
903
904    #[test]
905    fn test_decimal_dict() {
906        let values = Decimal128Array::from(vec![1, 0, 2, 5]);
907        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
908        let array1 = DictionaryArray::new(keys, Arc::new(values));
909
910        let values = Decimal128Array::from(vec![2, 3, 4, 5]);
911        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
912        let array2 = DictionaryArray::new(keys, Arc::new(values));
913
914        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
915
916        assert_eq!(Ordering::Less, cmp(0, 0));
917        assert_eq!(Ordering::Less, cmp(0, 3));
918        assert_eq!(Ordering::Equal, cmp(3, 3));
919        assert_eq!(Ordering::Greater, cmp(3, 1));
920        assert_eq!(Ordering::Greater, cmp(3, 2));
921    }
922
923    #[test]
924    fn test_decimal256_dict() {
925        let values = Decimal256Array::from(vec![
926            i256::from_i128(1),
927            i256::from_i128(0),
928            i256::from_i128(2),
929            i256::from_i128(5),
930        ]);
931        let keys = Int8Array::from_iter_values([0, 0, 1, 3]);
932        let array1 = DictionaryArray::new(keys, Arc::new(values));
933
934        let values = Decimal256Array::from(vec![
935            i256::from_i128(2),
936            i256::from_i128(3),
937            i256::from_i128(4),
938            i256::from_i128(5),
939        ]);
940        let keys = Int8Array::from_iter_values([0, 1, 1, 3]);
941        let array2 = DictionaryArray::new(keys, Arc::new(values));
942
943        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
944
945        assert_eq!(Ordering::Less, cmp(0, 0));
946        assert_eq!(Ordering::Less, cmp(0, 3));
947        assert_eq!(Ordering::Equal, cmp(3, 3));
948        assert_eq!(Ordering::Greater, cmp(3, 1));
949        assert_eq!(Ordering::Greater, cmp(3, 2));
950    }
951
952    fn test_bytes_impl<T: ByteArrayType>() {
953        let offsets = OffsetBuffer::from_lengths([3, 3, 1]);
954        let a = GenericByteArray::<T>::new(offsets, b"abcdefa".into(), None);
955        let cmp = make_comparator(&a, &a, SortOptions::default()).unwrap();
956
957        assert_eq!(Ordering::Less, cmp(0, 1));
958        assert_eq!(Ordering::Greater, cmp(0, 2));
959        assert_eq!(Ordering::Equal, cmp(1, 1));
960    }
961
962    #[test]
963    fn test_bytes() {
964        test_bytes_impl::<Utf8Type>();
965        test_bytes_impl::<LargeUtf8Type>();
966        test_bytes_impl::<BinaryType>();
967        test_bytes_impl::<LargeBinaryType>();
968    }
969
970    fn assert_cmp_cases<A: Array>(
971        array1: &A,
972        array2: &A,
973        opts: SortOptions,
974        cases: &[(usize, usize, Ordering)],
975    ) {
976        let cmp = make_comparator(array1, array2, opts).unwrap();
977        for (left, right, expected) in cases {
978            assert_eq!(cmp(*left, *right), *expected);
979        }
980    }
981
982    #[test]
983    fn test_lists() {
984        let mut a = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
985        a.extend([
986            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
987            Some(vec![
988                Some(vec![Some(1), Some(2), Some(3)]),
989                Some(vec![Some(1)]),
990            ]),
991            Some(vec![]),
992        ]);
993        let a = a.finish();
994        let mut b = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
995        b.extend([
996            Some(vec![Some(vec![Some(1), Some(2), None]), Some(vec![None])]),
997            Some(vec![
998                Some(vec![Some(1), Some(2), None]),
999                Some(vec![Some(1)]),
1000            ]),
1001            Some(vec![
1002                Some(vec![Some(1), Some(2), Some(3), Some(4)]),
1003                Some(vec![Some(1)]),
1004            ]),
1005            None,
1006        ]);
1007        let b = b.finish();
1008
1009        // Ascending with nulls first.
1010        assert_cmp_cases(
1011            &a,
1012            &b,
1013            SortOptions {
1014                descending: false,
1015                nulls_first: true,
1016            },
1017            &[
1018                (0, 0, Ordering::Equal),
1019                (0, 1, Ordering::Less),
1020                (0, 2, Ordering::Less),
1021                (1, 2, Ordering::Less),
1022                (1, 3, Ordering::Greater),
1023                (2, 0, Ordering::Less),
1024            ],
1025        );
1026
1027        // Descending with nulls first.
1028        assert_cmp_cases(
1029            &a,
1030            &b,
1031            SortOptions {
1032                descending: true,
1033                nulls_first: true,
1034            },
1035            &[
1036                (0, 0, Ordering::Equal),
1037                (0, 1, Ordering::Less),
1038                (0, 2, Ordering::Less),
1039                (1, 2, Ordering::Greater),
1040                (1, 3, Ordering::Greater),
1041                (2, 0, Ordering::Greater),
1042            ],
1043        );
1044
1045        // Descending with nulls last.
1046        assert_cmp_cases(
1047            &a,
1048            &b,
1049            SortOptions {
1050                descending: true,
1051                nulls_first: false,
1052            },
1053            &[
1054                (0, 0, Ordering::Equal),
1055                (0, 1, Ordering::Greater),
1056                (0, 2, Ordering::Greater),
1057                (1, 2, Ordering::Greater),
1058                (1, 3, Ordering::Less),
1059                (2, 0, Ordering::Greater),
1060            ],
1061        );
1062
1063        // Ascending with nulls last.
1064        assert_cmp_cases(
1065            &a,
1066            &b,
1067            SortOptions {
1068                descending: false,
1069                nulls_first: false,
1070            },
1071            &[
1072                (0, 0, Ordering::Equal),
1073                (0, 1, Ordering::Greater),
1074                (0, 2, Ordering::Greater),
1075                (1, 2, Ordering::Less),
1076                (1, 3, Ordering::Less),
1077                (2, 0, Ordering::Less),
1078            ],
1079        );
1080    }
1081
1082    fn list_view_array<O: OffsetSizeTrait>(
1083        values: Vec<i32>,
1084        offsets: &[usize],
1085        sizes: &[usize],
1086        valid: Option<&[bool]>,
1087    ) -> GenericListViewArray<O> {
1088        let offsets = offsets
1089            .iter()
1090            .map(|v| O::from_usize(*v).unwrap())
1091            .collect::<ScalarBuffer<O>>();
1092        let sizes = sizes
1093            .iter()
1094            .map(|v| O::from_usize(*v).unwrap())
1095            .collect::<ScalarBuffer<O>>();
1096        let field = Arc::new(Field::new_list_field(DataType::Int32, true));
1097        let values = Int32Array::from(values);
1098        let nulls = valid.map(NullBuffer::from);
1099        GenericListViewArray::new(field, offsets, sizes, Arc::new(values), nulls)
1100    }
1101
1102    fn test_list_view_comparisons<O: OffsetSizeTrait>() {
1103        let array = list_view_array::<O>(
1104            vec![1, 2, 3, 4, 5],
1105            &[0, 2, 1, 0, 3],
1106            &[2, 2, 2, 0, 2],
1107            Some(&[true, true, true, true, false]),
1108        );
1109
1110        // Ascending with nulls first (non-monotonic offsets and empty list).
1111        assert_cmp_cases(
1112            &array,
1113            &array,
1114            SortOptions {
1115                descending: false,
1116                nulls_first: true,
1117            },
1118            &[
1119                (0, 2, Ordering::Less),    // [1,2] < [2,3]
1120                (1, 2, Ordering::Greater), // [3,4] > [2,3]
1121                (3, 0, Ordering::Less),    // [] < [1,2]
1122                (4, 0, Ordering::Less),    // null < [1,2]
1123            ],
1124        );
1125
1126        // Ascending with nulls last.
1127        assert_cmp_cases(
1128            &array,
1129            &array,
1130            SortOptions {
1131                descending: false,
1132                nulls_first: false,
1133            },
1134            &[
1135                (0, 2, Ordering::Less),
1136                (1, 2, Ordering::Greater),
1137                (3, 0, Ordering::Less),
1138                (4, 0, Ordering::Greater), // null last
1139            ],
1140        );
1141
1142        // Descending with nulls first.
1143        assert_cmp_cases(
1144            &array,
1145            &array,
1146            SortOptions {
1147                descending: true,
1148                nulls_first: true,
1149            },
1150            &[
1151                (0, 2, Ordering::Greater),
1152                (1, 2, Ordering::Less),
1153                (3, 0, Ordering::Greater),
1154                (4, 0, Ordering::Less),
1155            ],
1156        );
1157
1158        // Descending with nulls last.
1159        assert_cmp_cases(
1160            &array,
1161            &array,
1162            SortOptions {
1163                descending: true,
1164                nulls_first: false,
1165            },
1166            &[
1167                (0, 2, Ordering::Greater),
1168                (1, 2, Ordering::Less),
1169                (3, 0, Ordering::Greater),
1170                (4, 0, Ordering::Greater),
1171            ],
1172        );
1173    }
1174
1175    #[test]
1176    fn test_list_view() {
1177        test_list_view_comparisons::<i32>();
1178    }
1179
1180    #[test]
1181    fn test_large_list_view() {
1182        test_list_view_comparisons::<i64>();
1183    }
1184
1185    #[test]
1186    fn test_struct() {
1187        let fields = Fields::from(vec![
1188            Field::new("a", DataType::Int32, true),
1189            Field::new_list("b", Field::new_list_field(DataType::Int32, true), true),
1190        ]);
1191
1192        let a = Int32Array::from(vec![Some(1), Some(2), None, None]);
1193        let mut b = ListBuilder::new(Int32Builder::new());
1194        b.extend([Some(vec![Some(1), Some(2)]), Some(vec![None]), None, None]);
1195        let b = b.finish();
1196
1197        let nulls = Some(NullBuffer::from_iter([true, true, true, false]));
1198        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
1199        let s1 = StructArray::new(fields.clone(), values, nulls);
1200
1201        let a = Int32Array::from(vec![None, Some(2), None]);
1202        let mut b = ListBuilder::new(Int32Builder::new());
1203        b.extend([None, None, Some(vec![])]);
1204        let b = b.finish();
1205
1206        let values = vec![Arc::new(a) as _, Arc::new(b) as _];
1207        let s2 = StructArray::new(fields.clone(), values, None);
1208
1209        let opts = SortOptions {
1210            descending: false,
1211            nulls_first: true,
1212        };
1213        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1214        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1215        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1216        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1217        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1218        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1219        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1220        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1221
1222        let opts = SortOptions {
1223            descending: true,
1224            nulls_first: true,
1225        };
1226        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1227        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1228        assert_eq!(cmp(0, 0), Ordering::Greater); // (1, [1, 2]) cmp (None, None)
1229        assert_eq!(cmp(1, 1), Ordering::Greater); // (2, [None]) cmp (2, None)
1230        assert_eq!(cmp(2, 2), Ordering::Less); // (None, None) cmp (None, [])
1231        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, [])
1232        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1233        assert_eq!(cmp(3, 0), Ordering::Less); // None cmp (None, None)
1234
1235        let opts = SortOptions {
1236            descending: true,
1237            nulls_first: false,
1238        };
1239        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1240        assert_eq!(cmp(0, 1), Ordering::Greater); // (1, [1, 2]) cmp (2, None)
1241        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1242        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1243        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1244        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1245        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1246        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1247
1248        let opts = SortOptions {
1249            descending: false,
1250            nulls_first: false,
1251        };
1252        let cmp = make_comparator(&s1, &s2, opts).unwrap();
1253        assert_eq!(cmp(0, 1), Ordering::Less); // (1, [1, 2]) cmp (2, None)
1254        assert_eq!(cmp(0, 0), Ordering::Less); // (1, [1, 2]) cmp (None, None)
1255        assert_eq!(cmp(1, 1), Ordering::Less); // (2, [None]) cmp (2, None)
1256        assert_eq!(cmp(2, 2), Ordering::Greater); // (None, None) cmp (None, [])
1257        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, [])
1258        assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, None)
1259        assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
1260    }
1261
1262    #[test]
1263    fn test_map() {
1264        // Create first map array demonstrating key priority over values:
1265        // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
1266        let string_builder = StringBuilder::new();
1267        let int_builder = Int32Builder::new();
1268        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1269
1270        // {"a": 100, "b": 1} - high value for "a", low value for "b"
1271        map1_builder.keys().append_value("a");
1272        map1_builder.values().append_value(100);
1273        map1_builder.keys().append_value("b");
1274        map1_builder.values().append_value(1);
1275        map1_builder.append(true).unwrap();
1276
1277        // {"b": 999, "c": 1} - very high value for "b", low value for "c"
1278        map1_builder.keys().append_value("b");
1279        map1_builder.values().append_value(999);
1280        map1_builder.keys().append_value("c");
1281        map1_builder.values().append_value(1);
1282        map1_builder.append(true).unwrap();
1283
1284        // {}
1285        map1_builder.append(true).unwrap();
1286
1287        // {"x": 1}
1288        map1_builder.keys().append_value("x");
1289        map1_builder.values().append_value(1);
1290        map1_builder.append(true).unwrap();
1291
1292        let map1 = map1_builder.finish();
1293
1294        // Create second map array:
1295        // [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
1296        let string_builder = StringBuilder::new();
1297        let int_builder = Int32Builder::new();
1298        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1299
1300        // {"a": 1, "c": 999} - low value for "a", high value for "c"
1301        map2_builder.keys().append_value("a");
1302        map2_builder.values().append_value(1);
1303        map2_builder.keys().append_value("c");
1304        map2_builder.values().append_value(999);
1305        map2_builder.append(true).unwrap();
1306
1307        // {"b": 1, "d": 999} - low value for "b", high value for "d"
1308        map2_builder.keys().append_value("b");
1309        map2_builder.values().append_value(1);
1310        map2_builder.keys().append_value("d");
1311        map2_builder.values().append_value(999);
1312        map2_builder.append(true).unwrap();
1313
1314        // {"a": 1}
1315        map2_builder.keys().append_value("a");
1316        map2_builder.values().append_value(1);
1317        map2_builder.append(true).unwrap();
1318
1319        // None
1320        map2_builder.append(false).unwrap();
1321
1322        let map2 = map2_builder.finish();
1323
1324        let opts = SortOptions {
1325            descending: false,
1326            nulls_first: true,
1327        };
1328        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1329
1330        // Test that keys have priority over values:
1331        // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1332        // First entries match (a:100 vs a:1), but 100 > 1, so Greater
1333        assert_eq!(cmp(0, 0), Ordering::Greater);
1334
1335        // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1336        // First entries match (b:999 vs b:1), but 999 > 1, so Greater
1337        assert_eq!(cmp(1, 1), Ordering::Greater);
1338
1339        // Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 1}
1340        assert_eq!(cmp(0, 1), Ordering::Less);
1341
1342        // Empty map vs non-empty
1343        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1344
1345        // Non-null vs null
1346        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1347
1348        // Key priority test: "x" > "a", regardless of values
1349        assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 999}
1350
1351        // Empty vs non-empty
1352        assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999}
1353
1354        let opts = SortOptions {
1355            descending: true,
1356            nulls_first: true,
1357        };
1358        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1359
1360        // With descending=true, value comparison is reversed
1361        assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 1, "c": 999} (reversed)
1362        assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 1, "d": 999} (reversed)
1363        assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs {"b": 999, "c": 1} (key order reversed)
1364        assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
1365        assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed)
1366
1367        let opts = SortOptions {
1368            descending: false,
1369            nulls_first: false,
1370        };
1371        let cmp = make_comparator(&map1, &map2, opts).unwrap();
1372
1373        // Same key priority behavior with nulls_first=false
1374        assert_eq!(cmp(0, 0), Ordering::Greater); // {"a": 100, "b": 1} vs {"a": 1, "c": 999}
1375        assert_eq!(cmp(1, 1), Ordering::Greater); // {"b": 999, "c": 1} vs {"b": 1, "d": 999}
1376        assert_eq!(cmp(3, 3), Ordering::Less); // {"x": 1} < None (nulls last)
1377        assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
1378    }
1379
1380    #[test]
1381    fn test_map_vs_list_consistency() {
1382        // Create map arrays and convert them to list arrays to verify comparison consistency
1383        // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}]
1384        let string_builder = StringBuilder::new();
1385        let int_builder = Int32Builder::new();
1386        let mut map1_builder = MapBuilder::new(None, string_builder, int_builder);
1387
1388        // {"a": 1, "b": 2}
1389        map1_builder.keys().append_value("a");
1390        map1_builder.values().append_value(1);
1391        map1_builder.keys().append_value("b");
1392        map1_builder.values().append_value(2);
1393        map1_builder.append(true).unwrap();
1394
1395        // {"x": 10}
1396        map1_builder.keys().append_value("x");
1397        map1_builder.values().append_value(10);
1398        map1_builder.append(true).unwrap();
1399
1400        // {}
1401        map1_builder.append(true).unwrap();
1402
1403        // {"c": 3}
1404        map1_builder.keys().append_value("c");
1405        map1_builder.values().append_value(3);
1406        map1_builder.append(true).unwrap();
1407
1408        let map1 = map1_builder.finish();
1409
1410        // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None]
1411        let string_builder = StringBuilder::new();
1412        let int_builder = Int32Builder::new();
1413        let mut map2_builder = MapBuilder::new(None, string_builder, int_builder);
1414
1415        // {"a": 1, "b": 2}
1416        map2_builder.keys().append_value("a");
1417        map2_builder.values().append_value(1);
1418        map2_builder.keys().append_value("b");
1419        map2_builder.values().append_value(2);
1420        map2_builder.append(true).unwrap();
1421
1422        // {"y": 20}
1423        map2_builder.keys().append_value("y");
1424        map2_builder.values().append_value(20);
1425        map2_builder.append(true).unwrap();
1426
1427        // {"d": 4}
1428        map2_builder.keys().append_value("d");
1429        map2_builder.values().append_value(4);
1430        map2_builder.append(true).unwrap();
1431
1432        // None
1433        map2_builder.append(false).unwrap();
1434
1435        let map2 = map2_builder.finish();
1436
1437        // Convert map arrays to list arrays (Map entries are struct arrays with key-value pairs)
1438        let list1: ListArray = map1.clone().into();
1439        let list2: ListArray = map2.clone().into();
1440
1441        let test_cases = [
1442            SortOptions {
1443                descending: false,
1444                nulls_first: true,
1445            },
1446            SortOptions {
1447                descending: true,
1448                nulls_first: true,
1449            },
1450            SortOptions {
1451                descending: false,
1452                nulls_first: false,
1453            },
1454            SortOptions {
1455                descending: true,
1456                nulls_first: false,
1457            },
1458        ];
1459
1460        for opts in test_cases {
1461            let map_cmp = make_comparator(&map1, &map2, opts).unwrap();
1462            let list_cmp = make_comparator(&list1, &list2, opts).unwrap();
1463
1464            // Test all possible index combinations
1465            for i in 0..map1.len() {
1466                for j in 0..map2.len() {
1467                    let map_result = map_cmp(i, j);
1468                    let list_result = list_cmp(i, j);
1469                    assert_eq!(
1470                        map_result, list_result,
1471                        "Map comparison and List comparison should be equal for indices ({i}, {j}) with opts {opts:?}. Map: {map_result:?}, List: {list_result:?}"
1472                    );
1473                }
1474            }
1475        }
1476    }
1477
1478    #[test]
1479    fn test_dense_union() {
1480        // create a dense union array with Int32 (type_id = 0) and Utf8 (type_id=1)
1481        // the values are: [1, "b", 2, "a", 3]
1482        //  type_ids are: [0,  1,  0,  1,  0]
1483        //   offsets are: [0, 0, 1, 1, 2] from [1, 2, 3] and ["b", "a"]
1484        let int_array = Int32Array::from(vec![1, 2, 3]);
1485        let str_array = StringArray::from(vec!["b", "a"]);
1486
1487        let type_ids = [0, 1, 0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1488        let offsets = [0, 0, 1, 1, 2].into_iter().collect::<ScalarBuffer<i32>>();
1489
1490        let union_fields = [
1491            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1492            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1493        ]
1494        .into_iter()
1495        .collect::<UnionFields>();
1496
1497        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1498
1499        let array1 =
1500            UnionArray::try_new(union_fields.clone(), type_ids, Some(offsets), children).unwrap();
1501
1502        // create a second array: [2, "a", 1, "c"]
1503        //          type ids are: [0,  1,  0,  1]
1504        //           offsets are: [0, 0, 1, 1] from [2, 1] and ["a", "c"]
1505        let int_array2 = Int32Array::from(vec![2, 1]);
1506        let str_array2 = StringArray::from(vec!["a", "c"]);
1507        let type_ids2 = [0, 1, 0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1508        let offsets2 = [0, 0, 1, 1].into_iter().collect::<ScalarBuffer<i32>>();
1509
1510        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1511
1512        let array2 =
1513            UnionArray::try_new(union_fields, type_ids2, Some(offsets2), children2).unwrap();
1514
1515        let opts = SortOptions {
1516            descending: false,
1517            nulls_first: true,
1518        };
1519
1520        // comparing
1521        // [1, "b", 2, "a", 3]
1522        // [2, "a", 1, "c"]
1523        let cmp = make_comparator(&array1, &array2, opts).unwrap();
1524
1525        // array1[0] = (type_id=0, value=1)
1526        // array2[0] = (type_id=0, value=2)
1527        assert_eq!(cmp(0, 0), Ordering::Less); // 1 < 2
1528
1529        // array1[0] = (type_id=0, value=1)
1530        // array2[1] = (type_id=1, value="a")
1531        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1532
1533        // array1[1] = (type_id=1, value="b")
1534        // array2[1] = (type_id=1, value="a")
1535        assert_eq!(cmp(1, 1), Ordering::Greater); // "b" > "a"
1536
1537        // array1[2] = (type_id=0, value=2)
1538        // array2[0] = (type_id=0, value=2)
1539        assert_eq!(cmp(2, 0), Ordering::Equal); // 2 == 2
1540
1541        // array1[3] = (type_id=1, value="a")
1542        // array2[1] = (type_id=1, value="a")
1543        assert_eq!(cmp(3, 1), Ordering::Equal); // "a" == "a"
1544
1545        // array1[1] = (type_id=1, value="b")
1546        // array2[3] = (type_id=1, value="c")
1547        assert_eq!(cmp(1, 3), Ordering::Less); // "b" < "c"
1548
1549        let opts_desc = SortOptions {
1550            descending: true,
1551            nulls_first: true,
1552        };
1553        let cmp_desc = make_comparator(&array1, &array2, opts_desc).unwrap();
1554
1555        assert_eq!(cmp_desc(0, 0), Ordering::Greater); // 1 > 2 (reversed)
1556        assert_eq!(cmp_desc(0, 1), Ordering::Greater); // type_id 0 < 1, reversed to Greater
1557        assert_eq!(cmp_desc(1, 1), Ordering::Less); // "b" < "a" (reversed)
1558    }
1559
1560    #[test]
1561    fn test_sparse_union() {
1562        // create a sparse union array with Int32 (type_id=0) and Utf8 (type_id=1)
1563        // values: [1, "b", 3]
1564        // note, in sparse unions, child arrays have the same length as the union
1565        let int_array = Int32Array::from(vec![Some(1), None, Some(3)]);
1566        let str_array = StringArray::from(vec![None, Some("b"), None]);
1567        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1568
1569        let union_fields = [
1570            (0, Arc::new(Field::new("a", DataType::Int32, false))),
1571            (1, Arc::new(Field::new("b", DataType::Utf8, false))),
1572        ]
1573        .into_iter()
1574        .collect::<UnionFields>();
1575
1576        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1577
1578        let array = UnionArray::try_new(union_fields, type_ids, None, children).unwrap();
1579
1580        let opts = SortOptions::default();
1581        let cmp = make_comparator(&array, &array, opts).unwrap();
1582
1583        // array[0] = (type_id=0, value=1), array[2] = (type_id=0, value=3)
1584        assert_eq!(cmp(0, 2), Ordering::Less); // 1 < 3
1585        // array[0] = (type_id=0, value=1), array[1] = (type_id=1, value="b")
1586        assert_eq!(cmp(0, 1), Ordering::Less); // type_id 0 < 1
1587    }
1588
1589    #[test]
1590    #[should_panic(expected = "index out of bounds")]
1591    fn test_union_out_of_bounds() {
1592        // create a dense union array with 3 elements
1593        let int_array = Int32Array::from(vec![1, 2]);
1594        let str_array = StringArray::from(vec!["a"]);
1595
1596        let type_ids = [0, 1, 0].into_iter().collect::<ScalarBuffer<i8>>();
1597        let offsets = [0, 0, 1].into_iter().collect::<ScalarBuffer<i32>>();
1598
1599        let union_fields = [
1600            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1601            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1602        ]
1603        .into_iter()
1604        .collect::<UnionFields>();
1605
1606        let children = vec![Arc::new(int_array) as ArrayRef, Arc::new(str_array)];
1607
1608        let array = UnionArray::try_new(union_fields, type_ids, Some(offsets), children).unwrap();
1609
1610        let opts = SortOptions::default();
1611        let cmp = make_comparator(&array, &array, opts).unwrap();
1612
1613        // oob
1614        cmp(0, 3);
1615    }
1616
1617    #[test]
1618    fn test_union_incompatible_fields() {
1619        // create first union with Int32 and Utf8
1620        let int_array1 = Int32Array::from(vec![1, 2]);
1621        let str_array1 = StringArray::from(vec!["a", "b"]);
1622
1623        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1624        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1625
1626        let union_fields1 = [
1627            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1628            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1629        ]
1630        .into_iter()
1631        .collect::<UnionFields>();
1632
1633        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1634
1635        let array1 =
1636            UnionArray::try_new(union_fields1, type_ids1, Some(offsets1), children1).unwrap();
1637
1638        // create second union with Int32 and Float64 (incompatible with first)
1639        let int_array2 = Int32Array::from(vec![3, 4]);
1640        let float_array2 = Float64Array::from(vec![1.0, 2.0]);
1641
1642        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1643        let offsets2 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1644
1645        let union_fields2 = [
1646            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1647            (1, Arc::new(Field::new("C", DataType::Float64, false))),
1648        ]
1649        .into_iter()
1650        .collect::<UnionFields>();
1651
1652        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(float_array2)];
1653
1654        let array2 =
1655            UnionArray::try_new(union_fields2, type_ids2, Some(offsets2), children2).unwrap();
1656
1657        let opts = SortOptions::default();
1658
1659        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1660            make_comparator(&array1, &array2, opts)
1661        else {
1662            panic!("expected error when making comparator of incompatible union arrays");
1663        };
1664
1665        assert_eq!(
1666            &out,
1667            "Cannot compare UnionArrays with different fields: left=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"B\", data_type: Utf8 })], right=[(0, Field { name: \"A\", data_type: Int32 }), (1, Field { name: \"C\", data_type: Float64 })]"
1668        );
1669    }
1670
1671    #[test]
1672    fn test_union_incompatible_modes() {
1673        // create first union as Dense with Int32 and Utf8
1674        let int_array1 = Int32Array::from(vec![1, 2]);
1675        let str_array1 = StringArray::from(vec!["a", "b"]);
1676
1677        let type_ids1 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1678        let offsets1 = [0, 0].into_iter().collect::<ScalarBuffer<i32>>();
1679
1680        let union_fields1 = [
1681            (0, Arc::new(Field::new("A", DataType::Int32, false))),
1682            (1, Arc::new(Field::new("B", DataType::Utf8, false))),
1683        ]
1684        .into_iter()
1685        .collect::<UnionFields>();
1686
1687        let children1 = vec![Arc::new(int_array1) as ArrayRef, Arc::new(str_array1)];
1688
1689        let array1 =
1690            UnionArray::try_new(union_fields1.clone(), type_ids1, Some(offsets1), children1)
1691                .unwrap();
1692
1693        // create second union as Sparse with same fields (Int32 and Utf8)
1694        let int_array2 = Int32Array::from(vec![Some(3), None]);
1695        let str_array2 = StringArray::from(vec![None, Some("c")]);
1696
1697        let type_ids2 = [0, 1].into_iter().collect::<ScalarBuffer<i8>>();
1698
1699        let children2 = vec![Arc::new(int_array2) as ArrayRef, Arc::new(str_array2)];
1700
1701        let array2 = UnionArray::try_new(union_fields1, type_ids2, None, children2).unwrap();
1702
1703        let opts = SortOptions::default();
1704
1705        let Result::Err(ArrowError::InvalidArgumentError(out)) =
1706            make_comparator(&array1, &array2, opts)
1707        else {
1708            panic!("expected error when making comparator of union arrays with different modes");
1709        };
1710
1711        assert_eq!(
1712            &out,
1713            "Cannot compare UnionArrays with different modes: left=Dense, right=Sparse"
1714        );
1715    }
1716
1717    #[test]
1718    fn test_null_array_cmp() {
1719        let a = NullArray::new(3);
1720        let b = NullArray::new(3);
1721        let cmp = make_comparator(&a, &b, SortOptions::default()).unwrap();
1722
1723        assert_eq!(cmp(0, 0), Ordering::Equal);
1724        assert_eq!(cmp(0, 1), Ordering::Equal);
1725        assert_eq!(cmp(2, 0), Ordering::Equal);
1726    }
1727
1728    #[test]
1729    fn test_run_end_encoded_int32() {
1730        // Create RunEndEncoded arrays:
1731        // array1: [1, 1, 2, 2, 2, 3]
1732        // run_ends1: [2, 5, 6], values1: [1, 2, 3]
1733        let run_ends1 = Int32Array::from(vec![2, 5, 6]);
1734        let values1 = Int32Array::from(vec![1, 2, 3]);
1735        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1736
1737        // array2: [1, 2, 2, 3, 3, 3]
1738        // run_ends2: [1, 3, 6], values2: [1, 2, 3]
1739        let run_ends2 = Int32Array::from(vec![1, 3, 6]);
1740        let values2 = Int32Array::from(vec![1, 2, 3]);
1741        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1742
1743        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1744
1745        // array1[0] = 1, array2[0] = 1
1746        assert_eq!(cmp(0, 0), Ordering::Equal);
1747        // array1[0] = 1, array2[1] = 2
1748        assert_eq!(cmp(0, 1), Ordering::Less);
1749        // array1[2] = 2, array2[1] = 2
1750        assert_eq!(cmp(2, 1), Ordering::Equal);
1751        // array1[5] = 3, array2[5] = 3
1752        assert_eq!(cmp(5, 5), Ordering::Equal);
1753        // array1[1] = 1, array2[2] = 2
1754        assert_eq!(cmp(1, 2), Ordering::Less);
1755        // array1[4] = 2, array2[4] = 3
1756        assert_eq!(cmp(4, 4), Ordering::Less);
1757    }
1758
1759    #[test]
1760    fn test_run_end_encoded_with_nulls() {
1761        // Create RunEndEncoded arrays with nulls:
1762        // array1: [1, 1, null, null, 2]
1763        // run_ends1: [2, 4, 5], values1: [1, null, 2]
1764        let run_ends1 = Int32Array::from(vec![2, 4, 5]);
1765        let values1 = Int32Array::from(vec![Some(1), None, Some(2)]);
1766        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1767
1768        // array2: [null, 1, 1, 2, null]
1769        // run_ends2: [1, 3, 4, 5], values2: [null, 1, 2, null]
1770        let run_ends2 = Int32Array::from(vec![1, 3, 4, 5]);
1771        let values2 = Int32Array::from(vec![None, Some(1), Some(2), None]);
1772        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1773
1774        let opts = SortOptions::default();
1775        let cmp = make_comparator(&array1, &array2, opts).unwrap();
1776
1777        // array1[0] = 1, array2[1] = 1
1778        assert_eq!(cmp(0, 1), Ordering::Equal);
1779        // array1[2] = null, array2[0] = null
1780        assert_eq!(cmp(2, 0), Ordering::Equal);
1781        // array1[0] = 1, array2[0] = null (nulls first by default)
1782        assert_eq!(cmp(0, 0), Ordering::Greater);
1783        // array1[2] = null, array2[1] = 1
1784        assert_eq!(cmp(2, 1), Ordering::Less);
1785    }
1786
1787    #[test]
1788    fn test_run_end_encoded_int16() {
1789        // Test with Int16 run ends
1790        let run_ends1 = Int16Array::from(vec![3_i16, 5, 6]);
1791        let values1 = StringArray::from(vec!["a", "b", "c"]);
1792        let array1 = RunArray::<Int16Type>::try_new(&run_ends1, &values1).unwrap();
1793
1794        let run_ends2 = Int16Array::from(vec![2_i16, 4, 6]);
1795        let values2 = StringArray::from(vec!["a", "b", "c"]);
1796        let array2 = RunArray::<Int16Type>::try_new(&run_ends2, &values2).unwrap();
1797
1798        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1799
1800        // array1: [a, a, a, b, b, c]
1801        // array2: [a, a, b, b, c, c]
1802        assert_eq!(cmp(0, 0), Ordering::Equal); // a vs a
1803        assert_eq!(cmp(2, 2), Ordering::Less); // a vs b
1804        assert_eq!(cmp(3, 2), Ordering::Equal); // b vs b
1805        assert_eq!(cmp(5, 4), Ordering::Equal); // c vs c
1806    }
1807
1808    #[test]
1809    fn test_run_end_encoded_int64() {
1810        // Test with Int64 run ends
1811        let run_ends1 = Int64Array::from(vec![2_i64, 4, 6]);
1812        let values1 = Int64Array::from(vec![10_i64, 20, 30]);
1813        let array1 = RunArray::<Int64Type>::try_new(&run_ends1, &values1).unwrap();
1814
1815        let run_ends2 = Int64Array::from(vec![3_i64, 5, 6]);
1816        let values2 = Int64Array::from(vec![10_i64, 20, 30]);
1817        let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
1818
1819        let cmp = make_comparator(&array1, &array2, SortOptions::default()).unwrap();
1820
1821        // array1: [10, 10, 20, 20, 30, 30]
1822        // array2: [10, 10, 10, 20, 20, 30]
1823        assert_eq!(cmp(0, 0), Ordering::Equal); // 10 vs 10
1824        assert_eq!(cmp(1, 2), Ordering::Equal); // 10 vs 10
1825        assert_eq!(cmp(2, 3), Ordering::Equal); // 20 vs 20
1826        assert_eq!(cmp(4, 4), Ordering::Greater); // 30 vs 20
1827    }
1828
1829    #[test]
1830    fn test_run_end_encoded_sliced() {
1831        // Create a RunEndEncoded array and slice it:
1832        // original: [1, 1, 2, 2, 2, 3, 3, 4]
1833        // run_ends: [2, 5, 7, 8], values: [1, 2, 3, 4]
1834        let run_ends = Int32Array::from(vec![2, 5, 7, 8]);
1835        let values = Int32Array::from(vec![1, 2, 3, 4]);
1836        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1837
1838        // slice1 = array[1..5] => [1, 2, 2, 2]
1839        let slice1 = array.slice(1, 4);
1840        // slice2 = array[3..7] => [2, 2, 3, 3]
1841        let slice2 = array.slice(3, 4);
1842
1843        let cmp = make_comparator(&slice1, &slice2, SortOptions::default()).unwrap();
1844
1845        // slice1[0]=1, slice2[0]=2
1846        assert_eq!(cmp(0, 0), Ordering::Less);
1847        // slice1[1]=2, slice2[0]=2
1848        assert_eq!(cmp(1, 0), Ordering::Equal);
1849        // slice1[3]=2, slice2[2]=3
1850        assert_eq!(cmp(3, 2), Ordering::Less);
1851        // slice1[1]=2, slice2[3]=3
1852        assert_eq!(cmp(1, 3), Ordering::Less);
1853
1854        // Compare a sliced array with an unsliced array
1855        let run_ends2 = Int32Array::from(vec![2, 4]);
1856        let values2 = Int32Array::from(vec![1, 2]);
1857        let array2 = RunArray::<Int32Type>::try_new(&run_ends2, &values2).unwrap();
1858
1859        let cmp = make_comparator(&slice1, &array2, SortOptions::default()).unwrap();
1860
1861        // slice1[0]=1, array2[0]=1
1862        assert_eq!(cmp(0, 0), Ordering::Equal);
1863        // slice1[1]=2, array2[1]=1
1864        assert_eq!(cmp(1, 1), Ordering::Greater);
1865        // slice1[3]=2, array2[3]=2
1866        assert_eq!(cmp(3, 3), Ordering::Equal);
1867    }
1868
1869    #[test]
1870    fn test_run_end_encoded_sliced_with_nulls() {
1871        // Create a RunEndEncoded array with nulls:
1872        // original: [1, 1, null, null, 2, 2, null, 3]
1873        // run_ends: [2, 4, 6, 7, 8], values: [1, null, 2, null, 3]
1874        let run_ends = Int32Array::from(vec![2, 4, 6, 7, 8]);
1875        let values = Int32Array::from(vec![Some(1), None, Some(2), None, Some(3)]);
1876        let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
1877
1878        // slice1 = array[1..6] => [1, null, null, 2, 2]
1879        let slice1 = array.slice(1, 5);
1880        // slice2 = array[3..8] => [null, 2, 2, null, 3]
1881        let slice2 = array.slice(3, 5);
1882
1883        let opts = SortOptions::default(); // nulls_first=true, descending=false
1884        let cmp = make_comparator(&slice1, &slice2, opts).unwrap();
1885
1886        // slice1[0]=1, slice2[0]=null
1887        assert_eq!(cmp(0, 0), Ordering::Greater);
1888        // slice1[1]=null, slice2[0]=null
1889        assert_eq!(cmp(1, 0), Ordering::Equal);
1890        // slice1[1]=null, slice2[1]=2
1891        assert_eq!(cmp(1, 1), Ordering::Less);
1892        // slice1[3]=2, slice2[1]=2
1893        assert_eq!(cmp(3, 1), Ordering::Equal);
1894        // slice1[4]=2, slice2[4]=3
1895        assert_eq!(cmp(4, 4), Ordering::Less);
1896        // slice1[3]=2, slice2[3]=null
1897        assert_eq!(cmp(3, 3), Ordering::Greater);
1898    }
1899
1900    #[test]
1901    fn test_run_end_encoded_different_types() {
1902        // Test with different run end types - should fail
1903        let run_ends1 = Int32Array::from(vec![2, 4, 6]);
1904        let values1 = Int32Array::from(vec![1, 2, 3]);
1905        let array1 = RunArray::<Int32Type>::try_new(&run_ends1, &values1).unwrap();
1906
1907        let run_ends2 = Int64Array::from(vec![2_i64, 4, 6]);
1908        let values2 = Int64Array::from(vec![1_i64, 2, 3]);
1909        let array2 = RunArray::<Int64Type>::try_new(&run_ends2, &values2).unwrap();
1910
1911        let result = make_comparator(&array1, &array2, SortOptions::default());
1912        assert!(result.is_err());
1913        let err = match result {
1914            Err(e) => e.to_string(),
1915            Ok(_) => panic!("Expected error"),
1916        };
1917        assert!(err.contains("Cannot compare RunEndEncoded arrays"));
1918    }
1919}